diff options
author | Frode Isaksen <fisaksen@baylibre.com> | 2017-12-19 11:15:35 +0000 |
---|---|---|
committer | Jan-Simon Moeller <jsmoeller@linuxfoundation.org> | 2018-02-07 11:47:29 +0000 |
commit | c4a6287185179732dfc1e903c195ff90c19f1065 (patch) | |
tree | d35f5010dbd952e40f5c178322026445b55757c1 /meta-eas/recipes-kernel/linux/linux-renesas | |
parent | 109dea1d5c5a38807b098b588584636ae636a302 (diff) |
This layer provides Energy Aware Scheduling (EAS) patcheseel_5.1.0eel_5.0.3eel_5.0.2eel/5.1.0eel/5.0.3eel/5.0.25.1.05.0.35.0.2eel
For the moment only for Renesas R-Car Gen3 SoC's.
Can be expanded for other SoC's by setting the machine
feature biglittle and provide the relevant EAS patches.
Bug-AGL: SPEC-813
Change-Id: I2b5e69c515c33e57be19b30466fe208d7b8ac1a5
Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
Diffstat (limited to 'meta-eas/recipes-kernel/linux/linux-renesas')
97 files changed, 10742 insertions, 0 deletions
diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0001-arm64-dts-r8a7795-Update-cpu-capacity-dmips-mhz.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0001-arm64-dts-r8a7795-Update-cpu-capacity-dmips-mhz.patch new file mode 100644 index 0000000..11f927d --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0001-arm64-dts-r8a7795-Update-cpu-capacity-dmips-mhz.patch @@ -0,0 +1,59 @@ +From 59c433df0378f573a9928a8c94573fdf570138a6 Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Wed, 15 Nov 2017 11:28:32 +0900 +Subject: [PATCH 1/4] arm64: dts: r8a7795: Update cpu capacity-dmips-mhz + +Since the cpu_capacity for CA53 was set smaller than expected, the +behavior of scheduler may not be suitable a little. This patch fixes +the reasonable value to fit current implementation. This value should +be updated again when the turbo mode and big little architecture will +support with cpu capacity features. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7795.dtsi | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi +index eff01d6..4e44a94 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi +@@ -165,7 +165,7 @@ + dynamic-power-coefficient = <277>; + clocks =<&cpg CPG_CORE R8A7795_CLK_Z2>; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + a53_1: cpu@101 { +@@ -175,7 +175,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + a53_2: cpu@102 { +@@ -185,7 +185,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + a53_3: cpu@103 { +@@ -195,7 +195,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + L2_CA57: cache-controller@0 { +-- +2.7.4 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0001-sched-fair-Kill-the-unused-sched_shares_window_ns-tu.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0001-sched-fair-Kill-the-unused-sched_shares_window_ns-tu.patch new file mode 100644 index 0000000..5477ade --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0001-sched-fair-Kill-the-unused-sched_shares_window_ns-tu.patch @@ -0,0 +1,80 @@ +From d1f3667faa908a76aeb6e54f38b1e1d4b2a3ce2b Mon Sep 17 00:00:00 2001 +From: Matt Fleming <matt@codeblueprint.co.uk> +Date: Wed, 19 Oct 2016 15:10:59 +0100 +Subject: [PATCH 01/92] sched/fair: Kill the unused 'sched_shares_window_ns' + tunable + +The last user of this tunable was removed in 2012 in commit: + + 82958366cfea ("sched: Replace update_shares weight distribution with per-entity computation") + +Delete it since its very existence confuses people. + +Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk> +Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mike Galbraith <umgwanakikbuti@gmail.com> +Cc: Paul Turner <pjt@google.com> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Link: http://lkml.kernel.org/r/20161019141059.26408-1-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 3c3fcb45d524feb5d14a14f332e3eec7f2aff8f3) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + include/linux/sched/sysctl.h | 1 - + kernel/sched/fair.c | 7 ------- + kernel/sysctl.c | 7 ------- + 3 files changed, 15 deletions(-) + +diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h +index 22db1e6..4411453 100644 +--- a/include/linux/sched/sysctl.h ++++ b/include/linux/sched/sysctl.h +@@ -36,7 +36,6 @@ enum sched_tunable_scaling { + extern unsigned int sysctl_sched_migration_cost; + extern unsigned int sysctl_sched_nr_migrate; + extern unsigned int sysctl_sched_time_avg; +-extern unsigned int sysctl_sched_shares_window; + + int sched_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index c242944..3cf446c 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -93,13 +93,6 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling + + const_debug unsigned int sysctl_sched_migration_cost = 500000UL; + +-/* +- * The exponential sliding window over which load is averaged for shares +- * distribution. +- * (default: 10msec) +- */ +-unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; +- + #ifdef CONFIG_CFS_BANDWIDTH + /* + * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index 706309f..739fb17 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -347,13 +347,6 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, + .mode = 0644, + .proc_handler = proc_dointvec, + }, +- { +- .procname = "sched_shares_window_ns", +- .data = &sysctl_sched_shares_window, +- .maxlen = sizeof(unsigned int), +- .mode = 0644, +- .proc_handler = proc_dointvec, +- }, + #ifdef CONFIG_SCHEDSTATS + { + .procname = "sched_schedstats", +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0002-arm64-dts-r8a7796-Update-cpu-capacity-dmips-mhz.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0002-arm64-dts-r8a7796-Update-cpu-capacity-dmips-mhz.patch new file mode 100644 index 0000000..391ee27 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0002-arm64-dts-r8a7796-Update-cpu-capacity-dmips-mhz.patch @@ -0,0 +1,59 @@ +From a88ec6504f72d17ca2a0ae4da2e895462df7e23e Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Wed, 15 Nov 2017 11:33:36 +0900 +Subject: [PATCH 2/4] arm64: dts: r8a7796: Update cpu capacity-dmips-mhz + +Since the cpu_capacity for CA53 was set smaller than expected, the +behavior of scheduler may not be suitable a little. This patch fixes +the reasonable value to fit current implementation. This value should +be updated again when the turbo mode and big little architecture will +support with cpu capacity features. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7796.dtsi | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi +index da26f88..e35adbf 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi +@@ -126,7 +126,7 @@ + dynamic-power-coefficient = <277>; + clocks =<&cpg CPG_CORE R8A7796_CLK_Z2>; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <362>; ++ capacity-dmips-mhz = <543>; + }; + + a53_1: cpu@101 { +@@ -136,7 +136,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <362>; ++ capacity-dmips-mhz = <543>; + }; + + a53_2: cpu@102 { +@@ -146,7 +146,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <362>; ++ capacity-dmips-mhz = <543>; + }; + + a53_3: cpu@103 { +@@ -156,7 +156,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <362>; ++ capacity-dmips-mhz = <543>; + }; + + L2_CA57: cache-controller@0 { +-- +2.7.4 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0002-sched-fair-Compute-task-cpu-utilization-at-wake-up-c.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0002-sched-fair-Compute-task-cpu-utilization-at-wake-up-c.patch new file mode 100644 index 0000000..fccc8f3 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0002-sched-fair-Compute-task-cpu-utilization-at-wake-up-c.patch @@ -0,0 +1,123 @@ +From cb6859b76310842444b2123afc2a7dea03279b72 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 14 Oct 2016 14:41:07 +0100 +Subject: [PATCH 02/92] sched/fair: Compute task/cpu utilization at wake-up + correctly + +At task wake-up load-tracking isn't updated until the task is enqueued. +The task's own view of its utilization contribution may therefore not be +aligned with its contribution to the cfs_rq load-tracking which may have +been updated in the meantime. Basically, the task's own utilization +hasn't yet accounted for the sleep decay, while the cfs_rq may have +(partially). Estimating the cfs_rq utilization in case the task is +migrated at wake-up as task_rq(p)->cfs.avg.util_avg - p->se.avg.util_avg +is therefore incorrect as the two load-tracking signals aren't time +synchronized (different last update). + +To solve this problem, this patch synchronizes the task utilization with +its previous rq before the task utilization is used in the wake-up path. +Currently the update/synchronization is done _after_ the task has been +placed by select_task_rq_fair(). The synchronization is done without +having to take the rq lock using the existing mechanism used in +remove_entity_load_avg(). + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: dietmar.eggemann@arm.com +Cc: freedom.tan@mediatek.com +Cc: keita.kobayashi.ym@renesas.com +Cc: mgalbraith@suse.de +Cc: sgurrappadi@nvidia.com +Cc: vincent.guittot@linaro.org +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1476452472-24740-2-git-send-email-morten.rasmussen@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 104cb16d9eb684f071d5bf3aa87c0d01af259b7c) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 39 +++++++++++++++++++++++++++++++++++---- + 1 file changed, 35 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 3cf446c..b05d691 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3199,13 +3199,25 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq) + #endif + + /* ++ * Synchronize entity load avg of dequeued entity without locking ++ * the previous rq. ++ */ ++void sync_entity_load_avg(struct sched_entity *se) ++{ ++ struct cfs_rq *cfs_rq = cfs_rq_of(se); ++ u64 last_update_time; ++ ++ last_update_time = cfs_rq_last_update_time(cfs_rq); ++ __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); ++} ++ ++/* + * Task first catches up with cfs_rq, and then subtract + * itself from the cfs_rq (task must be off the queue now). + */ + void remove_entity_load_avg(struct sched_entity *se) + { + struct cfs_rq *cfs_rq = cfs_rq_of(se); +- u64 last_update_time; + + /* + * tasks cannot exit without having gone through wake_up_new_task() -> +@@ -3217,9 +3229,7 @@ void remove_entity_load_avg(struct sched_entity *se) + * calls this. + */ + +- last_update_time = cfs_rq_last_update_time(cfs_rq); +- +- __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); ++ sync_entity_load_avg(se); + atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); + atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg); + } +@@ -5583,6 +5593,24 @@ static inline int task_util(struct task_struct *p) + } + + /* ++ * cpu_util_wake: Compute cpu utilization with any contributions from ++ * the waking task p removed. ++ */ ++static int cpu_util_wake(int cpu, struct task_struct *p) ++{ ++ unsigned long util, capacity; ++ ++ /* Task has no contribution or is new */ ++ if (cpu != task_cpu(p) || !p->se.avg.last_update_time) ++ return cpu_util(cpu); ++ ++ capacity = capacity_orig_of(cpu); ++ util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0); ++ ++ return (util >= capacity) ? capacity : util; ++} ++ ++/* + * Disable WAKE_AFFINE in the case where task @p doesn't fit in the + * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu. + * +@@ -5600,6 +5628,9 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) + if (max_cap - min_cap < max_cap >> 3) + return 0; + ++ /* Bring task utilization in sync with prev_cpu */ ++ sync_entity_load_avg(&p->se); ++ + return min_cap * 1024 < task_util(p) * capacity_margin; + } + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0003-arm64-dts-r8a7795-es1-Update-cpu-capacity-dmips-mhz.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0003-arm64-dts-r8a7795-es1-Update-cpu-capacity-dmips-mhz.patch new file mode 100644 index 0000000..d779509 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0003-arm64-dts-r8a7795-es1-Update-cpu-capacity-dmips-mhz.patch @@ -0,0 +1,59 @@ +From 30b845df13f693baadc3c35b8719adb0a13a2a4a Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Wed, 15 Nov 2017 11:36:37 +0900 +Subject: [PATCH 3/4] arm64: dts: r8a7795-es1: Update cpu capacity-dmips-mhz + +Since the cpu_capacity for CA53 was set smaller than expected, the +behavior of scheduler may not be suitable a little. This patch fixes +the reasonable value to fit current implementation. This value should +be updated again when the turbo mode and big little architecture will +support with cpu capacity features. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi +index c2b5b8d..b8a1542 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi +@@ -170,7 +170,7 @@ + dynamic-power-coefficient = <277>; + clocks =<&cpg CPG_CORE R8A7795_CLK_Z2>; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + a53_1: cpu@101 { +@@ -180,7 +180,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + a53_2: cpu@102 { +@@ -190,7 +190,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + a53_3: cpu@103 { +@@ -200,7 +200,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; +- capacity-dmips-mhz = <379>; ++ capacity-dmips-mhz = <540>; + }; + + L2_CA57: cache-controller@0 { +-- +2.7.4 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0003-sched-fair-Consider-spare-capacity-in-find_idlest_gr.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0003-sched-fair-Consider-spare-capacity-in-find_idlest_gr.patch new file mode 100644 index 0000000..674ebb9 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0003-sched-fair-Consider-spare-capacity-in-find_idlest_gr.patch @@ -0,0 +1,140 @@ +From 575af3de702dae80c40bf510aaf7755374accb88 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 14 Oct 2016 14:41:08 +0100 +Subject: [PATCH 03/92] sched/fair: Consider spare capacity in + find_idlest_group() + +In low-utilization scenarios comparing relative loads in +find_idlest_group() doesn't always lead to the most optimum choice. +Systems with groups containing different numbers of cpus and/or cpus of +different compute capacity are significantly better off when considering +spare capacity rather than relative load in those scenarios. + +In addition to existing load based search an alternative spare capacity +based candidate sched_group is found and selected instead if sufficient +spare capacity exists. If not, existing behaviour is preserved. + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: dietmar.eggemann@arm.com +Cc: freedom.tan@mediatek.com +Cc: keita.kobayashi.ym@renesas.com +Cc: mgalbraith@suse.de +Cc: sgurrappadi@nvidia.com +Cc: vincent.guittot@linaro.org +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1476452472-24740-3-git-send-email-morten.rasmussen@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 6a0b19c0f39a7a7b7fb77d3867a733136ff059a3) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 50 +++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 45 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index b05d691..1ad3706 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5202,6 +5202,14 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, + return 1; + } + ++static inline int task_util(struct task_struct *p); ++static int cpu_util_wake(int cpu, struct task_struct *p); ++ ++static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) ++{ ++ return capacity_orig_of(cpu) - cpu_util_wake(cpu, p); ++} ++ + /* + * find_idlest_group finds and returns the least busy CPU group within the + * domain. +@@ -5211,7 +5219,9 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int sd_flag) + { + struct sched_group *idlest = NULL, *group = sd->groups; ++ struct sched_group *most_spare_sg = NULL; + unsigned long min_load = ULONG_MAX, this_load = 0; ++ unsigned long most_spare = 0, this_spare = 0; + int load_idx = sd->forkexec_idx; + int imbalance = 100 + (sd->imbalance_pct-100)/2; + +@@ -5219,7 +5229,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, + load_idx = sd->wake_idx; + + do { +- unsigned long load, avg_load; ++ unsigned long load, avg_load, spare_cap, max_spare_cap; + int local_group; + int i; + +@@ -5231,8 +5241,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, + local_group = cpumask_test_cpu(this_cpu, + sched_group_cpus(group)); + +- /* Tally up the load of all CPUs in the group */ ++ /* ++ * Tally up the load of all CPUs in the group and find ++ * the group containing the CPU with most spare capacity. ++ */ + avg_load = 0; ++ max_spare_cap = 0; + + for_each_cpu(i, sched_group_cpus(group)) { + /* Bias balancing toward cpus of our domain */ +@@ -5242,6 +5256,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, + load = target_load(i, load_idx); + + avg_load += load; ++ ++ spare_cap = capacity_spare_wake(i, p); ++ ++ if (spare_cap > max_spare_cap) ++ max_spare_cap = spare_cap; + } + + /* Adjust by relative CPU capacity of the group */ +@@ -5249,12 +5268,33 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, + + if (local_group) { + this_load = avg_load; +- } else if (avg_load < min_load) { +- min_load = avg_load; +- idlest = group; ++ this_spare = max_spare_cap; ++ } else { ++ if (avg_load < min_load) { ++ min_load = avg_load; ++ idlest = group; ++ } ++ ++ if (most_spare < max_spare_cap) { ++ most_spare = max_spare_cap; ++ most_spare_sg = group; ++ } + } + } while (group = group->next, group != sd->groups); + ++ /* ++ * The cross-over point between using spare capacity or least load ++ * is too conservative for high utilization tasks on partially ++ * utilized systems if we require spare_capacity > task_util(p), ++ * so we allow for some task stuffing by using ++ * spare_capacity > task_util(p)/2. ++ */ ++ if (this_spare > task_util(p) / 2 && ++ imbalance*this_spare > 100*most_spare) ++ return NULL; ++ else if (most_spare > task_util(p) / 2) ++ return most_spare_sg; ++ + if (!idlest || 100*this_load < imbalance*min_load) + return NULL; + return idlest; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0004-arm64-Update-the-energy-model-on-CA53-for-R-Car-Gen3.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0004-arm64-Update-the-energy-model-on-CA53-for-R-Car-Gen3.patch new file mode 100644 index 0000000..39e8d07 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0004-arm64-Update-the-energy-model-on-CA53-for-R-Car-Gen3.patch @@ -0,0 +1,59 @@ +From b3e079ba90ed7ee0e618a441844f3274c6e49e3b Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Wed, 15 Nov 2017 12:34:48 +0900 +Subject: [PATCH 4/4] arm64: Update the energy model on CA53 for R-Car Gen3 + +Since the cpu_capacity for CA53 was set smaller than expected, the +behavior of scheduler may not be suitable a little. This patch fixes +the reasonable value to fit current implementation. This value should +be updated again when the turbo mode will support with cpu capacity +features. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/energy_model.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/kernel/energy_model.h b/arch/arm64/kernel/energy_model.h +index c2aae4f..f261080 100644 +--- a/arch/arm64/kernel/energy_model.h ++++ b/arch/arm64/kernel/energy_model.h +@@ -189,7 +189,7 @@ static struct idle_state idle_states_cluster_h3_a57[] = { + + static struct capacity_state cap_states_cluster_h3_a53[] = { + /* Power per cluster */ +- { .cap = 379, .power = 16, }, /* 1200 MHz */ ++ { .cap = 432, .power = 16, }, /* 1200 MHz */ + }; + + static struct capacity_state cap_states_cluster_h3_a57[] = { +@@ -231,7 +231,7 @@ static struct idle_state idle_states_core_h3_a57[] = { + + static struct capacity_state cap_states_core_h3_a53[] = { + /* Power per cpu */ +- { .cap = 379, .power = 131, }, /* 1200 MHz */ ++ { .cap = 432, .power = 131, }, /* 1200 MHz */ + }; + + static struct capacity_state cap_states_core_h3_a57[] = { +@@ -273,7 +273,7 @@ static struct idle_state idle_states_cluster_m3_a57[] = { + + static struct capacity_state cap_states_cluster_m3_a53[] = { + /* Power per cluster */ +- { .cap = 362, .power = 33, }, /* 1200 MHz */ ++ { .cap = 434, .power = 33, }, /* 1200 MHz */ + }; + + static struct capacity_state cap_states_cluster_m3_a57[] = { +@@ -316,7 +316,7 @@ static struct idle_state idle_states_core_m3_a57[] = { + + static struct capacity_state cap_states_core_m3_a53[] = { + /* Power per cpu */ +- { .cap = 362, .power = 131, }, /* 1200 MHz */ ++ { .cap = 434, .power = 131, }, /* 1200 MHz */ + }; + + static struct capacity_state cap_states_core_m3_a57[] = { +-- +2.7.4 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0004-sched-fair-Add-per-CPU-min-capacity-to-sched_group_c.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0004-sched-fair-Add-per-CPU-min-capacity-to-sched_group_c.patch new file mode 100644 index 0000000..7932e0b --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0004-sched-fair-Add-per-CPU-min-capacity-to-sched_group_c.patch @@ -0,0 +1,143 @@ +From 9a434b62bbd9621f05318569949b59d37a0f59ce Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 14 Oct 2016 14:41:09 +0100 +Subject: [PATCH 04/92] sched/fair: Add per-CPU min capacity to + sched_group_capacity + +struct sched_group_capacity currently represents the compute capacity +sum of all CPUs in the sched_group. + +Unless it is divided by the group_weight to get the average capacity +per CPU, it hides differences in CPU capacity for mixed capacity systems +(e.g. high RT/IRQ utilization or ARM big.LITTLE). + +But even the average may not be sufficient if the group covers CPUs of +different capacities. + +Instead, by extending struct sched_group_capacity to indicate min per-CPU +capacity in the group a suitable group for a given task utilization can +more easily be found such that CPUs with reduced capacity can be avoided +for tasks with high utilization (not implemented by this patch). + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: dietmar.eggemann@arm.com +Cc: freedom.tan@mediatek.com +Cc: keita.kobayashi.ym@renesas.com +Cc: mgalbraith@suse.de +Cc: sgurrappadi@nvidia.com +Cc: vincent.guittot@linaro.org +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1476452472-24740-4-git-send-email-morten.rasmussen@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit bf475ce0a3dd75b5d1df6c6c14ae25168caa15ac) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/core.c | 3 ++- + kernel/sched/fair.c | 17 ++++++++++++----- + kernel/sched/sched.h | 3 ++- + 3 files changed, 16 insertions(+), 7 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 154fd68..e891e12 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -5707,7 +5707,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, + printk(KERN_CONT " %*pbl", + cpumask_pr_args(sched_group_cpus(group))); + if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { +- printk(KERN_CONT " (cpu_capacity = %d)", ++ printk(KERN_CONT " (cpu_capacity = %lu)", + group->sgc->capacity); + } + +@@ -6184,6 +6184,7 @@ int group_balance_cpu(struct sched_group *sg) + * die on a /0 trap. + */ + sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); ++ sg->sgc->min_capacity = SCHED_CAPACITY_SCALE; + + /* + * Make sure the first group of this domain contains the +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 1ad3706..faf8f18 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6909,13 +6909,14 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) + + cpu_rq(cpu)->cpu_capacity = capacity; + sdg->sgc->capacity = capacity; ++ sdg->sgc->min_capacity = capacity; + } + + void update_group_capacity(struct sched_domain *sd, int cpu) + { + struct sched_domain *child = sd->child; + struct sched_group *group, *sdg = sd->groups; +- unsigned long capacity; ++ unsigned long capacity, min_capacity; + unsigned long interval; + + interval = msecs_to_jiffies(sd->balance_interval); +@@ -6928,6 +6929,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu) + } + + capacity = 0; ++ min_capacity = ULONG_MAX; + + if (child->flags & SD_OVERLAP) { + /* +@@ -6952,11 +6954,12 @@ void update_group_capacity(struct sched_domain *sd, int cpu) + */ + if (unlikely(!rq->sd)) { + capacity += capacity_of(cpu); +- continue; ++ } else { ++ sgc = rq->sd->groups->sgc; ++ capacity += sgc->capacity; + } + +- sgc = rq->sd->groups->sgc; +- capacity += sgc->capacity; ++ min_capacity = min(capacity, min_capacity); + } + } else { + /* +@@ -6966,12 +6969,16 @@ void update_group_capacity(struct sched_domain *sd, int cpu) + + group = child->groups; + do { +- capacity += group->sgc->capacity; ++ struct sched_group_capacity *sgc = group->sgc; ++ ++ capacity += sgc->capacity; ++ min_capacity = min(sgc->min_capacity, min_capacity); + group = group->next; + } while (group != child->groups); + } + + sdg->sgc->capacity = capacity; ++ sdg->sgc->min_capacity = min_capacity; + } + + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 055f935..345c1cc 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -892,7 +892,8 @@ struct sched_group_capacity { + * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity + * for a single CPU. + */ +- unsigned int capacity; ++ unsigned long capacity; ++ unsigned long min_capacity; /* Min per-CPU capacity in group */ + unsigned long next_update; + int imbalance; /* XXX unrelated to capacity but shared group state */ + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0005-sched-fair-Avoid-pulling-tasks-from-non-overloaded-h.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0005-sched-fair-Avoid-pulling-tasks-from-non-overloaded-h.patch new file mode 100644 index 0000000..d4bd2bd --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0005-sched-fair-Avoid-pulling-tasks-from-non-overloaded-h.patch @@ -0,0 +1,81 @@ +From 3a86f1d4b2d288f765534c1763ed698029376098 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 14 Oct 2016 14:41:10 +0100 +Subject: [PATCH 05/92] sched/fair: Avoid pulling tasks from non-overloaded + higher capacity groups + +For asymmetric CPU capacity systems it is counter-productive for +throughput if low capacity CPUs are pulling tasks from non-overloaded +CPUs with higher capacity. The assumption is that higher CPU capacity is +preferred over running alone in a group with lower CPU capacity. + +This patch rejects higher CPU capacity groups with one or less task per +CPU as potential busiest group which could otherwise lead to a series of +failing load-balancing attempts leading to a force-migration. + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: dietmar.eggemann@arm.com +Cc: freedom.tan@mediatek.com +Cc: keita.kobayashi.ym@renesas.com +Cc: mgalbraith@suse.de +Cc: sgurrappadi@nvidia.com +Cc: vincent.guittot@linaro.org +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1476452472-24740-5-git-send-email-morten.rasmussen@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 9e0994c0a1c1f82c705f1f66388e1bcffcee8bb9) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index faf8f18..ee39bfd 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7073,6 +7073,17 @@ static inline int sg_imbalanced(struct sched_group *group) + return false; + } + ++/* ++ * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller ++ * per-CPU capacity than sched_group ref. ++ */ ++static inline bool ++group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref) ++{ ++ return sg->sgc->min_capacity * capacity_margin < ++ ref->sgc->min_capacity * 1024; ++} ++ + static inline enum + group_type group_classify(struct sched_group *group, + struct sg_lb_stats *sgs) +@@ -7176,6 +7187,20 @@ static bool update_sd_pick_busiest(struct lb_env *env, + if (sgs->avg_load <= busiest->avg_load) + return false; + ++ if (!(env->sd->flags & SD_ASYM_CPUCAPACITY)) ++ goto asym_packing; ++ ++ /* ++ * Candidate sg has no more than one task per CPU and ++ * has higher per-CPU capacity. Migrating tasks to less ++ * capable CPUs may harm throughput. Maximize throughput, ++ * power/energy consequences are not considered. ++ */ ++ if (sgs->sum_nr_running <= sgs->group_weight && ++ group_smaller_cpu_capacity(sds->local, sg)) ++ return false; ++ ++asym_packing: + /* This is the busiest node in its class. */ + if (!(env->sd->flags & SD_ASYM_PACKING)) + return true; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0006-sched-fair-Fix-incorrect-comment-for-capacity_margin.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0006-sched-fair-Fix-incorrect-comment-for-capacity_margin.patch new file mode 100644 index 0000000..f8619de --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0006-sched-fair-Fix-incorrect-comment-for-capacity_margin.patch @@ -0,0 +1,47 @@ +From 05c05d839fb3a94d5e07822d06cf0ae8bb8b564d Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 14 Oct 2016 14:41:12 +0100 +Subject: [PATCH 06/92] sched/fair: Fix incorrect comment for capacity_margin + +The comment for capacity_margin introduced in: + + 3273163c6775 ("sched/fair: Let asymmetric CPU configurations balance at wake-up") + +... got its usage the wrong way round - fix it. + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: dietmar.eggemann@arm.com +Cc: freedom.tan@mediatek.com +Cc: keita.kobayashi.ym@renesas.com +Cc: mgalbraith@suse.de +Cc: sgurrappadi@nvidia.com +Cc: vincent.guittot@linaro.org +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1476452472-24740-7-git-send-email-morten.rasmussen@arm.com +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 893c5d2279041afeb593f1fa8edd9d02edf5b7cb) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index ee39bfd..5e6c00a 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -109,7 +109,7 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling + + /* + * The margin used when comparing utilization with CPU capacity: +- * util * 1024 < capacity * margin ++ * util * margin < capacity * 1024 + */ + unsigned int capacity_margin = 1280; /* ~20% */ + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0007-sched-fair-Factorize-attach-detach-entity.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0007-sched-fair-Factorize-attach-detach-entity.patch new file mode 100644 index 0000000..0773c2c --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0007-sched-fair-Factorize-attach-detach-entity.patch @@ -0,0 +1,142 @@ +From 3fa8e908c327f136ecfa353f7034b19c6b23f852 Mon Sep 17 00:00:00 2001 +From: Vincent Guittot <vincent.guittot@linaro.org> +Date: Tue, 8 Nov 2016 10:53:42 +0100 +Subject: [PATCH 07/92] sched/fair: Factorize attach/detach entity + +Factorize post_init_entity_util_avg() and part of attach_task_cfs_rq() +in one function attach_entity_cfs_rq(). + +Create symmetric detach_entity_cfs_rq() function. + +Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Morten.Rasmussen@arm.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: bsegall@google.com +Cc: kernellwp@gmail.com +Cc: pjt@google.com +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1478598827-32372-2-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit df217913e72ec7e603d8b68cc4c70646cf7000db) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 53 +++++++++++++++++++++++++++++++---------------------- + 1 file changed, 31 insertions(+), 22 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 5e6c00a..0731aff 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -701,9 +701,7 @@ void init_entity_runnable_average(struct sched_entity *se) + } + + static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq); +-static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq); +-static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force); +-static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se); ++static void attach_entity_cfs_rq(struct sched_entity *se); + + /* + * With new tasks being created, their initial util_avgs are extrapolated +@@ -735,7 +733,6 @@ void post_init_entity_util_avg(struct sched_entity *se) + struct cfs_rq *cfs_rq = cfs_rq_of(se); + struct sched_avg *sa = &se->avg; + long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2; +- u64 now = cfs_rq_clock_task(cfs_rq); + + if (cap > 0) { + if (cfs_rq->avg.util_avg != 0) { +@@ -763,14 +760,12 @@ void post_init_entity_util_avg(struct sched_entity *se) + * such that the next switched_to_fair() has the + * expected state. + */ +- se->avg.last_update_time = now; ++ se->avg.last_update_time = cfs_rq_clock_task(cfs_rq); + return; + } + } + +- update_cfs_rq_load_avg(now, cfs_rq, false); +- attach_entity_load_avg(cfs_rq, se); +- update_tg_load_avg(cfs_rq, false); ++ attach_entity_cfs_rq(se); + } + + #else /* !CONFIG_SMP */ +@@ -8783,30 +8778,19 @@ static inline bool vruntime_normalized(struct task_struct *p) + return false; + } + +-static void detach_task_cfs_rq(struct task_struct *p) ++static void detach_entity_cfs_rq(struct sched_entity *se) + { +- struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + u64 now = cfs_rq_clock_task(cfs_rq); + +- if (!vruntime_normalized(p)) { +- /* +- * Fix up our vruntime so that the current sleep doesn't +- * cause 'unlimited' sleep bonus. +- */ +- place_entity(cfs_rq, se, 0); +- se->vruntime -= cfs_rq->min_vruntime; +- } +- + /* Catch up with the cfs_rq and remove our load when we leave */ + update_cfs_rq_load_avg(now, cfs_rq, false); + detach_entity_load_avg(cfs_rq, se); + update_tg_load_avg(cfs_rq, false); + } + +-static void attach_task_cfs_rq(struct task_struct *p) ++static void attach_entity_cfs_rq(struct sched_entity *se) + { +- struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + u64 now = cfs_rq_clock_task(cfs_rq); + +@@ -8818,10 +8802,35 @@ static void attach_task_cfs_rq(struct task_struct *p) + se->depth = se->parent ? se->parent->depth + 1 : 0; + #endif + +- /* Synchronize task with its cfs_rq */ ++ /* Synchronize entity with its cfs_rq */ + update_cfs_rq_load_avg(now, cfs_rq, false); + attach_entity_load_avg(cfs_rq, se); + update_tg_load_avg(cfs_rq, false); ++} ++ ++static void detach_task_cfs_rq(struct task_struct *p) ++{ ++ struct sched_entity *se = &p->se; ++ struct cfs_rq *cfs_rq = cfs_rq_of(se); ++ ++ if (!vruntime_normalized(p)) { ++ /* ++ * Fix up our vruntime so that the current sleep doesn't ++ * cause 'unlimited' sleep bonus. ++ */ ++ place_entity(cfs_rq, se, 0); ++ se->vruntime -= cfs_rq->min_vruntime; ++ } ++ ++ detach_entity_cfs_rq(se); ++} ++ ++static void attach_task_cfs_rq(struct task_struct *p) ++{ ++ struct sched_entity *se = &p->se; ++ struct cfs_rq *cfs_rq = cfs_rq_of(se); ++ ++ attach_entity_cfs_rq(se); + + if (!vruntime_normalized(p)) + se->vruntime += cfs_rq->min_vruntime; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0008-sched-fair-Fix-hierarchical-order-in-rq-leaf_cfs_rq_.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0008-sched-fair-Fix-hierarchical-order-in-rq-leaf_cfs_rq_.patch new file mode 100644 index 0000000..323a48f --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0008-sched-fair-Fix-hierarchical-order-in-rq-leaf_cfs_rq_.patch @@ -0,0 +1,171 @@ +From b351fc19a07fde281957aeb737cd32392c7bf5ba Mon Sep 17 00:00:00 2001 +From: Vincent Guittot <vincent.guittot@linaro.org> +Date: Tue, 8 Nov 2016 10:53:43 +0100 +Subject: [PATCH 08/92] sched/fair: Fix hierarchical order in + rq->leaf_cfs_rq_list + +Fix the insertion of cfs_rq in rq->leaf_cfs_rq_list to ensure that a +child will always be called before its parent. + +The hierarchical order in shares update list has been introduced by +commit: + + 67e86250f8ea ("sched: Introduce hierarchal order on shares update list") + +With the current implementation a child can be still put after its +parent. + +Lets take the example of: + + root + \ + b + /\ + c d* + | + e* + +with root -> b -> c already enqueued but not d -> e so the +leaf_cfs_rq_list looks like: head -> c -> b -> root -> tail + +The branch d -> e will be added the first time that they are enqueued, +starting with e then d. + +When e is added, its parents is not already on the list so e is put at +the tail : head -> c -> b -> root -> e -> tail + +Then, d is added at the head because its parent is already on the +list: head -> d -> c -> b -> root -> e -> tail + +e is not placed at the right position and will be called the last +whereas it should be called at the beginning. + +Because it follows the bottom-up enqueue sequence, we are sure that we +will finished to add either a cfs_rq without parent or a cfs_rq with a +parent that is already on the list. We can use this event to detect +when we have finished to add a new branch. For the others, whose +parents are not already added, we have to ensure that they will be +added after their children that have just been inserted the steps +before, and after any potential parents that are already in the list. +The easiest way is to put the cfs_rq just after the last inserted one +and to keep track of it untl the branch is fully added. + +Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Morten.Rasmussen@arm.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: bsegall@google.com +Cc: kernellwp@gmail.com +Cc: pjt@google.com +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1478598827-32372-3-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 9c2791f936ef5fd04a118b5c284f2c9a95f4a647) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/core.c | 1 + + kernel/sched/fair.c | 54 +++++++++++++++++++++++++++++++++++++++++++++------- + kernel/sched/sched.h | 1 + + 3 files changed, 49 insertions(+), 7 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index e891e12..391d6c9 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -7603,6 +7603,7 @@ void __init sched_init(void) + #ifdef CONFIG_FAIR_GROUP_SCHED + root_task_group.shares = ROOT_TASK_GROUP_LOAD; + INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); ++ rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; + /* + * How much cpu bandwidth does root_task_group get? + * +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 0731aff..4a67026 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -283,19 +283,59 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) + static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) + { + if (!cfs_rq->on_list) { ++ struct rq *rq = rq_of(cfs_rq); ++ int cpu = cpu_of(rq); + /* + * Ensure we either appear before our parent (if already + * enqueued) or force our parent to appear after us when it is +- * enqueued. The fact that we always enqueue bottom-up +- * reduces this to two cases. ++ * enqueued. The fact that we always enqueue bottom-up ++ * reduces this to two cases and a special case for the root ++ * cfs_rq. Furthermore, it also means that we will always reset ++ * tmp_alone_branch either when the branch is connected ++ * to a tree or when we reach the beg of the tree + */ + if (cfs_rq->tg->parent && +- cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) { +- list_add_rcu(&cfs_rq->leaf_cfs_rq_list, +- &rq_of(cfs_rq)->leaf_cfs_rq_list); +- } else { ++ cfs_rq->tg->parent->cfs_rq[cpu]->on_list) { ++ /* ++ * If parent is already on the list, we add the child ++ * just before. Thanks to circular linked property of ++ * the list, this means to put the child at the tail ++ * of the list that starts by parent. ++ */ ++ list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list, ++ &(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list)); ++ /* ++ * The branch is now connected to its tree so we can ++ * reset tmp_alone_branch to the beginning of the ++ * list. ++ */ ++ rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; ++ } else if (!cfs_rq->tg->parent) { ++ /* ++ * cfs rq without parent should be put ++ * at the tail of the list. ++ */ + list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list, +- &rq_of(cfs_rq)->leaf_cfs_rq_list); ++ &rq->leaf_cfs_rq_list); ++ /* ++ * We have reach the beg of a tree so we can reset ++ * tmp_alone_branch to the beginning of the list. ++ */ ++ rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; ++ } else { ++ /* ++ * The parent has not already been added so we want to ++ * make sure that it will be put after us. ++ * tmp_alone_branch points to the beg of the branch ++ * where we will add parent. ++ */ ++ list_add_rcu(&cfs_rq->leaf_cfs_rq_list, ++ rq->tmp_alone_branch); ++ /* ++ * update tmp_alone_branch to points to the new beg ++ * of the branch ++ */ ++ rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list; + } + + cfs_rq->on_list = 1; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 345c1cc..36f30e0 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -623,6 +623,7 @@ struct rq { + #ifdef CONFIG_FAIR_GROUP_SCHED + /* list of leaf cfs_rq on this cpu: */ + struct list_head leaf_cfs_rq_list; ++ struct list_head *tmp_alone_branch; + #endif /* CONFIG_FAIR_GROUP_SCHED */ + + /* +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0009-sched-fair-Factorize-PELT-update.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0009-sched-fair-Factorize-PELT-update.patch new file mode 100644 index 0000000..b5c051c --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0009-sched-fair-Factorize-PELT-update.patch @@ -0,0 +1,242 @@ +From 5fce03ae44059401dcf219ca80a53850fed51932 Mon Sep 17 00:00:00 2001 +From: Vincent Guittot <vincent.guittot@linaro.org> +Date: Tue, 8 Nov 2016 10:53:44 +0100 +Subject: [PATCH 09/92] sched/fair: Factorize PELT update + +Every time we modify load/utilization of sched_entity, we start to +sync it with its cfs_rq. This update is done in different ways: + + - when attaching/detaching a sched_entity, we update cfs_rq and then + we sync the entity with the cfs_rq. + + - when enqueueing/dequeuing the sched_entity, we update both + sched_entity and cfs_rq metrics to now. + +Use update_load_avg() everytime we have to update and sync cfs_rq and +sched_entity before changing the state of a sched_enity. + +Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Morten.Rasmussen@arm.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: bsegall@google.com +Cc: kernellwp@gmail.com +Cc: pjt@google.com +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1478598827-32372-4-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit d31b1a66cbe0931733583ad9d9e8c6cfd710907d) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 76 ++++++++++++++++++----------------------------------- + 1 file changed, 25 insertions(+), 51 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 4a67026..d707ad0 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3092,8 +3092,14 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) + return decayed || removed_load; + } + ++/* ++ * Optional action to be done while updating the load average ++ */ ++#define UPDATE_TG 0x1 ++#define SKIP_AGE_LOAD 0x2 ++ + /* Update task and its cfs_rq load average */ +-static inline void update_load_avg(struct sched_entity *se, int update_tg) ++static inline void update_load_avg(struct sched_entity *se, int flags) + { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + u64 now = cfs_rq_clock_task(cfs_rq); +@@ -3104,11 +3110,13 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) + * Track task load average for carrying it to new CPU after migrated, and + * track group sched_entity load average for task_h_load calc in migration + */ +- __update_load_avg(now, cpu, &se->avg, ++ if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) { ++ __update_load_avg(now, cpu, &se->avg, + se->on_rq * scale_load_down(se->load.weight), + cfs_rq->curr == se, NULL); ++ } + +- if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg) ++ if (update_cfs_rq_load_avg(now, cfs_rq, true) && (flags & UPDATE_TG)) + update_tg_load_avg(cfs_rq, 0); + } + +@@ -3122,26 +3130,6 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) + */ + static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) + { +- if (!sched_feat(ATTACH_AGE_LOAD)) +- goto skip_aging; +- +- /* +- * If we got migrated (either between CPUs or between cgroups) we'll +- * have aged the average right before clearing @last_update_time. +- * +- * Or we're fresh through post_init_entity_util_avg(). +- */ +- if (se->avg.last_update_time) { +- __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)), +- &se->avg, 0, 0, NULL); +- +- /* +- * XXX: we could have just aged the entire load away if we've been +- * absent from the fair class for too long. +- */ +- } +- +-skip_aging: + se->avg.last_update_time = cfs_rq->avg.last_update_time; + cfs_rq->avg.load_avg += se->avg.load_avg; + cfs_rq->avg.load_sum += se->avg.load_sum; +@@ -3161,9 +3149,6 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + */ + static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) + { +- __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)), +- &se->avg, se->on_rq * scale_load_down(se->load.weight), +- cfs_rq->curr == se, NULL); + + sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); + sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); +@@ -3178,34 +3163,20 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) + { + struct sched_avg *sa = &se->avg; +- u64 now = cfs_rq_clock_task(cfs_rq); +- int migrated, decayed; +- +- migrated = !sa->last_update_time; +- if (!migrated) { +- __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, +- se->on_rq * scale_load_down(se->load.weight), +- cfs_rq->curr == se, NULL); +- } +- +- decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated); + + cfs_rq->runnable_load_avg += sa->load_avg; + cfs_rq->runnable_load_sum += sa->load_sum; + +- if (migrated) ++ if (!sa->last_update_time) { + attach_entity_load_avg(cfs_rq, se); +- +- if (decayed || migrated) + update_tg_load_avg(cfs_rq, 0); ++ } + } + + /* Remove the runnable load generated by se from cfs_rq's runnable load average */ + static inline void + dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) + { +- update_load_avg(se, 1); +- + cfs_rq->runnable_load_avg = + max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0); + cfs_rq->runnable_load_sum = +@@ -3289,7 +3260,10 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq) + return 0; + } + +-static inline void update_load_avg(struct sched_entity *se, int not_used) ++#define UPDATE_TG 0x0 ++#define SKIP_AGE_LOAD 0x0 ++ ++static inline void update_load_avg(struct sched_entity *se, int not_used1) + { + cpufreq_update_util(rq_of(cfs_rq_of(se)), 0); + } +@@ -3434,6 +3408,7 @@ static inline void check_schedstat_required(void) + if (renorm && !curr) + se->vruntime += cfs_rq->min_vruntime; + ++ update_load_avg(se, UPDATE_TG); + enqueue_entity_load_avg(cfs_rq, se); + account_entity_enqueue(cfs_rq, se); + update_cfs_shares(cfs_rq); +@@ -3508,6 +3483,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) + * Update run-time statistics of the 'current'. + */ + update_curr(cfs_rq); ++ update_load_avg(se, UPDATE_TG); + dequeue_entity_load_avg(cfs_rq, se); + + update_stats_dequeue(cfs_rq, se, flags); +@@ -3595,7 +3571,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) + */ + update_stats_wait_end(cfs_rq, se); + __dequeue_entity(cfs_rq, se); +- update_load_avg(se, 1); ++ update_load_avg(se, UPDATE_TG); + } + + update_stats_curr_start(cfs_rq, se); +@@ -3713,7 +3689,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) + /* + * Ensure that runnable average is periodically updated. + */ +- update_load_avg(curr, 1); ++ update_load_avg(curr, UPDATE_TG); + update_cfs_shares(cfs_rq); + + #ifdef CONFIG_SCHED_HRTICK +@@ -4610,7 +4586,7 @@ static inline void hrtick_update(struct rq *rq) + if (cfs_rq_throttled(cfs_rq)) + break; + +- update_load_avg(se, 1); ++ update_load_avg(se, UPDATE_TG); + update_cfs_shares(cfs_rq); + } + +@@ -4669,7 +4645,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) + if (cfs_rq_throttled(cfs_rq)) + break; + +- update_load_avg(se, 1); ++ update_load_avg(se, UPDATE_TG); + update_cfs_shares(cfs_rq); + } + +@@ -8821,10 +8797,9 @@ static inline bool vruntime_normalized(struct task_struct *p) + static void detach_entity_cfs_rq(struct sched_entity *se) + { + struct cfs_rq *cfs_rq = cfs_rq_of(se); +- u64 now = cfs_rq_clock_task(cfs_rq); + + /* Catch up with the cfs_rq and remove our load when we leave */ +- update_cfs_rq_load_avg(now, cfs_rq, false); ++ update_load_avg(se, 0); + detach_entity_load_avg(cfs_rq, se); + update_tg_load_avg(cfs_rq, false); + } +@@ -8832,7 +8807,6 @@ static void detach_entity_cfs_rq(struct sched_entity *se) + static void attach_entity_cfs_rq(struct sched_entity *se) + { + struct cfs_rq *cfs_rq = cfs_rq_of(se); +- u64 now = cfs_rq_clock_task(cfs_rq); + + #ifdef CONFIG_FAIR_GROUP_SCHED + /* +@@ -8843,7 +8817,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se) + #endif + + /* Synchronize entity with its cfs_rq */ +- update_cfs_rq_load_avg(now, cfs_rq, false); ++ update_load_avg(se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD); + attach_entity_load_avg(cfs_rq, se); + update_tg_load_avg(cfs_rq, false); + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0010-sched-fair-Propagate-load-during-synchronous-attach-.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0010-sched-fair-Propagate-load-during-synchronous-attach-.patch new file mode 100644 index 0000000..13bcd06 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0010-sched-fair-Propagate-load-during-synchronous-attach-.patch @@ -0,0 +1,319 @@ +From bf9dc711b0003a71003b9da414efa25ebae3e5f7 Mon Sep 17 00:00:00 2001 +From: Vincent Guittot <vincent.guittot@linaro.org> +Date: Tue, 8 Nov 2016 10:53:45 +0100 +Subject: [PATCH 10/92] sched/fair: Propagate load during synchronous + attach/detach + +When a task moves from/to a cfs_rq, we set a flag which is then used to +propagate the change at parent level (sched_entity and cfs_rq) during +next update. If the cfs_rq is throttled, the flag will stay pending until +the cfs_rq is unthrottled. + +For propagating the utilization, we copy the utilization of group cfs_rq to +the sched_entity. + +For propagating the load, we have to take into account the load of the +whole task group in order to evaluate the load of the sched_entity. +Similarly to what was done before the rewrite of PELT, we add a correction +factor in case the task group's load is greater than its share so it will +contribute the same load of a task of equal weight. + +Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Morten.Rasmussen@arm.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: bsegall@google.com +Cc: kernellwp@gmail.com +Cc: pjt@google.com +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1478598827-32372-5-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 09a43ace1f986b003c118fdf6ddf1fd685692d49) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++- + kernel/sched/sched.h | 1 + + 2 files changed, 188 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index d707ad0..8cf26fd 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -2918,6 +2918,26 @@ static u32 __compute_runnable_contrib(u64 n) + return decayed; + } + ++/* ++ * Signed add and clamp on underflow. ++ * ++ * Explicitly do a load-store to ensure the intermediate value never hits ++ * memory. This allows lockless observations without ever seeing the negative ++ * values. ++ */ ++#define add_positive(_ptr, _val) do { \ ++ typeof(_ptr) ptr = (_ptr); \ ++ typeof(_val) val = (_val); \ ++ typeof(*ptr) res, var = READ_ONCE(*ptr); \ ++ \ ++ res = var + val; \ ++ \ ++ if (val < 0 && res > var) \ ++ res = 0; \ ++ \ ++ WRITE_ONCE(*ptr, res); \ ++} while (0) ++ + #ifdef CONFIG_FAIR_GROUP_SCHED + /** + * update_tg_load_avg - update the tg's load avg +@@ -2997,8 +3017,138 @@ void set_task_rq_fair(struct sched_entity *se, + se->avg.last_update_time = n_last_update_time; + } + } ++ ++/* Take into account change of utilization of a child task group */ ++static inline void ++update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se) ++{ ++ struct cfs_rq *gcfs_rq = group_cfs_rq(se); ++ long delta = gcfs_rq->avg.util_avg - se->avg.util_avg; ++ ++ /* Nothing to update */ ++ if (!delta) ++ return; ++ ++ /* Set new sched_entity's utilization */ ++ se->avg.util_avg = gcfs_rq->avg.util_avg; ++ se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX; ++ ++ /* Update parent cfs_rq utilization */ ++ add_positive(&cfs_rq->avg.util_avg, delta); ++ cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX; ++} ++ ++/* Take into account change of load of a child task group */ ++static inline void ++update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se) ++{ ++ struct cfs_rq *gcfs_rq = group_cfs_rq(se); ++ long delta, load = gcfs_rq->avg.load_avg; ++ ++ /* ++ * If the load of group cfs_rq is null, the load of the ++ * sched_entity will also be null so we can skip the formula ++ */ ++ if (load) { ++ long tg_load; ++ ++ /* Get tg's load and ensure tg_load > 0 */ ++ tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1; ++ ++ /* Ensure tg_load >= load and updated with current load*/ ++ tg_load -= gcfs_rq->tg_load_avg_contrib; ++ tg_load += load; ++ ++ /* ++ * We need to compute a correction term in the case that the ++ * task group is consuming more CPU than a task of equal ++ * weight. A task with a weight equals to tg->shares will have ++ * a load less or equal to scale_load_down(tg->shares). ++ * Similarly, the sched_entities that represent the task group ++ * at parent level, can't have a load higher than ++ * scale_load_down(tg->shares). And the Sum of sched_entities' ++ * load must be <= scale_load_down(tg->shares). ++ */ ++ if (tg_load > scale_load_down(gcfs_rq->tg->shares)) { ++ /* scale gcfs_rq's load into tg's shares*/ ++ load *= scale_load_down(gcfs_rq->tg->shares); ++ load /= tg_load; ++ } ++ } ++ ++ delta = load - se->avg.load_avg; ++ ++ /* Nothing to update */ ++ if (!delta) ++ return; ++ ++ /* Set new sched_entity's load */ ++ se->avg.load_avg = load; ++ se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX; ++ ++ /* Update parent cfs_rq load */ ++ add_positive(&cfs_rq->avg.load_avg, delta); ++ cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX; ++ ++ /* ++ * If the sched_entity is already enqueued, we also have to update the ++ * runnable load avg. ++ */ ++ if (se->on_rq) { ++ /* Update parent cfs_rq runnable_load_avg */ ++ add_positive(&cfs_rq->runnable_load_avg, delta); ++ cfs_rq->runnable_load_sum = cfs_rq->runnable_load_avg * LOAD_AVG_MAX; ++ } ++} ++ ++static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) ++{ ++ cfs_rq->propagate_avg = 1; ++} ++ ++static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se) ++{ ++ struct cfs_rq *cfs_rq = group_cfs_rq(se); ++ ++ if (!cfs_rq->propagate_avg) ++ return 0; ++ ++ cfs_rq->propagate_avg = 0; ++ return 1; ++} ++ ++/* Update task and its cfs_rq load average */ ++static inline int propagate_entity_load_avg(struct sched_entity *se) ++{ ++ struct cfs_rq *cfs_rq; ++ ++ if (entity_is_task(se)) ++ return 0; ++ ++ if (!test_and_clear_tg_cfs_propagate(se)) ++ return 0; ++ ++ cfs_rq = cfs_rq_of(se); ++ ++ set_tg_cfs_propagate(cfs_rq); ++ ++ update_tg_cfs_util(cfs_rq, se); ++ update_tg_cfs_load(cfs_rq, se); ++ ++ return 1; ++} ++ + #else /* CONFIG_FAIR_GROUP_SCHED */ ++ + static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} ++ ++static inline int propagate_entity_load_avg(struct sched_entity *se) ++{ ++ return 0; ++} ++ ++static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {} ++ + #endif /* CONFIG_FAIR_GROUP_SCHED */ + + static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) +@@ -3105,6 +3255,7 @@ static inline void update_load_avg(struct sched_entity *se, int flags) + u64 now = cfs_rq_clock_task(cfs_rq); + struct rq *rq = rq_of(cfs_rq); + int cpu = cpu_of(rq); ++ int decayed; + + /* + * Track task load average for carrying it to new CPU after migrated, and +@@ -3116,7 +3267,10 @@ static inline void update_load_avg(struct sched_entity *se, int flags) + cfs_rq->curr == se, NULL); + } + +- if (update_cfs_rq_load_avg(now, cfs_rq, true) && (flags & UPDATE_TG)) ++ decayed = update_cfs_rq_load_avg(now, cfs_rq, true); ++ decayed |= propagate_entity_load_avg(se); ++ ++ if (decayed && (flags & UPDATE_TG)) + update_tg_load_avg(cfs_rq, 0); + } + +@@ -3135,6 +3289,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + cfs_rq->avg.load_sum += se->avg.load_sum; + cfs_rq->avg.util_avg += se->avg.util_avg; + cfs_rq->avg.util_sum += se->avg.util_sum; ++ set_tg_cfs_propagate(cfs_rq); + + cfs_rq_util_change(cfs_rq); + } +@@ -3154,6 +3309,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); + sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); + sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); ++ set_tg_cfs_propagate(cfs_rq); + + cfs_rq_util_change(cfs_rq); + } +@@ -8794,6 +8950,31 @@ static inline bool vruntime_normalized(struct task_struct *p) + return false; + } + ++#ifdef CONFIG_FAIR_GROUP_SCHED ++/* ++ * Propagate the changes of the sched_entity across the tg tree to make it ++ * visible to the root ++ */ ++static void propagate_entity_cfs_rq(struct sched_entity *se) ++{ ++ struct cfs_rq *cfs_rq; ++ ++ /* Start to propagate at parent */ ++ se = se->parent; ++ ++ for_each_sched_entity(se) { ++ cfs_rq = cfs_rq_of(se); ++ ++ if (cfs_rq_throttled(cfs_rq)) ++ break; ++ ++ update_load_avg(se, UPDATE_TG); ++ } ++} ++#else ++static void propagate_entity_cfs_rq(struct sched_entity *se) { } ++#endif ++ + static void detach_entity_cfs_rq(struct sched_entity *se) + { + struct cfs_rq *cfs_rq = cfs_rq_of(se); +@@ -8802,6 +8983,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se) + update_load_avg(se, 0); + detach_entity_load_avg(cfs_rq, se); + update_tg_load_avg(cfs_rq, false); ++ propagate_entity_cfs_rq(se); + } + + static void attach_entity_cfs_rq(struct sched_entity *se) +@@ -8820,6 +9002,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se) + update_load_avg(se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD); + attach_entity_load_avg(cfs_rq, se); + update_tg_load_avg(cfs_rq, false); ++ propagate_entity_cfs_rq(se); + } + + static void detach_task_cfs_rq(struct task_struct *p) +@@ -8898,6 +9081,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) + cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; + #endif + #ifdef CONFIG_SMP ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ cfs_rq->propagate_avg = 0; ++#endif + atomic_long_set(&cfs_rq->removed_load_avg, 0); + atomic_long_set(&cfs_rq->removed_util_avg, 0); + #endif +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 36f30e0..d7e3931 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -404,6 +404,7 @@ struct cfs_rq { + unsigned long runnable_load_avg; + #ifdef CONFIG_FAIR_GROUP_SCHED + unsigned long tg_load_avg_contrib; ++ unsigned long propagate_avg; + #endif + atomic_long_t removed_load_avg, removed_util_avg; + #ifndef CONFIG_64BIT +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0011-sched-fair-Propagate-asynchrous-detach.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0011-sched-fair-Propagate-asynchrous-detach.patch new file mode 100644 index 0000000..3d7412a --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0011-sched-fair-Propagate-asynchrous-detach.patch @@ -0,0 +1,72 @@ +From 02927c812e8cd4ebd635e567ccd935d9e87075b8 Mon Sep 17 00:00:00 2001 +From: Vincent Guittot <vincent.guittot@linaro.org> +Date: Tue, 8 Nov 2016 10:53:46 +0100 +Subject: [PATCH 11/92] sched/fair: Propagate asynchrous detach + +A task can be asynchronously detached from cfs_rq when migrating +between CPUs. The load of the migrated task is then removed from +source cfs_rq during its next update. We use this event to set +propagation flag. + +During the load balance, we take advantage of the update of blocked +load to propagate any pending changes. + +The propagation relies on patch: + + "sched: Fix hierarchical order in rq->leaf_cfs_rq_list" + +... which orders children and parents, to ensure that it's done in one pass. + +Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Morten.Rasmussen@arm.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: bsegall@google.com +Cc: kernellwp@gmail.com +Cc: pjt@google.com +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1478598827-32372-6-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 4e5160766fcc9f41bbd38bac11f92dce993644aa) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 8cf26fd..090a9bb 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3219,6 +3219,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) + sub_positive(&sa->load_avg, r); + sub_positive(&sa->load_sum, r * LOAD_AVG_MAX); + removed_load = 1; ++ set_tg_cfs_propagate(cfs_rq); + } + + if (atomic_long_read(&cfs_rq->removed_util_avg)) { +@@ -3226,6 +3227,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) + sub_positive(&sa->util_avg, r); + sub_positive(&sa->util_sum, r * LOAD_AVG_MAX); + removed_util = 1; ++ set_tg_cfs_propagate(cfs_rq); + } + + decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, +@@ -6872,6 +6874,10 @@ static void update_blocked_averages(int cpu) + + if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true)) + update_tg_load_avg(cfs_rq, 0); ++ ++ /* Propagate pending load changes to the parent */ ++ if (cfs_rq->tg->se[cpu]) ++ update_load_avg(cfs_rq->tg->se[cpu], 0); + } + raw_spin_unlock_irqrestore(&rq->lock, flags); + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0012-sched-fair-Fix-task-group-initialization.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0012-sched-fair-Fix-task-group-initialization.patch new file mode 100644 index 0000000..e8703ba --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0012-sched-fair-Fix-task-group-initialization.patch @@ -0,0 +1,43 @@ +From c71758c426fd1c0e04b6f870b57c0434cb73246a Mon Sep 17 00:00:00 2001 +From: Vincent Guittot <vincent.guittot@linaro.org> +Date: Tue, 8 Nov 2016 10:53:47 +0100 +Subject: [PATCH 12/92] sched/fair: Fix task group initialization + +The moves of tasks are now propagated down to root and the utilization +of cfs_rq reflects reality so it doesn't need to be estimated at init. + +Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Morten.Rasmussen@arm.com +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: bsegall@google.com +Cc: kernellwp@gmail.com +Cc: pjt@google.com +Cc: yuyang.du@intel.com +Link: http://lkml.kernel.org/r/1478598827-32372-7-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit d03266910a533d874c01ef2ca8dc73009f2925fa) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 090a9bb..02605f2 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -9198,7 +9198,7 @@ void online_fair_sched_group(struct task_group *tg) + se = tg->se[i]; + + raw_spin_lock_irq(&rq->lock); +- post_init_entity_util_avg(se); ++ attach_entity_cfs_rq(se); + sync_throttle(tg, i); + raw_spin_unlock_irq(&rq->lock); + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0013-sched-dl-Fix-comment-in-pick_next_task_dl.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0013-sched-dl-Fix-comment-in-pick_next_task_dl.patch new file mode 100644 index 0000000..6458474 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0013-sched-dl-Fix-comment-in-pick_next_task_dl.patch @@ -0,0 +1,38 @@ +From a1ca4aea32f8ec66617aca32decdab5f0dd51182 Mon Sep 17 00:00:00 2001 +From: "T.Zhou" <t1zhou@163.com> +Date: Wed, 23 Nov 2016 08:48:32 +0800 +Subject: [PATCH 13/92] sched/dl: Fix comment in pick_next_task_dl() + +Fix cut & paste oversight: + + s/pull_rt_task/pull_dl_task + +Signed-off-by: T.Zhou <t1zhou@163.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: juri.lelli@gmail.com +Link: http://lkml.kernel.org/r/20161123004832.GA2983@geo +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 176cedc4ed143745708999155c11b5717cdebb35) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/deadline.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index 37e2449..0e4ad2a 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1137,7 +1137,7 @@ struct task_struct * + pull_dl_task(rq); + lockdep_repin_lock(&rq->lock, cookie); + /* +- * pull_rt_task() can drop (and re-acquire) rq->lock; this ++ * pull_dl_task() can drop (and re-acquire) rq->lock; this + * means a stop task can slip in, in which case we need to + * re-start task selection. + */ +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0014-sched-fair-Clean-up-the-tunable-parameter-definition.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0014-sched-fair-Clean-up-the-tunable-parameter-definition.patch new file mode 100644 index 0000000..df0388a --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0014-sched-fair-Clean-up-the-tunable-parameter-definition.patch @@ -0,0 +1,141 @@ +From 05871df5280460c20d17434132329bebb27a2cfa Mon Sep 17 00:00:00 2001 +From: Ingo Molnar <mingo@kernel.org> +Date: Wed, 23 Nov 2016 07:37:00 +0100 +Subject: [PATCH 14/92] sched/fair: Clean up the tunable parameter definitions + +No change in functionality: + + - align the default values vertically to make them easier to scan + - standardize the 'default:' lines + - fix minor whitespace typos + +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Mike Galbraith <efault@gmx.de> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit 2b4d5b2582deffb77b3b4b48a59cd36e9e1e14d9) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 50 ++++++++++++++++++++++++++++---------------------- + 1 file changed, 28 insertions(+), 22 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 02605f2..aa47589 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -37,7 +37,6 @@ + + /* + * Targeted preemption latency for CPU-bound tasks: +- * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) + * + * NOTE: this latency value is not the same as the concept of + * 'timeslice length' - timeslices in CFS are of variable length +@@ -46,31 +45,35 @@ + * + * (to see the precise effective timeslice length of your workload, + * run vmstat and monitor the context-switches (cs) field) ++ * ++ * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) + */ +-unsigned int sysctl_sched_latency = 6000000ULL; +-unsigned int normalized_sysctl_sched_latency = 6000000ULL; ++unsigned int sysctl_sched_latency = 6000000ULL; ++unsigned int normalized_sysctl_sched_latency = 6000000ULL; + + /* + * The initial- and re-scaling of tunables is configurable +- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) + * + * Options are: +- * SCHED_TUNABLESCALING_NONE - unscaled, always *1 +- * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) +- * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus ++ * ++ * SCHED_TUNABLESCALING_NONE - unscaled, always *1 ++ * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) ++ * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus ++ * ++ * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) + */ +-enum sched_tunable_scaling sysctl_sched_tunable_scaling +- = SCHED_TUNABLESCALING_LOG; ++enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; + + /* + * Minimal preemption granularity for CPU-bound tasks: ++ * + * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) + */ +-unsigned int sysctl_sched_min_granularity = 750000ULL; +-unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; ++unsigned int sysctl_sched_min_granularity = 750000ULL; ++unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; + + /* +- * is kept at sysctl_sched_latency / sysctl_sched_min_granularity ++ * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity + */ + static unsigned int sched_nr_latency = 8; + +@@ -82,16 +85,17 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling + + /* + * SCHED_OTHER wake-up granularity. +- * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) + * + * This option delays the preemption effects of decoupled workloads + * and reduces their over-scheduling. Synchronous workloads will still + * have immediate wakeup/sleep latencies. ++ * ++ * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) + */ +-unsigned int sysctl_sched_wakeup_granularity = 1000000UL; +-unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; ++unsigned int sysctl_sched_wakeup_granularity = 1000000UL; ++unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; + +-const_debug unsigned int sysctl_sched_migration_cost = 500000UL; ++const_debug unsigned int sysctl_sched_migration_cost = 500000UL; + + #ifdef CONFIG_CFS_BANDWIDTH + /* +@@ -102,16 +106,18 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling + * to consumption or the quota being specified to be smaller than the slice) + * we will always only issue the remaining available time. + * +- * default: 5 msec, units: microseconds +- */ +-unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; ++ * (default: 5 msec, units: microseconds) ++ */ ++unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; + #endif + + /* + * The margin used when comparing utilization with CPU capacity: + * util * margin < capacity * 1024 ++ * ++ * (default: ~20%) + */ +-unsigned int capacity_margin = 1280; /* ~20% */ ++unsigned int capacity_margin = 1280; + + static inline void update_load_add(struct load_weight *lw, unsigned long inc) + { +@@ -7174,8 +7180,8 @@ void update_group_capacity(struct sched_domain *sd, int cpu) + * cpumask covering 1 cpu of the first group and 3 cpus of the second group. + * Something like: + * +- * { 0 1 2 3 } { 4 5 6 7 } +- * * * * * ++ * { 0 1 2 3 } { 4 5 6 7 } ++ * * * * * + * + * If we were to balance group-wise we'd place two tasks in the first group and + * two tasks in the second group. Clearly this is undesired as it will overload +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0015-sched-Extend-scheduler-s-asym-packing.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0015-sched-Extend-scheduler-s-asym-packing.patch new file mode 100644 index 0000000..6d2b9de --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0015-sched-Extend-scheduler-s-asym-packing.patch @@ -0,0 +1,208 @@ +From e467f39a4c96c4a6f96be66a9224807fe9b7346e Mon Sep 17 00:00:00 2001 +From: Tim Chen <tim.c.chen@linux.intel.com> +Date: Tue, 22 Nov 2016 12:23:53 -0800 +Subject: [PATCH 15/92] sched: Extend scheduler's asym packing + +We generalize the scheduler's asym packing to provide an ordering +of the cpu beyond just the cpu number. This allows the use of the +ASYM_PACKING scheduler machinery to move loads to preferred CPU in a +sched domain. The preference is defined with the cpu priority +given by arch_asym_cpu_priority(cpu). + +We also record the most preferred cpu in a sched group when +we build the cpu's capacity for fast lookup of preferred cpu +during load balancing. + +Co-developed-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: linux-pm@vger.kernel.org +Cc: jolsa@redhat.com +Cc: rjw@rjwysocki.net +Cc: linux-acpi@vger.kernel.org +Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> +Cc: bp@suse.de +Link: http://lkml.kernel.org/r/0e73ae12737dfaafa46c07066cc7c5d3f1675e46.1479844244.git.tim.c.chen@linux.intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +(cherry picked from commit afe06efdf07c12fd9370d5cce5383398cedf6c90) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + include/linux/sched.h | 2 ++ + kernel/sched/core.c | 15 +++++++++++++++ + kernel/sched/fair.c | 53 ++++++++++++++++++++++++++++++++++----------------- + kernel/sched/sched.h | 6 ++++++ + 4 files changed, 59 insertions(+), 17 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index e9c009d..2e3c2a1 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1057,6 +1057,8 @@ static inline int cpu_numa_flags(void) + } + #endif + ++extern int arch_asym_cpu_priority(int cpu); ++ + struct sched_domain_attr { + int relax_domain_level; + }; +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 391d6c9..209d2ea 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6302,7 +6302,22 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) + WARN_ON(!sg); + + do { ++ int cpu, max_cpu = -1; ++ + sg->group_weight = cpumask_weight(sched_group_cpus(sg)); ++ ++ if (!(sd->flags & SD_ASYM_PACKING)) ++ goto next; ++ ++ for_each_cpu(cpu, sched_group_cpus(sg)) { ++ if (max_cpu < 0) ++ max_cpu = cpu; ++ else if (sched_asym_prefer(cpu, max_cpu)) ++ max_cpu = cpu; ++ } ++ sg->asym_prefer_cpu = max_cpu; ++ ++next: + sg = sg->next; + } while (sg != sd->groups); + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index aa47589..18d9e75 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -97,6 +97,16 @@ + + const_debug unsigned int sysctl_sched_migration_cost = 500000UL; + ++#ifdef CONFIG_SMP ++/* ++ * For asym packing, by default the lower numbered cpu has higher priority. ++ */ ++int __weak arch_asym_cpu_priority(int cpu) ++{ ++ return -cpu; ++} ++#endif ++ + #ifdef CONFIG_CFS_BANDWIDTH + /* + * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool +@@ -7388,16 +7398,18 @@ static bool update_sd_pick_busiest(struct lb_env *env, + if (env->idle == CPU_NOT_IDLE) + return true; + /* +- * ASYM_PACKING needs to move all the work to the lowest +- * numbered CPUs in the group, therefore mark all groups +- * higher than ourself as busy. ++ * ASYM_PACKING needs to move all the work to the highest ++ * prority CPUs in the group, therefore mark all groups ++ * of lower priority than ourself as busy. + */ +- if (sgs->sum_nr_running && env->dst_cpu < group_first_cpu(sg)) { ++ if (sgs->sum_nr_running && ++ sched_asym_prefer(env->dst_cpu, sg->asym_prefer_cpu)) { + if (!sds->busiest) + return true; + +- /* Prefer to move from highest possible cpu's work */ +- if (group_first_cpu(sds->busiest) < group_first_cpu(sg)) ++ /* Prefer to move from lowest priority cpu's work */ ++ if (sched_asym_prefer(sds->busiest->asym_prefer_cpu, ++ sg->asym_prefer_cpu)) + return true; + } + +@@ -7549,8 +7561,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) + if (!sds->busiest) + return 0; + +- busiest_cpu = group_first_cpu(sds->busiest); +- if (env->dst_cpu > busiest_cpu) ++ busiest_cpu = sds->busiest->asym_prefer_cpu; ++ if (sched_asym_prefer(busiest_cpu, env->dst_cpu)) + return 0; + + env->imbalance = DIV_ROUND_CLOSEST( +@@ -7888,10 +7900,11 @@ static int need_active_balance(struct lb_env *env) + + /* + * ASYM_PACKING needs to force migrate tasks from busy but +- * higher numbered CPUs in order to pack all tasks in the +- * lowest numbered CPUs. ++ * lower priority CPUs in order to pack all tasks in the ++ * highest priority CPUs. + */ +- if ((sd->flags & SD_ASYM_PACKING) && env->src_cpu > env->dst_cpu) ++ if ((sd->flags & SD_ASYM_PACKING) && ++ sched_asym_prefer(env->dst_cpu, env->src_cpu)) + return 1; + } + +@@ -8740,7 +8753,7 @@ static inline bool nohz_kick_needed(struct rq *rq) + unsigned long now = jiffies; + struct sched_domain_shared *sds; + struct sched_domain *sd; +- int nr_busy, cpu = rq->cpu; ++ int nr_busy, i, cpu = rq->cpu; + bool kick = false; + + if (unlikely(rq->idle_balance)) +@@ -8791,12 +8804,18 @@ static inline bool nohz_kick_needed(struct rq *rq) + } + + sd = rcu_dereference(per_cpu(sd_asym, cpu)); +- if (sd && (cpumask_first_and(nohz.idle_cpus_mask, +- sched_domain_span(sd)) < cpu)) { +- kick = true; +- goto unlock; +- } ++ if (sd) { ++ for_each_cpu(i, sched_domain_span(sd)) { ++ if (i == cpu || ++ !cpumask_test_cpu(i, nohz.idle_cpus_mask)) ++ continue; + ++ if (sched_asym_prefer(i, cpu)) { ++ kick = true; ++ goto unlock; ++ } ++ } ++ } + unlock: + rcu_read_unlock(); + return kick; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index d7e3931..7b34c78 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -540,6 +540,11 @@ struct dl_rq { + + #ifdef CONFIG_SMP + ++static inline bool sched_asym_prefer(int a, int b) ++{ ++ return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); ++} ++ + /* + * We add the notion of a root-domain which will be used to define per-domain + * variables. Each exclusive cpuset essentially defines an island domain by +@@ -908,6 +913,7 @@ struct sched_group { + + unsigned int group_weight; + struct sched_group_capacity *sgc; ++ int asym_prefer_cpu; /* cpu of highest priority in group */ + + /* + * The CPUs this group covers. +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0016-cpufreq-schedutil-Avoid-indented-labels.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0016-cpufreq-schedutil-Avoid-indented-labels.patch new file mode 100644 index 0000000..a086850 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0016-cpufreq-schedutil-Avoid-indented-labels.patch @@ -0,0 +1,44 @@ +From b56fd2c55df1136d076d89c88bb4db6877a58fea Mon Sep 17 00:00:00 2001 +From: Viresh Kumar <viresh.kumar@linaro.org> +Date: Tue, 15 Nov 2016 13:53:20 +0530 +Subject: [PATCH 16/92] cpufreq: schedutil: Avoid indented labels + +Switch to the more common practice of writing labels. + +Suggested-by: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> +Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> +(cherry picked from commit 8e2ddb03643eb9d0bc4926946d7ce0d308eef0a5) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/cpufreq_schedutil.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 69e0689..8c4e165 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -454,17 +454,17 @@ static int sugov_init(struct cpufreq_policy *policy) + if (ret) + goto fail; + +- out: ++out: + mutex_unlock(&global_tunables_lock); + + cpufreq_enable_fast_switch(policy); + return 0; + +- fail: ++fail: + policy->governor_data = NULL; + sugov_tunables_free(tunables); + +- free_sg_policy: ++free_sg_policy: + mutex_unlock(&global_tunables_lock); + + sugov_policy_free(sg_policy); +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0017-cpufreq-schedutil-enable-fast-switch-earlier.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0017-cpufreq-schedutil-enable-fast-switch-earlier.patch new file mode 100644 index 0000000..487c01e --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0017-cpufreq-schedutil-enable-fast-switch-earlier.patch @@ -0,0 +1,79 @@ +From 9286a34be80e891aebc0193be28931a9f194b23d Mon Sep 17 00:00:00 2001 +From: Viresh Kumar <viresh.kumar@linaro.org> +Date: Tue, 15 Nov 2016 13:53:21 +0530 +Subject: [PATCH 17/92] cpufreq: schedutil: enable fast switch earlier + +The fast_switch_enabled flag will be used by both sugov_policy_alloc() +and sugov_policy_free() with a later patch. + +Prepare for that by moving the calls to enable and disable it to the +beginning of sugov_init() and end of sugov_exit(). + +Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> +Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> +(cherry picked from commit 4a71ce4348bb61740d411822357061f8bf870f4c) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/cpufreq_schedutil.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 8c4e165..68f21bb 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -416,9 +416,13 @@ static int sugov_init(struct cpufreq_policy *policy) + if (policy->governor_data) + return -EBUSY; + ++ cpufreq_enable_fast_switch(policy); ++ + sg_policy = sugov_policy_alloc(policy); +- if (!sg_policy) +- return -ENOMEM; ++ if (!sg_policy) { ++ ret = -ENOMEM; ++ goto disable_fast_switch; ++ } + + mutex_lock(&global_tunables_lock); + +@@ -456,8 +460,6 @@ static int sugov_init(struct cpufreq_policy *policy) + + out: + mutex_unlock(&global_tunables_lock); +- +- cpufreq_enable_fast_switch(policy); + return 0; + + fail: +@@ -468,6 +470,10 @@ static int sugov_init(struct cpufreq_policy *policy) + mutex_unlock(&global_tunables_lock); + + sugov_policy_free(sg_policy); ++ ++disable_fast_switch: ++ cpufreq_disable_fast_switch(policy); ++ + pr_err("initialization failed (error %d)\n", ret); + return ret; + } +@@ -478,8 +484,6 @@ static void sugov_exit(struct cpufreq_policy *policy) + struct sugov_tunables *tunables = sg_policy->tunables; + unsigned int count; + +- cpufreq_disable_fast_switch(policy); +- + mutex_lock(&global_tunables_lock); + + count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); +@@ -490,6 +494,7 @@ static void sugov_exit(struct cpufreq_policy *policy) + mutex_unlock(&global_tunables_lock); + + sugov_policy_free(sg_policy); ++ cpufreq_disable_fast_switch(policy); + } + + static int sugov_start(struct cpufreq_policy *policy) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0018-cpufreq-schedutil-move-slow-path-from-workqueue-to-S.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0018-cpufreq-schedutil-move-slow-path-from-workqueue-to-S.patch new file mode 100644 index 0000000..244cf82 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0018-cpufreq-schedutil-move-slow-path-from-workqueue-to-S.patch @@ -0,0 +1,228 @@ +From 389b1838b500ecc431c2421ee399b8f737e88cf5 Mon Sep 17 00:00:00 2001 +From: Viresh Kumar <viresh.kumar@linaro.org> +Date: Tue, 15 Nov 2016 13:53:22 +0530 +Subject: [PATCH 18/92] cpufreq: schedutil: move slow path from workqueue to + SCHED_FIFO task + +If slow path frequency changes are conducted in a SCHED_OTHER context +then they may be delayed for some amount of time, including +indefinitely, when real time or deadline activity is taking place. + +Move the slow path to a real time kernel thread. In the future the +thread should be made SCHED_DEADLINE. The RT priority is arbitrarily set +to 50 for now. + +Hackbench results on ARM Exynos, dual core A15 platform for 10 +iterations: + +$ hackbench -s 100 -l 100 -g 10 -f 20 + +Before After +--------------------------------- +1.808 1.603 +1.847 1.251 +2.229 1.590 +1.952 1.600 +1.947 1.257 +1.925 1.627 +2.694 1.620 +1.258 1.621 +1.919 1.632 +1.250 1.240 + +Average: + +1.8829 1.5041 + +Based on initial work by Steve Muckle. + +Signed-off-by: Steve Muckle <smuckle.linux@gmail.com> +Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> +(cherry picked from commit 02a7b1ee3baa15a98b541d8cfd156bbe1a091c20) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/cpufreq_schedutil.c | 85 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 78 insertions(+), 7 deletions(-) + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 68f21bb..f165ba0 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -12,11 +12,14 @@ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + + #include <linux/cpufreq.h> ++#include <linux/kthread.h> + #include <linux/slab.h> + #include <trace/events/power.h> + + #include "sched.h" + ++#define SUGOV_KTHREAD_PRIORITY 50 ++ + struct sugov_tunables { + struct gov_attr_set attr_set; + unsigned int rate_limit_us; +@@ -35,8 +38,10 @@ struct sugov_policy { + + /* The next fields are only needed if fast switch cannot be used. */ + struct irq_work irq_work; +- struct work_struct work; ++ struct kthread_work work; + struct mutex work_lock; ++ struct kthread_worker worker; ++ struct task_struct *thread; + bool work_in_progress; + + bool need_freq_update; +@@ -291,7 +296,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, + raw_spin_unlock(&sg_policy->update_lock); + } + +-static void sugov_work(struct work_struct *work) ++static void sugov_work(struct kthread_work *work) + { + struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); + +@@ -308,7 +313,21 @@ static void sugov_irq_work(struct irq_work *irq_work) + struct sugov_policy *sg_policy; + + sg_policy = container_of(irq_work, struct sugov_policy, irq_work); +- schedule_work_on(smp_processor_id(), &sg_policy->work); ++ ++ /* ++ * For Real Time and Deadline tasks, schedutil governor shoots the ++ * frequency to maximum. And special care must be taken to ensure that ++ * this kthread doesn't result in that. ++ * ++ * This is (mostly) guaranteed by the work_in_progress flag. The flag is ++ * updated only at the end of the sugov_work() and before that schedutil ++ * rejects all other frequency scaling requests. ++ * ++ * Though there is a very rare case where the RT thread yields right ++ * after the work_in_progress flag is cleared. The effects of that are ++ * neglected for now. ++ */ ++ kthread_queue_work(&sg_policy->worker, &sg_policy->work); + } + + /************************** sysfs interface ************************/ +@@ -372,7 +391,6 @@ static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) + + sg_policy->policy = policy; + init_irq_work(&sg_policy->irq_work, sugov_irq_work); +- INIT_WORK(&sg_policy->work, sugov_work); + mutex_init(&sg_policy->work_lock); + raw_spin_lock_init(&sg_policy->update_lock); + return sg_policy; +@@ -384,6 +402,51 @@ static void sugov_policy_free(struct sugov_policy *sg_policy) + kfree(sg_policy); + } + ++static int sugov_kthread_create(struct sugov_policy *sg_policy) ++{ ++ struct task_struct *thread; ++ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; ++ struct cpufreq_policy *policy = sg_policy->policy; ++ int ret; ++ ++ /* kthread only required for slow path */ ++ if (policy->fast_switch_enabled) ++ return 0; ++ ++ kthread_init_work(&sg_policy->work, sugov_work); ++ kthread_init_worker(&sg_policy->worker); ++ thread = kthread_create(kthread_worker_fn, &sg_policy->worker, ++ "sugov:%d", ++ cpumask_first(policy->related_cpus)); ++ if (IS_ERR(thread)) { ++ pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); ++ return PTR_ERR(thread); ++ } ++ ++ ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m); ++ if (ret) { ++ kthread_stop(thread); ++ pr_warn("%s: failed to set SCHED_FIFO\n", __func__); ++ return ret; ++ } ++ ++ sg_policy->thread = thread; ++ kthread_bind_mask(thread, policy->related_cpus); ++ wake_up_process(thread); ++ ++ return 0; ++} ++ ++static void sugov_kthread_stop(struct sugov_policy *sg_policy) ++{ ++ /* kthread only required for slow path */ ++ if (sg_policy->policy->fast_switch_enabled) ++ return; ++ ++ kthread_flush_worker(&sg_policy->worker); ++ kthread_stop(sg_policy->thread); ++} ++ + static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) + { + struct sugov_tunables *tunables; +@@ -424,12 +487,16 @@ static int sugov_init(struct cpufreq_policy *policy) + goto disable_fast_switch; + } + ++ ret = sugov_kthread_create(sg_policy); ++ if (ret) ++ goto free_sg_policy; ++ + mutex_lock(&global_tunables_lock); + + if (global_tunables) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; +- goto free_sg_policy; ++ goto stop_kthread; + } + policy->governor_data = sg_policy; + sg_policy->tunables = global_tunables; +@@ -441,7 +508,7 @@ static int sugov_init(struct cpufreq_policy *policy) + tunables = sugov_tunables_alloc(sg_policy); + if (!tunables) { + ret = -ENOMEM; +- goto free_sg_policy; ++ goto stop_kthread; + } + + tunables->rate_limit_us = LATENCY_MULTIPLIER; +@@ -466,6 +533,9 @@ static int sugov_init(struct cpufreq_policy *policy) + policy->governor_data = NULL; + sugov_tunables_free(tunables); + ++stop_kthread: ++ sugov_kthread_stop(sg_policy); ++ + free_sg_policy: + mutex_unlock(&global_tunables_lock); + +@@ -493,6 +563,7 @@ static void sugov_exit(struct cpufreq_policy *policy) + + mutex_unlock(&global_tunables_lock); + ++ sugov_kthread_stop(sg_policy); + sugov_policy_free(sg_policy); + cpufreq_disable_fast_switch(policy); + } +@@ -541,7 +612,7 @@ static void sugov_stop(struct cpufreq_policy *policy) + synchronize_sched(); + + irq_work_sync(&sg_policy->irq_work); +- cancel_work_sync(&sg_policy->work); ++ kthread_cancel_work_sync(&sg_policy->work); + } + + static void sugov_limits(struct cpufreq_policy *policy) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0019-cpufreq-schedutil-irq-work-and-mutex-are-only-used-i.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0019-cpufreq-schedutil-irq-work-and-mutex-are-only-used-i.patch new file mode 100644 index 0000000..94c768a --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0019-cpufreq-schedutil-irq-work-and-mutex-are-only-used-i.patch @@ -0,0 +1,71 @@ +From fad60e6f1b0de5e37cb4e0ae8a4a769ef821950f Mon Sep 17 00:00:00 2001 +From: Viresh Kumar <viresh.kumar@linaro.org> +Date: Tue, 15 Nov 2016 13:53:23 +0530 +Subject: [PATCH 19/92] cpufreq: schedutil: irq-work and mutex are only used in + slow path + +Execute the irq-work specific initialization/exit code only when the +fast path isn't available. + +Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> +Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> +(cherry picked from commit 21ef57297b15a49b0c4dd4e7135c1a08e9a29a1c) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/cpufreq_schedutil.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index f165ba0..42a220e 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -390,15 +390,12 @@ static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) + return NULL; + + sg_policy->policy = policy; +- init_irq_work(&sg_policy->irq_work, sugov_irq_work); +- mutex_init(&sg_policy->work_lock); + raw_spin_lock_init(&sg_policy->update_lock); + return sg_policy; + } + + static void sugov_policy_free(struct sugov_policy *sg_policy) + { +- mutex_destroy(&sg_policy->work_lock); + kfree(sg_policy); + } + +@@ -432,6 +429,9 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) + + sg_policy->thread = thread; + kthread_bind_mask(thread, policy->related_cpus); ++ init_irq_work(&sg_policy->irq_work, sugov_irq_work); ++ mutex_init(&sg_policy->work_lock); ++ + wake_up_process(thread); + + return 0; +@@ -445,6 +445,7 @@ static void sugov_kthread_stop(struct sugov_policy *sg_policy) + + kthread_flush_worker(&sg_policy->worker); + kthread_stop(sg_policy->thread); ++ mutex_destroy(&sg_policy->work_lock); + } + + static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) +@@ -611,8 +612,10 @@ static void sugov_stop(struct cpufreq_policy *policy) + + synchronize_sched(); + +- irq_work_sync(&sg_policy->irq_work); +- kthread_cancel_work_sync(&sg_policy->work); ++ if (!policy->fast_switch_enabled) { ++ irq_work_sync(&sg_policy->irq_work); ++ kthread_cancel_work_sync(&sg_policy->work); ++ } + } + + static void sugov_limits(struct cpufreq_policy *policy) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0020-x86-topology-Define-x86-s-arch_update_cpu_topology.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0020-x86-topology-Define-x86-s-arch_update_cpu_topology.patch new file mode 100644 index 0000000..338ea13 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0020-x86-topology-Define-x86-s-arch_update_cpu_topology.patch @@ -0,0 +1,67 @@ +From 32b4c21d163c96ff9fa1829c7a4f325aca0655a7 Mon Sep 17 00:00:00 2001 +From: Tim Chen <tim.c.chen@linux.intel.com> +Date: Tue, 22 Nov 2016 12:23:54 -0800 +Subject: [PATCH 20/92] x86/topology: Define x86's arch_update_cpu_topology + +The scheduler calls arch_update_cpu_topology() to check whether the +scheduler domains have to be rebuilt. + +So far x86 has no requirement for this, but the upcoming ITMT support +makes this necessary. + +Request the rebuild when the x86 internal update flag is set. + +Suggested-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> +Cc: linux-pm@vger.kernel.org +Cc: peterz@infradead.org +Cc: jolsa@redhat.com +Cc: rjw@rjwysocki.net +Cc: linux-acpi@vger.kernel.org +Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> +Cc: bp@suse.de +Link: http://lkml.kernel.org/r/bfbf5591276ec60b2af2da798adc1060df1e2a5f.1479844244.git.tim.c.chen@linux.intel.com +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +(cherry picked from commit 7d25127cef44924f1013d119ba385095ca4b4a83) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/x86/include/asm/topology.h | 1 + + arch/x86/kernel/smpboot.c | 11 +++++++++++ + 2 files changed, 12 insertions(+) + +diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h +index cf75871..a5ca88a 100644 +--- a/arch/x86/include/asm/topology.h ++++ b/arch/x86/include/asm/topology.h +@@ -146,4 +146,5 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) + int x86_pci_root_bus_node(int bus); + void x86_pci_root_bus_resources(int bus, struct list_head *resources); + ++extern bool x86_topology_update; + #endif /* _ASM_X86_TOPOLOGY_H */ +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index 42f5eb7..ac61ee7 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -109,6 +109,17 @@ + /* Maximum number of SMT threads on any online core */ + int __max_smt_threads __read_mostly; + ++/* Flag to indicate if a complete sched domain rebuild is required */ ++bool x86_topology_update; ++ ++int arch_update_cpu_topology(void) ++{ ++ int retval = x86_topology_update; ++ ++ x86_topology_update = false; ++ return retval; ++} ++ + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) + { + unsigned long flags; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0021-sched-Prevent-unnecessary-active-balance-of-single-t.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0021-sched-Prevent-unnecessary-active-balance-of-single-t.patch new file mode 100644 index 0000000..64ad147 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0021-sched-Prevent-unnecessary-active-balance-of-single-t.patch @@ -0,0 +1,63 @@ +From 67c41183d153dca79ae8073ce6f056b66ba384f2 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Thu, 2 Jul 2015 17:16:34 +0100 +Subject: [PATCH 21/92] sched: Prevent unnecessary active balance of single + task in sched group + +Scenarios with the busiest group having just one task and the local +being idle on topologies with sched groups with different numbers of +cpus manage to dodge all load-balance bailout conditions resulting the +nr_balance_failed counter to be incremented. This eventually causes a +pointless active migration of the task. This patch prevents this by not +incrementing the counter when the busiest group only has one task. +ASYM_PACKING migrations and migrations due to reduced capacity should +still take place as these are explicitly captured by +need_active_balance(). + +A better solution would be to not attempt the load-balance in the first +place, but that requires significant changes to the order of bailout +conditions and statistics gathering. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit b1a7e8eae736c483b9ffed01b4dd35747f448232) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 18d9e75..341fccd 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6509,6 +6509,7 @@ struct lb_env { + int new_dst_cpu; + enum cpu_idle_type idle; + long imbalance; ++ unsigned int src_grp_nr_running; + /* The set of CPUs under consideration for load-balancing */ + struct cpumask *cpus; + +@@ -7517,6 +7518,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + if (env->sd->flags & SD_NUMA) + env->fbq_type = fbq_classify_group(&sds->busiest_stat); + ++ env->src_grp_nr_running = sds->busiest_stat.sum_nr_running; ++ + if (!env->sd->parent) { + /* update overload indicator if we are at root domain */ + if (env->dst_rq->rd->overload != overload) +@@ -8133,7 +8136,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, + * excessive cache_hot migrations and active balances. + */ + if (idle != CPU_NEWLY_IDLE) +- sd->nr_balance_failed++; ++ if (env.src_grp_nr_running > 1) ++ sd->nr_balance_failed++; + + if (need_active_balance(&env)) { + raw_spin_lock_irqsave(&busiest->lock, flags); +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0022-sched-Enable-idle-balance-to-pull-single-task-toward.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0022-sched-Enable-idle-balance-to-pull-single-task-toward.patch new file mode 100644 index 0000000..d8d1877 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0022-sched-Enable-idle-balance-to-pull-single-task-toward.patch @@ -0,0 +1,55 @@ +From 23326036926c9495301467238107ab942286aa54 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Mon, 26 Jan 2015 19:47:28 +0000 +Subject: [PATCH 22/92] sched: Enable idle balance to pull single task towards + cpu with higher capacity + +We do not want to miss out on the ability to pull a single remaining +task from a potential source cpu towards an idle destination cpu. Add an +extra criteria to need_active_balance() to kick off active load balance +if the source cpu is over-utilized and has lower capacity than the +destination cpu. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 006d246f483211175d75b8a2b90000c36c486b7d) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 341fccd..cb283ca 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5859,6 +5859,11 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) + return min_cap * 1024 < task_util(p) * capacity_margin; + } + ++static bool cpu_overutilized(int cpu) ++{ ++ return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); ++} ++ + /* + * select_task_rq_fair: Select target runqueue for the waking task in domains + * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, +@@ -7924,6 +7929,13 @@ static int need_active_balance(struct lb_env *env) + return 1; + } + ++ if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) && ++ env->src_rq->cfs.h_nr_running == 1 && ++ cpu_overutilized(env->src_cpu) && ++ !cpu_overutilized(env->dst_cpu)) { ++ return 1; ++ } ++ + return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); + } + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0023-Documentation-arm-define-DT-cpu-capacity-dmips-mhz-b.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0023-Documentation-arm-define-DT-cpu-capacity-dmips-mhz-b.patch new file mode 100644 index 0000000..771327c --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0023-Documentation-arm-define-DT-cpu-capacity-dmips-mhz-b.patch @@ -0,0 +1,309 @@ +From 908654b226b9b67b12a5c07434cd1854d9a848e8 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Thu, 15 Oct 2015 14:49:00 +0100 +Subject: [PATCH 23/92] Documentation: arm: define DT cpu capacity-dmips-mhz + bindings + +ARM systems may be configured to have cpus with different power/performance +characteristics within the same chip. In this case, additional information +has to be made available to the kernel (the scheduler in particular) for it +to be aware of such differences and take decisions accordingly. + +Therefore, this patch aims at standardizing cpu capacities device tree +bindings for ARM platforms. Bindings define cpu capacity-dmips-mhz +parameter, to allow operating systems to retrieve such information from +the device tree and initialize related kernel structures, paving the way +for common code in the kernel to deal with heterogeneity. + +Cc: Rob Herring <robh+dt@kernel.org> +Cc: Pawel Moll <pawel.moll@arm.com> +Cc: Mark Rutland <mark.rutland@arm.com> +Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> +Cc: Kumar Gala <galak@codeaurora.org> +Cc: Maxime Ripard <maxime.ripard@free-electrons.com> +Cc: Olof Johansson <olof@lixom.net> +Cc: Gregory CLEMENT <gregory.clement@free-electrons.com> +Cc: Paul Walmsley <paul@pwsan.com> +Cc: Linus Walleij <linus.walleij@linaro.org> +Cc: Chen-Yu Tsai <wens@csie.org> +Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> +Cc: devicetree@vger.kernel.org +Acked-by: Rob Herring <robh@kernel.org> +Acked-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + .../devicetree/bindings/arm/cpu-capacity.txt | 236 +++++++++++++++++++++ + Documentation/devicetree/bindings/arm/cpus.txt | 10 + + 2 files changed, 246 insertions(+) + create mode 100644 Documentation/devicetree/bindings/arm/cpu-capacity.txt + +diff --git a/Documentation/devicetree/bindings/arm/cpu-capacity.txt b/Documentation/devicetree/bindings/arm/cpu-capacity.txt +new file mode 100644 +index 0000000..7809fbe +--- /dev/null ++++ b/Documentation/devicetree/bindings/arm/cpu-capacity.txt +@@ -0,0 +1,236 @@ ++========================================== ++ARM CPUs capacity bindings ++========================================== ++ ++========================================== ++1 - Introduction ++========================================== ++ ++ARM systems may be configured to have cpus with different power/performance ++characteristics within the same chip. In this case, additional information has ++to be made available to the kernel for it to be aware of such differences and ++take decisions accordingly. ++ ++========================================== ++2 - CPU capacity definition ++========================================== ++ ++CPU capacity is a number that provides the scheduler information about CPUs ++heterogeneity. Such heterogeneity can come from micro-architectural differences ++(e.g., ARM big.LITTLE systems) or maximum frequency at which CPUs can run ++(e.g., SMP systems with multiple frequency domains). Heterogeneity in this ++context is about differing performance characteristics; this binding tries to ++capture a first-order approximation of the relative performance of CPUs. ++ ++CPU capacities are obtained by running a suitable benchmark. This binding makes ++no guarantees on the validity or suitability of any particular benchmark, the ++final capacity should, however, be: ++ ++* A "single-threaded" or CPU affine benchmark ++* Divided by the running frequency of the CPU executing the benchmark ++* Not subject to dynamic frequency scaling of the CPU ++ ++For the time being we however advise usage of the Dhrystone benchmark. What ++above thus becomes: ++ ++CPU capacities are obtained by running the Dhrystone benchmark on each CPU at ++max frequency (with caches enabled). The obtained DMIPS score is then divided ++by the frequency (in MHz) at which the benchmark has been run, so that ++DMIPS/MHz are obtained. Such values are then normalized w.r.t. the highest ++score obtained in the system. ++ ++========================================== ++3 - capacity-dmips-mhz ++========================================== ++ ++capacity-dmips-mhz is an optional cpu node [1] property: u32 value ++representing CPU capacity expressed in normalized DMIPS/MHz. At boot time, the ++maximum frequency available to the cpu is then used to calculate the capacity ++value internally used by the kernel. ++ ++capacity-dmips-mhz property is all-or-nothing: if it is specified for a cpu ++node, it has to be specified for every other cpu nodes, or the system will ++fall back to the default capacity value for every CPU. If cpufreq is not ++available, final capacities are calculated by directly using capacity-dmips- ++mhz values (normalized w.r.t. the highest value found while parsing the DT). ++ ++=========================================== ++4 - Examples ++=========================================== ++ ++Example 1 (ARM 64-bit, 6-cpu system, two clusters): ++capacities-dmips-mhz are scaled w.r.t. 1024 (cpu@0 and cpu@1) ++supposing cluster0@max-freq=1100 and custer1@max-freq=850, ++final capacities are 1024 for cluster0 and 446 for cluster1 ++ ++cpus { ++ #address-cells = <2>; ++ #size-cells = <0>; ++ ++ cpu-map { ++ cluster0 { ++ core0 { ++ cpu = <&A57_0>; ++ }; ++ core1 { ++ cpu = <&A57_1>; ++ }; ++ }; ++ ++ cluster1 { ++ core0 { ++ cpu = <&A53_0>; ++ }; ++ core1 { ++ cpu = <&A53_1>; ++ }; ++ core2 { ++ cpu = <&A53_2>; ++ }; ++ core3 { ++ cpu = <&A53_3>; ++ }; ++ }; ++ }; ++ ++ idle-states { ++ entry-method = "arm,psci"; ++ ++ CPU_SLEEP_0: cpu-sleep-0 { ++ compatible = "arm,idle-state"; ++ arm,psci-suspend-param = <0x0010000>; ++ local-timer-stop; ++ entry-latency-us = <100>; ++ exit-latency-us = <250>; ++ min-residency-us = <150>; ++ }; ++ ++ CLUSTER_SLEEP_0: cluster-sleep-0 { ++ compatible = "arm,idle-state"; ++ arm,psci-suspend-param = <0x1010000>; ++ local-timer-stop; ++ entry-latency-us = <800>; ++ exit-latency-us = <700>; ++ min-residency-us = <2500>; ++ }; ++ }; ++ ++ A57_0: cpu@0 { ++ compatible = "arm,cortex-a57","arm,armv8"; ++ reg = <0x0 0x0>; ++ device_type = "cpu"; ++ enable-method = "psci"; ++ next-level-cache = <&A57_L2>; ++ clocks = <&scpi_dvfs 0>; ++ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <1024>; ++ }; ++ ++ A57_1: cpu@1 { ++ compatible = "arm,cortex-a57","arm,armv8"; ++ reg = <0x0 0x1>; ++ device_type = "cpu"; ++ enable-method = "psci"; ++ next-level-cache = <&A57_L2>; ++ clocks = <&scpi_dvfs 0>; ++ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <1024>; ++ }; ++ ++ A53_0: cpu@100 { ++ compatible = "arm,cortex-a53","arm,armv8"; ++ reg = <0x0 0x100>; ++ device_type = "cpu"; ++ enable-method = "psci"; ++ next-level-cache = <&A53_L2>; ++ clocks = <&scpi_dvfs 1>; ++ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; ++ }; ++ ++ A53_1: cpu@101 { ++ compatible = "arm,cortex-a53","arm,armv8"; ++ reg = <0x0 0x101>; ++ device_type = "cpu"; ++ enable-method = "psci"; ++ next-level-cache = <&A53_L2>; ++ clocks = <&scpi_dvfs 1>; ++ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; ++ }; ++ ++ A53_2: cpu@102 { ++ compatible = "arm,cortex-a53","arm,armv8"; ++ reg = <0x0 0x102>; ++ device_type = "cpu"; ++ enable-method = "psci"; ++ next-level-cache = <&A53_L2>; ++ clocks = <&scpi_dvfs 1>; ++ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; ++ }; ++ ++ A53_3: cpu@103 { ++ compatible = "arm,cortex-a53","arm,armv8"; ++ reg = <0x0 0x103>; ++ device_type = "cpu"; ++ enable-method = "psci"; ++ next-level-cache = <&A53_L2>; ++ clocks = <&scpi_dvfs 1>; ++ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; ++ }; ++ ++ A57_L2: l2-cache0 { ++ compatible = "cache"; ++ }; ++ ++ A53_L2: l2-cache1 { ++ compatible = "cache"; ++ }; ++}; ++ ++Example 2 (ARM 32-bit, 4-cpu system, two clusters, ++ cpus 0,1@1GHz, cpus 2,3@500MHz): ++capacities-dmips-mhz are scaled w.r.t. 2 (cpu@0 and cpu@1), this means that first ++cpu@0 and cpu@1 are twice fast than cpu@2 and cpu@3 (at the same frequency) ++ ++cpus { ++ #address-cells = <1>; ++ #size-cells = <0>; ++ ++ cpu0: cpu@0 { ++ device_type = "cpu"; ++ compatible = "arm,cortex-a15"; ++ reg = <0>; ++ capacity-dmips-mhz = <2>; ++ }; ++ ++ cpu1: cpu@1 { ++ device_type = "cpu"; ++ compatible = "arm,cortex-a15"; ++ reg = <1>; ++ capacity-dmips-mhz = <2>; ++ }; ++ ++ cpu2: cpu@2 { ++ device_type = "cpu"; ++ compatible = "arm,cortex-a15"; ++ reg = <0x100>; ++ capacity-dmips-mhz = <1>; ++ }; ++ ++ cpu3: cpu@3 { ++ device_type = "cpu"; ++ compatible = "arm,cortex-a15"; ++ reg = <0x101>; ++ capacity-dmips-mhz = <1>; ++ }; ++}; ++ ++=========================================== ++5 - References ++=========================================== ++ ++[1] ARM Linux Kernel documentation - CPUs bindings ++ Documentation/devicetree/bindings/arm/cpus.txt +diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt +index e6782d5..c1dcf4c 100644 +--- a/Documentation/devicetree/bindings/arm/cpus.txt ++++ b/Documentation/devicetree/bindings/arm/cpus.txt +@@ -241,6 +241,14 @@ nodes to be present and contain the properties described below. + # List of phandles to idle state nodes supported + by this cpu [3]. + ++ - capacity-dmips-mhz ++ Usage: Optional ++ Value type: <u32> ++ Definition: ++ # u32 value representing CPU capacity [3] in ++ DMIPS/MHz, relative to highest capacity-dmips-mhz ++ in the system. ++ + - rockchip,pmu + Usage: optional for systems that have an "enable-method" + property value of "rockchip,rk3066-smp" +@@ -464,3 +472,5 @@ cpus { + [2] arm/msm/qcom,kpss-acc.txt + [3] ARM Linux kernel documentation - idle states bindings + Documentation/devicetree/bindings/arm/idle-states.txt ++[3] ARM Linux kernel documentation - cpu capacity bindings ++ Documentation/devicetree/bindings/arm/cpu-capacity.txt +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0024-arm-parse-cpu-capacity-dmips-mhz-from-DT.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0024-arm-parse-cpu-capacity-dmips-mhz-from-DT.patch new file mode 100644 index 0000000..0a841b6 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0024-arm-parse-cpu-capacity-dmips-mhz-from-DT.patch @@ -0,0 +1,210 @@ +From c9441760c1171936fc561e364111866457fde987 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Wed, 11 Nov 2015 16:21:27 +0000 +Subject: [PATCH 24/92] arm: parse cpu capacity-dmips-mhz from DT + +With the introduction of cpu capacity-dmips-mhz bindings, CPU capacities +can now be calculated from values extracted from DT and information +coming from cpufreq. Add parsing of DT information at boot time, and +complement it with cpufreq information. We keep code that can produce +same information, based on different DT properties and hard-coded +values, as fall-back for backward compatibility. + +Caveat: the information provided by this patch will start to be used in +the future. We need to #define arch_scale_cpu_capacity to something +provided in arch, so that scheduler's default implementation (which gets +used if arch_scale_cpu_capacity is not defined) is overwritten. + +Cc: Russell King <linux@arm.linux.org.uk> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + arch/arm/kernel/topology.c | 145 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 144 insertions(+), 1 deletion(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index ec279d1..b22868a 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -78,6 +78,134 @@ struct cpu_efficiency { + #define cpu_capacity(cpu) __cpu_capacity[cpu] + + static unsigned long middle_capacity = 1; ++static bool cap_from_dt = true; ++static u32 *raw_capacity; ++static bool cap_parsing_failed; ++static u32 capacity_scale; ++ ++static int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu) ++{ ++ int ret = 1; ++ u32 cpu_capacity; ++ ++ if (cap_parsing_failed) ++ return !ret; ++ ++ ret = of_property_read_u32(cpu_node, ++ "capacity-dmips-mhz", ++ &cpu_capacity); ++ if (!ret) { ++ if (!raw_capacity) { ++ raw_capacity = kcalloc(num_possible_cpus(), ++ sizeof(*raw_capacity), ++ GFP_KERNEL); ++ if (!raw_capacity) { ++ pr_err("cpu_capacity: failed to allocate memory for raw capacities\n"); ++ cap_parsing_failed = true; ++ return !ret; ++ } ++ } ++ capacity_scale = max(cpu_capacity, capacity_scale); ++ raw_capacity[cpu] = cpu_capacity; ++ pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n", ++ cpu_node->full_name, raw_capacity[cpu]); ++ } else { ++ if (raw_capacity) { ++ pr_err("cpu_capacity: missing %s raw capacity\n", ++ cpu_node->full_name); ++ pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); ++ } ++ cap_parsing_failed = true; ++ kfree(raw_capacity); ++ } ++ ++ return !ret; ++} ++ ++static void normalize_cpu_capacity(void) ++{ ++ u64 capacity; ++ int cpu; ++ ++ if (!raw_capacity || cap_parsing_failed) ++ return; ++ ++ pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); ++ for_each_possible_cpu(cpu) { ++ capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT) ++ / capacity_scale; ++ set_capacity_scale(cpu, capacity); ++ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", ++ cpu, arch_scale_cpu_capacity(NULL, cpu)); ++ } ++} ++ ++#ifdef CONFIG_CPU_FREQ ++static cpumask_var_t cpus_to_visit; ++static bool cap_parsing_done; ++ ++static int ++init_cpu_capacity_callback(struct notifier_block *nb, ++ unsigned long val, ++ void *data) ++{ ++ struct cpufreq_policy *policy = data; ++ int cpu; ++ ++ if (cap_parsing_failed || cap_parsing_done) ++ return 0; ++ ++ switch (val) { ++ case CPUFREQ_NOTIFY: ++ pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", ++ cpumask_pr_args(policy->related_cpus), ++ cpumask_pr_args(cpus_to_visit)); ++ cpumask_andnot(cpus_to_visit, ++ cpus_to_visit, ++ policy->related_cpus); ++ for_each_cpu(cpu, policy->related_cpus) { ++ raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) * ++ policy->cpuinfo.max_freq / 1000UL; ++ capacity_scale = max(raw_capacity[cpu], capacity_scale); ++ } ++ if (cpumask_empty(cpus_to_visit)) { ++ normalize_cpu_capacity(); ++ kfree(raw_capacity); ++ pr_debug("cpu_capacity: parsing done\n"); ++ cap_parsing_done = true; ++ } ++ } ++ return 0; ++} ++ ++static struct notifier_block init_cpu_capacity_notifier = { ++ .notifier_call = init_cpu_capacity_callback, ++}; ++ ++static int __init register_cpufreq_notifier(void) ++{ ++ if (cap_parsing_failed) ++ return -EINVAL; ++ ++ if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { ++ pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n"); ++ return -ENOMEM; ++ } ++ cpumask_copy(cpus_to_visit, cpu_possible_mask); ++ ++ return cpufreq_register_notifier(&init_cpu_capacity_notifier, ++ CPUFREQ_POLICY_NOTIFIER); ++} ++core_initcall(register_cpufreq_notifier); ++#else ++static int __init free_raw_capacity(void) ++{ ++ kfree(raw_capacity); ++ ++ return 0; ++} ++core_initcall(free_raw_capacity); ++#endif + + /* + * Iterate all CPUs' descriptor in DT and compute the efficiency +@@ -99,6 +227,12 @@ static void __init parse_dt_topology(void) + __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), + GFP_NOWAIT); + ++ cn = of_find_node_by_path("/cpus"); ++ if (!cn) { ++ pr_err("No CPU information found in DT\n"); ++ return; ++ } ++ + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; +@@ -110,6 +244,13 @@ static void __init parse_dt_topology(void) + continue; + } + ++ if (parse_cpu_capacity(cn, cpu)) { ++ of_node_put(cn); ++ continue; ++ } ++ ++ cap_from_dt = false; ++ + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; +@@ -151,6 +292,8 @@ static void __init parse_dt_topology(void) + middle_capacity = ((max_capacity / 3) + >> (SCHED_CAPACITY_SHIFT-1)) + 1; + ++ if (cap_from_dt && !cap_parsing_failed) ++ normalize_cpu_capacity(); + } + + /* +@@ -160,7 +303,7 @@ static void __init parse_dt_topology(void) + */ + static void update_cpu_capacity(unsigned int cpu) + { +- if (!cpu_capacity(cpu)) ++ if (!cpu_capacity(cpu) || cap_from_dt) + return; + + set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0025-fixup-arm-parse-cpu-capacity-dmips-mhz-from-DT.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0025-fixup-arm-parse-cpu-capacity-dmips-mhz-from-DT.patch new file mode 100644 index 0000000..84dea53 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0025-fixup-arm-parse-cpu-capacity-dmips-mhz-from-DT.patch @@ -0,0 +1,49 @@ +From 2b32578a1c36deaddcbb61dc6a10a17356a0499f Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Wed, 31 Aug 2016 18:04:46 +0100 +Subject: [PATCH 25/92] fixup! arm: parse cpu capacity-dmips-mhz from DT + +(cherry picked from commit 55df048a3a8e4940c78f2a24e6e1766af18a7577) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index b22868a..a26e787 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -143,6 +143,8 @@ static void normalize_cpu_capacity(void) + #ifdef CONFIG_CPU_FREQ + static cpumask_var_t cpus_to_visit; + static bool cap_parsing_done; ++static void parsing_done_workfn(struct work_struct *work); ++static DECLARE_WORK(parsing_done_work, parsing_done_workfn); + + static int + init_cpu_capacity_callback(struct notifier_block *nb, +@@ -173,6 +175,7 @@ static void normalize_cpu_capacity(void) + kfree(raw_capacity); + pr_debug("cpu_capacity: parsing done\n"); + cap_parsing_done = true; ++ schedule_work(&parsing_done_work); + } + } + return 0; +@@ -197,6 +200,13 @@ static int __init register_cpufreq_notifier(void) + CPUFREQ_POLICY_NOTIFIER); + } + core_initcall(register_cpufreq_notifier); ++ ++static void parsing_done_workfn(struct work_struct *work) ++{ ++ cpufreq_unregister_notifier(&init_cpu_capacity_notifier, ++ CPUFREQ_POLICY_NOTIFIER); ++} ++ + #else + static int __init free_raw_capacity(void) + { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0026-arm-dts-add-TC2-cpu-capacity-dmips-mhz-information.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0026-arm-dts-add-TC2-cpu-capacity-dmips-mhz-information.patch new file mode 100644 index 0000000..bab5175 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0026-arm-dts-add-TC2-cpu-capacity-dmips-mhz-information.patch @@ -0,0 +1,70 @@ +From dbe8ad50cfc3117c8b7384f31be43a747911d368 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Tue, 13 Oct 2015 14:56:51 +0100 +Subject: [PATCH 26/92] arm, dts: add TC2 cpu capacity-dmips-mhz information + +Add TC2 cpu capacity information. + +Cc: Liviu Dudau <liviu.dudau@arm.com> +Cc: Sudeep Holla <sudeep.holla@arm.com> +Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> +Cc: Rob Herring <robh+dt@kernel.org> +Cc: Pawel Moll <pawel.moll@arm.com> +Cc: Mark Rutland <mark.rutland@arm.com> +Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> +Cc: Kumar Gala <galak@codeaurora.org> +Cc: Russell King <linux@arm.linux.org.uk> +Cc: devicetree@vger.kernel.org +Acked-by: Sudeep Holla <sudeep.holla@arm.com> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +index 0205c97..45d08cc 100644 +--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts ++++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +@@ -39,6 +39,7 @@ + reg = <0>; + cci-control-port = <&cci_control1>; + cpu-idle-states = <&CLUSTER_SLEEP_BIG>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu1: cpu@1 { +@@ -47,6 +48,7 @@ + reg = <1>; + cci-control-port = <&cci_control1>; + cpu-idle-states = <&CLUSTER_SLEEP_BIG>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu2: cpu@2 { +@@ -55,6 +57,7 @@ + reg = <0x100>; + cci-control-port = <&cci_control2>; + cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>; ++ capacity-dmips-mhz = <516>; + }; + + cpu3: cpu@3 { +@@ -63,6 +66,7 @@ + reg = <0x101>; + cci-control-port = <&cci_control2>; + cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>; ++ capacity-dmips-mhz = <516>; + }; + + cpu4: cpu@4 { +@@ -71,6 +75,7 @@ + reg = <0x102>; + cci-control-port = <&cci_control2>; + cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>; ++ capacity-dmips-mhz = <516>; + }; + + idle-states { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0027-arm64-parse-cpu-capacity-dmips-mhz-from-DT.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0027-arm64-parse-cpu-capacity-dmips-mhz-from-DT.patch new file mode 100644 index 0000000..0b35caf --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0027-arm64-parse-cpu-capacity-dmips-mhz-from-DT.patch @@ -0,0 +1,212 @@ +From f7b42d6b9a5d61382b11dfa52484bd86f41fc32e Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Thu, 19 Nov 2015 16:18:38 +0000 +Subject: [PATCH 27/92] arm64: parse cpu capacity-dmips-mhz from DT + +With the introduction of cpu capacity-dmips-mhz bindings, CPU capacities +can now be calculated from values extracted from DT and information +coming from cpufreq. Add parsing of DT information at boot time, and +complement it with cpufreq information. Also, store such information +using per CPU variables, as we do for arm. + +Caveat: the information provided by this patch will start to be used in +the future. We need to #define arch_scale_cpu_capacity to something +provided in arch, so that scheduler's default implementation (which gets +used if arch_scale_cpu_capacity is not defined) is overwritten. + +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Cc: Mark Brown <broonie@kernel.org> +Cc: Sudeep Holla <sudeep.holla@arm.com> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + arch/arm64/kernel/topology.c | 149 ++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 148 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index 694f6de..c1277c19 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -19,10 +19,152 @@ + #include <linux/nodemask.h> + #include <linux/of.h> + #include <linux/sched.h> ++#include <linux/slab.h> ++#include <linux/cpufreq.h> + + #include <asm/cputype.h> + #include <asm/topology.h> + ++static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; ++ ++unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) ++{ ++ return per_cpu(cpu_scale, cpu); ++} ++ ++static void set_capacity_scale(unsigned int cpu, unsigned long capacity) ++{ ++ per_cpu(cpu_scale, cpu) = capacity; ++} ++ ++static u32 capacity_scale; ++static u32 *raw_capacity; ++static bool cap_parsing_failed; ++ ++static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu) ++{ ++ int ret; ++ u32 cpu_capacity; ++ ++ if (cap_parsing_failed) ++ return; ++ ++ ret = of_property_read_u32(cpu_node, ++ "capacity-dmips-mhz", ++ &cpu_capacity); ++ if (!ret) { ++ if (!raw_capacity) { ++ raw_capacity = kcalloc(num_possible_cpus(), ++ sizeof(*raw_capacity), ++ GFP_KERNEL); ++ if (!raw_capacity) { ++ pr_err("cpu_capacity: failed to allocate memory for raw capacities\n"); ++ cap_parsing_failed = true; ++ return; ++ } ++ } ++ capacity_scale = max(cpu_capacity, capacity_scale); ++ raw_capacity[cpu] = cpu_capacity; ++ pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n", ++ cpu_node->full_name, raw_capacity[cpu]); ++ } else { ++ if (raw_capacity) { ++ pr_err("cpu_capacity: missing %s raw capacity\n", ++ cpu_node->full_name); ++ pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); ++ } ++ cap_parsing_failed = true; ++ kfree(raw_capacity); ++ } ++} ++ ++static void normalize_cpu_capacity(void) ++{ ++ u64 capacity; ++ int cpu; ++ ++ if (!raw_capacity || cap_parsing_failed) ++ return; ++ ++ pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); ++ for_each_possible_cpu(cpu) { ++ pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", ++ cpu, raw_capacity[cpu]); ++ capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT) ++ / capacity_scale; ++ set_capacity_scale(cpu, capacity); ++ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", ++ cpu, arch_scale_cpu_capacity(NULL, cpu)); ++ } ++} ++ ++#ifdef CONFIG_CPU_FREQ ++static cpumask_var_t cpus_to_visit; ++static bool cap_parsing_done; ++ ++static int ++init_cpu_capacity_callback(struct notifier_block *nb, ++ unsigned long val, ++ void *data) ++{ ++ struct cpufreq_policy *policy = data; ++ int cpu; ++ ++ if (cap_parsing_failed || cap_parsing_done) ++ return 0; ++ ++ switch (val) { ++ case CPUFREQ_NOTIFY: ++ pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", ++ cpumask_pr_args(policy->related_cpus), ++ cpumask_pr_args(cpus_to_visit)); ++ cpumask_andnot(cpus_to_visit, ++ cpus_to_visit, ++ policy->related_cpus); ++ for_each_cpu(cpu, policy->related_cpus) { ++ raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) * ++ policy->cpuinfo.max_freq / 1000UL; ++ capacity_scale = max(raw_capacity[cpu], capacity_scale); ++ } ++ if (cpumask_empty(cpus_to_visit)) { ++ normalize_cpu_capacity(); ++ kfree(raw_capacity); ++ pr_debug("cpu_capacity: parsing done\n"); ++ cap_parsing_done = true; ++ } ++ } ++ return 0; ++} ++ ++static struct notifier_block init_cpu_capacity_notifier = { ++ .notifier_call = init_cpu_capacity_callback, ++}; ++ ++static int __init register_cpufreq_notifier(void) ++{ ++ if (cap_parsing_failed) ++ return -EINVAL; ++ ++ if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { ++ pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n"); ++ return -ENOMEM; ++ } ++ cpumask_copy(cpus_to_visit, cpu_possible_mask); ++ ++ return cpufreq_register_notifier(&init_cpu_capacity_notifier, ++ CPUFREQ_POLICY_NOTIFIER); ++} ++core_initcall(register_cpufreq_notifier); ++#else ++static int __init free_raw_capacity(void) ++{ ++ kfree(raw_capacity); ++ ++ return 0; ++} ++core_initcall(free_raw_capacity); ++#endif ++ + static int __init get_cpu_for_node(struct device_node *node) + { + struct device_node *cpu_node; +@@ -34,6 +176,7 @@ static int __init get_cpu_for_node(struct device_node *node) + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) { ++ parse_cpu_capacity(cpu_node, cpu); + of_node_put(cpu_node); + return cpu; + } +@@ -178,13 +321,17 @@ static int __init parse_dt_topology(void) + * cluster with restricted subnodes. + */ + map = of_get_child_by_name(cn, "cpu-map"); +- if (!map) ++ if (!map) { ++ cap_parsing_failed = true; + goto out; ++ } + + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; + ++ normalize_cpu_capacity(); ++ + /* + * Check that all cores are in the topology; the SMP code will + * only mark cores described in the DT as possible. +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0028-fixup-arm64-parse-cpu-capacity-dmips-mhz-from-DT.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0028-fixup-arm64-parse-cpu-capacity-dmips-mhz-from-DT.patch new file mode 100644 index 0000000..2cf8c09 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0028-fixup-arm64-parse-cpu-capacity-dmips-mhz-from-DT.patch @@ -0,0 +1,49 @@ +From 7096f8cbba75b5a6a9ba440cae5c16092b16e492 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Wed, 31 Aug 2016 18:05:12 +0100 +Subject: [PATCH 28/92] fixup! arm64: parse cpu capacity-dmips-mhz from DT + +(cherry picked from commit 2aa41d080e07f93dc979a23bf4e40f91e3643fb0) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/topology.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index c1277c19..b75b0ba 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -101,6 +101,8 @@ static void normalize_cpu_capacity(void) + #ifdef CONFIG_CPU_FREQ + static cpumask_var_t cpus_to_visit; + static bool cap_parsing_done; ++static void parsing_done_workfn(struct work_struct *work); ++static DECLARE_WORK(parsing_done_work, parsing_done_workfn); + + static int + init_cpu_capacity_callback(struct notifier_block *nb, +@@ -131,6 +133,7 @@ static void normalize_cpu_capacity(void) + kfree(raw_capacity); + pr_debug("cpu_capacity: parsing done\n"); + cap_parsing_done = true; ++ schedule_work(&parsing_done_work); + } + } + return 0; +@@ -155,6 +158,13 @@ static int __init register_cpufreq_notifier(void) + CPUFREQ_POLICY_NOTIFIER); + } + core_initcall(register_cpufreq_notifier); ++ ++static void parsing_done_workfn(struct work_struct *work) ++{ ++ cpufreq_unregister_notifier(&init_cpu_capacity_notifier, ++ CPUFREQ_POLICY_NOTIFIER); ++} ++ + #else + static int __init free_raw_capacity(void) + { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0029-arm64-dts-add-Juno-cpu-capacity-dmips-mhz-informatio.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0029-arm64-dts-add-Juno-cpu-capacity-dmips-mhz-informatio.patch new file mode 100644 index 0000000..ced0790 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0029-arm64-dts-add-Juno-cpu-capacity-dmips-mhz-informatio.patch @@ -0,0 +1,82 @@ +From 32b76aa4d389f58f621e704a4c67e6da9ee21df9 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Wed, 11 Nov 2015 18:13:19 +0000 +Subject: [PATCH 29/92] arm64, dts: add Juno cpu capacity-dmips-mhz information + +Add Juno cpu capacity-dmips-mhz information. + +Cc: Rob Herring <robh+dt@kernel.org> +Cc: Pawel Moll <pawel.moll@arm.com> +Cc: Mark Rutland <mark.rutland@arm.com> +Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> +Cc: Kumar Gala <galak@codeaurora.org> +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Cc: Liviu Dudau <Liviu.Dudau@arm.com> +Cc: Sudeep Holla <sudeep.holla@arm.com> +Cc: Arnd Bergmann <arnd@arndb.de> +Cc: Jon Medhurst <tixy@linaro.org> +Cc: Olof Johansson <olof@lixom.net> +Cc: Robin Murphy <robin.murphy@arm.com> +Cc: devicetree@vger.kernel.org +Acked-by: Sudeep Holla <sudeep.holla@arm.com> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + arch/arm64/boot/dts/arm/juno.dts | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts +index 6e154d9..ac5ceb7 100644 +--- a/arch/arm64/boot/dts/arm/juno.dts ++++ b/arch/arm64/boot/dts/arm/juno.dts +@@ -90,6 +90,7 @@ + next-level-cache = <&A57_L2>; + clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <1024>; + }; + + A57_1: cpu@1 { +@@ -100,6 +101,7 @@ + next-level-cache = <&A57_L2>; + clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <1024>; + }; + + A53_0: cpu@100 { +@@ -110,6 +112,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; + }; + + A53_1: cpu@101 { +@@ -120,6 +123,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; + }; + + A53_2: cpu@102 { +@@ -130,6 +134,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; + }; + + A53_3: cpu@103 { +@@ -140,6 +145,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <578>; + }; + + A57_L2: l2-cache0 { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0030-arm64-dts-add-Juno-r2-cpu-capacity-dmips-mhz-informa.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0030-arm64-dts-add-Juno-r2-cpu-capacity-dmips-mhz-informa.patch new file mode 100644 index 0000000..79bd1a6 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0030-arm64-dts-add-Juno-r2-cpu-capacity-dmips-mhz-informa.patch @@ -0,0 +1,83 @@ +From 893313890fb7c18ba7e63626924f32deea273bec Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Tue, 22 Mar 2016 15:45:16 +0000 +Subject: [PATCH 30/92] arm64, dts: add Juno r2 cpu capacity-dmips-mhz + information + +Add Juno r2 cpu capacity-dmips-mhz information. + +Cc: Rob Herring <robh+dt@kernel.org> +Cc: Pawel Moll <pawel.moll@arm.com> +Cc: Mark Rutland <mark.rutland@arm.com> +Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> +Cc: Kumar Gala <galak@codeaurora.org> +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Cc: Liviu Dudau <Liviu.Dudau@arm.com> +Cc: Sudeep Holla <sudeep.holla@arm.com> +Cc: Arnd Bergmann <arnd@arndb.de> +Cc: Jon Medhurst <tixy@linaro.org> +Cc: Olof Johansson <olof@lixom.net> +Cc: Robin Murphy <robin.murphy@arm.com> +Cc: devicetree@vger.kernel.org +Acked-by: Sudeep Holla <sudeep.holla@arm.com> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + arch/arm64/boot/dts/arm/juno-r2.dts | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/arm64/boot/dts/arm/juno-r2.dts b/arch/arm64/boot/dts/arm/juno-r2.dts +index 26aaa6a..28f40ec 100644 +--- a/arch/arm64/boot/dts/arm/juno-r2.dts ++++ b/arch/arm64/boot/dts/arm/juno-r2.dts +@@ -90,6 +90,7 @@ + next-level-cache = <&A72_L2>; + clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <1024>; + }; + + A72_1: cpu@1 { +@@ -100,6 +101,7 @@ + next-level-cache = <&A72_L2>; + clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <1024>; + }; + + A53_0: cpu@100 { +@@ -110,6 +112,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <485>; + }; + + A53_1: cpu@101 { +@@ -120,6 +123,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <485>; + }; + + A53_2: cpu@102 { +@@ -130,6 +134,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <485>; + }; + + A53_3: cpu@103 { +@@ -140,6 +145,7 @@ + next-level-cache = <&A53_L2>; + clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; ++ capacity-dmips-mhz = <485>; + }; + + A72_L2: l2-cache0 { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0031-arm-add-sysfs-cpu_capacity-attribute.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0031-arm-add-sysfs-cpu_capacity-attribute.patch new file mode 100644 index 0000000..03c46e3 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0031-arm-add-sysfs-cpu_capacity-attribute.patch @@ -0,0 +1,127 @@ +From cecbf11db6cfccccc7019e2aeec37ead5d8b8a03 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Thu, 15 Oct 2015 13:53:37 +0100 +Subject: [PATCH 31/92] arm: add sysfs cpu_capacity attribute + +Add a sysfs cpu_capacity attribute with which it is possible to read and +write (thus over-writing default values) CPUs capacity. This might be +useful in situations where values needs changing after boot. + +The new attribute shows up as: + + /sys/devices/system/cpu/cpu*/cpu_capacity + +Cc: Russell King <linux@arm.linux.org.uk> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + arch/arm/kernel/topology.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 73 insertions(+) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index a26e787..e79ac85 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -41,6 +41,7 @@ + * updated during this sequence. + */ + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; ++static DEFINE_MUTEX(cpu_scale_mutex); + + unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) + { +@@ -52,6 +53,76 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity) + per_cpu(cpu_scale, cpu) = capacity; + } + ++#ifdef CONFIG_PROC_SYSCTL ++#include <asm/cpu.h> ++#include <linux/string.h> ++static ssize_t show_cpu_capacity(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct cpu *cpu = container_of(dev, struct cpu, dev); ++ ssize_t rc; ++ int cpunum = cpu->dev.id; ++ unsigned long capacity = arch_scale_cpu_capacity(NULL, cpunum); ++ ++ rc = sprintf(buf, "%lu\n", capacity); ++ ++ return rc; ++} ++ ++static ssize_t store_cpu_capacity(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ struct cpu *cpu = container_of(dev, struct cpu, dev); ++ int this_cpu = cpu->dev.id, i; ++ unsigned long new_capacity; ++ ssize_t ret; ++ ++ if (count) { ++ char *p = (char *) buf; ++ ++ ret = kstrtoul(p, 0, &new_capacity); ++ if (ret) ++ return ret; ++ if (new_capacity > SCHED_CAPACITY_SCALE) ++ return -EINVAL; ++ ++ mutex_lock(&cpu_scale_mutex); ++ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling) ++ set_capacity_scale(i, new_capacity); ++ mutex_unlock(&cpu_scale_mutex); ++ } ++ ++ return count; ++} ++ ++static DEVICE_ATTR(cpu_capacity, ++ 0644, ++ show_cpu_capacity, ++ store_cpu_capacity); ++ ++static int register_cpu_capacity_sysctl(void) ++{ ++ int i; ++ struct device *cpu; ++ ++ for_each_possible_cpu(i) { ++ cpu = get_cpu_device(i); ++ if (!cpu) { ++ pr_err("%s: too early to get CPU%d device!\n", ++ __func__, i); ++ continue; ++ } ++ device_create_file(cpu, &dev_attr_cpu_capacity); ++ } ++ ++ return 0; ++} ++late_initcall(register_cpu_capacity_sysctl); ++#endif ++ + #ifdef CONFIG_OF + struct cpu_efficiency { + const char *compatible; +@@ -131,6 +202,7 @@ static void normalize_cpu_capacity(void) + return; + + pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); ++ mutex_lock(&cpu_scale_mutex); + for_each_possible_cpu(cpu) { + capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT) + / capacity_scale; +@@ -138,6 +210,7 @@ static void normalize_cpu_capacity(void) + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); + } ++ mutex_unlock(&cpu_scale_mutex); + } + + #ifdef CONFIG_CPU_FREQ +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0032-arm64-add-sysfs-cpu_capacity-attribute.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0032-arm64-add-sysfs-cpu_capacity-attribute.patch new file mode 100644 index 0000000..83f9e38 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0032-arm64-add-sysfs-cpu_capacity-attribute.patch @@ -0,0 +1,130 @@ +From 4454a804e1015b7699441e42d36dd8be8b0ee897 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Wed, 14 Oct 2015 12:02:05 +0100 +Subject: [PATCH 32/92] arm64: add sysfs cpu_capacity attribute + +Add a sysfs cpu_capacity attribute with which it is possible to read and +write (thus over-writing default values) CPUs capacity. This might be +useful in situations where values needs changing after boot. + +The new attribute shows up as: + + /sys/devices/system/cpu/cpu*/cpu_capacity + +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Cc: Mark Brown <broonie@kernel.org> +Cc: Sudeep Holla <sudeep.holla@arm.com> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +--- + arch/arm64/kernel/topology.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 73 insertions(+) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index b75b0ba..cff34cc 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -26,6 +26,7 @@ + #include <asm/topology.h> + + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; ++static DEFINE_MUTEX(cpu_scale_mutex); + + unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) + { +@@ -37,6 +38,76 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity) + per_cpu(cpu_scale, cpu) = capacity; + } + ++#ifdef CONFIG_PROC_SYSCTL ++#include <asm/cpu.h> ++#include <linux/string.h> ++static ssize_t show_cpu_capacity(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct cpu *cpu = container_of(dev, struct cpu, dev); ++ ssize_t rc; ++ int cpunum = cpu->dev.id; ++ unsigned long capacity = arch_scale_cpu_capacity(NULL, cpunum); ++ ++ rc = sprintf(buf, "%lu\n", capacity); ++ ++ return rc; ++} ++ ++static ssize_t store_cpu_capacity(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ struct cpu *cpu = container_of(dev, struct cpu, dev); ++ int this_cpu = cpu->dev.id, i; ++ unsigned long new_capacity; ++ ssize_t ret; ++ ++ if (count) { ++ char *p = (char *) buf; ++ ++ ret = kstrtoul(p, 0, &new_capacity); ++ if (ret) ++ return ret; ++ if (new_capacity > SCHED_CAPACITY_SCALE) ++ return -EINVAL; ++ ++ mutex_lock(&cpu_scale_mutex); ++ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling) ++ set_capacity_scale(i, new_capacity); ++ mutex_unlock(&cpu_scale_mutex); ++ } ++ ++ return count; ++} ++ ++static DEVICE_ATTR(cpu_capacity, ++ 0644, ++ show_cpu_capacity, ++ store_cpu_capacity); ++ ++static int register_cpu_capacity_sysctl(void) ++{ ++ int i; ++ struct device *cpu; ++ ++ for_each_possible_cpu(i) { ++ cpu = get_cpu_device(i); ++ if (!cpu) { ++ pr_err("%s: too early to get CPU%d device!\n", ++ __func__, i); ++ continue; ++ } ++ device_create_file(cpu, &dev_attr_cpu_capacity); ++ } ++ ++ return 0; ++} ++late_initcall(register_cpu_capacity_sysctl); ++#endif ++ + static u32 capacity_scale; + static u32 *raw_capacity; + static bool cap_parsing_failed; +@@ -87,6 +158,7 @@ static void normalize_cpu_capacity(void) + return; + + pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); ++ mutex_lock(&cpu_scale_mutex); + for_each_possible_cpu(cpu) { + pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", + cpu, raw_capacity[cpu]); +@@ -96,6 +168,7 @@ static void normalize_cpu_capacity(void) + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); + } ++ mutex_unlock(&cpu_scale_mutex); + } + + #ifdef CONFIG_CPU_FREQ +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0033-fixup-arm-include-linux-cpufreq.h.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0033-fixup-arm-include-linux-cpufreq.h.patch new file mode 100644 index 0000000..adce4b1 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0033-fixup-arm-include-linux-cpufreq.h.patch @@ -0,0 +1,27 @@ +From 98f9117448e3de1af4886892b3bf011aedebc4e8 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Mon, 21 Nov 2016 12:24:49 +0000 +Subject: [PATCH 33/92] fixup! arm: #include <linux/cpufreq.h> + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 9744ccb31b7c454f7355ca2f8545a23b8cb76b15) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index e79ac85..46167bf 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -21,6 +21,7 @@ + #include <linux/of.h> + #include <linux/sched.h> + #include <linux/slab.h> ++#include <linux/cpufreq.h> + + #include <asm/cputype.h> + #include <asm/topology.h> +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0034-arm-Re-wire-cpu-invariant-load-tracking-support-up-t.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0034-arm-Re-wire-cpu-invariant-load-tracking-support-up-t.patch new file mode 100644 index 0000000..3cc8cff --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0034-arm-Re-wire-cpu-invariant-load-tracking-support-up-t.patch @@ -0,0 +1,55 @@ +From a7770736a651a5e7ee12b2df95982ff7d76a6c5a Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 14 Apr 2015 16:25:31 +0100 +Subject: [PATCH 34/92] arm: Re-wire cpu-invariant load-tracking support up to + the scheduler + +Since commit 8cd5601c5060 ("sched/fair: Convert arch_scale_cpu_capacity() +from weak function to #define") the wiring has to be done by associating +arch_scale_cpu_capacity with the actual implementation provided by the +arch. + +Define arch_scale_freq_capacity to use the arch-specific function +scale_cpu_capacity(). + +cc: Russell King <linux@arm.linux.org.uk> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 496fddb12c6eeb54a2caa3413c0a591dc2762ab6) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/include/asm/topology.h | 4 ++++ + arch/arm/kernel/topology.c | 2 +- + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h +index 370f7a7..942a863 100644 +--- a/arch/arm/include/asm/topology.h ++++ b/arch/arm/include/asm/topology.h +@@ -24,6 +24,10 @@ struct cputopo_arm { + void store_cpu_topology(unsigned int cpuid); + const struct cpumask *cpu_coregroup_mask(int cpu); + ++struct sched_domain; ++#define arch_scale_cpu_capacity scale_cpu_capacity ++extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); ++ + #else + + static inline void init_cpu_topology(void) { } +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index 46167bf..f0957f3 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -44,7 +44,7 @@ + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + static DEFINE_MUTEX(cpu_scale_mutex); + +-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) ++unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) + { + return per_cpu(cpu_scale, cpu); + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0035-arm-Make-cpufreq-policy-notifier-usable-for-frequenc.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0035-arm-Make-cpufreq-policy-notifier-usable-for-frequenc.patch new file mode 100644 index 0000000..7a004f0 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0035-arm-Make-cpufreq-policy-notifier-usable-for-frequenc.patch @@ -0,0 +1,83 @@ +From e869e7130e9fcb8b44fd20b11ba4eee166f3f958 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Mon, 21 Nov 2016 16:42:17 +0000 +Subject: [PATCH 35/92] arm: Make cpufreq policy notifier usable for + frequency-invariant load-tracking support + +The maximum supported frequency of a cpu (policy->cpuinfo.max_freq) has +to be retrieved for frequency-invariant load-tracking. + +This can be achieved by coding this functionality into the existing +cpufreq policy notifier (init_cpu_capacity_notifier) which is currently +only used for setting up dt-based cpu capacities (cpu node property +capacity-dmips-mhz). + +But frequency-invariant load-tracking has to work whether cpu capacity +dt-parsing succeeded or not. + +Change init_cpu_capacity_notifier in such a way that even if the parsing +of the cpu capacity information failed the notifier is called for each +cpufreq policy to be able to set the maximum supported frequency. + +The continue statement in init_cpu_capacity_callback() makes sure that we +don't go on calculating cap_scale in case the capacity parsing failed. The +whole implementation makes only sense as soon the code to set the per-cpu +variable max_freq is introduced by the following patch ("arm: +Frequency-invariant load-tracking support"). + +Cc: Russell King <linux@arm.linux.org.uk> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit e47eb4cac39817612543425e7d64cd30a6baf684) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index f0957f3..b030d01 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -228,7 +228,7 @@ static void normalize_cpu_capacity(void) + struct cpufreq_policy *policy = data; + int cpu; + +- if (cap_parsing_failed || cap_parsing_done) ++ if (cap_parsing_done) + return 0; + + switch (val) { +@@ -240,14 +240,18 @@ static void normalize_cpu_capacity(void) + cpus_to_visit, + policy->related_cpus); + for_each_cpu(cpu, policy->related_cpus) { ++ if (cap_parsing_failed) ++ continue; + raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) * + policy->cpuinfo.max_freq / 1000UL; + capacity_scale = max(raw_capacity[cpu], capacity_scale); + } + if (cpumask_empty(cpus_to_visit)) { +- normalize_cpu_capacity(); +- kfree(raw_capacity); +- pr_debug("cpu_capacity: parsing done\n"); ++ if (!cap_parsing_failed) { ++ normalize_cpu_capacity(); ++ kfree(raw_capacity); ++ } ++ pr_debug("cpu_capacity: parsing done"); + cap_parsing_done = true; + schedule_work(&parsing_done_work); + } +@@ -261,9 +265,6 @@ static void normalize_cpu_capacity(void) + + static int __init register_cpufreq_notifier(void) + { +- if (cap_parsing_failed) +- return -EINVAL; +- + if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { + pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n"); + return -ENOMEM; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0036-arm-Frequency-invariant-load-tracking-support.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0036-arm-Frequency-invariant-load-tracking-support.patch new file mode 100644 index 0000000..5c8419e --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0036-arm-Frequency-invariant-load-tracking-support.patch @@ -0,0 +1,134 @@ +From f2b29716bba30b235f4c3a4292bb0ca0465e0031 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Wed, 16 Nov 2016 19:46:35 +0000 +Subject: [PATCH 36/92] arm: Frequency-invariant load-tracking support + +Implements arch-specific function scale_freq_capacity() which provides +the following frequency scaling factor: + + current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_supported_freq(cpu) + +The debug output in init_cpu_capacity_callback() has been changed to be +able to distinguish whether cpu capacity and max frequency or only max +frequency has been parsed. The latter case happens on systems where there +is no or broken cpu capacity binding (cpu node property +capacity-dmips-mhz) information. + +One possible consumer of this is the Per-Entity Load Tracking (PELT) +mechanism of the scheduler's CFS class. + +The actual wiring up to the scheduler isn't provided by this patch. + +Cc: Russell King <linux@arm.linux.org.uk> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit fd3bdcb6e9f895ba22de4295e29de4f4e1ed3e77) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 57 +++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 51 insertions(+), 6 deletions(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index b030d01..e521b07 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -219,6 +219,8 @@ static void normalize_cpu_capacity(void) + static bool cap_parsing_done; + static void parsing_done_workfn(struct work_struct *work); + static DECLARE_WORK(parsing_done_work, parsing_done_workfn); ++static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; ++static DEFINE_PER_CPU(unsigned long, max_freq); + + static int + init_cpu_capacity_callback(struct notifier_block *nb, +@@ -233,13 +235,14 @@ static void normalize_cpu_capacity(void) + + switch (val) { + case CPUFREQ_NOTIFY: +- pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", +- cpumask_pr_args(policy->related_cpus), +- cpumask_pr_args(cpus_to_visit)); ++ pr_debug("cpu_capacity: calling %s for CPUs [%*pbl] (to_visit=%*pbl)\n", ++ __func__, cpumask_pr_args(policy->related_cpus), ++ cpumask_pr_args(cpus_to_visit)); + cpumask_andnot(cpus_to_visit, + cpus_to_visit, + policy->related_cpus); + for_each_cpu(cpu, policy->related_cpus) { ++ per_cpu(max_freq, cpu) = policy->cpuinfo.max_freq; + if (cap_parsing_failed) + continue; + raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) * +@@ -250,8 +253,10 @@ static void normalize_cpu_capacity(void) + if (!cap_parsing_failed) { + normalize_cpu_capacity(); + kfree(raw_capacity); ++ pr_debug("cpu_capacity: parsing done"); ++ } else { ++ pr_debug("cpu_capacity: max frequency parsing done"); + } +- pr_debug("cpu_capacity: parsing done"); + cap_parsing_done = true; + schedule_work(&parsing_done_work); + } +@@ -263,16 +268,56 @@ static void normalize_cpu_capacity(void) + .notifier_call = init_cpu_capacity_callback, + }; + ++unsigned long scale_freq_capacity(struct sched_domain *sd, int cpu) ++{ ++ return per_cpu(freq_scale, cpu); ++} ++ ++static void set_freq_scale(unsigned int cpu, unsigned long freq) ++{ ++ unsigned long max = per_cpu(max_freq, cpu); ++ ++ if (!max) ++ return; ++ ++ per_cpu(freq_scale, cpu) = (freq << SCHED_CAPACITY_SHIFT) / max; ++} ++ ++static int ++set_freq_scale_callback(struct notifier_block *nb, ++ unsigned long val, ++ void *data) ++{ ++ struct cpufreq_freqs *freq = data; ++ ++ switch (val) { ++ case CPUFREQ_PRECHANGE: ++ set_freq_scale(freq->cpu, freq->new); ++ } ++ return 0; ++} ++ ++static struct notifier_block set_freq_scale_notifier = { ++ .notifier_call = set_freq_scale_callback, ++}; + static int __init register_cpufreq_notifier(void) + { ++ int ret; ++ + if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { + pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n"); + return -ENOMEM; + } + cpumask_copy(cpus_to_visit, cpu_possible_mask); + +- return cpufreq_register_notifier(&init_cpu_capacity_notifier, +- CPUFREQ_POLICY_NOTIFIER); ++ ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, ++ CPUFREQ_POLICY_NOTIFIER); ++ ++ if (ret) ++ return ret; ++ ++ return cpufreq_register_notifier(&set_freq_scale_notifier, ++ CPUFREQ_TRANSITION_NOTIFIER); + } + core_initcall(register_cpufreq_notifier); + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0037-arm-Wire-frequency-invariant-load-tracking-support-u.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0037-arm-Wire-frequency-invariant-load-tracking-support-u.patch new file mode 100644 index 0000000..5ddb5aa --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0037-arm-Wire-frequency-invariant-load-tracking-support-u.patch @@ -0,0 +1,38 @@ +From d3d5aa80cf65f112c759fdd04ec6725361afa6a5 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Fri, 18 Nov 2016 12:30:31 +0000 +Subject: [PATCH 37/92] arm: Wire frequency-invariant load-tracking support up + to the scheduler + +Define arch_scale_freq_capacity to use the arch-specific function +scale_freq_capacity(). + +Cc: Russell King <linux@arm.linux.org.uk> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 7049774f92be687318845bf633f314676285792f) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/include/asm/topology.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h +index 942a863..981e9c2 100644 +--- a/arch/arm/include/asm/topology.h ++++ b/arch/arm/include/asm/topology.h +@@ -28,6 +28,13 @@ struct cputopo_arm { + #define arch_scale_cpu_capacity scale_cpu_capacity + extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); + ++#ifdef CONFIG_CPU_FREQ ++ ++#define arch_scale_freq_capacity scale_freq_capacity ++extern unsigned long scale_freq_capacity(struct sched_domain *sd, int cpu); ++ ++#endif /* CONFIG_CPU_FREQ */ ++ + #else + + static inline void init_cpu_topology(void) { } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0038-arm64-Wire-cpu-invariant-load-tracking-support-up-to.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0038-arm64-Wire-cpu-invariant-load-tracking-support-up-to.patch new file mode 100644 index 0000000..7d3f07a --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0038-arm64-Wire-cpu-invariant-load-tracking-support-up-to.patch @@ -0,0 +1,51 @@ +From abdcda318be2337a46f118ffda2d377c95c12408 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Fri, 20 Nov 2015 11:47:48 +0000 +Subject: [PATCH 38/92] arm64: Wire cpu-invariant load-tracking support up to + the scheduler + +Define arch_scale_cpu_capacity to use the arch-specific function +scale_cpu_capacity(). + +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 94ad9dbcfe547eeed8115fab9f114be0d0487bd8) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/include/asm/topology.h | 4 ++++ + arch/arm64/kernel/topology.c | 2 +- + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h +index 8b57339..5d576eb 100644 +--- a/arch/arm64/include/asm/topology.h ++++ b/arch/arm64/include/asm/topology.h +@@ -32,6 +32,10 @@ struct cpu_topology { + + #endif /* CONFIG_NUMA */ + ++struct sched_domain; ++#define arch_scale_cpu_capacity scale_cpu_capacity ++extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); ++ + #include <asm-generic/topology.h> + + #endif /* _ASM_ARM_TOPOLOGY_H */ +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index cff34cc..91de9dd 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -28,7 +28,7 @@ + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + static DEFINE_MUTEX(cpu_scale_mutex); + +-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) ++unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) + { + return per_cpu(cpu_scale, cpu); + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0039-arm64-Make-cpufreq-policy-notifier-usable-for-freque.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0039-arm64-Make-cpufreq-policy-notifier-usable-for-freque.patch new file mode 100644 index 0000000..83670ea --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0039-arm64-Make-cpufreq-policy-notifier-usable-for-freque.patch @@ -0,0 +1,82 @@ +From c352f232654c79fbe6f7ae46ddffc2f7c267519c Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Mon, 21 Nov 2016 16:38:09 +0000 +Subject: [PATCH 39/92] arm64: Make cpufreq policy notifier usable for + frequency-invariant load-tracking support + +The maximum supported frequency of a cpu (policy->cpuinfo.max_freq) has +to be retrieved for frequency-invariant load-tracking. + +This can be achieved by coding this functionality into the existing +cpufreq policy notifier (init_cpu_capacity_notifier) which is currently +only used for setting up dt-based cpu capacities (cpu node property +(capacity-dmips-mhz). + +But frequency-invariant load-tracking has to work whether cpu capacity +dt-parsing succeeded or not. + +Change init_cpu_capacity_notifier in such a way that even if the parsing +of the cpu capacity information failed the notifier is called for each +cpufreq policy to be able to set the maximum supported frequency. + +The continue statement in init_cpu_capacity_callback() makes sure that we +don't go on calculating cap_scale in case the capacity parsing failed. The +whole implementation makes only sense as soon the code to set the per-cpu +variable max_freq is introduced by the following patch +("arm64: Frequency-invariant load-tracking support"). + +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit c4e6eb1f2e7ada58bd538af2243809416c6538aa) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/topology.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index 91de9dd..3d27df1 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -185,7 +185,7 @@ static void normalize_cpu_capacity(void) + struct cpufreq_policy *policy = data; + int cpu; + +- if (cap_parsing_failed || cap_parsing_done) ++ if (cap_parsing_done) + return 0; + + switch (val) { +@@ -197,13 +197,17 @@ static void normalize_cpu_capacity(void) + cpus_to_visit, + policy->related_cpus); + for_each_cpu(cpu, policy->related_cpus) { ++ if (cap_parsing_failed) ++ continue; + raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) * + policy->cpuinfo.max_freq / 1000UL; + capacity_scale = max(raw_capacity[cpu], capacity_scale); + } + if (cpumask_empty(cpus_to_visit)) { +- normalize_cpu_capacity(); +- kfree(raw_capacity); ++ if (!cap_parsing_failed) { ++ normalize_cpu_capacity(); ++ kfree(raw_capacity); ++ } + pr_debug("cpu_capacity: parsing done\n"); + cap_parsing_done = true; + schedule_work(&parsing_done_work); +@@ -218,9 +222,6 @@ static void normalize_cpu_capacity(void) + + static int __init register_cpufreq_notifier(void) + { +- if (cap_parsing_failed) +- return -EINVAL; +- + if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { + pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n"); + return -ENOMEM; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0040-arm64-Frequency-invariant-load-tracking-support.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0040-arm64-Frequency-invariant-load-tracking-support.patch new file mode 100644 index 0000000..7651c1f --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0040-arm64-Frequency-invariant-load-tracking-support.patch @@ -0,0 +1,136 @@ +From 34e84963e8c1a9a808e1027a268db205da11174f Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Wed, 16 Nov 2016 10:43:37 +0000 +Subject: [PATCH 40/92] arm64: Frequency-invariant load-tracking support + +Implements arch-specific function scale_freq_capacity() which provides the +following frequency scaling factor: + + current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_supported_freq(cpu) + +The debug output in init_cpu_capacity_callback() has been changed to be +able to distinguish whether cpu capacity and max frequency or only max +frequency has been parsed. The latter case happens on systems where there +is no or broken cpu capacity binding (cpu node property +capacity-dmips-mhz) information. + +One possible consumer of this is the Per-Entity Load Tracking (PELT) +mechanism of the scheduler's CFS class. + +The actual wiring up to the scheduler isn't provided by this patch. + +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit b13969222bdf073b5220e990e5bb923b458b5c48) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/topology.c | 58 +++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 52 insertions(+), 6 deletions(-) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index 3d27df1..280319b 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -176,6 +176,8 @@ static void normalize_cpu_capacity(void) + static bool cap_parsing_done; + static void parsing_done_workfn(struct work_struct *work); + static DECLARE_WORK(parsing_done_work, parsing_done_workfn); ++static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; ++static DEFINE_PER_CPU(unsigned long, max_freq); + + static int + init_cpu_capacity_callback(struct notifier_block *nb, +@@ -190,13 +192,14 @@ static void normalize_cpu_capacity(void) + + switch (val) { + case CPUFREQ_NOTIFY: +- pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", +- cpumask_pr_args(policy->related_cpus), +- cpumask_pr_args(cpus_to_visit)); ++ pr_debug("cpu_capacity: calling %s for CPUs [%*pbl] (to_visit=[%*pbl])\n", ++ __func__, cpumask_pr_args(policy->related_cpus), ++ cpumask_pr_args(cpus_to_visit)); + cpumask_andnot(cpus_to_visit, + cpus_to_visit, + policy->related_cpus); + for_each_cpu(cpu, policy->related_cpus) { ++ per_cpu(max_freq, cpu) = policy->cpuinfo.max_freq; + if (cap_parsing_failed) + continue; + raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) * +@@ -207,8 +210,10 @@ static void normalize_cpu_capacity(void) + if (!cap_parsing_failed) { + normalize_cpu_capacity(); + kfree(raw_capacity); ++ pr_debug("cpu_capacity: parsing done\n"); ++ } else { ++ pr_debug("cpu_capacity: max frequency parsing done\n"); + } +- pr_debug("cpu_capacity: parsing done\n"); + cap_parsing_done = true; + schedule_work(&parsing_done_work); + } +@@ -220,16 +225,57 @@ static void normalize_cpu_capacity(void) + .notifier_call = init_cpu_capacity_callback, + }; + ++unsigned long scale_freq_capacity(struct sched_domain *sd, int cpu) ++{ ++ return per_cpu(freq_scale, cpu); ++} ++ ++static void set_freq_scale(unsigned int cpu, unsigned long freq) ++{ ++ unsigned long max = per_cpu(max_freq, cpu); ++ ++ if (!max) ++ return; ++ ++ per_cpu(freq_scale, cpu) = (freq << SCHED_CAPACITY_SHIFT) / max; ++} ++ ++static int ++set_freq_scale_callback(struct notifier_block *nb, ++ unsigned long val, ++ void *data) ++{ ++ struct cpufreq_freqs *freq = data; ++ ++ switch (val) { ++ case CPUFREQ_PRECHANGE: ++ set_freq_scale(freq->cpu, freq->new); ++ } ++ return 0; ++} ++ ++static struct notifier_block set_freq_scale_notifier = { ++ .notifier_call = set_freq_scale_callback, ++}; ++ + static int __init register_cpufreq_notifier(void) + { ++ int ret; ++ + if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { + pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n"); + return -ENOMEM; + } + cpumask_copy(cpus_to_visit, cpu_possible_mask); + +- return cpufreq_register_notifier(&init_cpu_capacity_notifier, +- CPUFREQ_POLICY_NOTIFIER); ++ ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, ++ CPUFREQ_POLICY_NOTIFIER); ++ ++ if (ret) ++ return ret; ++ ++ return cpufreq_register_notifier(&set_freq_scale_notifier, ++ CPUFREQ_TRANSITION_NOTIFIER); + } + core_initcall(register_cpufreq_notifier); + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0041-arm64-Wire-frequency-invariant-load-tracking-support.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0041-arm64-Wire-frequency-invariant-load-tracking-support.patch new file mode 100644 index 0000000..ef066fd --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0041-arm64-Wire-frequency-invariant-load-tracking-support.patch @@ -0,0 +1,39 @@ +From 527149044bd9ad829a841ad770dbad688d01f2cc Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Fri, 25 Sep 2015 17:15:11 +0100 +Subject: [PATCH 41/92] arm64: Wire frequency-invariant load-tracking support + up to the scheduler + +Define arch_scale_freq_capacity to use the arch-specific function +scale_freq_capacity(). + +Cc: Catalin Marinas <catalin.marinas@arm.com> +Cc: Will Deacon <will.deacon@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 3cf6bce5357e08c7876df57c923aa27bf864c00a) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/include/asm/topology.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h +index 5d576eb..0f2c4dc 100644 +--- a/arch/arm64/include/asm/topology.h ++++ b/arch/arm64/include/asm/topology.h +@@ -36,6 +36,13 @@ struct cpu_topology { + #define arch_scale_cpu_capacity scale_cpu_capacity + extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); + ++#ifdef CONFIG_CPU_FREQ ++ ++#define arch_scale_freq_capacity scale_freq_capacity ++extern unsigned long scale_freq_capacity(struct sched_domain *sd, int cpu); ++ ++#endif /* CONFIG_CPU_FREQ */ ++ + #include <asm-generic/topology.h> + + #endif /* _ASM_ARM_TOPOLOGY_H */ +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0042-arm-Set-SD_ASYM_CPUCAPACITY-for-big.LITTLE-platforms.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0042-arm-Set-SD_ASYM_CPUCAPACITY-for-big.LITTLE-platforms.patch new file mode 100644 index 0000000..9225712 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0042-arm-Set-SD_ASYM_CPUCAPACITY-for-big.LITTLE-platforms.patch @@ -0,0 +1,80 @@ +From 68c09523de80d3510269aa418c5c3814c4aae56b Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 17 Jun 2016 15:03:34 +0100 +Subject: [PATCH 42/92] arm: Set SD_ASYM_CPUCAPACITY for big.LITTLE platforms + +Set the SD_ASYM_CPUCAPACITY flag for DIE level sched_domains +on big.LITTLE systems. + +cc: Russell King <linux@arm.linux.org.uk> + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 2954ca1a61107ff029d1668dadae88c5e43035e5) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index e521b07..69fb4b6 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -154,6 +154,7 @@ struct cpu_efficiency { + static u32 *raw_capacity; + static bool cap_parsing_failed; + static u32 capacity_scale; ++static bool asym_cpucap; + + static int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu) + { +@@ -210,6 +211,8 @@ static void normalize_cpu_capacity(void) + set_capacity_scale(cpu, capacity); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); ++ if (capacity < capacity_scale) ++ asym_cpucap = true; + } + mutex_unlock(&cpu_scale_mutex); + } +@@ -422,6 +425,9 @@ static void __init parse_dt_topology(void) + middle_capacity = ((max_capacity / 3) + >> (SCHED_CAPACITY_SHIFT-1)) + 1; + ++ if (max_capacity && max_capacity != min_capacity) ++ asym_cpucap = true; ++ + if (cap_from_dt && !cap_parsing_failed) + normalize_cpu_capacity(); + } +@@ -438,6 +444,9 @@ static void update_cpu_capacity(unsigned int cpu) + + set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); + ++ if (scale_cpu_capacity(NULL, cpu) < SCHED_CAPACITY_SCALE) ++ asym_cpucap = true; ++ + pr_info("CPU%u: update cpu_capacity %lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); + } +@@ -553,12 +562,17 @@ static inline int cpu_corepower_flags(void) + return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; + } + ++static inline int arm_cpu_cpu_flags(void) ++{ ++ return asym_cpucap ? SD_ASYM_CPUCAPACITY : 0; ++} ++ + static struct sched_domain_topology_level arm_topology[] = { + #ifdef CONFIG_SCHED_MC + { cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) }, + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + #endif +- { cpu_cpu_mask, SD_INIT_NAME(DIE) }, ++ { cpu_cpu_mask, arm_cpu_cpu_flags, SD_INIT_NAME(DIE) }, + { NULL, }, + }; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0043-arm-Update-SD_ASYM_CPUCAPACITY-topology-flag-when-cp.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0043-arm-Update-SD_ASYM_CPUCAPACITY-topology-flag-when-cp.patch new file mode 100644 index 0000000..6781716 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0043-arm-Update-SD_ASYM_CPUCAPACITY-topology-flag-when-cp.patch @@ -0,0 +1,151 @@ +From 82ae3c283c44fb74536b33dfeb3f23e228af869f Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Wed, 22 Jun 2016 10:58:17 +0100 +Subject: [PATCH 43/92] arm: Update SD_ASYM_CPUCAPACITY topology flag when cpu + capacity is changed + +When cpu capacity information is provided through DT or sysfs we might +not know if the system has asymmetric cpu capacities when the system +topology information is initially passed to the scheduler. We therefore +have to enforce a rebuild of the sched_domain hierarchy if we later +discover that the flag was initially set wrong. + +It is mainly an issue for systems with same type of cores (dmips/mhz) +but different max frequencies, and asymmetric cpu capacity systems where +cpu capacity is not provided by DT but set through sysfs later. + +cc: Russell King <linux@armlinux.org.uk> + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit d02bb26e525bcbeb3c91345c4ee2a1fe1ca11e48) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 39 +++++++++++++++++++++++++++++++++++++-- + 1 file changed, 37 insertions(+), 2 deletions(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index 69fb4b6..4d94639 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -22,6 +22,7 @@ + #include <linux/sched.h> + #include <linux/slab.h> + #include <linux/cpufreq.h> ++#include <linux/cpuset.h> + + #include <asm/cputype.h> + #include <asm/topology.h> +@@ -43,6 +44,8 @@ + */ + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + static DEFINE_MUTEX(cpu_scale_mutex); ++static bool asym_cpucap; ++static bool update_flags; + + unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) + { +@@ -54,6 +57,14 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity) + per_cpu(cpu_scale, cpu) = capacity; + } + ++static void update_sched_flags(void) ++{ ++ update_flags = true; ++ rebuild_sched_domains(); ++ update_flags = false; ++ pr_debug("cpu_capacity: Rebuilt sched_domain hierarchy.\n"); ++} ++ + #ifdef CONFIG_PROC_SYSCTL + #include <asm/cpu.h> + #include <linux/string.h> +@@ -83,6 +94,7 @@ static ssize_t store_cpu_capacity(struct device *dev, + + if (count) { + char *p = (char *) buf; ++ bool asym = false; + + ret = kstrtoul(p, 0, &new_capacity); + if (ret) +@@ -93,6 +105,17 @@ static ssize_t store_cpu_capacity(struct device *dev, + mutex_lock(&cpu_scale_mutex); + for_each_cpu(i, &cpu_topology[this_cpu].core_sibling) + set_capacity_scale(i, new_capacity); ++ ++ for_each_possible_cpu(i) { ++ if (per_cpu(cpu_scale, i) != new_capacity) ++ asym = true; ++ } ++ ++ if (asym != asym_cpucap) { ++ asym_cpucap = asym; ++ update_sched_flags(); ++ } ++ + mutex_unlock(&cpu_scale_mutex); + } + +@@ -154,7 +177,6 @@ struct cpu_efficiency { + static u32 *raw_capacity; + static bool cap_parsing_failed; + static u32 capacity_scale; +-static bool asym_cpucap; + + static int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu) + { +@@ -199,6 +221,7 @@ static void normalize_cpu_capacity(void) + { + u64 capacity; + int cpu; ++ bool asym = false; + + if (!raw_capacity || cap_parsing_failed) + return; +@@ -212,9 +235,12 @@ static void normalize_cpu_capacity(void) + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); + if (capacity < capacity_scale) +- asym_cpucap = true; ++ asym = true; + } + mutex_unlock(&cpu_scale_mutex); ++ ++ if (asym != asym_cpucap) ++ asym_cpucap = asym; + } + + #ifdef CONFIG_CPU_FREQ +@@ -232,6 +258,7 @@ static void normalize_cpu_capacity(void) + { + struct cpufreq_policy *policy = data; + int cpu; ++ bool asym; + + if (cap_parsing_done) + return 0; +@@ -254,7 +281,10 @@ static void normalize_cpu_capacity(void) + } + if (cpumask_empty(cpus_to_visit)) { + if (!cap_parsing_failed) { ++ asym = asym_cpucap; + normalize_cpu_capacity(); ++ if (asym != asym_cpucap) ++ update_sched_flags(); + kfree(raw_capacity); + pr_debug("cpu_capacity: parsing done"); + } else { +@@ -502,6 +532,11 @@ static void update_siblings_masks(unsigned int cpuid) + smp_wmb(); + } + ++int arch_update_cpu_topology(void) ++{ ++ return update_flags ? 1 : 0; ++} ++ + /* + * store_cpu_topology is called at boot when only one cpu is running + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0044-arm64-Set-SD_ASYM_CPUCAPACITY-topology-flag-for-asym.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0044-arm64-Set-SD_ASYM_CPUCAPACITY-topology-flag-for-asym.patch new file mode 100644 index 0000000..819a5cf --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0044-arm64-Set-SD_ASYM_CPUCAPACITY-topology-flag-for-asym.patch @@ -0,0 +1,165 @@ +From bacbde41338ca493a4557fb33faf4ea4ab788bb5 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Wed, 22 Jun 2016 14:51:17 +0100 +Subject: [PATCH 44/92] arm64: Set SD_ASYM_CPUCAPACITY topology flag for + asymmetric cpu capacity systems + +In addition to adjusting the cpu capacities asymmetric cpu capacity +systems also have to set the SD_ASYM_CPUCAPACITY topology flag to inform +the scheduler to look harder when load balancing. This patch sets the +topology flag on DIE level for such systems based on the cpu capacities +provided through DT or sysfs. + +cc: Catalin Marinas <catalin.marinas@arm.com> +cc: Will Deacon <will.deacon@arm.com> + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 8a8992543ec08e26f8a8ab3cdb4bb556283ce9ea) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/topology.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index 280319b..eb2ad75 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -21,12 +21,15 @@ + #include <linux/sched.h> + #include <linux/slab.h> + #include <linux/cpufreq.h> ++#include <linux/cpuset.h> + + #include <asm/cputype.h> + #include <asm/topology.h> + + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + static DEFINE_MUTEX(cpu_scale_mutex); ++static bool asym_cpucap; ++static bool update_flags; + + unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) + { +@@ -38,6 +41,14 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity) + per_cpu(cpu_scale, cpu) = capacity; + } + ++static void update_sched_flags(void) ++{ ++ update_flags = true; ++ rebuild_sched_domains(); ++ update_flags = false; ++ pr_debug("cpu_capacity: Rebuilt sched_domain hierarchy.\n"); ++} ++ + #ifdef CONFIG_PROC_SYSCTL + #include <asm/cpu.h> + #include <linux/string.h> +@@ -67,6 +78,7 @@ static ssize_t store_cpu_capacity(struct device *dev, + + if (count) { + char *p = (char *) buf; ++ bool asym = false; + + ret = kstrtoul(p, 0, &new_capacity); + if (ret) +@@ -77,6 +89,16 @@ static ssize_t store_cpu_capacity(struct device *dev, + mutex_lock(&cpu_scale_mutex); + for_each_cpu(i, &cpu_topology[this_cpu].core_sibling) + set_capacity_scale(i, new_capacity); ++ ++ for_each_possible_cpu(i) { ++ if (per_cpu(cpu_scale, i) != new_capacity) ++ asym = true; ++ } ++ ++ if (asym != asym_cpucap) { ++ asym_cpucap = asym; ++ update_sched_flags(); ++ } + mutex_unlock(&cpu_scale_mutex); + } + +@@ -153,6 +175,7 @@ static void normalize_cpu_capacity(void) + { + u64 capacity; + int cpu; ++ bool asym = false; + + if (!raw_capacity || cap_parsing_failed) + return; +@@ -167,8 +190,13 @@ static void normalize_cpu_capacity(void) + set_capacity_scale(cpu, capacity); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); ++ if (capacity < capacity_scale) ++ asym = true; + } + mutex_unlock(&cpu_scale_mutex); ++ ++ if (asym != asym_cpucap) ++ asym_cpucap = asym; + } + + #ifdef CONFIG_CPU_FREQ +@@ -186,6 +214,7 @@ static void normalize_cpu_capacity(void) + { + struct cpufreq_policy *policy = data; + int cpu; ++ bool asym; + + if (cap_parsing_done) + return 0; +@@ -208,7 +237,10 @@ static void normalize_cpu_capacity(void) + } + if (cpumask_empty(cpus_to_visit)) { + if (!cap_parsing_failed) { ++ asym = asym_cpucap; + normalize_cpu_capacity(); ++ if (asym != asym_cpucap) ++ update_sched_flags(); + kfree(raw_capacity); + pr_debug("cpu_capacity: parsing done\n"); + } else { +@@ -488,6 +520,11 @@ const struct cpumask *cpu_coregroup_mask(int cpu) + return &cpu_topology[cpu].core_sibling; + } + ++static int cpu_cpu_flags(void) ++{ ++ return asym_cpucap ? SD_ASYM_CPUCAPACITY : 0; ++} ++ + static void update_siblings_masks(unsigned int cpuid) + { + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; +@@ -569,6 +606,19 @@ static void __init reset_cpu_topology(void) + } + } + ++static struct sched_domain_topology_level arm64_topology[] = { ++#ifdef CONFIG_SCHED_MC ++ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, ++#endif ++ { cpu_cpu_mask, cpu_cpu_flags, SD_INIT_NAME(DIE) }, ++ { NULL, } ++}; ++ ++int arch_update_cpu_topology(void) ++{ ++ return update_flags ? 1 : 0; ++} ++ + void __init init_cpu_topology(void) + { + reset_cpu_topology(); +@@ -579,4 +629,6 @@ void __init init_cpu_topology(void) + */ + if (of_have_populated_dt() && parse_dt_topology()) + reset_cpu_topology(); ++ else ++ set_sched_topology(arm64_topology); + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0045-sched-Documentation-for-scheduler-energy-cost-model.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0045-sched-Documentation-for-scheduler-energy-cost-model.patch new file mode 100644 index 0000000..273f0e8 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0045-sched-Documentation-for-scheduler-energy-cost-model.patch @@ -0,0 +1,389 @@ +From ffa86114448cb4f15e3774072964960c5c4d6b5e Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 13 Jan 2015 13:43:28 +0000 +Subject: [PATCH 45/92] sched: Documentation for scheduler energy cost model + +This documentation patch provides an overview of the experimental +scheduler energy costing model, associated data structures, and a +reference recipe on how platforms can be characterized to derive energy +models. + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 8de9277814e36b07759c65bce220c82584f80aa3) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + Documentation/scheduler/sched-energy.txt | 362 +++++++++++++++++++++++++++++++ + 1 file changed, 362 insertions(+) + create mode 100644 Documentation/scheduler/sched-energy.txt + +diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt +new file mode 100644 +index 0000000..dab2f90 +--- /dev/null ++++ b/Documentation/scheduler/sched-energy.txt +@@ -0,0 +1,362 @@ ++Energy cost model for energy-aware scheduling (EXPERIMENTAL) ++ ++Introduction ++============= ++ ++The basic energy model uses platform energy data stored in sched_group_energy ++data structures attached to the sched_groups in the sched_domain hierarchy. The ++energy cost model offers two functions that can be used to guide scheduling ++decisions: ++ ++1. static unsigned int sched_group_energy(struct energy_env *eenv) ++2. static int energy_diff(struct energy_env *eenv) ++ ++sched_group_energy() estimates the energy consumed by all cpus in a specific ++sched_group including any shared resources owned exclusively by this group of ++cpus. Resources shared with other cpus are excluded (e.g. later level caches). ++ ++energy_diff() estimates the total energy impact of a utilization change. That ++is, adding, removing, or migrating utilization (tasks). ++ ++Both functions use a struct energy_env to specify the scenario to be evaluated: ++ ++ struct energy_env { ++ struct sched_group *sg_top; ++ struct sched_group *sg_cap; ++ int cap_idx; ++ int util_delta; ++ int src_cpu; ++ int dst_cpu; ++ int energy; ++ }; ++ ++sg_top: sched_group to be evaluated. Not used by energy_diff(). ++ ++sg_cap: sched_group covering the cpus in the same frequency domain. Set by ++sched_group_energy(). ++ ++cap_idx: Capacity state to be used for energy calculations. Set by ++find_new_capacity(). ++ ++util_delta: Amount of utilization to be added, removed, or migrated. ++ ++src_cpu: Source cpu from where 'util_delta' utilization is removed. Should be ++-1 if no source (e.g. task wake-up). ++ ++dst_cpu: Destination cpu where 'util_delta' utilization is added. Should be -1 ++if utilization is removed (e.g. terminating tasks). ++ ++energy: Result of sched_group_energy(). ++ ++The metric used to represent utilization is the actual per-entity running time ++averaged over time using a geometric series. Very similar to the existing ++per-entity load-tracking, but _not_ scaled by task priority and capped by the ++capacity of the cpu. The latter property does mean that utilization may ++underestimate the compute requirements for task on fully/over utilized cpus. ++The greatest potential for energy savings without affecting performance too much ++is scenarios where the system isn't fully utilized. If the system is deemed ++fully utilized load-balancing should be done with task load (includes task ++priority) instead in the interest of fairness and performance. ++ ++ ++Background and Terminology ++=========================== ++ ++To make it clear from the start: ++ ++energy = [joule] (resource like a battery on powered devices) ++power = energy/time = [joule/second] = [watt] ++ ++The goal of energy-aware scheduling is to minimize energy, while still getting ++the job done. That is, we want to maximize: ++ ++ performance [inst/s] ++ -------------------- ++ power [W] ++ ++which is equivalent to minimizing: ++ ++ energy [J] ++ ----------- ++ instruction ++ ++while still getting 'good' performance. It is essentially an alternative ++optimization objective to the current performance-only objective for the ++scheduler. This alternative considers two objectives: energy-efficiency and ++performance. Hence, there needs to be a user controllable knob to switch the ++objective. Since it is early days, this is currently a sched_feature ++(ENERGY_AWARE). ++ ++The idea behind introducing an energy cost model is to allow the scheduler to ++evaluate the implications of its decisions rather than applying energy-saving ++techniques blindly that may only have positive effects on some platforms. At ++the same time, the energy cost model must be as simple as possible to minimize ++the scheduler latency impact. ++ ++Platform topology ++------------------ ++ ++The system topology (cpus, caches, and NUMA information, not peripherals) is ++represented in the scheduler by the sched_domain hierarchy which has ++sched_groups attached at each level that covers one or more cpus (see ++sched-domains.txt for more details). To add energy awareness to the scheduler ++we need to consider power and frequency domains. ++ ++Power domain: ++ ++A power domain is a part of the system that can be powered on/off ++independently. Power domains are typically organized in a hierarchy where you ++may be able to power down just a cpu or a group of cpus along with any ++associated resources (e.g. shared caches). Powering up a cpu means that all ++power domains it is a part of in the hierarchy must be powered up. Hence, it is ++more expensive to power up the first cpu that belongs to a higher level power ++domain than powering up additional cpus in the same high level domain. Two ++level power domain hierarchy example: ++ ++ Power source ++ +-------------------------------+----... ++per group PD G G ++ | +----------+ | ++ +--------+-------| Shared | (other groups) ++per-cpu PD G G | resource | ++ | | +----------+ ++ +-------+ +-------+ ++ | CPU 0 | | CPU 1 | ++ +-------+ +-------+ ++ ++Frequency domain: ++ ++Frequency domains (P-states) typically cover the same group of cpus as one of ++the power domain levels. That is, there might be several smaller power domains ++sharing the same frequency (P-state) or there might be a power domain spanning ++multiple frequency domains. ++ ++From a scheduling point of view there is no need to know the actual frequencies ++[Hz]. All the scheduler cares about is the compute capacity available at the ++current state (P-state) the cpu is in and any other available states. For that ++reason, and to also factor in any cpu micro-architecture differences, compute ++capacity scaling states are called 'capacity states' in this document. For SMP ++systems this is equivalent to P-states. For mixed micro-architecture systems ++(like ARM big.LITTLE) it is P-states scaled according to the micro-architecture ++performance relative to the other cpus in the system. ++ ++Energy modelling: ++------------------ ++ ++Due to the hierarchical nature of the power domains, the most obvious way to ++model energy costs is therefore to associate power and energy costs with ++domains (groups of cpus). Energy costs of shared resources are associated with ++the group of cpus that share the resources, only the cost of powering the ++cpu itself and any private resources (e.g. private L1 caches) is associated ++with the per-cpu groups (lowest level). ++ ++For example, for an SMP system with per-cpu power domains and a cluster level ++(group of cpus) power domain we get the overall energy costs to be: ++ ++ energy = energy_cluster + n * energy_cpu ++ ++where 'n' is the number of cpus powered up and energy_cluster is the cost paid ++as soon as any cpu in the cluster is powered up. ++ ++The power and frequency domains can naturally be mapped onto the existing ++sched_domain hierarchy and sched_groups by adding the necessary data to the ++existing data structures. ++ ++The energy model considers energy consumption from two contributors (shown in ++the illustration below): ++ ++1. Busy energy: Energy consumed while a cpu and the higher level groups that it ++belongs to are busy running tasks. Busy energy is associated with the state of ++the cpu, not an event. The time the cpu spends in this state varies. Thus, the ++most obvious platform parameter for this contribution is busy power ++(energy/time). ++ ++2. Idle energy: Energy consumed while a cpu and higher level groups that it ++belongs to are idle (in a C-state). Like busy energy, idle energy is associated ++with the state of the cpu. Thus, the platform parameter for this contribution ++is idle power (energy/time). ++ ++Energy consumed during transitions from an idle-state (C-state) to a busy state ++(P-state) or going the other way is ignored by the model to simplify the energy ++model calculations. ++ ++ ++ Power ++ ^ ++ | busy->idle idle->busy ++ | transition transition ++ | ++ | _ __ ++ | / \ / \__________________ ++ |______________/ \ / ++ | \ / ++ | Busy \ Idle / Busy ++ | low P-state \____________/ high P-state ++ | ++ +------------------------------------------------------------> time ++ ++Busy |--------------| |-----------------| ++ ++Wakeup |------| |------| ++ ++Idle |------------| ++ ++ ++The basic algorithm ++==================== ++ ++The basic idea is to determine the total energy impact when utilization is ++added or removed by estimating the impact at each level in the sched_domain ++hierarchy starting from the bottom (sched_group contains just a single cpu). ++The energy cost comes from busy time (sched_group is awake because one or more ++cpus are busy) and idle time (in an idle-state). Energy model numbers account ++for energy costs associated with all cpus in the sched_group as a group. ++ ++ for_each_domain(cpu, sd) { ++ sg = sched_group_of(cpu) ++ energy_before = curr_util(sg) * busy_power(sg) ++ + (1-curr_util(sg)) * idle_power(sg) ++ energy_after = new_util(sg) * busy_power(sg) ++ + (1-new_util(sg)) * idle_power(sg) ++ energy_diff += energy_before - energy_after ++ ++ } ++ ++ return energy_diff ++ ++{curr, new}_util: The cpu utilization at the lowest level and the overall ++non-idle time for the entire group for higher levels. Utilization is in the ++range 0.0 to 1.0 in the pseudo-code. ++ ++busy_power: The power consumption of the sched_group. ++ ++idle_power: The power consumption of the sched_group when idle. ++ ++Note: It is a fundamental assumption that the utilization is (roughly) scale ++invariant. Task utilization tracking factors in any frequency scaling and ++performance scaling differences due to difference cpu microarchitectures such ++that task utilization can be used across the entire system. ++ ++ ++Platform energy data ++===================== ++ ++struct sched_group_energy can be attached to sched_groups in the sched_domain ++hierarchy and has the following members: ++ ++cap_states: ++ List of struct capacity_state representing the supported capacity states ++ (P-states). struct capacity_state has two members: cap and power, which ++ represents the compute capacity and the busy_power of the state. The ++ list must be ordered by capacity low->high. ++ ++nr_cap_states: ++ Number of capacity states in cap_states list. ++ ++idle_states: ++ List of struct idle_state containing idle_state power cost for each ++ idle-state supported by the system orderd by shallowest state first. ++ All states must be included at all level in the hierarchy, i.e. a ++ sched_group spanning just a single cpu must also include coupled ++ idle-states (cluster states). In addition to the cpuidle idle-states, ++ the list must also contain an entry for the idling using the arch ++ default idle (arch_idle_cpu()). Despite this state may not be a true ++ hardware idle-state it is considered the shallowest idle-state in the ++ energy model and must be the first entry. cpus may enter this state ++ (possibly 'active idling') if cpuidle decides not enter a cpuidle ++ idle-state. Default idle may not be used when cpuidle is enabled. ++ In this case, it should just be a copy of the first cpuidle idle-state. ++ ++nr_idle_states: ++ Number of idle states in idle_states list. ++ ++There are no unit requirements for the energy cost data. Data can be normalized ++with any reference, however, the normalization must be consistent across all ++energy cost data. That is, one bogo-joule/watt must be the same quantity for ++data, but we don't care what it is. ++ ++A recipe for platform characterization ++======================================= ++ ++Obtaining the actual model data for a particular platform requires some way of ++measuring power/energy. There isn't a tool to help with this (yet). This ++section provides a recipe for use as reference. It covers the steps used to ++characterize the ARM TC2 development platform. This sort of measurements is ++expected to be done anyway when tuning cpuidle and cpufreq for a given ++platform. ++ ++The energy model needs two types of data (struct sched_group_energy holds ++these) for each sched_group where energy costs should be taken into account: ++ ++1. Capacity state information ++ ++A list containing the compute capacity and power consumption when fully ++utilized attributed to the group as a whole for each available capacity state. ++At the lowest level (group contains just a single cpu) this is the power of the ++cpu alone without including power consumed by resources shared with other cpus. ++It basically needs to fit the basic modelling approach described in "Background ++and Terminology" section: ++ ++ energy_system = energy_shared + n * energy_cpu ++ ++for a system containing 'n' busy cpus. Only 'energy_cpu' should be included at ++the lowest level. 'energy_shared' is included at the next level which ++represents the group of cpus among which the resources are shared. ++ ++This model is, of course, a simplification of reality. Thus, power/energy ++attributions might not always exactly represent how the hardware is designed. ++Also, busy power is likely to depend on the workload. It is therefore ++recommended to use a representative mix of workloads when characterizing the ++capacity states. ++ ++If the group has no capacity scaling support, the list will contain a single ++state where power is the busy power attributed to the group. The capacity ++should be set to a default value (1024). ++ ++When frequency domains include multiple power domains, the group representing ++the frequency domain and all child groups share capacity states. This must be ++indicated by setting the SD_SHARE_CAP_STATES sched_domain flag. All groups at ++all levels that share the capacity state must have the list of capacity states ++with the power set to the contribution of the individual group. ++ ++2. Idle power information ++ ++Stored in the idle_states list. The power number is the group idle power ++consumption in each idle state as well when the group is idle but has not ++entered an idle-state ('active idle' as mentioned earlier). Due to the way the ++energy model is defined, the idle power of the deepest group idle state can ++alternatively be accounted for in the parent group busy power. In that case the ++group idle state power values are offset such that the idle power of the ++deepest state is zero. It is less intuitive, but it is easier to measure as ++idle power consumed by the group and the busy/idle power of the parent group ++cannot be distinguished without per group measurement points. ++ ++Measuring capacity states and idle power: ++ ++The capacity states' capacity and power can be estimated by running a benchmark ++workload at each available capacity state. By restricting the benchmark to run ++on subsets of cpus it is possible to extrapolate the power consumption of ++shared resources. ++ ++ARM TC2 has two clusters of two and three cpus respectively. Each cluster has a ++shared L2 cache. TC2 has on-chip energy counters per cluster. Running a ++benchmark workload on just one cpu in a cluster means that power is consumed in ++the cluster (higher level group) and a single cpu (lowest level group). Adding ++another benchmark task to another cpu increases the power consumption by the ++amount consumed by the additional cpu. Hence, it is possible to extrapolate the ++cluster busy power. ++ ++For platforms that don't have energy counters or equivalent instrumentation ++built-in, it may be possible to use an external DAQ to acquire similar data. ++ ++If the benchmark includes some performance score (for example sysbench cpu ++benchmark), this can be used to record the compute capacity. ++ ++Measuring idle power requires insight into the idle state implementation on the ++particular platform. Specifically, if the platform has coupled idle-states (or ++package states). To measure non-coupled per-cpu idle-states it is necessary to ++keep one cpu busy to keep any shared resources alive to isolate the idle power ++of the cpu from idle/busy power of the shared resources. The cpu can be tricked ++into different per-cpu idle states by disabling the other states. Based on ++various combinations of measurements with specific cpus busy and disabling ++idle-states it is possible to extrapolate the idle-state power. +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0046-sched-Make-energy-awareness-a-sched-feature.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0046-sched-Make-energy-awareness-a-sched-feature.patch new file mode 100644 index 0000000..9b9929b --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0046-sched-Make-energy-awareness-a-sched-feature.patch @@ -0,0 +1,57 @@ +From ac91525a35e79b82a09fb1ed75ee547802f3bed1 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 13 Jan 2015 13:45:51 +0000 +Subject: [PATCH 46/92] sched: Make energy awareness a sched feature + +This patch introduces the ENERGY_AWARE sched feature, which is +implemented using jump labels when SCHED_DEBUG is defined. It is +statically set false when SCHED_DEBUG is not defined. Hence this doesn't +allow energy awareness to be enabled without SCHED_DEBUG. This +sched_feature knob will be replaced later with a more appropriate +control knob when things have matured a bit. + +ENERGY_AWARE is based on per-entity load-tracking hence FAIR_GROUP_SCHED +must be enable. This dependency isn't checked at compile time yet. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit e3b80d193dad157c720ec29496104126dbfc67ce) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 5 +++++ + kernel/sched/features.h | 5 +++++ + 2 files changed, 10 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index cb283ca..fa393d9 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5291,6 +5291,11 @@ static void record_wakee(struct task_struct *p) + } + } + ++static inline bool energy_aware(void) ++{ ++ return sched_feat(ENERGY_AWARE); ++} ++ + /* + * Detect M:N waker/wakee relationships via a switching-frequency heuristic. + * +diff --git a/kernel/sched/features.h b/kernel/sched/features.h +index 69631fa..b634151 100644 +--- a/kernel/sched/features.h ++++ b/kernel/sched/features.h +@@ -69,3 +69,8 @@ + SCHED_FEAT(LB_MIN, false) + SCHED_FEAT(ATTACH_AGE_LOAD, true) + ++/* ++ * Energy aware scheduling. Use platform energy model to guide scheduling ++ * decisions optimizing for energy efficiency. ++ */ ++SCHED_FEAT(ENERGY_AWARE, false) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0047-sched-Introduce-energy-data-structures.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0047-sched-Introduce-energy-data-structures.patch new file mode 100644 index 0000000..5590b2f --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0047-sched-Introduce-energy-data-structures.patch @@ -0,0 +1,107 @@ +From 5e91e54c76b1a29dff30f8c5bfb7741569b3a6f8 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Fri, 14 Nov 2014 16:08:45 +0000 +Subject: [PATCH 47/92] sched: Introduce energy data structures + +The struct sched_group_energy represents the per sched_group related +data which is needed for energy aware scheduling. It contains: + + (1) number of elements of the idle state array + (2) pointer to the idle state array which comprises 'power consumption' + for each idle state + (3) number of elements of the capacity state array + (4) pointer to the capacity state array which comprises 'compute + capacity and power consumption' tuples for each capacity state + +The struct sched_group obtains a pointer to a struct sched_group_energy. + +The function pointer sched_domain_energy_f is introduced into struct +sched_domain_topology_level which will allow the arch to pass a particular +struct sched_group_energy from the topology shim layer into the scheduler +core. + +The function pointer sched_domain_energy_f has an 'int cpu' parameter +since the folding of two adjacent sd levels via sd degenerate doesn't work +for all sd levels. I.e. it is not possible for example to use this feature +to provide per-cpu energy in sd level DIE on ARM's TC2 platform. + +It was discussed that the folding of sd levels approach is preferable +over the cpu parameter approach, simply because the user (the arch +specifying the sd topology table) can introduce less errors. But since +it is not working, the 'int cpu' parameter is the only way out. It's +possible to use the folding of sd levels approach for +sched_domain_flags_f and the cpu parameter approach for the +sched_domain_energy_f at the same time though. With the use of the +'int cpu' parameter, an extra check function has to be provided to make +sure that all cpus spanned by a sched group are provisioned with the same +energy data. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 0bab805ddc2bbf1dae5baf44f2812392789846c7) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + include/linux/sched.h | 19 +++++++++++++++++++ + kernel/sched/sched.h | 1 + + 2 files changed, 20 insertions(+) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 2e3c2a1..0522149 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1069,6 +1069,22 @@ struct sched_domain_attr { + + extern int sched_domain_level_max; + ++struct capacity_state { ++ unsigned long cap; /* compute capacity */ ++ unsigned long power; /* power consumption at this compute capacity */ ++}; ++ ++struct idle_state { ++ unsigned long power; /* power consumption in this idle state */ ++}; ++ ++struct sched_group_energy { ++ unsigned int nr_idle_states; /* number of idle states */ ++ struct idle_state *idle_states; /* ptr to idle state array */ ++ unsigned int nr_cap_states; /* number of capacity states */ ++ struct capacity_state *cap_states; /* ptr to capacity state array */ ++}; ++ + struct sched_group; + + struct sched_domain_shared { +@@ -1176,6 +1192,8 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + + typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); + typedef int (*sched_domain_flags_f)(void); ++typedef ++const struct sched_group_energy * const(*sched_domain_energy_f)(int cpu); + + #define SDTL_OVERLAP 0x01 + +@@ -1189,6 +1207,7 @@ struct sd_data { + struct sched_domain_topology_level { + sched_domain_mask_f mask; + sched_domain_flags_f sd_flags; ++ sched_domain_energy_f energy; + int flags; + int numa_level; + struct sd_data data; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 7b34c78..46787bd 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -914,6 +914,7 @@ struct sched_group { + unsigned int group_weight; + struct sched_group_capacity *sgc; + int asym_prefer_cpu; /* cpu of highest priority in group */ ++ const struct sched_group_energy const *sge; + + /* + * The CPUs this group covers. +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0048-sched-Initialize-energy-data-structures.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0048-sched-Initialize-energy-data-structures.patch new file mode 100644 index 0000000..1107577 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0048-sched-Initialize-energy-data-structures.patch @@ -0,0 +1,126 @@ +From cd9cfc061f16a3664fc374802e6c014602b0c94f Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Fri, 14 Nov 2014 16:20:20 +0000 +Subject: [PATCH 48/92] sched: Initialize energy data structures + +The sched_group_energy (sge) pointer of the first sched_group (sg) in +the sched_domain (sd) is initialized to point to the appropriate (in +terms of sd level and cpu) sge data defined in the arch and so to the +correct part of the Energy Model (EM). + +Energy-aware scheduling allows that a system has only EM data up to a +certain sd level (so called highest energy aware balancing sd level). +A check in init_sched_energy() enforces that all sd's below this sd +level contain EM data. + +The 'int cpu' parameter of sched_domain_energy_f requires that +check_sched_energy_data() makes sure that all cpus spanned by a sg +are provisioned with the same EM data. + +This patch has also been tested with feature FORCE_SD_OVERLAP enabled. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 054c7ad511ad9e2b54e48d4e01d34be97fff104c) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/core.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 71 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 209d2ea..083b318 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6327,6 +6327,73 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) + update_group_capacity(sd, cpu); + } + ++#define energy_eff(e, n) \ ++ ((e->cap_states[n].cap << SCHED_CAPACITY_SHIFT)/e->cap_states[n].power) ++ ++static void init_sched_groups_energy(int cpu, struct sched_domain *sd, ++ sched_domain_energy_f fn) ++{ ++ struct sched_group *sg = sd->groups; ++ const struct sched_group_energy *sge; ++ int i; ++ ++ if (!(fn && fn(cpu))) ++ return; ++ ++ if (cpu != group_balance_cpu(sg)) ++ return; ++ ++ if (sd->flags & SD_OVERLAP) { ++ pr_err("BUG: EAS does not support overlapping sd spans\n"); ++#ifdef CONFIG_SCHED_DEBUG ++ pr_err(" the %s domain has SD_OVERLAP set\n", sd->name); ++#endif ++ return; ++ } ++ ++ if (sd->child && !sd->child->groups->sge) { ++ pr_err("BUG: EAS setup borken for CPU%d\n", cpu); ++#ifdef CONFIG_SCHED_DEBUG ++ pr_err(" energy data on %s but not on %s domain\n", ++ sd->name, sd->child->name); ++#endif ++ return; ++ } ++ ++ sge = fn(cpu); ++ ++ /* ++ * Check that the per-cpu provided sd energy data is consistent for all ++ * cpus within the mask. ++ */ ++ if (cpumask_weight(sched_group_cpus(sg)) > 1) { ++ struct cpumask mask; ++ ++ cpumask_xor(&mask, sched_group_cpus(sg), get_cpu_mask(cpu)); ++ ++ for_each_cpu(i, &mask) ++ BUG_ON(sge != fn(i)); ++ } ++ ++ /* Check that energy efficiency (capacity/power) is monotonically ++ * decreasing in the capacity state vector with higher indexes ++ */ ++ for (i = 0; i < (sge->nr_cap_states - 1); i++) { ++ if (energy_eff(sge, i) > energy_eff(sge, i+1)) ++ continue; ++#ifdef CONFIG_SCHED_DEBUG ++ pr_warn("WARN: cpu=%d, domain=%s: incr. energy eff %lu[%d]->%lu[%d]\n", ++ cpu, sd->name, energy_eff(sge, i), i, ++ energy_eff(sge, i+1), i+1); ++#else ++ pr_warn("WARN: cpu=%d: incr. energy eff %lu[%d]->%lu[%d]\n", ++ cpu, energy_eff(sge, i), i, energy_eff(sge, i+1), i+1); ++#endif ++ } ++ ++ sd->groups->sge = fn(cpu); ++} ++ + /* + * Initializers for schedule domains + * Non-inlined to reduce accumulated stack pressure in build_sched_domains() +@@ -7052,10 +7119,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, + + /* Calculate CPU capacity for physical packages and nodes */ + for (i = nr_cpumask_bits-1; i >= 0; i--) { ++ struct sched_domain_topology_level *tl = sched_domain_topology; ++ + if (!cpumask_test_cpu(i, cpu_map)) + continue; + +- for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { ++ for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) { ++ init_sched_groups_energy(i, sd, tl->energy); + claim_allocations(i, sd); + init_sched_groups_capacity(i, sd); + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0049-sched-EAS-cpu-hotplug-interoperability.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0049-sched-EAS-cpu-hotplug-interoperability.patch new file mode 100644 index 0000000..447e302 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0049-sched-EAS-cpu-hotplug-interoperability.patch @@ -0,0 +1,204 @@ +From 3477712d21ac713611aa74f9f5a6ff4b1e49a930 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Thu, 30 Jul 2015 16:53:30 +0100 +Subject: [PATCH 49/92] sched: EAS & cpu hotplug interoperability + +For Energy-Aware Scheduling (EAS) to work properly, even in the case that +cpus are hot-plugged out, the energy model (EM) data on all energy-aware +sched domains has to be present for all online cpus. + +Mainline sd hierarchy setup code will remove sd's which are not useful for +task scheduling e.g. in the following situations: + +1. Only one cpu remains in one cluster of a two cluster system. + + This remaining cpu only has DIE and no MC sd. + +2. A complete cluster in a two-cluster system is hot-plugged out. + + The cpus of the remaining cluster only have MC and no DIE sd. + +To make sure that all online cpus keep all their energy-aware sd's, +the sd degenerate functionality has been changed to not free sd's if +their first sg contains EM data in case: + +1. There is only one cpu left in the sd. + +2. There have to be at least 2 sg's if certain sd flags are set. + +Instead of freeing such an sd it now clears only its SD_LOAD_BALANCE +flag. + +This will make sure that the EAS functionality will always see all +energy-aware sd's for all online cpus. + +It will introduce a (small ?) performance degradation since the +hot-path macro for_each_domain() has to deal with sd's not +contributing to task-scheduling at all now. There is the handling +of newidle decay values before the SD_LOAD_BALANCE check in +rebalance_domains(). +But generally, code to make sure that task scheduling is not invoked +on these sd's is in place (if (!(sd->flags & SD_LOAD_BALANCE)) +already. + +This patch has been tested on a single (a7) cluster TC2 system. I.e. we +could abandon the SYS sd level patch and use this patch to solve all +problems related to sd topology setups and runtime changes. + +This patch should not be squashed. + +Test (w/ CONFIG_SCHED_DEBUG): + +JUNO: + +$ cat /proc/cpuinfo | grep "^CPU part" +CPU part : 0xd03 +CPU part : 0xd07 +CPU part : 0xd07 +CPU part : 0xd03 +CPU part : 0xd03 +CPU part : 0xd03 + +$ cat /proc/sys/kernel/sched_domain/cpu*/domain*/{name,flags} + +MC +DIE +MC +DIE +MC +DIE +MC +DIE +MC +DIE +MC +DIE +33583 - 0x832f +4143 - 0x102f +33583 +4143 +33583 +4143 +33583 +4143 +33583 +4143 +33583 +4143 + +Hotplug-out A57 cluster + +$ echo 0 > /sys/devices/system/cpu/cpu1/online +$ echo 0 > /sys/devices/system/cpu/cpu2/online + +$ cat /proc/sys/kernel/sched_domain/cpu*/domain*/{name,flags} + +MC +DIE +MC +DIE +MC +DIE +MC +DIE +33583 +4142 - 0x102e <-- !SD_LOAD_BALANCE +33583 +4142 +33583 +4142 +33583 +4142 + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit a5ebdd0fe5357fe125d9603d766adc69e7607981) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/core.c | 17 ++++++++++------- + kernel/sched/fair.c | 7 +++++-- + 2 files changed, 15 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 083b318..f31ea62 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -5663,9 +5663,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, + + if (!(sd->flags & SD_LOAD_BALANCE)) { + printk("does not load-balance\n"); +- if (sd->parent) +- printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" +- " has parent"); + return -1; + } + +@@ -5760,8 +5757,12 @@ static inline bool sched_debug(void) + + static int sd_degenerate(struct sched_domain *sd) + { +- if (cpumask_weight(sched_domain_span(sd)) == 1) +- return 1; ++ if (cpumask_weight(sched_domain_span(sd)) == 1) { ++ if (sd->groups->sge) ++ sd->flags &= ~SD_LOAD_BALANCE; ++ else ++ return 1; ++ } + + /* Following flags need at least 2 groups */ + if (sd->flags & (SD_LOAD_BALANCE | +@@ -5805,6 +5806,10 @@ static int sd_degenerate(struct sched_domain *sd) + SD_SHARE_PKG_RESOURCES | + SD_PREFER_SIBLING | + SD_SHARE_POWERDOMAIN); ++ if (parent->groups->sge) { ++ parent->flags &= ~SD_LOAD_BALANCE; ++ return 0; ++ } + if (nr_node_ids == 1) + pflags &= ~SD_SERIALIZE; + } +@@ -7098,8 +7103,6 @@ static int build_sched_domains(const struct cpumask *cpu_map, + *per_cpu_ptr(d.sd, i) = sd; + if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP)) + sd->flags |= SD_OVERLAP; +- if (cpumask_equal(cpu_map, sched_domain_span(sd))) +- break; + } + } + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index fa393d9..c7d9bbf 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7457,6 +7457,9 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq) + } + #endif /* CONFIG_NUMA_BALANCING */ + ++#define lb_sd_parent(sd) \ ++ (sd->parent && sd->parent->groups != sd->parent->groups->next) ++ + /** + * update_sd_lb_stats - Update sched_domain's statistics for load balancing. + * @env: The load balancing environment. +@@ -7530,7 +7533,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + + env->src_grp_nr_running = sds->busiest_stat.sum_nr_running; + +- if (!env->sd->parent) { ++ if (!lb_sd_parent(env->sd)) { + /* update overload indicator if we are at root domain */ + if (env->dst_rq->rd->overload != overload) + env->dst_rq->rd->overload = overload; +@@ -7989,7 +7992,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, + int *continue_balancing) + { + int ld_moved, cur_ld_moved, active_balance = 0; +- struct sched_domain *sd_parent = sd->parent; ++ struct sched_domain *sd_parent = lb_sd_parent(sd) ? sd->parent : NULL; + struct sched_group *group; + struct rq *busiest; + unsigned long flags; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0050-sched-Introduce-SD_SHARE_CAP_STATES-sched_domain-fla.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0050-sched-Introduce-SD_SHARE_CAP_STATES-sched_domain-fla.patch new file mode 100644 index 0000000..a55cd11 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0050-sched-Introduce-SD_SHARE_CAP_STATES-sched_domain-fla.patch @@ -0,0 +1,83 @@ +From 1e2c379e10555b3efa8b7f8e4552b9ddae237491 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 13 Jan 2015 13:50:46 +0000 +Subject: [PATCH 50/92] sched: Introduce SD_SHARE_CAP_STATES sched_domain flag + +cpufreq is currently keeping it a secret which cpus are sharing +clock source. The scheduler needs to know about clock domains as well +to become more energy aware. The SD_SHARE_CAP_STATES domain flag +indicates whether cpus belonging to the sched_domain share capacity +states (P-states). + +There is no connection with cpufreq (yet). The flag must be set by +the arch specific topology code. + +cc: Russell King <linux@arm.linux.org.uk> +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit d5cfb54bc10768bf25da557c1cd7c91df6ae6296) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + include/linux/sched.h | 1 + + kernel/sched/core.c | 10 +++++++--- + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 0522149..fad382a 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1035,6 +1035,7 @@ extern void wake_q_add(struct wake_q_head *head, + #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ + #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ + #define SD_NUMA 0x4000 /* cross-node balancing */ ++#define SD_SHARE_CAP_STATES 0x8000 /* Domain members share capacity state */ + + #ifdef CONFIG_SCHED_SMT + static inline int cpu_smt_flags(void) +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index f31ea62..b450329 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -5772,7 +5772,8 @@ static int sd_degenerate(struct sched_domain *sd) + SD_SHARE_CPUCAPACITY | + SD_ASYM_CPUCAPACITY | + SD_SHARE_PKG_RESOURCES | +- SD_SHARE_POWERDOMAIN)) { ++ SD_SHARE_POWERDOMAIN | ++ SD_SHARE_CAP_STATES)) { + if (sd->groups != sd->groups->next) + return 0; + } +@@ -5805,7 +5806,8 @@ static int sd_degenerate(struct sched_domain *sd) + SD_SHARE_CPUCAPACITY | + SD_SHARE_PKG_RESOURCES | + SD_PREFER_SIBLING | +- SD_SHARE_POWERDOMAIN); ++ SD_SHARE_POWERDOMAIN | ++ SD_SHARE_CAP_STATES); + if (parent->groups->sge) { + parent->flags &= ~SD_LOAD_BALANCE; + return 0; +@@ -6515,6 +6517,7 @@ static void claim_allocations(int cpu, struct sched_domain *sd) + * SD_NUMA - describes NUMA topologies + * SD_SHARE_POWERDOMAIN - describes shared power domain + * SD_ASYM_CPUCAPACITY - describes mixed capacity topologies ++ * SD_SHARE_CAP_STATES - describes shared capacity states + * + * Odd one out, which beside describing the topology has a quirk also + * prescribes the desired behaviour that goes along with it: +@@ -6527,7 +6530,8 @@ static void claim_allocations(int cpu, struct sched_domain *sd) + SD_NUMA | \ + SD_ASYM_PACKING | \ + SD_ASYM_CPUCAPACITY | \ +- SD_SHARE_POWERDOMAIN) ++ SD_SHARE_POWERDOMAIN | \ ++ SD_SHARE_CAP_STATES) + + static struct sched_domain * + sd_init(struct sched_domain_topology_level *tl, +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0051-sched-Relocated-cpu_util-and-change-return-type.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0051-sched-Relocated-cpu_util-and-change-return-type.patch new file mode 100644 index 0000000..dab8b97 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0051-sched-Relocated-cpu_util-and-change-return-type.patch @@ -0,0 +1,109 @@ +From 88ce1ba602ccc9479feb7837f2c7bc074379561c Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Thu, 11 Dec 2014 15:25:29 +0000 +Subject: [PATCH 51/92] sched: Relocated cpu_util() and change return type + +Move cpu_util() to an earlier position in fair.c and change return +type to unsigned long as negative usage doesn't make much sense. All +other load and capacity related functions use unsigned long including +the caller of cpu_util(). + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit c5aa28c6e0792ee8a994d3e401fc619694d01f5f) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 68 ++++++++++++++++++++++++++--------------------------- + 1 file changed, 34 insertions(+), 34 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index c7d9bbf..0085d4f 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5274,6 +5274,40 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) + + #endif + ++/* ++ * cpu_util returns the amount of capacity of a CPU that is used by CFS ++ * tasks. The unit of the return value must be the one of capacity so we can ++ * compare the utilization with the capacity of the CPU that is available for ++ * CFS task (ie cpu_capacity). ++ * ++ * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the ++ * recent utilization of currently non-runnable tasks on a CPU. It represents ++ * the amount of utilization of a CPU in the range [0..capacity_orig] where ++ * capacity_orig is the cpu_capacity available at the highest frequency ++ * (arch_scale_freq_capacity()). ++ * The utilization of a CPU converges towards a sum equal to or less than the ++ * current capacity (capacity_curr <= capacity_orig) of the CPU because it is ++ * the running time on this CPU scaled by capacity_curr. ++ * ++ * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even ++ * higher than capacity_orig because of unfortunate rounding in ++ * cfs.avg.util_avg or just after migrating tasks and new task wakeups until ++ * the average stabilizes with the new running time. We need to check that the ++ * utilization stays within the range of [0..capacity_orig] and cap it if ++ * necessary. Without utilization capping, a group could be seen as overloaded ++ * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of ++ * available capacity. We allow utilization to overshoot capacity_curr (but not ++ * capacity_orig) as it useful for predicting the capacity required after task ++ * migrations (scheduler-driven DVFS). ++ */ ++static unsigned long cpu_util(int cpu) ++{ ++ unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; ++ unsigned long capacity = capacity_orig_of(cpu); ++ ++ return (util >= capacity) ? capacity : util; ++} ++ + static void record_wakee(struct task_struct *p) + { + /* +@@ -5783,40 +5817,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + return target; + } + +-/* +- * cpu_util returns the amount of capacity of a CPU that is used by CFS +- * tasks. The unit of the return value must be the one of capacity so we can +- * compare the utilization with the capacity of the CPU that is available for +- * CFS task (ie cpu_capacity). +- * +- * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the +- * recent utilization of currently non-runnable tasks on a CPU. It represents +- * the amount of utilization of a CPU in the range [0..capacity_orig] where +- * capacity_orig is the cpu_capacity available at the highest frequency +- * (arch_scale_freq_capacity()). +- * The utilization of a CPU converges towards a sum equal to or less than the +- * current capacity (capacity_curr <= capacity_orig) of the CPU because it is +- * the running time on this CPU scaled by capacity_curr. +- * +- * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even +- * higher than capacity_orig because of unfortunate rounding in +- * cfs.avg.util_avg or just after migrating tasks and new task wakeups until +- * the average stabilizes with the new running time. We need to check that the +- * utilization stays within the range of [0..capacity_orig] and cap it if +- * necessary. Without utilization capping, a group could be seen as overloaded +- * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of +- * available capacity. We allow utilization to overshoot capacity_curr (but not +- * capacity_orig) as it useful for predicting the capacity required after task +- * migrations (scheduler-driven DVFS). +- */ +-static int cpu_util(int cpu) +-{ +- unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; +- unsigned long capacity = capacity_orig_of(cpu); +- +- return (util >= capacity) ? capacity : util; +-} +- + static inline int task_util(struct task_struct *p) + { + return p->se.avg.util_avg; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0052-sched-Highest-energy-aware-balancing-sched_domain-le.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0052-sched-Highest-energy-aware-balancing-sched_domain-le.patch new file mode 100644 index 0000000..546ac07 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0052-sched-Highest-energy-aware-balancing-sched_domain-le.patch @@ -0,0 +1,76 @@ +From a90c8ec93f298cccfbc4db29a8513675002da326 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 2 Jan 2015 17:08:52 +0000 +Subject: [PATCH 52/92] sched: Highest energy aware balancing sched_domain + level pointer + +Add another member to the family of per-cpu sched_domain shortcut +pointers. This one, sd_ea, points to the highest level at which energy +model is provided. At this level and all levels below all sched_groups +have energy model data attached. + +Partial energy model information is possible but restricted to providing +energy model data for lower level sched_domains (sd_ea and below) and +leaving load-balancing on levels above to non-energy-aware +load-balancing. For example, it is possible to apply energy-aware +scheduling within each socket on a multi-socket system and let normal +scheduling handle load-balancing between sockets. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit c4eb7a2b0a1908a377fb063ecae89d7bacd6aa78) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/core.c | 10 ++++++++++ + kernel/sched/sched.h | 1 + + 2 files changed, 11 insertions(+) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index b450329..f2642d4 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6001,11 +6001,13 @@ static void destroy_sched_domains(struct sched_domain *sd) + DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); + DEFINE_PER_CPU(struct sched_domain *, sd_numa); + DEFINE_PER_CPU(struct sched_domain *, sd_asym); ++DEFINE_PER_CPU(struct sched_domain *, sd_ea); + + static void update_top_cache_domain(int cpu) + { + struct sched_domain_shared *sds = NULL; + struct sched_domain *sd; ++ struct sched_domain *ea_sd = NULL; + int id = cpu; + int size = 1; + +@@ -6026,6 +6028,14 @@ static void update_top_cache_domain(int cpu) + + sd = highest_flag_domain(cpu, SD_ASYM_PACKING); + rcu_assign_pointer(per_cpu(sd_asym, cpu), sd); ++ ++ for_each_domain(cpu, sd) { ++ if (sd->groups->sge) ++ ea_sd = sd; ++ else ++ break; ++ } ++ rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); + } + + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 46787bd..22b7cfd 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -892,6 +892,7 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) + DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); + DECLARE_PER_CPU(struct sched_domain *, sd_numa); + DECLARE_PER_CPU(struct sched_domain *, sd_asym); ++DECLARE_PER_CPU(struct sched_domain *, sd_ea); + + struct sched_group_capacity { + atomic_t ref; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0053-sched-Calculate-energy-consumption-of-sched_group.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0053-sched-Calculate-energy-consumption-of-sched_group.patch new file mode 100644 index 0000000..2f82044 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0053-sched-Calculate-energy-consumption-of-sched_group.patch @@ -0,0 +1,229 @@ +From 1e9ffadc70a89ede023b79139debe4f6a17c5c7f Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Thu, 18 Dec 2014 14:47:18 +0000 +Subject: [PATCH 53/92] sched: Calculate energy consumption of sched_group + +For energy-aware load-balancing decisions it is necessary to know the +energy consumption estimates of groups of cpus. This patch introduces a +basic function, sched_group_energy(), which estimates the energy +consumption of the cpus in the group and any resources shared by the +members of the group. + +NOTE: The function has five levels of identation and breaks the 80 +character limit. Refactoring is necessary. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 1e09b216da57376d8321459eda52699cef8f7e1a) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/core.c | 4 ++ + kernel/sched/fair.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++ + kernel/sched/sched.h | 1 + + 3 files changed, 161 insertions(+) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index f2642d4..431c7e0 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6002,6 +6002,7 @@ static void destroy_sched_domains(struct sched_domain *sd) + DEFINE_PER_CPU(struct sched_domain *, sd_numa); + DEFINE_PER_CPU(struct sched_domain *, sd_asym); + DEFINE_PER_CPU(struct sched_domain *, sd_ea); ++DEFINE_PER_CPU(struct sched_domain *, sd_scs); + + static void update_top_cache_domain(int cpu) + { +@@ -6036,6 +6037,9 @@ static void update_top_cache_domain(int cpu) + break; + } + rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); ++ ++ sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES); ++ rcu_assign_pointer(per_cpu(sd_scs, cpu), sd); + } + + /* +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 0085d4f..1b8dca7 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5331,6 +5331,162 @@ static inline bool energy_aware(void) + } + + /* ++ * cpu_norm_util() returns the cpu util relative to a specific capacity, ++ * i.e. it's busy ratio, in the range [0..SCHED_CAPACITY_SCALE] which is useful ++ * for energy calculations. Using the scale-invariant util returned by ++ * cpu_util() and approximating scale-invariant util by: ++ * ++ * util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time ++ * ++ * the normalized util can be found using the specific capacity. ++ * ++ * capacity = capacity_orig * curr_freq/max_freq ++ * ++ * norm_util = running_time/time ~ util/capacity ++ */ ++static unsigned long cpu_norm_util(int cpu, unsigned long capacity) ++{ ++ int util = cpu_util(cpu); ++ ++ if (util >= capacity) ++ return SCHED_CAPACITY_SCALE; ++ ++ return (util << SCHED_CAPACITY_SHIFT)/capacity; ++} ++ ++static unsigned long group_max_util(struct sched_group *sg) ++{ ++ int i; ++ unsigned long max_util = 0; ++ ++ for_each_cpu(i, sched_group_cpus(sg)) ++ max_util = max(max_util, cpu_util(i)); ++ ++ return max_util; ++} ++ ++/* ++ * group_norm_util() returns the approximated group util relative to it's ++ * current capacity (busy ratio) in the range [0..SCHED_CAPACITY_SCALE] for use ++ * in energy calculations. Since task executions may or may not overlap in time ++ * in the group the true normalized util is between max(cpu_norm_util(i)) and ++ * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The ++ * latter is used as the estimate as it leads to a more pessimistic energy ++ * estimate (more busy). ++ */ ++static unsigned long group_norm_util(struct sched_group *sg, int cap_idx) ++{ ++ int i; ++ unsigned long util_sum = 0; ++ unsigned long capacity = sg->sge->cap_states[cap_idx].cap; ++ ++ for_each_cpu(i, sched_group_cpus(sg)) ++ util_sum += cpu_norm_util(i, capacity); ++ ++ if (util_sum > SCHED_CAPACITY_SCALE) ++ return SCHED_CAPACITY_SCALE; ++ return util_sum; ++} ++ ++static int find_new_capacity(struct sched_group *sg, ++ const struct sched_group_energy const *sge) ++{ ++ int idx; ++ unsigned long util = group_max_util(sg); ++ ++ for (idx = 0; idx < sge->nr_cap_states; idx++) { ++ if (sge->cap_states[idx].cap >= util) ++ return idx; ++ } ++ ++ return idx; ++} ++ ++/* ++ * sched_group_energy(): Computes the absolute energy consumption of cpus ++ * belonging to the sched_group including shared resources shared only by ++ * members of the group. Iterates over all cpus in the hierarchy below the ++ * sched_group starting from the bottom working it's way up before going to ++ * the next cpu until all cpus are covered at all levels. The current ++ * implementation is likely to gather the same util statistics multiple times. ++ * This can probably be done in a faster but more complex way. ++ * Note: sched_group_energy() may fail when racing with sched_domain updates. ++ */ ++static int sched_group_energy(struct sched_group *sg_top) ++{ ++ struct sched_domain *sd; ++ int cpu, total_energy = 0; ++ struct cpumask visit_cpus; ++ struct sched_group *sg; ++ ++ WARN_ON(!sg_top->sge); ++ ++ cpumask_copy(&visit_cpus, sched_group_cpus(sg_top)); ++ ++ while (!cpumask_empty(&visit_cpus)) { ++ struct sched_group *sg_shared_cap = NULL; ++ ++ cpu = cpumask_first(&visit_cpus); ++ ++ /* ++ * Is the group utilization affected by cpus outside this ++ * sched_group? ++ */ ++ sd = rcu_dereference(per_cpu(sd_scs, cpu)); ++ ++ if (!sd) ++ /* ++ * We most probably raced with hotplug; returning a ++ * wrong energy estimation is better than entering an ++ * infinite loop. ++ */ ++ return -EINVAL; ++ ++ if (sd->parent) ++ sg_shared_cap = sd->parent->groups; ++ ++ for_each_domain(cpu, sd) { ++ sg = sd->groups; ++ ++ /* Has this sched_domain already been visited? */ ++ if (sd->child && group_first_cpu(sg) != cpu) ++ break; ++ ++ do { ++ struct sched_group *sg_cap_util; ++ unsigned long group_util; ++ int sg_busy_energy, sg_idle_energy, cap_idx; ++ ++ if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) ++ sg_cap_util = sg_shared_cap; ++ else ++ sg_cap_util = sg; ++ ++ cap_idx = find_new_capacity(sg_cap_util, sg->sge); ++ group_util = group_norm_util(sg, cap_idx); ++ sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) ++ >> SCHED_CAPACITY_SHIFT; ++ sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) * sg->sge->idle_states[0].power) ++ >> SCHED_CAPACITY_SHIFT; ++ ++ total_energy += sg_busy_energy + sg_idle_energy; ++ ++ if (!sd->child) ++ cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); ++ ++ if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top))) ++ goto next_cpu; ++ ++ } while (sg = sg->next, sg != sd->groups); ++ } ++next_cpu: ++ continue; ++ } ++ ++ return total_energy; ++} ++ ++/* + * Detect M:N waker/wakee relationships via a switching-frequency heuristic. + * + * A waker of many should wake a different task than the one last awakened +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 22b7cfd..75dcd85 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -893,6 +893,7 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) + DECLARE_PER_CPU(struct sched_domain *, sd_numa); + DECLARE_PER_CPU(struct sched_domain *, sd_asym); + DECLARE_PER_CPU(struct sched_domain *, sd_ea); ++DECLARE_PER_CPU(struct sched_domain *, sd_scs); + + struct sched_group_capacity { + atomic_t ref; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0054-sched-Extend-sched_group_energy-to-test-load-balanci.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0054-sched-Extend-sched_group_energy-to-test-load-balanci.patch new file mode 100644 index 0000000..5617b5f --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0054-sched-Extend-sched_group_energy-to-test-load-balanci.patch @@ -0,0 +1,218 @@ +From f613a647e9c9dc9981b5fe277304fea79aaacfbb Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 2 Jan 2015 14:21:56 +0000 +Subject: [PATCH 54/92] sched: Extend sched_group_energy to test load-balancing + decisions + +Extended sched_group_energy() to support energy prediction with usage +(tasks) added/removed from a specific cpu or migrated between a pair of +cpus. Useful for load-balancing decision making. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 2ab0ed691531c9d04e07c80a91b38970b08c9477) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 90 +++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 63 insertions(+), 27 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 1b8dca7..dcc73d8 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5300,12 +5300,21 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) + * capacity_orig) as it useful for predicting the capacity required after task + * migrations (scheduler-driven DVFS). + */ +-static unsigned long cpu_util(int cpu) ++static unsigned long __cpu_util(int cpu, int delta) + { + unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; + unsigned long capacity = capacity_orig_of(cpu); + +- return (util >= capacity) ? capacity : util; ++ delta += util; ++ if (delta < 0) ++ return 0; ++ ++ return (delta >= capacity) ? capacity : delta; ++} ++ ++static unsigned long cpu_util(int cpu) ++{ ++ return __cpu_util(cpu, 0); + } + + static void record_wakee(struct task_struct *p) +@@ -5330,8 +5339,18 @@ static inline bool energy_aware(void) + return sched_feat(ENERGY_AWARE); + } + ++struct energy_env { ++ struct sched_group *sg_top; ++ struct sched_group *sg_cap; ++ int cap_idx; ++ int util_delta; ++ int src_cpu; ++ int dst_cpu; ++ int energy; ++}; ++ + /* +- * cpu_norm_util() returns the cpu util relative to a specific capacity, ++ * __cpu_norm_util() returns the cpu util relative to a specific capacity, + * i.e. it's busy ratio, in the range [0..SCHED_CAPACITY_SCALE] which is useful + * for energy calculations. Using the scale-invariant util returned by + * cpu_util() and approximating scale-invariant util by: +@@ -5344,9 +5363,9 @@ static inline bool energy_aware(void) + * + * norm_util = running_time/time ~ util/capacity + */ +-static unsigned long cpu_norm_util(int cpu, unsigned long capacity) ++static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta) + { +- int util = cpu_util(cpu); ++ int util = __cpu_util(cpu, delta); + + if (util >= capacity) + return SCHED_CAPACITY_SCALE; +@@ -5354,13 +5373,25 @@ static unsigned long cpu_norm_util(int cpu, unsigned long capacity) + return (util << SCHED_CAPACITY_SHIFT)/capacity; + } + +-static unsigned long group_max_util(struct sched_group *sg) ++static int calc_util_delta(struct energy_env *eenv, int cpu) + { +- int i; ++ if (cpu == eenv->src_cpu) ++ return -eenv->util_delta; ++ if (cpu == eenv->dst_cpu) ++ return eenv->util_delta; ++ return 0; ++} ++ ++static ++unsigned long group_max_util(struct energy_env *eenv) ++{ ++ int i, delta; + unsigned long max_util = 0; + +- for_each_cpu(i, sched_group_cpus(sg)) +- max_util = max(max_util, cpu_util(i)); ++ for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) { ++ delta = calc_util_delta(eenv, i); ++ max_util = max(max_util, __cpu_util(i, delta)); ++ } + + return max_util; + } +@@ -5374,31 +5405,36 @@ static unsigned long group_max_util(struct sched_group *sg) + * latter is used as the estimate as it leads to a more pessimistic energy + * estimate (more busy). + */ +-static unsigned long group_norm_util(struct sched_group *sg, int cap_idx) ++static unsigned ++long group_norm_util(struct energy_env *eenv, struct sched_group *sg) + { +- int i; ++ int i, delta; + unsigned long util_sum = 0; +- unsigned long capacity = sg->sge->cap_states[cap_idx].cap; ++ unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap; + +- for_each_cpu(i, sched_group_cpus(sg)) +- util_sum += cpu_norm_util(i, capacity); ++ for_each_cpu(i, sched_group_cpus(sg)) { ++ delta = calc_util_delta(eenv, i); ++ util_sum += __cpu_norm_util(i, capacity, delta); ++ } + + if (util_sum > SCHED_CAPACITY_SCALE) + return SCHED_CAPACITY_SCALE; + return util_sum; + } + +-static int find_new_capacity(struct sched_group *sg, ++static int find_new_capacity(struct energy_env *eenv, + const struct sched_group_energy const *sge) + { + int idx; +- unsigned long util = group_max_util(sg); ++ unsigned long util = group_max_util(eenv); + + for (idx = 0; idx < sge->nr_cap_states; idx++) { + if (sge->cap_states[idx].cap >= util) +- return idx; ++ break; + } + ++ eenv->cap_idx = idx; ++ + return idx; + } + +@@ -5412,16 +5448,16 @@ static int find_new_capacity(struct sched_group *sg, + * This can probably be done in a faster but more complex way. + * Note: sched_group_energy() may fail when racing with sched_domain updates. + */ +-static int sched_group_energy(struct sched_group *sg_top) ++static int sched_group_energy(struct energy_env *eenv) + { + struct sched_domain *sd; + int cpu, total_energy = 0; + struct cpumask visit_cpus; + struct sched_group *sg; + +- WARN_ON(!sg_top->sge); ++ WARN_ON(!eenv->sg_top->sge); + +- cpumask_copy(&visit_cpus, sched_group_cpus(sg_top)); ++ cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); + + while (!cpumask_empty(&visit_cpus)) { + struct sched_group *sg_shared_cap = NULL; +@@ -5453,17 +5489,16 @@ static int sched_group_energy(struct sched_group *sg_top) + break; + + do { +- struct sched_group *sg_cap_util; + unsigned long group_util; + int sg_busy_energy, sg_idle_energy, cap_idx; + + if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) +- sg_cap_util = sg_shared_cap; ++ eenv->sg_cap = sg_shared_cap; + else +- sg_cap_util = sg; ++ eenv->sg_cap = sg; + +- cap_idx = find_new_capacity(sg_cap_util, sg->sge); +- group_util = group_norm_util(sg, cap_idx); ++ cap_idx = find_new_capacity(eenv, sg->sge); ++ group_util = group_norm_util(eenv, sg); + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) + >> SCHED_CAPACITY_SHIFT; + sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) * sg->sge->idle_states[0].power) +@@ -5474,7 +5509,7 @@ static int sched_group_energy(struct sched_group *sg_top) + if (!sd->child) + cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); + +- if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top))) ++ if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) + goto next_cpu; + + } while (sg = sg->next, sg != sd->groups); +@@ -5483,7 +5518,8 @@ static int sched_group_energy(struct sched_group *sg_top) + continue; + } + +- return total_energy; ++ eenv->energy = total_energy; ++ return 0; + } + + /* +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0055-sched-Estimate-energy-impact-of-scheduling-decisions.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0055-sched-Estimate-energy-impact-of-scheduling-decisions.patch new file mode 100644 index 0000000..f6fc44d --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0055-sched-Estimate-energy-impact-of-scheduling-decisions.patch @@ -0,0 +1,84 @@ +From 9994534fcf8c013f6dfbb017055a3be2c8f48a0f Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 6 Jan 2015 17:34:05 +0000 +Subject: [PATCH 55/92] sched: Estimate energy impact of scheduling decisions + +Adds a generic energy-aware helper function, energy_diff(), that +calculates energy impact of adding, removing, and migrating utilization +in the system. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 6f19a2b0b118f6551e22cf21c1230040a3c4f8a1) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index dcc73d8..dc1fca0 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5522,6 +5522,58 @@ static int sched_group_energy(struct energy_env *eenv) + return 0; + } + ++static inline bool cpu_in_sg(struct sched_group *sg, int cpu) ++{ ++ return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg)); ++} ++ ++/* ++ * energy_diff(): Estimate the energy impact of changing the utilization ++ * distribution. eenv specifies the change: utilisation amount, source, and ++ * destination cpu. Source or destination cpu may be -1 in which case the ++ * utilization is removed from or added to the system (e.g. task wake-up). If ++ * both are specified, the utilization is migrated. ++ */ ++static int energy_diff(struct energy_env *eenv) ++{ ++ struct sched_domain *sd; ++ struct sched_group *sg; ++ int sd_cpu = -1, energy_before = 0, energy_after = 0; ++ ++ struct energy_env eenv_before = { ++ .util_delta = 0, ++ .src_cpu = eenv->src_cpu, ++ .dst_cpu = eenv->dst_cpu, ++ }; ++ ++ if (eenv->src_cpu == eenv->dst_cpu) ++ return 0; ++ ++ sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu; ++ sd = rcu_dereference(per_cpu(sd_ea, sd_cpu)); ++ ++ if (!sd) ++ return 0; /* Error */ ++ ++ sg = sd->groups; ++ ++ do { ++ if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) { ++ eenv_before.sg_top = eenv->sg_top = sg; ++ ++ if (sched_group_energy(&eenv_before)) ++ return 0; /* Invalid result abort */ ++ energy_before += eenv_before.energy; ++ ++ if (sched_group_energy(eenv)) ++ return 0; /* Invalid result abort */ ++ energy_after += eenv->energy; ++ } ++ } while (sg = sg->next, sg != sd->groups); ++ ++ return energy_after-energy_before; ++} ++ + /* + * Detect M:N waker/wakee relationships via a switching-frequency heuristic. + * +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0056-sched-cpuidle-Track-cpuidle-state-index-in-the-sched.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0056-sched-cpuidle-Track-cpuidle-state-index-in-the-sched.patch new file mode 100644 index 0000000..db293ea --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0056-sched-cpuidle-Track-cpuidle-state-index-in-the-sched.patch @@ -0,0 +1,124 @@ +From 1f3a3c308f275c6aeef63c5db216ae869db66dbc Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 27 Jan 2015 13:48:07 +0000 +Subject: [PATCH 56/92] sched, cpuidle: Track cpuidle state index in the + scheduler + +The idle-state of each cpu is currently pointed to by rq->idle_state but +there isn't any information in the struct cpuidle_state that can used to +look up the idle-state energy model data stored in struct +sched_group_energy. For this purpose is necessary to store the idle +state index as well. Ideally, the idle-state data should be unified. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit b3f2122ee08c05f9290f5d3664446bfb59b54691) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + drivers/cpuidle/cpuidle.c | 4 ++-- + include/linux/cpuidle.h | 2 +- + kernel/sched/idle.c | 3 ++- + kernel/sched/sched.h | 21 +++++++++++++++++++++ + 4 files changed, 26 insertions(+), 4 deletions(-) + +diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c +index c73207a..78ab946 100644 +--- a/drivers/cpuidle/cpuidle.c ++++ b/drivers/cpuidle/cpuidle.c +@@ -192,7 +192,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, + } + + /* Take note of the planned idle state. */ +- sched_idle_set_state(target_state); ++ sched_idle_set_state(target_state, index); + + trace_cpu_idle_rcuidle(index, dev->cpu); + time_start = ns_to_ktime(local_clock()); +@@ -205,7 +205,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + + /* The cpu is no longer idle or about to enter idle. */ +- sched_idle_set_state(NULL); ++ sched_idle_set_state(NULL, -1); + + if (broadcast) { + if (WARN_ON_ONCE(!irqs_disabled())) +diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h +index bb31373..9a8eec9 100644 +--- a/include/linux/cpuidle.h ++++ b/include/linux/cpuidle.h +@@ -207,7 +207,7 @@ static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, + #endif + + /* kernel/sched/idle.c */ +-extern void sched_idle_set_state(struct cpuidle_state *idle_state); ++extern void sched_idle_set_state(struct cpuidle_state *idle_state, int index); + extern void default_idle_call(void); + + #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED +diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c +index 1d8718d..cf75f00 100644 +--- a/kernel/sched/idle.c ++++ b/kernel/sched/idle.c +@@ -23,9 +23,10 @@ + * sched_idle_set_state - Record idle state for the current CPU. + * @idle_state: State to record. + */ +-void sched_idle_set_state(struct cpuidle_state *idle_state) ++void sched_idle_set_state(struct cpuidle_state *idle_state, int index) + { + idle_set_state(this_rq(), idle_state); ++ idle_set_state_idx(this_rq(), index); + } + + static int __read_mostly cpu_idle_force_poll; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 75dcd85..b24cefa 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -726,6 +726,7 @@ struct rq { + #ifdef CONFIG_CPU_IDLE + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; ++ int idle_state_idx; + #endif + }; + +@@ -1335,6 +1336,17 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) + SCHED_WARN_ON(!rcu_read_lock_held()); + return rq->idle_state; + } ++ ++static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx) ++{ ++ rq->idle_state_idx = idle_state_idx; ++} ++ ++static inline int idle_get_state_idx(struct rq *rq) ++{ ++ WARN_ON(!rcu_read_lock_held()); ++ return rq->idle_state_idx; ++} + #else + static inline void idle_set_state(struct rq *rq, + struct cpuidle_state *idle_state) +@@ -1345,6 +1357,15 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) + { + return NULL; + } ++ ++static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx) ++{ ++} ++ ++static inline int idle_get_state_idx(struct rq *rq) ++{ ++ return -1; ++} + #endif + + extern void sysrq_sched_debug_show(void); +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0057-sched-Determine-the-current-sched_group-idle-state.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0057-sched-Determine-the-current-sched_group-idle-state.patch new file mode 100644 index 0000000..e1eec60 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0057-sched-Determine-the-current-sched_group-idle-state.patch @@ -0,0 +1,76 @@ +From ee2bf33dc8d406cab7a0827d63ad3e41dd41c96e Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Tue, 27 Jan 2015 14:04:17 +0000 +Subject: [PATCH 57/92] sched: Determine the current sched_group idle-state + +To estimate the energy consumption of a sched_group in +sched_group_energy() it is necessary to know which idle-state the group +is in when it is idle. For now, it is assumed that this is the current +idle-state (though it might be wrong). Based on the individual cpu +idle-states group_idle_state() finds the group idle-state. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 2e5d2cd2670f7564bb38ec3984e3693da3f0470d) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index dc1fca0..799e2b5 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5438,6 +5438,20 @@ static int find_new_capacity(struct energy_env *eenv, + return idx; + } + ++static int group_idle_state(struct sched_group *sg) ++{ ++ int i, state = INT_MAX; ++ ++ /* Find the shallowest idle state in the sched group. */ ++ for_each_cpu(i, sched_group_cpus(sg)) ++ state = min(state, idle_get_state_idx(cpu_rq(i))); ++ ++ /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ ++ state++; ++ ++ return state; ++} ++ + /* + * sched_group_energy(): Computes the absolute energy consumption of cpus + * belonging to the sched_group including shared resources shared only by +@@ -5490,7 +5504,8 @@ static int sched_group_energy(struct energy_env *eenv) + + do { + unsigned long group_util; +- int sg_busy_energy, sg_idle_energy, cap_idx; ++ int sg_busy_energy, sg_idle_energy; ++ int cap_idx, idle_idx; + + if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) + eenv->sg_cap = sg_shared_cap; +@@ -5498,11 +5513,13 @@ static int sched_group_energy(struct energy_env *eenv) + eenv->sg_cap = sg; + + cap_idx = find_new_capacity(eenv, sg->sge); ++ idle_idx = group_idle_state(sg); + group_util = group_norm_util(eenv, sg); + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) +- >> SCHED_CAPACITY_SHIFT; +- sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) * sg->sge->idle_states[0].power) +- >> SCHED_CAPACITY_SHIFT; ++ >> SCHED_CAPACITY_SHIFT; ++ sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) ++ * sg->sge->idle_states[idle_idx].power) ++ >> SCHED_CAPACITY_SHIFT; + + total_energy += sg_busy_energy + sg_idle_energy; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0058-sched-fair-Add-energy_diff-dead-zone-margin.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0058-sched-fair-Add-energy_diff-dead-zone-margin.patch new file mode 100644 index 0000000..163fb7c --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0058-sched-fair-Add-energy_diff-dead-zone-margin.patch @@ -0,0 +1,53 @@ +From 91820aa3145f93421c29834315616f338b4fcca2 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Wed, 30 Mar 2016 14:20:12 +0100 +Subject: [PATCH 58/92] sched/fair: Add energy_diff dead-zone margin + +It is not worth the overhead to migrate tasks for tiny insignificant +energy savings. To prevent this, an energy margin is introduced in +energy_diff() which effectively adds a dead-zone that rounds tiny energy +differences to zero. Since no scale is enforced for energy model data +the margin can't be absolute. Instead it is defined as +/-1.56% energy +saving compared to the current total estimated energy consumption. + +(cherry picked from commit c36ecf27db72f60934ce2c9ca7e3d97a80926c1c) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 799e2b5..5e13787 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5556,6 +5556,7 @@ static int energy_diff(struct energy_env *eenv) + struct sched_domain *sd; + struct sched_group *sg; + int sd_cpu = -1, energy_before = 0, energy_after = 0; ++ int diff, margin; + + struct energy_env eenv_before = { + .util_delta = 0, +@@ -5588,7 +5589,18 @@ static int energy_diff(struct energy_env *eenv) + } + } while (sg = sg->next, sg != sd->groups); + +- return energy_after-energy_before; ++ /* ++ * Dead-zone margin preventing too many migrations. ++ */ ++ ++ margin = energy_before >> 6; /* ~1.56% */ ++ ++ diff = energy_after-energy_before; ++ ++ if (abs(diff) < margin) ++ return 0; ++ ++ return diff; + } + + /* +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0059-sched-Add-over-utilization-tipping-point-indicator.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0059-sched-Add-over-utilization-tipping-point-indicator.patch new file mode 100644 index 0000000..d2d4d1a --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0059-sched-Add-over-utilization-tipping-point-indicator.patch @@ -0,0 +1,187 @@ +From 237ff0550a99ada0cffde12845e13e52e69143a1 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Sat, 9 May 2015 16:49:57 +0100 +Subject: [PATCH 59/92] sched: Add over-utilization/tipping point indicator + +Energy-aware scheduling is only meant to be active while the system is +_not_ over-utilized. That is, there are spare cycles available to shift +tasks around based on their actual utilization to get a more +energy-efficient task distribution without depriving any tasks. When +above the tipping point task placement is done the traditional way based +on load_avg, spreading the tasks across as many cpus as possible based +on priority scaled load to preserve smp_nice. Below the tipping point we +want to use util_avg instead. We need to define a criteria for when we +make the switch. + +The util_avg for each cpu converges towards 100% (1024) regardless of +how many task additional task we may put on it. If we define +over-utilized as: + +sum_{cpus}(rq.cfs.avg.util_avg) + margin > sum_{cpus}(rq.capacity) + +some individual cpus may be over-utilized running multiple tasks even +when the above condition is false. That should be okay as long as we try +to spread the tasks out to avoid per-cpu over-utilization as much as +possible and if all tasks have the _same_ priority. If the latter isn't +true, we have to consider priority to preserve smp_nice. + +For example, we could have n_cpus nice=-10 util_avg=55% tasks and +n_cpus/2 nice=0 util_avg=60% tasks. Balancing based on util_avg we are +likely to end up with nice=-10 tasks sharing cpus and nice=0 tasks +getting their own as we 1.5*n_cpus tasks in total and 55%+55% is less +over-utilized than 55%+60% for those cpus that have to be shared. The +system utilization is only 85% of the system capacity, but we are +breaking smp_nice. + +To be sure not to break smp_nice, we have defined over-utilization +conservatively as when any cpu in the system is fully utilized at it's +highest frequency instead: + +cpu_rq(any).cfs.avg.util_avg + margin > cpu_rq(any).capacity + +IOW, as soon as one cpu is (nearly) 100% utilized, we switch to load_avg +to factor in priority to preserve smp_nice. + +With this definition, we can skip periodic load-balance as no cpu has an +always-running task when the system is not over-utilized. All tasks will +be periodic and we can balance them at wake-up. This conservative +condition does however mean that some scenarios that could benefit from +energy-aware decisions even if one cpu is fully utilized would not get +those benefits. + +For system where some cpus might have reduced capacity on some cpus +(RT-pressure and/or big.LITTLE), we want periodic load-balance checks as +soon a just a single cpu is fully utilized as it might one of those with +reduced capacity and in that case we want to migrate it. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit e402a7e2f2a45377f32b2925197a747f04ca1668) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 31 +++++++++++++++++++++++++------ + kernel/sched/sched.h | 3 +++ + 2 files changed, 28 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 5e13787..db732bd 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4715,6 +4715,8 @@ static inline void hrtick_update(struct rq *rq) + } + #endif + ++static bool cpu_overutilized(int cpu); ++ + /* + * The enqueue_task method is called before nr_running is + * increased. Here we update the fair scheduling stats and +@@ -4725,6 +4727,7 @@ static inline void hrtick_update(struct rq *rq) + { + struct cfs_rq *cfs_rq; + struct sched_entity *se = &p->se; ++ int task_new = !(flags & ENQUEUE_WAKEUP); + + /* + * If in_iowait is set, the code below may not trigger any cpufreq +@@ -4764,9 +4767,12 @@ static inline void hrtick_update(struct rq *rq) + update_cfs_shares(cfs_rq); + } + +- if (!se) ++ if (!se) { + add_nr_running(rq, 1); +- ++ if (!task_new && !rq->rd->overutilized && ++ cpu_overutilized(rq->cpu)) ++ rq->rd->overutilized = true; ++ } + hrtick_update(rq); + } + +@@ -7578,11 +7584,12 @@ group_type group_classify(struct sched_group *group, + * @local_group: Does group contain this_cpu. + * @sgs: variable to hold the statistics for this group. + * @overload: Indicate more than one runnable task for any CPU. ++ * @overutilized: Indicate overutilization for any CPU. + */ + static inline void update_sg_lb_stats(struct lb_env *env, + struct sched_group *group, int load_idx, + int local_group, struct sg_lb_stats *sgs, +- bool *overload) ++ bool *overload, bool *overutilized) + { + unsigned long load; + int i, nr_running; +@@ -7616,6 +7623,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, + */ + if (!nr_running && idle_cpu(i)) + sgs->idle_cpus++; ++ ++ if (cpu_overutilized(i)) ++ *overutilized = true; + } + + /* Adjust by relative CPU capacity of the group */ +@@ -7744,7 +7754,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + struct sched_group *sg = env->sd->groups; + struct sg_lb_stats tmp_sgs; + int load_idx, prefer_sibling = 0; +- bool overload = false; ++ bool overload = false, overutilized = false; + + if (child && child->flags & SD_PREFER_SIBLING) + prefer_sibling = 1; +@@ -7766,7 +7776,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + } + + update_sg_lb_stats(env, sg, load_idx, local_group, sgs, +- &overload); ++ &overload, &overutilized); + + if (local_group) + goto next_group; +@@ -7810,8 +7820,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + /* update overload indicator if we are at root domain */ + if (env->dst_rq->rd->overload != overload) + env->dst_rq->rd->overload = overload; +- } + ++ /* Update over-utilization (tipping point, U >= 0) indicator */ ++ if (env->dst_rq->rd->overutilized != overutilized) ++ env->dst_rq->rd->overutilized = overutilized; ++ } else { ++ if (!env->dst_rq->rd->overutilized && overutilized) ++ env->dst_rq->rd->overutilized = true; ++ } + } + + /** +@@ -9192,6 +9208,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) + + if (static_branch_unlikely(&sched_numa_balancing)) + task_tick_numa(rq, curr); ++ ++ if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) ++ rq->rd->overutilized = true; + } + + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index b24cefa..fa98ab3 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -563,6 +563,9 @@ struct root_domain { + /* Indicate more than one runnable task for any CPU */ + bool overload; + ++ /* Indicate one or more cpus over-utilized (tipping point) */ ++ bool overutilized; ++ + /* + * The bit corresponding to a CPU gets set here if such CPU has more + * than one runnable -deadline task (as it is below for RT tasks). +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0060-sched-fair-Energy-aware-wake-up-task-placement.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0060-sched-fair-Energy-aware-wake-up-task-placement.patch new file mode 100644 index 0000000..d85d6fe --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0060-sched-fair-Energy-aware-wake-up-task-placement.patch @@ -0,0 +1,104 @@ +From b0ba3376b094bcf111551ee7c5e14183fe15da79 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Wed, 30 Mar 2016 14:29:48 +0100 +Subject: [PATCH 60/92] sched/fair: Energy-aware wake-up task placement + +When the systems is not overutilized, place waking tasks on the most +energy efficient cpu. Previous attempts reduced the search space by +matching task utilization to cpu capacity before consulting the energy +model as this is an expensive operation. The search heuristics didn't +work very well and lacking any better alternatives this patch takes the +brute-force route and tries all potential targets. + +This approach doesn't scale, but it might be sufficient for many +embedded applications while work is continuing on a heuristic that can +minimize the necessary computations. The heuristic must be derrived from +the platform energy model rather than make additional assumptions, such +lower capacity implies better energy efficiency. PeterZ mentioned in the +past that we might be able to derrive some simpler deciding functions +using mathematical (modal?) analysis. + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 8491fd68555c31e596e1888912796184d42138cc) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 56 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index db732bd..58b468c 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6148,6 +6148,59 @@ static bool cpu_overutilized(int cpu) + return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); + } + ++static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu) ++{ ++ int i; ++ int min_diff = 0, energy_cpu = prev_cpu, spare_cpu = prev_cpu; ++ unsigned long max_spare = 0; ++ struct sched_domain *sd; ++ ++ rcu_read_lock(); ++ ++ sd = rcu_dereference(per_cpu(sd_ea, prev_cpu)); ++ ++ if (!sd) ++ return prev_cpu; ++ ++ for_each_cpu_and(i, tsk_cpus_allowed(p), sched_domain_span(sd)) { ++ int diff; ++ unsigned long spare; ++ ++ struct energy_env eenv = { ++ .util_delta = task_util(p), ++ .src_cpu = prev_cpu, ++ .dst_cpu = i, ++ }; ++ ++ spare = capacity_spare_wake(i, p); ++ ++ if (i == prev_cpu) ++ continue; ++ ++ if (spare > max_spare) { ++ max_spare = spare; ++ spare_cpu = i; ++ } ++ ++ if (spare * 1024 < capacity_margin * task_util(p)) ++ continue; ++ ++ diff = energy_diff(&eenv); ++ ++ if (diff < min_diff) { ++ min_diff = diff; ++ energy_cpu = i; ++ } ++ } ++ ++ rcu_read_unlock(); ++ ++ if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu)) ++ return prev_cpu; ++ ++ return energy_cpu != prev_cpu ? energy_cpu : spare_cpu; ++} ++ + /* + * select_task_rq_fair: Select target runqueue for the waking task in domains + * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, +@@ -6175,6 +6228,9 @@ static bool cpu_overutilized(int cpu) + && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + } + ++ if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized)) ++ return select_energy_cpu_brute(p, prev_cpu); ++ + rcu_read_lock(); + for_each_domain(cpu, tmp) { + if (!(tmp->flags & SD_LOAD_BALANCE)) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0061-sched-fair-Fix-select_task_rq_brute-rcu_unlock-when-.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0061-sched-fair-Fix-select_task_rq_brute-rcu_unlock-when-.patch new file mode 100644 index 0000000..4756f31 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0061-sched-fair-Fix-select_task_rq_brute-rcu_unlock-when-.patch @@ -0,0 +1,38 @@ +From f4e82adc562197bc0c6411d9db0ad4497bb689a5 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Thu, 14 Apr 2016 08:39:45 +0100 +Subject: [PATCH 61/92] sched/fair: Fix select_task_rq_brute rcu_unlock when + !sd + +To be merged with original patch + +(cherry picked from commit 149b57aaecf04f0e14a2d0cef807f0c499c6d6ba) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 58b468c..3c193f5 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6160,7 +6160,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu) + sd = rcu_dereference(per_cpu(sd_ea, prev_cpu)); + + if (!sd) +- return prev_cpu; ++ goto unlock; + + for_each_cpu_and(i, tsk_cpus_allowed(p), sched_domain_span(sd)) { + int diff; +@@ -6193,6 +6193,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu) + } + } + ++unlock: + rcu_read_unlock(); + + if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu)) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0062-sched-Consider-a-not-over-utilized-energy-aware-syst.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0062-sched-Consider-a-not-over-utilized-energy-aware-syst.patch new file mode 100644 index 0000000..70ac245 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0062-sched-Consider-a-not-over-utilized-energy-aware-syst.patch @@ -0,0 +1,49 @@ +From f02fa9f5a150721451b41d9677f9c5f5710c09a9 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Sun, 10 May 2015 15:17:32 +0100 +Subject: [PATCH 62/92] sched: Consider a not over-utilized energy-aware system + as balanced + +In case the system operates below the tipping point indicator, +introduced in ("sched: Add over-utilization/tipping point +indicator"), bail out in find_busiest_group after the dst and src +group statistics have been checked. + +There is simply no need to move usage around because all involved +cpus still have spare cycles available. + +For an energy-aware system below its tipping point, we rely on the +task placement of the wakeup path. This works well for short running +tasks. + +The existence of long running tasks on one of the involved cpus lets +the system operate over its tipping point. To be able to move such +a task (whose load can't be used to average the load among the cpus) +from a src cpu with lower capacity than the dst_cpu, an additional +rule has to be implemented in need_active_balance. + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit e604c981fa91a5e5522cd4a5a057880ab54d374c) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 3c193f5..360e922 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8103,6 +8103,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env) + * this level. + */ + update_sd_lb_stats(env, &sds); ++ ++ if (energy_aware() && !env->dst_rq->rd->overutilized) ++ goto out_balanced; ++ + local = &sds.local_stat; + busiest = &sds.busiest_stat; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0063-sched-Disable-energy-unfriendly-nohz-kicks.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0063-sched-Disable-energy-unfriendly-nohz-kicks.patch new file mode 100644 index 0000000..8e96e3f --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0063-sched-Disable-energy-unfriendly-nohz-kicks.patch @@ -0,0 +1,43 @@ +From 64537223eb2a922fd872741df1a82b1bee325f3e Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 3 Feb 2015 13:54:11 +0000 +Subject: [PATCH 63/92] sched: Disable energy-unfriendly nohz kicks + +With energy-aware scheduling enabled nohz_kick_needed() generates many +nohz idle-balance kicks which lead to nothing when multiple tasks get +packed on a single cpu to save energy. This causes unnecessary wake-ups +and hence wastes energy. Make these conditions depend on !energy_aware() +for now until the energy-aware nohz story gets sorted out. + +cc: Ingo Molnar <mingo@redhat.com> +cc: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit a5268e57a1b257dee1ffa9cd9d670ee747a4ec88) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 360e922..bc347af 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -9150,12 +9150,13 @@ static inline bool nohz_kick_needed(struct rq *rq) + if (time_before(now, nohz.next_balance)) + return false; + +- if (rq->nr_running >= 2) ++ if (rq->nr_running >= 2 && ++ (!energy_aware() || cpu_overutilized(cpu))) + return true; + + rcu_read_lock(); + sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); +- if (sds) { ++ if (sds && !energy_aware()) { + /* + * XXX: write a coherent comment on why we do this. + * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0064-arm-Set-SD_SHARE_CAP_STATES-sched_domain-flag-when-a.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0064-arm-Set-SD_SHARE_CAP_STATES-sched_domain-flag-when-a.patch new file mode 100644 index 0000000..aa3fe22 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0064-arm-Set-SD_SHARE_CAP_STATES-sched_domain-flag-when-a.patch @@ -0,0 +1,104 @@ +From b1238cc2a12dba910ff1f6af592196a2743bf594 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 12 Aug 2016 11:29:04 +0100 +Subject: [PATCH 64/92] arm: Set SD_SHARE_CAP_STATES sched_domain flag when + applicable + +Energy-aware scheduling relies on the SD_SHARE_CAP_STATES to identify +sharing the same clock source/frequency domain. The assumption is that a +sched_domain exists that match the clock/frequency domain, i.e. +policy->related_cpus in cpufreq terms. The flag is not set for systems +without frequency scaling or systems with per-cpu frequency scaling. + +cc: Russell King <linux@armlinux.org.uk> + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit cca07e1a18915b6990c1cf464686bcb24d4d1472) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 33 ++++++++++++++++++++++++++++++--- + 1 file changed, 30 insertions(+), 3 deletions(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index 4d94639..c77f39f 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -45,6 +45,7 @@ + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + static DEFINE_MUTEX(cpu_scale_mutex); + static bool asym_cpucap; ++static bool sd_mc_share_cap, sd_die_share_cap; + static bool update_flags; + + unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) +@@ -268,6 +269,14 @@ static void normalize_cpu_capacity(void) + pr_debug("cpu_capacity: calling %s for CPUs [%*pbl] (to_visit=%*pbl)\n", + __func__, cpumask_pr_args(policy->related_cpus), + cpumask_pr_args(cpus_to_visit)); ++ ++ cpu = cpumask_first(policy->related_cpus); ++ ++ if (cpumask_subset(cpu_coregroup_mask(cpu), policy->related_cpus)) ++ sd_mc_share_cap = true; ++ else if (cpumask_subset(cpu_cpu_mask(cpu), policy->related_cpus)) ++ sd_die_share_cap = true; ++ + cpumask_andnot(cpus_to_visit, + cpus_to_visit, + policy->related_cpus); +@@ -279,11 +288,13 @@ static void normalize_cpu_capacity(void) + policy->cpuinfo.max_freq / 1000UL; + capacity_scale = max(raw_capacity[cpu], capacity_scale); + } ++ + if (cpumask_empty(cpus_to_visit)) { + if (!cap_parsing_failed) { + asym = asym_cpucap; + normalize_cpu_capacity(); +- if (asym != asym_cpucap) ++ if (asym != asym_cpucap || ++ sd_mc_share_cap || sd_die_share_cap) + update_sched_flags(); + kfree(raw_capacity); + pr_debug("cpu_capacity: parsing done"); +@@ -477,6 +488,9 @@ static void update_cpu_capacity(unsigned int cpu) + if (scale_cpu_capacity(NULL, cpu) < SCHED_CAPACITY_SCALE) + asym_cpucap = true; + ++ if (scale_cpu_capacity(NULL, cpu) < SCHED_CAPACITY_SCALE) ++ asym_cpucap = true; ++ + pr_info("CPU%u: update cpu_capacity %lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); + } +@@ -594,12 +608,25 @@ void store_cpu_topology(unsigned int cpuid) + + static inline int cpu_corepower_flags(void) + { +- return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; ++ int mc_flags = SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; ++ ++ if (sd_mc_share_cap) ++ mc_flags |= SD_SHARE_CAP_STATES; ++ ++ return mc_flags; + } + + static inline int arm_cpu_cpu_flags(void) + { +- return asym_cpucap ? SD_ASYM_CPUCAPACITY : 0; ++ int die_flags = 0; ++ ++ if (asym_cpucap) ++ die_flags |= SD_ASYM_CPUCAPACITY; ++ ++ if (sd_die_share_cap) ++ die_flags |= SD_SHARE_CAP_STATES; ++ ++ return die_flags; + } + + static struct sched_domain_topology_level arm_topology[] = { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0065-arm64-Set-SD_SHARE_CAP_STATES-sched_domain-flag-when.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0065-arm64-Set-SD_SHARE_CAP_STATES-sched_domain-flag-when.patch new file mode 100644 index 0000000..aac955a --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0065-arm64-Set-SD_SHARE_CAP_STATES-sched_domain-flag-when.patch @@ -0,0 +1,98 @@ +From 2a8f391ef5bb8d5c68b731743dd1f2b9f1fb0d46 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Fri, 12 Aug 2016 10:53:13 +0100 +Subject: [PATCH 65/92] arm64: Set SD_SHARE_CAP_STATES sched_domain flag when + applicable + +Energy-aware scheduling relies on the SD_SHARE_CAP_STATES to identify +sharing the same clock source/frequency domain. The assumption is that a +sched_domain exists that match the clock/frequency domain, i.e. +policy->related_cpus in cpufreq terms. The flag is not set for systems +without frequency scaling or systems with per-cpu frequency scaling. + +cc: Catalin Marinas <catalin.marinas@arm.com> +cc: Will Deacon <will.deacon@arm.com> + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit 2c36006aa6a75c35269d26b08f2e86bad68d72bd) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/topology.c | 34 +++++++++++++++++++++++++++++++--- + 1 file changed, 31 insertions(+), 3 deletions(-) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index eb2ad75..a8d986e 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -29,6 +29,7 @@ + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + static DEFINE_MUTEX(cpu_scale_mutex); + static bool asym_cpucap; ++static bool sd_mc_share_cap, sd_die_share_cap; + static bool update_flags; + + unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) +@@ -224,6 +225,14 @@ static void normalize_cpu_capacity(void) + pr_debug("cpu_capacity: calling %s for CPUs [%*pbl] (to_visit=[%*pbl])\n", + __func__, cpumask_pr_args(policy->related_cpus), + cpumask_pr_args(cpus_to_visit)); ++ ++ cpu = cpumask_first(policy->related_cpus); ++ ++ if (cpumask_subset(cpu_coregroup_mask(cpu), policy->related_cpus)) ++ sd_mc_share_cap = true; ++ else if (cpumask_subset(cpu_cpu_mask(cpu), policy->related_cpus)) ++ sd_die_share_cap = true; ++ + cpumask_andnot(cpus_to_visit, + cpus_to_visit, + policy->related_cpus); +@@ -239,7 +248,8 @@ static void normalize_cpu_capacity(void) + if (!cap_parsing_failed) { + asym = asym_cpucap; + normalize_cpu_capacity(); +- if (asym != asym_cpucap) ++ if (asym != asym_cpucap || ++ sd_mc_share_cap || sd_die_share_cap) + update_sched_flags(); + kfree(raw_capacity); + pr_debug("cpu_capacity: parsing done\n"); +@@ -522,7 +532,25 @@ const struct cpumask *cpu_coregroup_mask(int cpu) + + static int cpu_cpu_flags(void) + { +- return asym_cpucap ? SD_ASYM_CPUCAPACITY : 0; ++ int die_flags = 0; ++ ++ if (asym_cpucap) ++ die_flags |= SD_ASYM_CPUCAPACITY; ++ ++ if (sd_die_share_cap) ++ die_flags |= SD_SHARE_CAP_STATES; ++ ++ return die_flags; ++} ++ ++static int cpu_coregroup_flags(void) ++{ ++ int mc_flags = SD_SHARE_PKG_RESOURCES; ++ ++ if (sd_mc_share_cap) ++ mc_flags |= SD_SHARE_CAP_STATES; ++ ++ return mc_flags; + } + + static void update_siblings_masks(unsigned int cpuid) +@@ -608,7 +636,7 @@ static void __init reset_cpu_topology(void) + + static struct sched_domain_topology_level arm64_topology[] = { + #ifdef CONFIG_SCHED_MC +- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, ++ { cpu_coregroup_mask, cpu_coregroup_flags, SD_INIT_NAME(MC) }, + #endif + { cpu_cpu_mask, cpu_cpu_flags, SD_INIT_NAME(DIE) }, + { NULL, } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0066-arm-topology-Define-TC2-energy-and-provide-it-to-the.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0066-arm-topology-Define-TC2-energy-and-provide-it-to-the.patch new file mode 100644 index 0000000..8f7e877 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0066-arm-topology-Define-TC2-energy-and-provide-it-to-the.patch @@ -0,0 +1,180 @@ +From 709f084599231a964047d3af46465428b2b2f56a Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Fri, 14 Nov 2014 17:16:41 +0000 +Subject: [PATCH 66/92] arm: topology: Define TC2 energy and provide it to the + scheduler + +This patch is only here to be able to test provisioning of energy related +data from an arch topology shim layer to the scheduler. Since there is no +code today which deals with extracting energy related data from the dtb or +acpi, and process it in the topology shim layer, the content of the +sched_group_energy structures as well as the idle_state and capacity_state +arrays are hard-coded here. + +This patch defines the sched_group_energy structure as well as the +idle_state and capacity_state array for the cluster (relates to sched +groups (sgs) in DIE sched domain level) and for the core (relates to sgs +in MC sd level) for a Cortex A7 as well as for a Cortex A15. +It further provides related implementations of the sched_domain_energy_f +functions (cpu_cluster_energy() and cpu_core_energy()). + +To be able to propagate this information from the topology shim layer to +the scheduler, the elements of the arm_topology[] table have been +provisioned with the appropriate sched_domain_energy_f functions. + +cc: Russell King <linux@arm.linux.org.uk> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 28ef5dd535285f5edcc4892496925156f453c636) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 126 +++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 123 insertions(+), 3 deletions(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index c77f39f..08c4749 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -606,6 +606,127 @@ void store_cpu_topology(unsigned int cpuid) + cpu_topology[cpuid].socket_id, mpidr); + } + ++/* ++ * ARM TC2 specific energy cost model data. There are no unit requirements for ++ * the data. Data can be normalized to any reference point, but the ++ * normalization must be consistent. That is, one bogo-joule/watt must be the ++ * same quantity for all data, but we don't care what it is. ++ */ ++static struct idle_state idle_states_cluster_a7[] = { ++ { .power = 25 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 25 }, /* WFI */ ++ { .power = 10 }, /* cluster-sleep-l */ ++ }; ++ ++static struct idle_state idle_states_cluster_a15[] = { ++ { .power = 70 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 70 }, /* WFI */ ++ { .power = 25 }, /* cluster-sleep-b */ ++ }; ++ ++static struct capacity_state cap_states_cluster_a7[] = { ++ /* Cluster only power */ ++ { .cap = 150, .power = 2967, }, /* 350 MHz */ ++ { .cap = 172, .power = 2792, }, /* 400 MHz */ ++ { .cap = 215, .power = 2810, }, /* 500 MHz */ ++ { .cap = 258, .power = 2815, }, /* 600 MHz */ ++ { .cap = 301, .power = 2919, }, /* 700 MHz */ ++ { .cap = 344, .power = 2847, }, /* 800 MHz */ ++ { .cap = 387, .power = 3917, }, /* 900 MHz */ ++ { .cap = 430, .power = 4905, }, /* 1000 MHz */ ++ }; ++ ++static struct capacity_state cap_states_cluster_a15[] = { ++ /* Cluster only power */ ++ { .cap = 426, .power = 7920, }, /* 500 MHz */ ++ { .cap = 512, .power = 8165, }, /* 600 MHz */ ++ { .cap = 597, .power = 8172, }, /* 700 MHz */ ++ { .cap = 682, .power = 8195, }, /* 800 MHz */ ++ { .cap = 768, .power = 8265, }, /* 900 MHz */ ++ { .cap = 853, .power = 8446, }, /* 1000 MHz */ ++ { .cap = 938, .power = 11426, }, /* 1100 MHz */ ++ { .cap = 1024, .power = 15200, }, /* 1200 MHz */ ++ }; ++ ++static struct sched_group_energy energy_cluster_a7 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a7), ++ .idle_states = idle_states_cluster_a7, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a7), ++ .cap_states = cap_states_cluster_a7, ++}; ++ ++static struct sched_group_energy energy_cluster_a15 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a15), ++ .idle_states = idle_states_cluster_a15, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a15), ++ .cap_states = cap_states_cluster_a15, ++}; ++ ++static struct idle_state idle_states_core_a7[] = { ++ { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */ ++ { .power = 0 }, /* WFI */ ++ { .power = 0 }, /* cluster-sleep-l */ ++ }; ++ ++static struct idle_state idle_states_core_a15[] = { ++ { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */ ++ { .power = 0 }, /* WFI */ ++ { .power = 0 }, /* cluster-sleep-b */ ++ }; ++ ++static struct capacity_state cap_states_core_a7[] = { ++ /* Power per cpu */ ++ { .cap = 150, .power = 187, }, /* 350 MHz */ ++ { .cap = 172, .power = 275, }, /* 400 MHz */ ++ { .cap = 215, .power = 334, }, /* 500 MHz */ ++ { .cap = 258, .power = 407, }, /* 600 MHz */ ++ { .cap = 301, .power = 447, }, /* 700 MHz */ ++ { .cap = 344, .power = 549, }, /* 800 MHz */ ++ { .cap = 387, .power = 761, }, /* 900 MHz */ ++ { .cap = 430, .power = 1024, }, /* 1000 MHz */ ++ }; ++ ++static struct capacity_state cap_states_core_a15[] = { ++ /* Power per cpu */ ++ { .cap = 426, .power = 2021, }, /* 500 MHz */ ++ { .cap = 512, .power = 2312, }, /* 600 MHz */ ++ { .cap = 597, .power = 2756, }, /* 700 MHz */ ++ { .cap = 682, .power = 3125, }, /* 800 MHz */ ++ { .cap = 768, .power = 3524, }, /* 900 MHz */ ++ { .cap = 853, .power = 3846, }, /* 1000 MHz */ ++ { .cap = 938, .power = 5177, }, /* 1100 MHz */ ++ { .cap = 1024, .power = 6997, }, /* 1200 MHz */ ++ }; ++ ++static struct sched_group_energy energy_core_a7 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_a7), ++ .idle_states = idle_states_core_a7, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_a7), ++ .cap_states = cap_states_core_a7, ++}; ++ ++static struct sched_group_energy energy_core_a15 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_a15), ++ .idle_states = idle_states_core_a15, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_a15), ++ .cap_states = cap_states_core_a15, ++}; ++ ++/* sd energy functions */ ++static inline ++const struct sched_group_energy * const cpu_cluster_energy(int cpu) ++{ ++ return cpu_topology[cpu].socket_id ? &energy_cluster_a7 : ++ &energy_cluster_a15; ++} ++ ++static inline ++const struct sched_group_energy * const cpu_core_energy(int cpu) ++{ ++ return cpu_topology[cpu].socket_id ? &energy_core_a7 : ++ &energy_core_a15; ++} ++ + static inline int cpu_corepower_flags(void) + { + int mc_flags = SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; +@@ -631,10 +752,9 @@ static inline int arm_cpu_cpu_flags(void) + + static struct sched_domain_topology_level arm_topology[] = { + #ifdef CONFIG_SCHED_MC +- { cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) }, +- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, ++ { cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) }, + #endif +- { cpu_cpu_mask, arm_cpu_cpu_flags, SD_INIT_NAME(DIE) }, ++ { cpu_cpu_mask, arm_cpu_cpu_flags, cpu_cluster_energy, SD_INIT_NAME(DIE) }, + { NULL, }, + }; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0067-arm-Cpu-invariant-scheduler-load-tracking-and-capaci.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0067-arm-Cpu-invariant-scheduler-load-tracking-and-capaci.patch new file mode 100644 index 0000000..ac96677 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0067-arm-Cpu-invariant-scheduler-load-tracking-and-capaci.patch @@ -0,0 +1,76 @@ +From facd4083a41b455eacc6ff7b89d5eb4eceb53b39 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Fri, 10 Jul 2015 13:57:19 +0100 +Subject: [PATCH 67/92] arm: Cpu invariant scheduler load-tracking and capacity + support + +Provides the scheduler with a cpu scaling correction factor for more +accurate load-tracking and cpu capacity handling. + +The Energy Model (EM) (in fact the capacity value of the last element +of the capacity states vector of the core (MC) level sched_group_energy +structure) is used instead of the arm arch specific cpu_efficiency and +dtb property 'clock-frequency' values as the source for this cpu +scaling factor. + +The cpu capacity value depends on the micro-architecture and the +maximum frequency of the cpu. + +The maximum frequency part should not be confused with the frequency +invariant scheduler load-tracking support which deals with frequency +related scaling due to DFVS functionality. + +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit e47ea7b6779f5b3b176fb5966c51e638c122656d) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm/kernel/topology.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c +index 08c4749..c85e1d0 100644 +--- a/arch/arm/kernel/topology.c ++++ b/arch/arm/kernel/topology.c +@@ -218,12 +218,17 @@ static int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu) + return !ret; + } + ++static const struct sched_group_energy * const cpu_core_energy(int cpu); ++ + static void normalize_cpu_capacity(void) + { + u64 capacity; + int cpu; + bool asym = false; + ++ if (cpu_core_energy(0)) ++ return; ++ + if (!raw_capacity || cap_parsing_failed) + return; + +@@ -480,10 +485,17 @@ static void __init parse_dt_topology(void) + */ + static void update_cpu_capacity(unsigned int cpu) + { +- if (!cpu_capacity(cpu) || cap_from_dt) +- return; ++ if (cpu_core_energy(cpu)) { ++ unsigned long capacity; ++ int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; ++ capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; ++ set_capacity_scale(cpu, capacity); ++ } else { ++ if (!cpu_capacity(cpu) || cap_from_dt) ++ return; + +- set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); ++ set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); ++ } + + if (scale_cpu_capacity(NULL, cpu) < SCHED_CAPACITY_SCALE) + asym_cpucap = true; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0068-arm64-topology-Define-JUNO-energy-and-provide-it-to-.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0068-arm64-topology-Define-JUNO-energy-and-provide-it-to-.patch new file mode 100644 index 0000000..809e37f --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0068-arm64-topology-Define-JUNO-energy-and-provide-it-to-.patch @@ -0,0 +1,171 @@ +From 5ef504e0ddfbc6856799fc6c342f36e0e4d18cbd Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Tue, 10 Feb 2015 12:05:22 +0000 +Subject: [PATCH 68/92] arm64, topology: Define JUNO energy and provide it to + the scheduler + +This patch is only here to be able to test provisioning of energy related +data from an arch topology shim layer to the scheduler. Since there is no +code today which deals with extracting energy related data from the dtb or +acpi, and process it in the topology shim layer, the content of the +sched_group_energy structures as well as the idle_state and capacity_state +arrays are hard-coded here. + +This patch defines the sched_group_energy structure as well as the +idle_state and capacity_state array for the cluster (relates to sched +groups (sgs) in DIE sched domain level) and for the core (relates to sgs +in MC sd level) for a Cortex A53 as well as for a Cortex A57. +It further provides related implementations of the sched_domain_energy_f +functions (cpu_cluster_energy() and cpu_core_energy()). + +To be able to propagate this information from the topology shim layer to +the scheduler, the elements of the arm_topology[] table have been +provisioned with the appropriate sched_domain_energy_f functions. + +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +(cherry picked from commit 037aceb5c5b768295bdda8f111ac9b0c8f97cc79) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/topology.c | 118 ++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 116 insertions(+), 2 deletions(-) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index a8d986e..a75db2b 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -525,6 +525,120 @@ static int __init parse_dt_topology(void) + struct cpu_topology cpu_topology[NR_CPUS]; + EXPORT_SYMBOL_GPL(cpu_topology); + ++/* ++ * ARM JUNO specific energy cost model data. There are no unit requirements for ++ * the data. Data can be normalized to any reference point, but the ++ * normalization must be consistent. That is, one bogo-joule/watt must be the ++ * same quantity for all data, but we don't care what it is. ++ */ ++ ++static struct idle_state idle_states_cluster_a53[] = { ++ { .power = 56 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 56 }, /* WFI */ ++ { .power = 56 }, /* cpu-sleep-0 */ ++ { .power = 17 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_cluster_a57[] = { ++ { .power = 65 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 65 }, /* WFI */ ++ { .power = 65 }, /* cpu-sleep-0 */ ++ { .power = 24 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_cluster_a53[] = { ++ /* Power per cluster */ ++ { .cap = 235, .power = 26, }, /* 450 MHz */ ++ { .cap = 303, .power = 30, }, /* 575 MHz */ ++ { .cap = 368, .power = 39, }, /* 700 MHz */ ++ { .cap = 406, .power = 47, }, /* 775 MHz */ ++ { .cap = 447, .power = 57, }, /* 850 Mhz */ ++}; ++ ++static struct capacity_state cap_states_cluster_a57[] = { ++ /* Power per cluster */ ++ { .cap = 417, .power = 24, }, /* 450 MHz */ ++ { .cap = 579, .power = 32, }, /* 625 MHz */ ++ { .cap = 744, .power = 43, }, /* 800 MHz */ ++ { .cap = 883, .power = 49, }, /* 950 MHz */ ++ { .cap = 1024, .power = 64, }, /* 1100 MHz */ ++}; ++ ++static struct sched_group_energy energy_cluster_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a53), ++ .idle_states = idle_states_cluster_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a53), ++ .cap_states = cap_states_cluster_a53, ++}; ++ ++static struct sched_group_energy energy_cluster_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a57), ++ .idle_states = idle_states_cluster_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a57), ++ .cap_states = cap_states_cluster_a57, ++}; ++ ++static struct idle_state idle_states_core_a53[] = { ++ { .power = 6 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 6 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_core_a57[] = { ++ { .power = 15 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 15 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_core_a53[] = { ++ /* Power per cpu */ ++ { .cap = 235, .power = 33, }, /* 450 MHz */ ++ { .cap = 302, .power = 46, }, /* 575 MHz */ ++ { .cap = 368, .power = 61, }, /* 700 MHz */ ++ { .cap = 406, .power = 76, }, /* 775 MHz */ ++ { .cap = 447, .power = 93, }, /* 850 Mhz */ ++}; ++ ++static struct capacity_state cap_states_core_a57[] = { ++ /* Power per cpu */ ++ { .cap = 417, .power = 168, }, /* 450 MHz */ ++ { .cap = 579, .power = 251, }, /* 625 MHz */ ++ { .cap = 744, .power = 359, }, /* 800 MHz */ ++ { .cap = 883, .power = 479, }, /* 950 MHz */ ++ { .cap = 1024, .power = 616, }, /* 1100 MHz */ ++}; ++ ++static struct sched_group_energy energy_core_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_a53), ++ .idle_states = idle_states_core_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_a53), ++ .cap_states = cap_states_core_a53, ++}; ++ ++static struct sched_group_energy energy_core_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_a57), ++ .idle_states = idle_states_core_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_a57), ++ .cap_states = cap_states_core_a57, ++}; ++ ++/* sd energy functions */ ++static inline ++const struct sched_group_energy * const cpu_cluster_energy(int cpu) ++{ ++ return cpu_topology[cpu].cluster_id ? &energy_cluster_a53 : ++ &energy_cluster_a57; ++} ++ ++static inline ++const struct sched_group_energy * const cpu_core_energy(int cpu) ++{ ++ return cpu_topology[cpu].cluster_id ? &energy_core_a53 : ++ &energy_core_a57; ++} ++ + const struct cpumask *cpu_coregroup_mask(int cpu) + { + return &cpu_topology[cpu].core_sibling; +@@ -636,9 +750,9 @@ static void __init reset_cpu_topology(void) + + static struct sched_domain_topology_level arm64_topology[] = { + #ifdef CONFIG_SCHED_MC +- { cpu_coregroup_mask, cpu_coregroup_flags, SD_INIT_NAME(MC) }, ++ { cpu_coregroup_mask, cpu_coregroup_flags, cpu_core_energy, SD_INIT_NAME(MC) }, + #endif +- { cpu_cpu_mask, cpu_cpu_flags, SD_INIT_NAME(DIE) }, ++ { cpu_cpu_mask, cpu_cpu_flags, cpu_cluster_energy, SD_INIT_NAME(DIE) }, + { NULL, } + }; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0069-arm64-Cpu-invariant-scheduler-load-tracking-and-capa.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0069-arm64-Cpu-invariant-scheduler-load-tracking-and-capa.patch new file mode 100644 index 0000000..50d6766 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0069-arm64-Cpu-invariant-scheduler-load-tracking-and-capa.patch @@ -0,0 +1,88 @@ +From 4e480c5d43dc3cee5c0b7664d0ee394d1206d994 Mon Sep 17 00:00:00 2001 +From: Juri Lelli <juri.lelli@arm.com> +Date: Thu, 30 Apr 2015 11:53:48 +0100 +Subject: [PATCH 69/92] arm64: Cpu invariant scheduler load-tracking and + capacity support + +Provides the scheduler with a cpu scaling correction factor for more +accurate load-tracking and cpu capacity handling. + +The Energy Model (EM) (in fact the capacity value of the last element +of the capacity states vector of the core (MC) level sched_group_energy +structure) is used as the source for this cpu scaling factor. + +The cpu capacity value depends on the micro-architecture and the +maximum frequency of the cpu. + +The maximum frequency part should not be confused with the frequency +invariant scheduler load-tracking support which deals with frequency +related scaling due to DFVS functionality. + +Signed-off-by: Juri Lelli <juri.lelli@arm.com> +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit ea98c1ac6d9b31033caa1f29ed1b8b707f437ae8) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/topology.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index a75db2b..761fcb6 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -172,12 +172,17 @@ static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu) + } + } + ++static const struct sched_group_energy * const cpu_core_energy(int cpu); ++ + static void normalize_cpu_capacity(void) + { + u64 capacity; + int cpu; + bool asym = false; + ++ if (cpu_core_energy(0)) ++ return; ++ + if (!raw_capacity || cap_parsing_failed) + return; + +@@ -667,6 +672,26 @@ static int cpu_coregroup_flags(void) + return mc_flags; + } + ++static void update_cpu_capacity(unsigned int cpu) ++{ ++ unsigned long capacity; ++ int max_cap_idx; ++ ++ if (!cpu_core_energy(cpu)) ++ return; ++ ++ max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; ++ capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; ++ ++ set_capacity_scale(cpu, capacity); ++ ++ if (capacity < SCHED_CAPACITY_SCALE) ++ asym_cpucap = true; ++ ++ pr_info("CPU%d: update cpu_capacity %lu\n", ++ cpu, arch_scale_cpu_capacity(NULL, cpu)); ++} ++ + static void update_siblings_masks(unsigned int cpuid) + { + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; +@@ -728,6 +753,7 @@ void store_cpu_topology(unsigned int cpuid) + + topology_populated: + update_siblings_masks(cpuid); ++ update_cpu_capacity(cpuid); + } + + static void __init reset_cpu_topology(void) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0070-arm64-Factor-out-energy-model-from-topology-shim-lay.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0070-arm64-Factor-out-energy-model-from-topology-shim-lay.patch new file mode 100644 index 0000000..f6ec5be --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0070-arm64-Factor-out-energy-model-from-topology-shim-lay.patch @@ -0,0 +1,353 @@ +From 19b9b07f001278198a4d3aae6744213beb1a75bd Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Mon, 15 Aug 2016 22:38:20 +0100 +Subject: [PATCH 70/92] arm64: Factor out energy model from topology shim layer + +To be able to support multiple energy models before we have the +full-fletched dt solution in arm64 (e.g. for platform Arm Juno and +Hisilicon Hikey) factor out the static energy model data and the +appropriate access function into energy_model.h. + +The patch uses of_match_node() to match the compatible string with the +appropriate platform energy model data, i.e. the patch introduces a +dependency to CONFIG_OF_FLATTREE for propagating the energy model data +towards the task scheduler. + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 6a48e61e481ef4952be07345efb10bc5cd47d22d) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/energy_model.h | 177 +++++++++++++++++++++++++++++++++++++++ + arch/arm64/kernel/topology.c | 118 +------------------------- + 2 files changed, 181 insertions(+), 114 deletions(-) + create mode 100644 arch/arm64/kernel/energy_model.h + +diff --git a/arch/arm64/kernel/energy_model.h b/arch/arm64/kernel/energy_model.h +new file mode 100644 +index 0000000..170dd84 +--- /dev/null ++++ b/arch/arm64/kernel/energy_model.h +@@ -0,0 +1,177 @@ ++/* ++ * arch/arm64/kernel/energy_model.h ++ * ++ * Copyright (C) 2016 ARM Ltd. ++ * ++ * This file is subject to the terms and conditions of the GNU General Public ++ * License. See the file "COPYING" in the main directory of this archive ++ * for more details. ++ */ ++ ++#include <linux/of_fdt.h> ++ ++/* ++ * Energy cost model data. There are no unit requirements for the data. ++ * Data can be normalized to any reference point, but the normalization ++ * must be consistent. That is, one bogo-joule/watt must be the same ++ * quantity for all data, but we don't care what it is. ++ */ ++ ++/* Juno (r0, r2) */ ++ ++static struct idle_state idle_states_cluster_juno_a53[] = { ++ { .power = 56 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 56 }, /* WFI */ ++ { .power = 56 }, /* cpu-sleep-0 */ ++ { .power = 17 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_cluster_juno_a57[] = { ++ { .power = 65 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 65 }, /* WFI */ ++ { .power = 65 }, /* cpu-sleep-0 */ ++ { .power = 24 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_cluster_juno_a53[] = { ++ { .cap = 235, .power = 26, }, /* 450 MHz */ ++ { .cap = 303, .power = 30, }, /* 575 MHz */ ++ { .cap = 368, .power = 39, }, /* 700 MHz */ ++ { .cap = 406, .power = 47, }, /* 775 MHz */ ++ { .cap = 447, .power = 57, }, /* 850 Mhz */ ++}; ++ ++static struct capacity_state cap_states_cluster_juno_a57[] = { ++ { .cap = 417, .power = 24, }, /* 450 MHz */ ++ { .cap = 579, .power = 32, }, /* 625 MHz */ ++ { .cap = 744, .power = 43, }, /* 800 MHz */ ++ { .cap = 883, .power = 49, }, /* 950 MHz */ ++ { .cap = 1024, .power = 64, }, /* 1100 MHz */ ++}; ++ ++static struct sched_group_energy energy_cluster_juno_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_juno_a53), ++ .idle_states = idle_states_cluster_juno_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_juno_a53), ++ .cap_states = cap_states_cluster_juno_a53, ++}; ++ ++static struct sched_group_energy energy_cluster_juno_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_juno_a57), ++ .idle_states = idle_states_cluster_juno_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_juno_a57), ++ .cap_states = cap_states_cluster_juno_a57, ++}; ++ ++static struct idle_state idle_states_core_juno_a53[] = { ++ { .power = 6 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 6 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_core_juno_a57[] = { ++ { .power = 15 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 15 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_core_juno_a53[] = { ++ { .cap = 235, .power = 33, }, /* 450 MHz */ ++ { .cap = 302, .power = 46, }, /* 575 MHz */ ++ { .cap = 368, .power = 61, }, /* 700 MHz */ ++ { .cap = 406, .power = 76, }, /* 775 MHz */ ++ { .cap = 447, .power = 93, }, /* 850 Mhz */ ++}; ++ ++static struct capacity_state cap_states_core_juno_a57[] = { ++ { .cap = 417, .power = 168, }, /* 450 MHz */ ++ { .cap = 579, .power = 251, }, /* 625 MHz */ ++ { .cap = 744, .power = 359, }, /* 800 MHz */ ++ { .cap = 883, .power = 479, }, /* 950 MHz */ ++ { .cap = 1024, .power = 616, }, /* 1100 MHz */ ++}; ++ ++static struct sched_group_energy energy_core_juno_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_juno_a53), ++ .idle_states = idle_states_core_juno_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_juno_a53), ++ .cap_states = cap_states_core_juno_a53, ++}; ++ ++static struct sched_group_energy energy_core_juno_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_juno_a57), ++ .idle_states = idle_states_core_juno_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_juno_a57), ++ .cap_states = cap_states_core_juno_a57, ++}; ++ ++/* An energy model contains core and cluster sched group energy for 2 ++ * clusters (cluster id 0 and 1). set_energy_model() relies on this ++ * feature. It is enforced by a BUG_ON in energy(). ++ */ ++ ++struct energy_model { ++ struct sched_group_energy *core_energy[2]; ++ struct sched_group_energy *cluster_energy[2]; ++}; ++ ++static struct energy_model juno_model = { ++ { &energy_core_juno_a57, &energy_core_juno_a53, }, ++ { &energy_cluster_juno_a57, &energy_cluster_juno_a53, }, ++}; ++ ++static struct of_device_id model_matches[] = { ++ { .compatible = "arm,juno", .data = &juno_model }, ++ {}, ++}; ++ ++static struct sched_group_energy **core_energy, **cluster_energy; ++ ++static void __init set_energy_model(void) ++{ ++ const struct of_device_id *match; ++ struct energy_model *em; ++ ++ BUG_ON(core_energy || cluster_energy); ++ ++ match = of_match_node(model_matches, of_root); ++ ++ if (!match) ++ return; ++ ++ em = (struct energy_model *) match->data; ++ ++ core_energy = em->core_energy; ++ cluster_energy = em->cluster_energy; ++ ++ pr_debug("energy model core[0,1]=[%p,%p] cluster=[%p,%p]\n", ++ em->core_energy[0], em->core_energy[1], ++ em->cluster_energy[0], em->cluster_energy[1]); ++} ++ ++static inline ++struct sched_group_energy *energy(int cpu, struct sched_group_energy **sge) ++{ ++ int idx = cpu_topology[cpu].cluster_id; ++ ++ BUG_ON(idx != 0 && idx != 1); ++ ++ pr_debug("cpu=%d %s%s[%d]=%p\n", cpu, (sge == core_energy) ? ++ "core" : "cluster", "_energy", idx, sge[idx]); ++ ++ return sge[idx]; ++} ++ ++static inline ++const struct sched_group_energy * const cpu_core_energy(int cpu) ++{ ++ return core_energy ? energy(cpu, core_energy) : NULL; ++} ++ ++static inline ++const struct sched_group_energy * const cpu_cluster_energy(int cpu) ++{ ++ return cluster_energy ? energy(cpu, cluster_energy) : NULL; ++} +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index 761fcb6..baaab53 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -26,6 +26,8 @@ + #include <asm/cputype.h> + #include <asm/topology.h> + ++#include "energy_model.h" ++ + static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + static DEFINE_MUTEX(cpu_scale_mutex); + static bool asym_cpucap; +@@ -503,6 +505,8 @@ static int __init parse_dt_topology(void) + goto out; + } + ++ set_energy_model(); ++ + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; +@@ -530,120 +534,6 @@ static int __init parse_dt_topology(void) + struct cpu_topology cpu_topology[NR_CPUS]; + EXPORT_SYMBOL_GPL(cpu_topology); + +-/* +- * ARM JUNO specific energy cost model data. There are no unit requirements for +- * the data. Data can be normalized to any reference point, but the +- * normalization must be consistent. That is, one bogo-joule/watt must be the +- * same quantity for all data, but we don't care what it is. +- */ +- +-static struct idle_state idle_states_cluster_a53[] = { +- { .power = 56 }, /* arch_cpu_idle() (active idle) = WFI */ +- { .power = 56 }, /* WFI */ +- { .power = 56 }, /* cpu-sleep-0 */ +- { .power = 17 }, /* cluster-sleep-0 */ +-}; +- +-static struct idle_state idle_states_cluster_a57[] = { +- { .power = 65 }, /* arch_cpu_idle() (active idle) = WFI */ +- { .power = 65 }, /* WFI */ +- { .power = 65 }, /* cpu-sleep-0 */ +- { .power = 24 }, /* cluster-sleep-0 */ +-}; +- +-static struct capacity_state cap_states_cluster_a53[] = { +- /* Power per cluster */ +- { .cap = 235, .power = 26, }, /* 450 MHz */ +- { .cap = 303, .power = 30, }, /* 575 MHz */ +- { .cap = 368, .power = 39, }, /* 700 MHz */ +- { .cap = 406, .power = 47, }, /* 775 MHz */ +- { .cap = 447, .power = 57, }, /* 850 Mhz */ +-}; +- +-static struct capacity_state cap_states_cluster_a57[] = { +- /* Power per cluster */ +- { .cap = 417, .power = 24, }, /* 450 MHz */ +- { .cap = 579, .power = 32, }, /* 625 MHz */ +- { .cap = 744, .power = 43, }, /* 800 MHz */ +- { .cap = 883, .power = 49, }, /* 950 MHz */ +- { .cap = 1024, .power = 64, }, /* 1100 MHz */ +-}; +- +-static struct sched_group_energy energy_cluster_a53 = { +- .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a53), +- .idle_states = idle_states_cluster_a53, +- .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a53), +- .cap_states = cap_states_cluster_a53, +-}; +- +-static struct sched_group_energy energy_cluster_a57 = { +- .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a57), +- .idle_states = idle_states_cluster_a57, +- .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a57), +- .cap_states = cap_states_cluster_a57, +-}; +- +-static struct idle_state idle_states_core_a53[] = { +- { .power = 6 }, /* arch_cpu_idle() (active idle) = WFI */ +- { .power = 6 }, /* WFI */ +- { .power = 0 }, /* cpu-sleep-0 */ +- { .power = 0 }, /* cluster-sleep-0 */ +-}; +- +-static struct idle_state idle_states_core_a57[] = { +- { .power = 15 }, /* arch_cpu_idle() (active idle) = WFI */ +- { .power = 15 }, /* WFI */ +- { .power = 0 }, /* cpu-sleep-0 */ +- { .power = 0 }, /* cluster-sleep-0 */ +-}; +- +-static struct capacity_state cap_states_core_a53[] = { +- /* Power per cpu */ +- { .cap = 235, .power = 33, }, /* 450 MHz */ +- { .cap = 302, .power = 46, }, /* 575 MHz */ +- { .cap = 368, .power = 61, }, /* 700 MHz */ +- { .cap = 406, .power = 76, }, /* 775 MHz */ +- { .cap = 447, .power = 93, }, /* 850 Mhz */ +-}; +- +-static struct capacity_state cap_states_core_a57[] = { +- /* Power per cpu */ +- { .cap = 417, .power = 168, }, /* 450 MHz */ +- { .cap = 579, .power = 251, }, /* 625 MHz */ +- { .cap = 744, .power = 359, }, /* 800 MHz */ +- { .cap = 883, .power = 479, }, /* 950 MHz */ +- { .cap = 1024, .power = 616, }, /* 1100 MHz */ +-}; +- +-static struct sched_group_energy energy_core_a53 = { +- .nr_idle_states = ARRAY_SIZE(idle_states_core_a53), +- .idle_states = idle_states_core_a53, +- .nr_cap_states = ARRAY_SIZE(cap_states_core_a53), +- .cap_states = cap_states_core_a53, +-}; +- +-static struct sched_group_energy energy_core_a57 = { +- .nr_idle_states = ARRAY_SIZE(idle_states_core_a57), +- .idle_states = idle_states_core_a57, +- .nr_cap_states = ARRAY_SIZE(cap_states_core_a57), +- .cap_states = cap_states_core_a57, +-}; +- +-/* sd energy functions */ +-static inline +-const struct sched_group_energy * const cpu_cluster_energy(int cpu) +-{ +- return cpu_topology[cpu].cluster_id ? &energy_cluster_a53 : +- &energy_cluster_a57; +-} +- +-static inline +-const struct sched_group_energy * const cpu_core_energy(int cpu) +-{ +- return cpu_topology[cpu].cluster_id ? &energy_core_a53 : +- &energy_core_a57; +-} +- + const struct cpumask *cpu_coregroup_mask(int cpu) + { + return &cpu_topology[cpu].core_sibling; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0071-arm64-Add-Hikey-energy-model.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0071-arm64-Add-Hikey-energy-model.patch new file mode 100644 index 0000000..891ca92 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0071-arm64-Add-Hikey-energy-model.patch @@ -0,0 +1,87 @@ +From 0eeb3c8bc2f3436b44096bd0659050ce301f49ae Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Tue, 16 Aug 2016 14:10:38 +0100 +Subject: [PATCH 71/92] arm64: Add Hikey energy model + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 75751f967eca10b6cced89831efa39d28699ba37) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/energy_model.h | 52 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/arch/arm64/kernel/energy_model.h b/arch/arm64/kernel/energy_model.h +index 170dd84..a71c78d 100644 +--- a/arch/arm64/kernel/energy_model.h ++++ b/arch/arm64/kernel/energy_model.h +@@ -107,6 +107,52 @@ + .cap_states = cap_states_core_juno_a57, + }; + ++/* HiKey */ ++ ++static struct idle_state idle_states_cluster_hikey[] = { ++ { .power = 107 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 107 }, /* WFI */ ++ { .power = 47 }, /* cpu-sleep */ ++ { .power = 0 }, /* cluster-sleep */ ++}; ++ ++static struct capacity_state cap_states_cluster_hikey[] = { ++ { .cap = 178, .power = 16, }, /* 208 MHz */ ++ { .cap = 369, .power = 29, }, /* 432 MHz */ ++ { .cap = 622, .power = 47, }, /* 729 MHz */ ++ { .cap = 819, .power = 75, }, /* 960 MHz */ ++ { .cap = 1024, .power = 112, }, /* 1200 MHz */ ++}; ++ ++static struct sched_group_energy energy_cluster_hikey = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_hikey), ++ .idle_states = idle_states_cluster_hikey, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_hikey), ++ .cap_states = cap_states_cluster_hikey, ++}; ++ ++static struct idle_state idle_states_core_hikey[] = { ++ { .power = 15 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 15 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep */ ++ { .power = 0 }, /* cluster-sleep */ ++}; ++ ++static struct capacity_state cap_states_core_hikey[] = { ++ { .cap = 178, .power = 69, }, /* 208 MHz */ ++ { .cap = 369, .power = 125, }, /* 432 MHz */ ++ { .cap = 622, .power = 224, }, /* 729 MHz */ ++ { .cap = 819, .power = 367, }, /* 960 MHz */ ++ { .cap = 1024, .power = 670, }, /* 1200 MHz */ ++}; ++ ++static struct sched_group_energy energy_core_hikey = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_hikey), ++ .idle_states = idle_states_core_hikey, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_hikey), ++ .cap_states = cap_states_core_hikey, ++}; ++ + /* An energy model contains core and cluster sched group energy for 2 + * clusters (cluster id 0 and 1). set_energy_model() relies on this + * feature. It is enforced by a BUG_ON in energy(). +@@ -122,8 +168,14 @@ struct energy_model { + { &energy_cluster_juno_a57, &energy_cluster_juno_a53, }, + }; + ++static struct energy_model hikey_model = { ++ { &energy_core_hikey, &energy_core_hikey, }, ++ { &energy_cluster_hikey, &energy_cluster_hikey, }, ++}; ++ + static struct of_device_id model_matches[] = { + { .compatible = "arm,juno", .data = &juno_model }, ++ { .compatible = "hisilicon,hi6220-hikey", .data = &hikey_model }, + {}, + }; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0072-arm64-dts-Add-Hikey-cpu-capacity-dmips-mhz-informati.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0072-arm64-dts-Add-Hikey-cpu-capacity-dmips-mhz-informati.patch new file mode 100644 index 0000000..87a8aec --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0072-arm64-dts-Add-Hikey-cpu-capacity-dmips-mhz-informati.patch @@ -0,0 +1,93 @@ +From 9634026b856ae824ed7e2b714c81edd180a36ca3 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Sun, 14 Aug 2016 15:44:31 +0100 +Subject: [PATCH 72/92] arm64, dts: Add Hikey cpu capacity-dmips-mhz + information + +Hikey is an SMP platform, so this property would normally not be necessary. + +But since we drive the setting of the EAS specific sched domain flag +SD_SHARE_CAP_STATES via the init_cpu_capacity_callback() cpufreq notifier +we have to make sure that cap_parsing_failed is not set to true in +parse_cpu_capacity() so that init_cpu_capacity_callback() will bail out +before consuming the CPUFREQ_NOTIFY. The easiest way to achieve this is to +provide the dts file with this property. + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 9cffd417af22705fbdafc3264d90e18b175af62b) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +index 17839db..78af87b 100644 +--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi ++++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +@@ -92,6 +92,7 @@ + #cooling-cells = <2>; /* min followed by max */ + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; + dynamic-power-coefficient = <311>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu1: cpu@1 { +@@ -102,6 +103,7 @@ + next-level-cache = <&CLUSTER0_L2>; + operating-points-v2 = <&cpu_opp_table>; + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu2: cpu@2 { +@@ -112,6 +114,7 @@ + next-level-cache = <&CLUSTER0_L2>; + operating-points-v2 = <&cpu_opp_table>; + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu3: cpu@3 { +@@ -122,6 +125,7 @@ + next-level-cache = <&CLUSTER0_L2>; + operating-points-v2 = <&cpu_opp_table>; + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu4: cpu@100 { +@@ -132,6 +136,7 @@ + next-level-cache = <&CLUSTER1_L2>; + operating-points-v2 = <&cpu_opp_table>; + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu5: cpu@101 { +@@ -142,6 +147,7 @@ + next-level-cache = <&CLUSTER1_L2>; + operating-points-v2 = <&cpu_opp_table>; + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu6: cpu@102 { +@@ -152,6 +158,7 @@ + next-level-cache = <&CLUSTER1_L2>; + operating-points-v2 = <&cpu_opp_table>; + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; ++ capacity-dmips-mhz = <1024>; + }; + + cpu7: cpu@103 { +@@ -162,6 +169,7 @@ + next-level-cache = <&CLUSTER1_L2>; + operating-points-v2 = <&cpu_opp_table>; + cpu-idle-states = <&CPU_SLEEP &CLUSTER_SLEEP>; ++ capacity-dmips-mhz = <1024>; + }; + + CLUSTER0_L2: l2-cache0 { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0073-arm64-Introduce-sys-sd-energy-model-infrastructure.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0073-arm64-Introduce-sys-sd-energy-model-infrastructure.patch new file mode 100644 index 0000000..700ecc0 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0073-arm64-Introduce-sys-sd-energy-model-infrastructure.patch @@ -0,0 +1,126 @@ +From dabca795d8793a16cbbce37b0cc187d2a65cd50e Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Thu, 18 Aug 2016 12:06:07 +0100 +Subject: [PATCH 73/92] arm64: Introduce sys sd energy model infrastructure + +Allow the energy model to contain a system level besides the already +existing core and cluster level. + +This is necessary for platforms with frequency domains spanning all +cpus to let the EAS algorithm work properly. + +The whole idea of this system level has to be rethought once +the idea of the 'struct sched_domain_shared' gets more momentum: + +https://lkml.org/lkml/2016/6/16/209 + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 83a776c78d6e2885f2821a35c78e465c60726244) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/energy_model.h | 28 ++++++++++++++++++++-------- + arch/arm64/kernel/topology.c | 1 + + 2 files changed, 21 insertions(+), 8 deletions(-) + +diff --git a/arch/arm64/kernel/energy_model.h b/arch/arm64/kernel/energy_model.h +index a71c78d..4a86fa3 100644 +--- a/arch/arm64/kernel/energy_model.h ++++ b/arch/arm64/kernel/energy_model.h +@@ -153,24 +153,27 @@ + .cap_states = cap_states_core_hikey, + }; + +-/* An energy model contains core and cluster sched group energy for 2 +- * clusters (cluster id 0 and 1). set_energy_model() relies on this +- * feature. It is enforced by a BUG_ON in energy(). ++/* An energy model contains core, cluster and system sched group energy ++ * for 2 clusters (cluster id 0 and 1). set_energy_model() relies on ++ * this feature. It is enforced by a BUG_ON in energy(). + */ + + struct energy_model { + struct sched_group_energy *core_energy[2]; + struct sched_group_energy *cluster_energy[2]; ++ struct sched_group_energy *system_energy[2]; + }; + + static struct energy_model juno_model = { + { &energy_core_juno_a57, &energy_core_juno_a53, }, + { &energy_cluster_juno_a57, &energy_cluster_juno_a53, }, ++ {}, + }; + + static struct energy_model hikey_model = { + { &energy_core_hikey, &energy_core_hikey, }, + { &energy_cluster_hikey, &energy_cluster_hikey, }, ++ {}, + }; + + static struct of_device_id model_matches[] = { +@@ -179,14 +182,14 @@ struct energy_model { + {}, + }; + +-static struct sched_group_energy **core_energy, **cluster_energy; ++struct sched_group_energy **core_energy, **cluster_energy, **system_energy; + + static void __init set_energy_model(void) + { + const struct of_device_id *match; + struct energy_model *em; + +- BUG_ON(core_energy || cluster_energy); ++ BUG_ON(core_energy || cluster_energy || system_energy); + + match = of_match_node(model_matches, of_root); + +@@ -197,10 +200,12 @@ static void __init set_energy_model(void) + + core_energy = em->core_energy; + cluster_energy = em->cluster_energy; ++ system_energy = em->system_energy; + +- pr_debug("energy model core[0,1]=[%p,%p] cluster=[%p,%p]\n", ++ pr_debug("energy model core[0,1]=[%p,%p] cluster=[%p,%p] system=[%p,%p]\n", + em->core_energy[0], em->core_energy[1], +- em->cluster_energy[0], em->cluster_energy[1]); ++ em->cluster_energy[0], em->cluster_energy[1], ++ em->system_energy[0], em->system_energy[1]); + } + + static inline +@@ -211,7 +216,8 @@ struct sched_group_energy *energy(int cpu, struct sched_group_energy **sge) + BUG_ON(idx != 0 && idx != 1); + + pr_debug("cpu=%d %s%s[%d]=%p\n", cpu, (sge == core_energy) ? +- "core" : "cluster", "_energy", idx, sge[idx]); ++ "core" : (sge == cluster_energy) ? "cluster" : ++ "system", "_energy", idx, sge[idx]); + + return sge[idx]; + } +@@ -227,3 +233,9 @@ const struct sched_group_energy * const cpu_cluster_energy(int cpu) + { + return cluster_energy ? energy(cpu, cluster_energy) : NULL; + } ++ ++static inline ++const struct sched_group_energy * const cpu_system_energy(int cpu) ++{ ++ return system_energy ? energy(cpu, system_energy) : NULL; ++} +diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c +index baaab53..4062786 100644 +--- a/arch/arm64/kernel/topology.c ++++ b/arch/arm64/kernel/topology.c +@@ -669,6 +669,7 @@ static void __init reset_cpu_topology(void) + { cpu_coregroup_mask, cpu_coregroup_flags, cpu_core_energy, SD_INIT_NAME(MC) }, + #endif + { cpu_cpu_mask, cpu_cpu_flags, cpu_cluster_energy, SD_INIT_NAME(DIE) }, ++ { cpu_cpu_mask, NULL, cpu_system_energy, SD_INIT_NAME(SYS) }, + { NULL, } + }; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0074-arm64-Define-Hi6220-SYS-sd-energy-model.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0074-arm64-Define-Hi6220-SYS-sd-energy-model.patch new file mode 100644 index 0000000..1afbf14 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0074-arm64-Define-Hi6220-SYS-sd-energy-model.patch @@ -0,0 +1,69 @@ +From 18204f2d5c286cde5c83e50c153ef678cb08ecf9 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Thu, 18 Aug 2016 14:27:09 +0100 +Subject: [PATCH 74/92] arm64: Define Hi6220 SYS sd energy model + +Hi6220 has a single frequency domain spanning the two clusters. It +needs the SYS sched domain (sd) to let the EAS algorithm work +properly. + +The SD_SHARE_CAP_STATES flag is not set on SYS sd. + +This lets sd_ea (highest sd w/ energy model data) point to the SYS +sd whereas sd_scs (highest sd w/ SD_SHARE_CAP_STATES set) points to +the DIE sd. This setup allows the code in sched_group_energy() to +set sg_shared_cap to the single sched group of the SYS sd covering +all the cpus in the system as they are all part of the single +frequency domain. + +The capacity and idle state vectors only contain entries w/ power +values equal zero, so there is no system-wide energy contribution. + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 364845656d93e1722081038e09c4781fa7cfb961) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/energy_model.h | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/kernel/energy_model.h b/arch/arm64/kernel/energy_model.h +index 4a86fa3..0a7c134 100644 +--- a/arch/arm64/kernel/energy_model.h ++++ b/arch/arm64/kernel/energy_model.h +@@ -109,6 +109,24 @@ + + /* HiKey */ + ++static struct idle_state idle_states_system_hikey[] = { ++ { .power = 0 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 0 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep */ ++ { .power = 0 }, /* cluster-sleep */ ++}; ++ ++static struct capacity_state cap_states_system_hikey[] = { ++ { .cap = 1024, .power = 0, }, ++}; ++ ++static struct sched_group_energy energy_system_hikey = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_system_hikey), ++ .idle_states = idle_states_system_hikey, ++ .nr_cap_states = ARRAY_SIZE(cap_states_system_hikey), ++ .cap_states = cap_states_system_hikey, ++}; ++ + static struct idle_state idle_states_cluster_hikey[] = { + { .power = 107 }, /* arch_cpu_idle() (active idle) = WFI */ + { .power = 107 }, /* WFI */ +@@ -173,7 +191,7 @@ struct energy_model { + static struct energy_model hikey_model = { + { &energy_core_hikey, &energy_core_hikey, }, + { &energy_cluster_hikey, &energy_cluster_hikey, }, +- {}, ++ { &energy_system_hikey, &energy_system_hikey, }, + }; + + static struct of_device_id model_matches[] = { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0075-sched-debug-Add-energy-procfs-interface.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0075-sched-debug-Add-energy-procfs-interface.patch new file mode 100644 index 0000000..7073d2b --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0075-sched-debug-Add-energy-procfs-interface.patch @@ -0,0 +1,161 @@ +From 066fa940b25dc58b6618b785d82c90cbaa9b54d5 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Fri, 14 Nov 2014 16:25:50 +0000 +Subject: [PATCH 75/92] sched/debug: Add energy procfs interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This patch makes the energy data available via procfs. The related files +are placed as sub-directory named 'energy' inside the +/proc/sys/kernel/sched_domain/cpuX/domainY/groupZ directory for those +cpu/domain/group tuples which have energy information. + +The following example depicts the contents of +/proc/sys/kernel/sched_domain/cpu0/domain0/group[01] for a system which +has energy information attached to domain level 0. + +├── cpu0 +│ ├── domain0 +│ │ ├── busy_factor +│ │ ├── busy_idx +│ │ ├── cache_nice_tries +│ │ ├── flags +│ │ ├── forkexec_idx +│ │ ├── group0 +│ │ │ └── energy +│ │ │ ├── cap_states +│ │ │ ├── idle_states +│ │ │ ├── nr_cap_states +│ │ │ └── nr_idle_states +│ │ ├── group1 +│ │ │ └── energy +│ │ │ ├── cap_states +│ │ │ ├── idle_states +│ │ │ ├── nr_cap_states +│ │ │ └── nr_idle_states +│ │ ├── idle_idx +│ │ ├── imbalance_pct +│ │ ├── max_interval +│ │ ├── max_newidle_lb_cost +│ │ ├── min_interval +│ │ ├── name +│ │ ├── newidle_idx +│ │ └── wake_idx +│ └── domain1 +│ ├── busy_factor +│ ├── busy_idx +│ ├── cache_nice_tries +│ ├── flags +│ ├── forkexec_idx +│ ├── idle_idx +│ ├── imbalance_pct +│ ├── max_interval +│ ├── max_newidle_lb_cost +│ ├── min_interval +│ ├── name +│ ├── newidle_idx +│ └── wake_idx + +The files 'nr_idle_states' and 'nr_cap_states' contain a scalar value +whereas 'idle_states' and 'cap_states' contain a vector of power +consumption at this idle state respectively (compute capacity, power +consumption) at this capacity state. + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 6bc112e3c581d16e3ebb7eb7abd396b7a2048169) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/debug.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 65 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index fa178b6..48a621c 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -261,9 +261,60 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) + } + + static struct ctl_table * ++sd_alloc_ctl_energy_table(struct sched_group_energy *sge) ++{ ++ struct ctl_table *table = sd_alloc_ctl_entry(5); ++ ++ if (table == NULL) ++ return NULL; ++ ++ set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states, ++ sizeof(int), 0644, proc_dointvec_minmax, false); ++ set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power, ++ sge->nr_idle_states*sizeof(struct idle_state), 0644, ++ proc_doulongvec_minmax, false); ++ set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states, ++ sizeof(int), 0644, proc_dointvec_minmax, false); ++ set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap, ++ sge->nr_cap_states*sizeof(struct capacity_state), 0644, ++ proc_doulongvec_minmax, false); ++ ++ return table; ++} ++ ++static struct ctl_table * ++sd_alloc_ctl_group_table(struct sched_group *sg) ++{ ++ struct ctl_table *table = sd_alloc_ctl_entry(2); ++ ++ if (table == NULL) ++ return NULL; ++ ++ table->procname = kstrdup("energy", GFP_KERNEL); ++ table->mode = 0555; ++ table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge); ++ ++ return table; ++} ++ ++static struct ctl_table * + sd_alloc_ctl_domain_table(struct sched_domain *sd) + { +- struct ctl_table *table = sd_alloc_ctl_entry(14); ++ struct ctl_table *table; ++ unsigned int nr_entries = 14; ++ ++ int i = 0; ++ struct sched_group *sg = sd->groups; ++ ++ if (sg->sge) { ++ int nr_sgs = 0; ++ ++ do {} while (nr_sgs++, sg = sg->next, sg != sd->groups); ++ ++ nr_entries += nr_sgs; ++ } ++ ++ table = sd_alloc_ctl_entry(nr_entries); + + if (table == NULL) + return NULL; +@@ -296,7 +347,19 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) + sizeof(long), 0644, proc_doulongvec_minmax, false); + set_table_entry(&table[12], "name", sd->name, + CORENAME_MAX_SIZE, 0444, proc_dostring, false); +- /* &table[13] is terminator */ ++ sg = sd->groups; ++ if (sg->sge) { ++ char buf[32]; ++ struct ctl_table *entry = &table[13]; ++ ++ do { ++ snprintf(buf, 32, "group%d", i); ++ entry->procname = kstrdup(buf, GFP_KERNEL); ++ entry->mode = 0555; ++ entry->child = sd_alloc_ctl_group_table(sg); ++ } while (entry++, i++, sg = sg->next, sg != sd->groups); ++ } ++ /* &table[nr_entries-1] is terminator */ + + return table; + } +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0076-sched-events-Introduce-cfs_rq-PELT-trace-event-sched.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0076-sched-events-Introduce-cfs_rq-PELT-trace-event-sched.patch new file mode 100644 index 0000000..950e7a9 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0076-sched-events-Introduce-cfs_rq-PELT-trace-event-sched.patch @@ -0,0 +1,121 @@ +From 1f93576e19b38a0df4203faaf07b656e5f24b6f8 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Mon, 9 Nov 2015 12:07:27 +0000 +Subject: [PATCH 76/92] sched/events: Introduce cfs_rq PELT trace event + sched_pelt_cfs_rq() + +The trace event is only defined if symmetric multi-processing +(CONFIG_SMP) is enabled. +To let this trace event work for configurations with and without group +scheduling support for SCHED_OTHER (CONFIG_FAIR_GROUP_SCHED) the +following special handling is necessary for non-existent key=value +pairs: + +id = -1 : In case of !CONFIG_FAIR_GROUP_SCHED the task group css id is + set to -1. + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 1c61904cbc5998b19dc8c04bb0cfc99f9a8ec9db) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + include/trace/events/sched.h | 51 ++++++++++++++++++++++++++++++++++++++++++++ + kernel/sched/fair.c | 7 ++++++ + 2 files changed, 58 insertions(+) + +diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h +index 9b90c57..c9c3348 100644 +--- a/include/trace/events/sched.h ++++ b/include/trace/events/sched.h +@@ -562,6 +562,57 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * + + TP_printk("cpu=%d", __entry->cpu) + ); ++ ++#ifdef CONFIG_SMP ++/* ++ * Tracepoint for cfs_rq Per Entity Load Tracking (PELT). ++ */ ++TRACE_EVENT(sched_pelt_cfs_rq, ++ ++ TP_PROTO(struct cfs_rq *cfs_rq), ++ ++ TP_ARGS(cfs_rq), ++ ++ TP_STRUCT__entry( ++ __field( int, cpu ) ++ __field( int, id ) ++ __field( unsigned long, load_avg ) ++ __field( unsigned long, util_avg ) ++ __field( u64, load_sum ) ++ __field( u32, util_sum ) ++ __field( u32, period_contrib ) ++ __field( u64, last_update_time ) ++ __field( unsigned long, runnable_load_avg ) ++ __field( u64, runnable_load_sum ) ++ ), ++ ++ TP_fast_assign( ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ __entry->cpu = cfs_rq->rq->cpu; ++ __entry->id = cfs_rq->tg->css.id; ++#else ++ __entry->cpu = (container_of(cfs_rq, struct rq, cfs))->cpu; ++ __entry->id = -1; ++#endif ++ __entry->load_avg = cfs_rq->avg.load_avg; ++ __entry->util_avg = cfs_rq->avg.util_avg; ++ __entry->load_sum = cfs_rq->avg.load_sum; ++ __entry->util_sum = cfs_rq->avg.util_sum; ++ __entry->period_contrib = cfs_rq->avg.period_contrib; ++ __entry->last_update_time = cfs_rq->avg.last_update_time; ++ __entry->runnable_load_avg = cfs_rq->runnable_load_avg; ++ __entry->runnable_load_sum = cfs_rq->runnable_load_sum; ++ ), ++ ++ TP_printk("cpu=%d tg_css_id=%d load_avg=%lu util_avg=%lu" ++ " load_sum=%llu util_sum=%u period_contrib=%u last_update_time=%llu" ++ " runnable_load_avg=%lu runnable_load_sum=%llu", ++ __entry->cpu, __entry->id, __entry->load_avg, ++ __entry->util_avg, __entry->load_sum, __entry->util_sum, ++ __entry->period_contrib, __entry->last_update_time, ++ __entry->runnable_load_avg, __entry->runnable_load_sum) ++); ++#endif /* CONFIG_SMP */ + #endif /* _TRACE_SCHED_H */ + + /* This part must be outside protection */ +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index bc347af..03b4666 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -2931,6 +2931,9 @@ static u32 __compute_runnable_contrib(u64 n) + sa->util_avg = sa->util_sum / LOAD_AVG_MAX; + } + ++ if (cfs_rq) ++ trace_sched_pelt_cfs_rq(cfs_rq); ++ + return decayed; + } + +@@ -3310,6 +3313,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + set_tg_cfs_propagate(cfs_rq); + + cfs_rq_util_change(cfs_rq); ++ ++ trace_sched_pelt_cfs_rq(cfs_rq); + } + + /** +@@ -3330,6 +3335,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + set_tg_cfs_propagate(cfs_rq); + + cfs_rq_util_change(cfs_rq); ++ ++ trace_sched_pelt_cfs_rq(cfs_rq); + } + + /* Add the load generated by se into cfs_rq's load average */ +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0077-sched-events-Introduce-sched-entity-PELT-trace-event.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0077-sched-events-Introduce-sched-entity-PELT-trace-event.patch new file mode 100644 index 0000000..5e7d128 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0077-sched-events-Introduce-sched-entity-PELT-trace-event.patch @@ -0,0 +1,146 @@ +From f815820c55e552529885b5c541be9e2e3000c198 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Mon, 25 Apr 2016 00:48:09 +0100 +Subject: [PATCH 77/92] sched/events: Introduce sched entity PELT trace event + sched_pelt_se() + +The trace event is only defined if symmetric multi-processing +(CONFIG_SMP) is enabled. +To let this trace event work for configurations with and without group +scheduling support for SCHED_OTHER (CONFIG_FAIR_GROUP_SCHED) the +following special handling is necessary for non-existent key=value +pairs: + +id = -1 : In case sched entity is representing a task + : In case CONFIG_FAIR_GROUP_SCHED is not set + +pid = -1 : In case sched entity is representing a task group + +comm = "n/a" : In case sched entity is representing a task group + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 31f5aeb31289ec28c5831b62a2176da50b3c7fa4) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + include/trace/events/sched.h | 54 ++++++++++++++++++++++++++++++++++++++++++++ + kernel/sched/fair.c | 9 ++++++-- + 2 files changed, 61 insertions(+), 2 deletions(-) + +diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h +index c9c3348..58ba3d5 100644 +--- a/include/trace/events/sched.h ++++ b/include/trace/events/sched.h +@@ -565,6 +565,60 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * + + #ifdef CONFIG_SMP + /* ++ * Tracepoint for sched entity Per Entity Load Tracking (PELT). ++ */ ++TRACE_EVENT(sched_pelt_se, ++ ++ TP_PROTO(struct sched_entity *se), ++ ++ TP_ARGS(se), ++ ++ TP_STRUCT__entry( ++ __field( int, cpu ) ++ __field( int, id ) ++ __array( char, comm, TASK_COMM_LEN ) ++ __field( pid_t, pid ) ++ __field( unsigned long, load_avg ) ++ __field( unsigned long, util_avg ) ++ __field( u64, load_sum ) ++ __field( u32, util_sum ) ++ __field( u32, period_contrib ) ++ __field( u64, last_update_time ) ++ ), ++ ++ TP_fast_assign( ++ struct task_struct *p = container_of(se, struct task_struct, se); ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ __entry->cpu = se->my_q ? cpu_of(se->cfs_rq->rq) : task_cpu(p); ++ __entry->id = se->my_q ? se->my_q->tg->css.id : -1; ++ memcpy(__entry->comm, se->my_q ? "n/a" : p->comm, ++ TASK_COMM_LEN); ++ __entry->pid = se->my_q ? -1 : p->pid; ++#else ++ __entry->cpu = task_cpu(p); ++ __entry->id = -1; ++ memcpy(__entry->comm, p->comm, TASK_COMM_LEN); ++ __entry->pid = p->pid; ++#endif ++ __entry->load_avg = se->avg.load_avg; ++ __entry->util_avg = se->avg.util_avg; ++ __entry->load_sum = se->avg.load_sum; ++ __entry->util_sum = se->avg.util_sum; ++ __entry->period_contrib = se->avg.period_contrib; ++ __entry->last_update_time = se->avg.last_update_time; ++ ), ++ ++ TP_printk("cpu=%d tg_css_id=%d comm=%s pid=%d load_avg=%lu util_avg=%lu" ++ " load_sum=%llu util_sum=%u period_contrib=%u" ++ " last_update_time=%llu", ++ __entry->cpu, __entry->id, __entry->comm, __entry->pid, ++ __entry->load_avg, __entry->util_avg, __entry->load_sum, ++ __entry->util_sum, __entry->period_contrib, ++ __entry->last_update_time) ++); ++ ++/* + * Tracepoint for cfs_rq Per Entity Load Tracking (PELT). + */ + TRACE_EVENT(sched_pelt_cfs_rq, +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 03b4666..fffe7cb 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -2845,7 +2845,7 @@ static u32 __compute_runnable_contrib(u64 n) + */ + if ((s64)delta < 0) { + sa->last_update_time = now; +- return 0; ++ goto trace; + } + + /* +@@ -2854,7 +2854,7 @@ static u32 __compute_runnable_contrib(u64 n) + */ + delta >>= 10; + if (!delta) +- return 0; ++ goto trace; + sa->last_update_time = now; + + scale_freq = arch_scale_freq_capacity(NULL, cpu); +@@ -2931,8 +2931,11 @@ static u32 __compute_runnable_contrib(u64 n) + sa->util_avg = sa->util_sum / LOAD_AVG_MAX; + } + ++trace: + if (cfs_rq) + trace_sched_pelt_cfs_rq(cfs_rq); ++ else ++ trace_sched_pelt_se(container_of(sa, struct sched_entity, avg)); + + return decayed; + } +@@ -3315,6 +3318,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + cfs_rq_util_change(cfs_rq); + + trace_sched_pelt_cfs_rq(cfs_rq); ++ trace_sched_pelt_se(se); + } + + /** +@@ -3337,6 +3341,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s + cfs_rq_util_change(cfs_rq); + + trace_sched_pelt_cfs_rq(cfs_rq); ++ trace_sched_pelt_se(se); + } + + /* Add the load generated by se into cfs_rq's load average */ +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0078-sched-events-Introduce-task-group-PELT-trace-event-s.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0078-sched-events-Introduce-task-group-PELT-trace-event-s.patch new file mode 100644 index 0000000..970f2e5 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0078-sched-events-Introduce-task-group-PELT-trace-event-s.patch @@ -0,0 +1,91 @@ +From cf1dd9ac6f0b61e88c4dbeb57aaee06de5f6a250 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Thu, 13 Oct 2016 12:20:12 +0100 +Subject: [PATCH 78/92] sched/events: Introduce task group PELT trace event + sched_pelt_tg() + +The trace event is only defined if symmetric multi-processing +(CONFIG_SMP) and group scheduling support for SCHED_OTHER +(CONFIG_FAIR_GROUP_SCHED) are enabled. + +The cfs_rq owned by the task group is used as the only parameter for the +trace event because it has a reference to the task group and the cpu. +Using the task group as a parameter instead would require the cpu as a +second parameter. + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 9099eec7854949cf30adf1b5ee026821e1d53e83) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + include/trace/events/sched.h | 33 +++++++++++++++++++++++++++++++++ + kernel/sched/fair.c | 5 ++++- + 2 files changed, 37 insertions(+), 1 deletion(-) + +diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h +index 58ba3d5..e269b04 100644 +--- a/include/trace/events/sched.h ++++ b/include/trace/events/sched.h +@@ -666,6 +666,39 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * + __entry->period_contrib, __entry->last_update_time, + __entry->runnable_load_avg, __entry->runnable_load_sum) + ); ++ ++/* ++ * Tracepoint for task group Per Entity Load Tracking (PELT). ++ */ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++TRACE_EVENT(sched_pelt_tg, ++ ++ TP_PROTO(struct cfs_rq *cfs_rq), ++ ++ TP_ARGS(cfs_rq), ++ ++ TP_STRUCT__entry( ++ __field( int, cpu ) ++ __field( int, id ) ++ __field( long, load_avg ) ++ __field( unsigned long, shares ) ++ __field( unsigned long, tg_load_avg_contrib ) ++ ), ++ ++ TP_fast_assign( ++ __entry->cpu = cfs_rq->rq->cpu; ++ __entry->id = cfs_rq->tg->css.id; ++ __entry->load_avg = atomic_long_read(&cfs_rq->tg->load_avg); ++ __entry->shares = scale_load_down(cfs_rq->tg->shares); ++ __entry->tg_load_avg_contrib = cfs_rq->tg_load_avg_contrib; ++ ), ++ ++ TP_printk("cpu=%d tg_css_id=%d load_avg=%ld shares=%lu" ++ " tg_load_avg_contrib=%lu", ++ __entry->cpu, __entry->id, __entry->load_avg, ++ __entry->shares, __entry->tg_load_avg_contrib) ++); ++#endif /* CONFIG_FAIR_GROUP_SCHED */ + #endif /* CONFIG_SMP */ + #endif /* _TRACE_SCHED_H */ + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index fffe7cb..3563486 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -2985,12 +2985,15 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) + * No need to update load_avg for root_task_group as it is not used. + */ + if (cfs_rq->tg == &root_task_group) +- return; ++ goto trace; + + if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) { + atomic_long_add(delta, &cfs_rq->tg->load_avg); + cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg; + } ++ ++trace: ++ trace_sched_pelt_tg(cfs_rq); + } + + /* +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0079-sched-Call-cfs_rq-and-sched_entity-PELT-tracepoints-.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0079-sched-Call-cfs_rq-and-sched_entity-PELT-tracepoints-.patch new file mode 100644 index 0000000..8666804 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0079-sched-Call-cfs_rq-and-sched_entity-PELT-tracepoints-.patch @@ -0,0 +1,30 @@ +From 1dc0f447d22d781d4ff4d5fa8f789bf41fd56161 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann <dietmar.eggemann@arm.com> +Date: Wed, 12 Oct 2016 17:36:41 +0100 +Subject: [PATCH 79/92] sched: Call cfs_rq and sched_entity PELT tracepoints + also in propagate_entity_load_avg() + +Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> +(cherry picked from commit 20ab5631c366a28b5c7e598107337ac622c41e31) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 3563486..489f6d3 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3160,6 +3160,9 @@ static inline int propagate_entity_load_avg(struct sched_entity *se) + update_tg_cfs_util(cfs_rq, se); + update_tg_cfs_load(cfs_rq, se); + ++ trace_sched_pelt_cfs_rq(cfs_rq); ++ trace_sched_pelt_se(se); ++ + return 1; + } + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0080-sched-core-Fix-find_idlest_group-for-fork.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0080-sched-core-Fix-find_idlest_group-for-fork.patch new file mode 100644 index 0000000..ccf260c --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0080-sched-core-Fix-find_idlest_group-for-fork.patch @@ -0,0 +1,86 @@ +From 4e4846aafb09473f1ca69fa061e72437b55f78c7 Mon Sep 17 00:00:00 2001 +From: Vincent Guittot <vincent.guittot@linaro.org> +Date: Thu, 8 Dec 2016 17:56:53 +0100 +Subject: [PATCH 80/92] sched/core: Fix find_idlest_group() for fork + +During fork, the utilization of a task is init once the rq has been +selected because the current utilization level of the rq is used to +set the utilization of the fork task. As the task's utilization is +still 0 at this step of the fork sequence, it doesn't make sense to +look for some spare capacity that can fit the task's utilization. +Furthermore, I can see perf regressions for the test: + + hackbench -P -g 1 + +because the least loaded policy is always bypassed and tasks are not +spread during fork. + +With this patch and the fix below, we are back to same performances as +for v4.8. The fix below is only a temporary one used for the test +until a smarter solution is found because we can't simply remove the +test which is useful for others benchmarks + +| @@ -5708,13 +5708,6 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t +| +| avg_cost = this_sd->avg_scan_cost; +| +| - /* +| - * Due to large variance we need a large fuzz factor; hackbench in +| - * particularly is sensitive here. +| - */ +| - if ((avg_idle / 512) < avg_cost) +| - return -1; +| - +| time = local_clock(); +| +| for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) { + +Tested-by: Matt Fleming <matt@codeblueprint.co.uk> +Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk> +Acked-by: Morten Rasmussen <morten.rasmussen@arm.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: dietmar.eggemann@arm.com +Cc: kernellwp@gmail.com +Cc: umgwanakikbuti@gmail.com +Cc: yuyang.du@intel.comc +Link: http://lkml.kernel.org/r/1481216215-24651-2-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +(cherry picked from commit f2e4ac502860c2adcc5ce56e0064fc4bcab90af4) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 489f6d3..fea3ca8 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5809,13 +5809,21 @@ static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) + * utilized systems if we require spare_capacity > task_util(p), + * so we allow for some task stuffing by using + * spare_capacity > task_util(p)/2. ++ * ++ * Spare capacity can't be used for fork because the utilization has ++ * not been set yet, we must first select a rq to compute the initial ++ * utilization. + */ ++ if (sd_flag & SD_BALANCE_FORK) ++ goto skip_spare; ++ + if (this_spare > task_util(p) / 2 && + imbalance*this_spare > 100*most_spare) + return NULL; + else if (most_spare > task_util(p) / 2) + return most_spare_sg; + ++skip_spare: + if (!idlest || 100*this_load < imbalance*min_load) + return NULL; + return idlest; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0081-SPLIT-sched-fair-Don-t-load-balance-from-group_other.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0081-SPLIT-sched-fair-Don-t-load-balance-from-group_other.patch new file mode 100644 index 0000000..1258e93 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0081-SPLIT-sched-fair-Don-t-load-balance-from-group_other.patch @@ -0,0 +1,38 @@ +From 28e38963fbe539c9804d9132aa3613ead32ee330 Mon Sep 17 00:00:00 2001 +From: Brendan Jackman <brendan.jackman@arm.com> +Date: Wed, 15 Feb 2017 17:17:26 +0000 +Subject: [PATCH 81/92] SPLIT: sched/fair: Don't load balance from group_other + to groups without spare capacity + +This is split out from 885677858fd "sched: Consider misfit tasks when +load-balancing" from lsk-4.4-armlt. The rest of that patch will follow, this is +separated for ease of examination and discussion. + +(cherry picked from commit a3e504284acbf3dd12ceec8ebcabaee1892f876e) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index fea3ca8..cf60d93 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7750,6 +7750,14 @@ static bool update_sd_pick_busiest(struct lb_env *env, + if (sgs->group_type < busiest->group_type) + return false; + ++ /* ++ * Candidate sg doesn't face any serious load-balance problems ++ * so don't pick it if the local sg is already filled up. ++ */ ++ if (sgs->group_type == group_other && ++ !group_has_capacity(env, &sds->local_stat)) ++ return false; ++ + if (sgs->avg_load <= busiest->avg_load) + return false; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0082-sched-Add-group_misfit_task-load-balance-type.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0082-sched-Add-group_misfit_task-load-balance-type.patch new file mode 100644 index 0000000..d020473 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0082-sched-Add-group_misfit_task-load-balance-type.patch @@ -0,0 +1,141 @@ +From b58a2c49fdbb556dcc51f750f77424063d92db3c Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Thu, 25 Feb 2016 12:47:54 +0000 +Subject: [PATCH 82/92] sched: Add group_misfit_task load-balance type + +To maximize throughput in systems with reduced capacity cpus (e.g. +high RT/IRQ load and/or ARM big.LITTLE) load-balancing has to consider +task and cpu utilization as well as per-cpu compute capacity when +load-balancing in addition to the current average load based +load-balancing policy. Tasks that are scheduled on a reduced capacity +cpu need to be identified and migrated to a higher capacity cpu if +possible. + +To implement this additional policy an additional group_type +(load-balance scenario) is added: group_misfit_task. This represents +scenarios where a sched_group has tasks that are not suitable for its +per-cpu capacity. group_misfit_task is only considered if the system is +not overloaded in any other way (group_imbalanced or group_overloaded). + +Identifying misfit tasks requires the rq lock to be held. To avoid +taking remote rq locks to examine source sched_groups for misfit tasks, +each cpu is responsible for tracking misfit tasks themselves and update +the rq->misfit_task flag. This means checking task utilization when +tasks are scheduled and on sched_tick. + +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit a1a42660d90a077df8412b6ca763ba76506da8e6) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 29 ++++++++++++++++++++++------- + kernel/sched/sched.h | 1 + + 2 files changed, 23 insertions(+), 7 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index cf60d93..e7fd15e 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6627,6 +6627,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + if (hrtick_enabled(rq)) + hrtick_start_fair(rq, p); + ++ rq->misfit_task = !task_fits_max(p, rq->cpu); ++ + return p; + simple: + cfs_rq = &rq->cfs; +@@ -6648,9 +6650,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + if (hrtick_enabled(rq)) + hrtick_start_fair(rq, p); + ++ rq->misfit_task = !task_fits_max(p, rq->cpu); ++ + return p; + + idle: ++ rq->misfit_task = 0; + /* + * This is OK, because current is on_cpu, which avoids it being picked + * for load-balance and preemption/IRQs are still disabled avoiding +@@ -6863,6 +6868,13 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp + + enum fbq_type { regular, remote, all }; + ++enum group_type { ++ group_other = 0, ++ group_misfit_task, ++ group_imbalanced, ++ group_overloaded, ++}; ++ + #define LBF_ALL_PINNED 0x01 + #define LBF_NEED_BREAK 0x02 + #define LBF_DST_PINNED 0x04 +@@ -7338,12 +7350,6 @@ static unsigned long task_h_load(struct task_struct *p) + + /********** Helpers for find_busiest_group ************************/ + +-enum group_type { +- group_other = 0, +- group_imbalanced, +- group_overloaded, +-}; +- + /* + * sg_lb_stats - stats of a sched_group required for load_balancing + */ +@@ -7359,6 +7365,7 @@ struct sg_lb_stats { + unsigned int group_weight; + enum group_type group_type; + int group_no_capacity; ++ int group_misfit_task; /* A cpu has a task too big for its capacity */ + #ifdef CONFIG_NUMA_BALANCING + unsigned int nr_numa_running; + unsigned int nr_preferred_running; +@@ -7656,6 +7663,9 @@ group_type group_classify(struct sched_group *group, + if (sg_imbalanced(group)) + return group_imbalanced; + ++ if (sgs->group_misfit_task) ++ return group_misfit_task; ++ + return group_other; + } + +@@ -7707,8 +7717,11 @@ static inline void update_sg_lb_stats(struct lb_env *env, + if (!nr_running && idle_cpu(i)) + sgs->idle_cpus++; + +- if (cpu_overutilized(i)) ++ if (cpu_overutilized(i)) { + *overutilized = true; ++ if (!sgs->group_misfit_task && rq->misfit_task) ++ sgs->group_misfit_task = capacity_of(i); ++ } + } + + /* Adjust by relative CPU capacity of the group */ +@@ -9307,6 +9320,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) + + if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) + rq->rd->overutilized = true; ++ ++ rq->misfit_task = !task_fits_max(curr, rq->cpu); + } + + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index fa98ab3..704d8a4 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -611,6 +611,7 @@ struct rq { + #endif + #define CPU_LOAD_IDX_MAX 5 + unsigned long cpu_load[CPU_LOAD_IDX_MAX]; ++ unsigned int misfit_task; + #ifdef CONFIG_NO_HZ_COMMON + #ifdef CONFIG_SMP + unsigned long last_load_update_tick; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0083-sched-Consider-misfit-tasks-when-load-balancing.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0083-sched-Consider-misfit-tasks-when-load-balancing.patch new file mode 100644 index 0000000..db57a58 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0083-sched-Consider-misfit-tasks-when-load-balancing.patch @@ -0,0 +1,180 @@ +From 488802cf1168582adc4fdd039ecd5b3dfe652085 Mon Sep 17 00:00:00 2001 +From: Morten Rasmussen <morten.rasmussen@arm.com> +Date: Tue, 28 Jul 2015 15:42:47 +0100 +Subject: [PATCH 83/92] sched: Consider misfit tasks when load-balancing + +With the new group_misfit_task load-balancing scenario additional policy +conditions are needed when load-balancing. Misfit task balancing only +makes sense between source group with lower capacity than the target +group. If capacities are the same, fallback to normal group_other +balancing. The aim is to balance tasks such that no task has its +throughput hindered by compute capacity if a cpu with more capacity is +available. Load-balancing is generally based on average load in the +sched_groups, but for misfitting tasks it is necessary to introduce +exceptions to migrate tasks against usual metrics and optimize +throughput. + +This patch ensures the following load-balance for mixed capacity systems +(e.g. ARM big.LITTLE) for always-running tasks: + +1. Place a task on each cpu starting in order from cpus with highest +capacity to lowest until all cpus are in use (i.e. one task on each +cpu). + +2. Once all cpus are in use balance according to compute capacity such +that load per capacity is approximately the same regardless of the +compute capacity (i.e. big cpus get more tasks than little cpus). + +Necessary changes are introduced in find_busiest_group(), +calculate_imbalance(), and find_busiest_queue(). This includes passing +the group_type on to find_busiest_queue() through struct lb_env, which +is currently only considers imbalance and not the imbalance situation +(group_type). + +To avoid taking remote rq locks to examine source sched_groups for +misfit tasks, each cpu is responsible for tracking misfit tasks +themselves and update the rq->misfit_task flag. This means checking task +utilization when tasks are scheduled and on sched_tick. + +Change-Id: I458461cebf269d6d4eeac6f83e4c84f4e4d7a9dd +Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> +(cherry picked from commit bcd0dfada3e8abb6f1d5cff73134fc54ab76f409) +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/fair.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 62 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index e7fd15e..966af0b 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6169,6 +6169,27 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) + return min_cap * 1024 < task_util(p) * capacity_margin; + } + ++static inline bool __task_fits(struct task_struct *p, int cpu, int util) ++{ ++ unsigned long capacity = capacity_of(cpu); ++ ++ return (capacity * 1024) > (util * capacity_margin); ++} ++ ++static inline bool task_fits_max(struct task_struct *p, int cpu) ++{ ++ unsigned long capacity = capacity_of(cpu); ++ unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity; ++ ++ if (capacity == max_capacity) ++ return true; ++ ++ if (capacity * capacity_margin > max_capacity * 1024) ++ return true; ++ ++ return __task_fits(p, cpu, 0); ++} ++ + static bool cpu_overutilized(int cpu) + { + return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); +@@ -6904,6 +6925,7 @@ struct lb_env { + unsigned int loop_max; + + enum fbq_type fbq_type; ++ enum group_type busiest_group_type; + struct list_head tasks; + }; + +@@ -7902,6 +7924,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd + sgs->group_type = group_classify(sg, sgs); + } + ++ /* ++ * Ignore task groups with misfit tasks if local group has no ++ * capacity or if per-cpu capacity isn't higher. ++ */ ++ if (sgs->group_type == group_misfit_task && ++ (!group_has_capacity(env, &sds->local_stat) || ++ !group_smaller_cpu_capacity(sg, sds->local))) ++ sgs->group_type = group_other; ++ + if (update_sd_pick_busiest(env, sds, sg, sgs)) { + sds->busiest = sg; + sds->busiest_stat = *sgs; +@@ -8082,6 +8113,22 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s + */ + if (busiest->avg_load <= sds->avg_load || + local->avg_load >= sds->avg_load) { ++ /* Misfitting tasks should be migrated in any case */ ++ if (busiest->group_type == group_misfit_task) { ++ env->imbalance = busiest->group_misfit_task; ++ return; ++ } ++ ++ /* ++ * Busiest group is overloaded, local is not, use the spare ++ * cycles to maximize throughput ++ */ ++ if (busiest->group_type == group_overloaded && ++ local->group_type <= group_misfit_task) { ++ env->imbalance = busiest->load_per_task; ++ return; ++ } ++ + env->imbalance = 0; + return fix_small_imbalance(env, sds); + } +@@ -8115,6 +8162,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s + (sds->avg_load - local->avg_load) * local->group_capacity + ) / SCHED_CAPACITY_SCALE; + ++ /* Boost imbalance to allow misfit task to be balanced. */ ++ if (busiest->group_type == group_misfit_task) ++ env->imbalance = max_t(long, env->imbalance, ++ busiest->group_misfit_task); ++ + /* + * if *imbalance is less than the average load per runnable task + * there is no guarantee that any tasks will be moved so we'll have +@@ -8181,6 +8233,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env) + busiest->group_no_capacity) + goto force_balance; + ++ /* Misfitting tasks should be dealt with regardless of the avg load */ ++ if (busiest->group_type == group_misfit_task) { ++ goto force_balance; ++ } ++ + /* + * If the local group is busier than the selected busiest group + * don't try and pull any tasks. +@@ -8204,7 +8261,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) + * might end up to just move the imbalance on another group + */ + if ((busiest->group_type != group_overloaded) && +- (local->idle_cpus <= (busiest->idle_cpus + 1))) ++ (local->idle_cpus <= (busiest->idle_cpus + 1)) && ++ !group_smaller_cpu_capacity(sds.busiest, sds.local)) + goto out_balanced; + } else { + /* +@@ -8217,6 +8275,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) + } + + force_balance: ++ env->busiest_group_type = busiest->group_type; + /* Looks like there is an imbalance. Compute it */ + calculate_imbalance(env, &sds); + return sds.busiest; +@@ -8275,7 +8334,8 @@ static struct rq *find_busiest_queue(struct lb_env *env, + */ + + if (rq->nr_running == 1 && wl > env->imbalance && +- !check_cpu_capacity(rq, env->sd)) ++ !check_cpu_capacity(rq, env->sd) && ++ env->busiest_group_type != group_misfit_task) + continue; + + /* +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0084-arm64-dts-r8a7795-Add-multi-cluster-definition.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0084-arm64-dts-r8a7795-Add-multi-cluster-definition.patch new file mode 100644 index 0000000..bf83a88 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0084-arm64-dts-r8a7795-Add-multi-cluster-definition.patch @@ -0,0 +1,59 @@ +From 0fe892a26802ebd96f9e565760c4cb215babb96a Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Thu, 1 Oct 2015 14:50:38 +0900 +Subject: [PATCH 84/92] arm64: dts: r8a7795: Add multi-cluster definition + +This patch adds the "cpu-map" for multi-cluster into r8a7795 +device-tree. This definition is used to parse the cpu topology. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7795.dtsi | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi +index 94262a1..22df16ba 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi +@@ -55,6 +55,38 @@ + #address-cells = <1>; + #size-cells = <0>; + ++ cpu-map { ++ cluster0 { ++ core0 { ++ cpu = <&a57_0>; ++ }; ++ core1 { ++ cpu = <&a57_1>; ++ }; ++ core2 { ++ cpu = <&a57_2>; ++ }; ++ core3 { ++ cpu = <&a57_3>; ++ }; ++ }; ++ ++ cluster1 { ++ core0 { ++ cpu = <&a53_0>; ++ }; ++ core1 { ++ cpu = <&a53_1>; ++ }; ++ core2 { ++ cpu = <&a53_2>; ++ }; ++ core3 { ++ cpu = <&a53_3>; ++ }; ++ }; ++ }; ++ + a57_0: cpu@0 { + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0>; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0085-arm64-dts-r8a7796-Add-multi-cluster-definition.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0085-arm64-dts-r8a7796-Add-multi-cluster-definition.patch new file mode 100644 index 0000000..039b5fe --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0085-arm64-dts-r8a7796-Add-multi-cluster-definition.patch @@ -0,0 +1,53 @@ +From 5a7311047ea788378333397c2e4659a1623315f5 Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Thu, 25 Aug 2016 14:14:07 +0900 +Subject: [PATCH 85/92] arm64: dts: r8a7796: Add multi-cluster definition + +This patch adds the "cpu-map" for multi-cluster into r8a7796 +device-tree. This definition is used to parse the cpu topology. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7796.dtsi | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi +index afdd69d..de8e219 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi +@@ -54,6 +54,32 @@ + #address-cells = <1>; + #size-cells = <0>; + ++ cpu-map { ++ cluster0 { ++ core0 { ++ cpu = <&a57_0>; ++ }; ++ core1 { ++ cpu = <&a57_1>; ++ }; ++ }; ++ ++ cluster1 { ++ core0 { ++ cpu = <&a53_0>; ++ }; ++ core1 { ++ cpu = <&a53_1>; ++ }; ++ core2 { ++ cpu = <&a53_2>; ++ }; ++ core3 { ++ cpu = <&a53_3>; ++ }; ++ }; ++ }; ++ + a57_0: cpu@0 { + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0>; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0086-arm64-dts-r8a7795-es1-Add-multi-cluster-definition.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0086-arm64-dts-r8a7795-es1-Add-multi-cluster-definition.patch new file mode 100644 index 0000000..b0b60f3 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0086-arm64-dts-r8a7795-es1-Add-multi-cluster-definition.patch @@ -0,0 +1,59 @@ +From 60138899758aaa6b794325dd933911765a8e7ed2 Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Fri, 24 Mar 2017 18:40:28 +0900 +Subject: [PATCH 86/92] arm64: dts: r8a7795-es1: Add multi-cluster definition + +This patch adds the "cpu-map" for multi-cluster into r8a7795-es1 +device-tree. This definition is used to parse the cpu topology. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi | 32 ++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi +index 2bf5911..a8dae60a 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi +@@ -60,6 +60,38 @@ + #address-cells = <1>; + #size-cells = <0>; + ++ cpu-map { ++ cluster0 { ++ core0 { ++ cpu = <&a57_0>; ++ }; ++ core1 { ++ cpu = <&a57_1>; ++ }; ++ core2 { ++ cpu = <&a57_2>; ++ }; ++ core3 { ++ cpu = <&a57_3>; ++ }; ++ }; ++ ++ cluster1 { ++ core0 { ++ cpu = <&a53_0>; ++ }; ++ core1 { ++ cpu = <&a53_1>; ++ }; ++ core2 { ++ cpu = <&a53_2>; ++ }; ++ core3 { ++ cpu = <&a53_3>; ++ }; ++ }; ++ }; ++ + a57_0: cpu@0 { + compatible = "arm,cortex-a57", "arm,armv8"; + reg = <0x0>; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0087-arm64-dts-r8a7795-Add-cpu-capacity-dmips-mhz.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0087-arm64-dts-r8a7795-Add-cpu-capacity-dmips-mhz.patch new file mode 100644 index 0000000..a9b9c41 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0087-arm64-dts-r8a7795-Add-cpu-capacity-dmips-mhz.patch @@ -0,0 +1,84 @@ +From eb679d47b12f08b67c3a1046bd956add4848697f Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Fri, 24 Mar 2017 19:13:27 +0900 +Subject: [PATCH 87/92] arm64: dts: r8a7795: Add cpu capacity-dmips-mhz + +Set the capacity-dmips-mhz for R-CAR H3(ES2.0). +This value is based on the result of the evaluation. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7795.dtsi | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi +index 3a27bdb..eff01d6 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi +@@ -105,6 +105,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a57_1: cpu@1 { +@@ -120,6 +121,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a57_2: cpu@2 { +@@ -135,6 +137,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a57_3: cpu@3 { +@@ -150,6 +153,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a53_0: cpu@100 { +@@ -161,6 +165,7 @@ + dynamic-power-coefficient = <277>; + clocks =<&cpg CPG_CORE R8A7795_CLK_Z2>; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + a53_1: cpu@101 { +@@ -170,6 +175,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + a53_2: cpu@102 { +@@ -179,6 +185,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + a53_3: cpu@103 { +@@ -188,6 +195,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + L2_CA57: cache-controller@0 { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0088-arm64-dts-r8a7796-Add-cpu-capacity-dmips-mhz.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0088-arm64-dts-r8a7796-Add-cpu-capacity-dmips-mhz.patch new file mode 100644 index 0000000..59e00ef --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0088-arm64-dts-r8a7796-Add-cpu-capacity-dmips-mhz.patch @@ -0,0 +1,68 @@ +From b97005ea6b5891f1d2251828f38011971545c4f9 Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Fri, 24 Mar 2017 19:56:34 +0900 +Subject: [PATCH 88/92] arm64: dts: r8a7796: Add cpu capacity-dmips-mhz + +Set the capacity-dmips-mhz for R-CAR M3. +This value is based on the result of the evaluation. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7796.dtsi | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi +index 47b8af0..da26f88 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi +@@ -98,6 +98,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a57_1: cpu@1 { +@@ -113,6 +114,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a53_0: cpu@100 { +@@ -124,6 +126,7 @@ + dynamic-power-coefficient = <277>; + clocks =<&cpg CPG_CORE R8A7796_CLK_Z2>; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <362>; + }; + + a53_1: cpu@101 { +@@ -133,6 +136,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <362>; + }; + + a53_2: cpu@102 { +@@ -142,6 +146,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <362>; + }; + + a53_3: cpu@103 { +@@ -151,6 +156,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <362>; + }; + + L2_CA57: cache-controller@0 { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0089-arm64-dts-r8a7795-es1-Add-cpu-capacity-dmips-mhz.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0089-arm64-dts-r8a7795-es1-Add-cpu-capacity-dmips-mhz.patch new file mode 100644 index 0000000..5ef1bf8 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0089-arm64-dts-r8a7795-es1-Add-cpu-capacity-dmips-mhz.patch @@ -0,0 +1,84 @@ +From 4461e893a07445bcbb54a6bbf4919955c1d22e5e Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Fri, 24 Mar 2017 19:59:17 +0900 +Subject: [PATCH 89/92] arm64: dts: r8a7795-es1: Add cpu capacity-dmips-mhz + +Set the capacity-dmips-mhz for R-CAR H3(ES1.x). +This value is based on the result of the evaluation. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi +index 172007a..c2b5b8d 100644 +--- a/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi ++++ b/arch/arm64/boot/dts/renesas/r8a7795-es1.dtsi +@@ -110,6 +110,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a57_1: cpu@1 { +@@ -125,6 +126,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a57_2: cpu@2 { +@@ -140,6 +142,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a57_3: cpu@3 { +@@ -155,6 +158,7 @@ + <&cluster0_opp_tb3>, <&cluster0_opp_tb4>, + <&cluster0_opp_tb5>, <&cluster0_opp_tb6>, + <&cluster0_opp_tb7>; ++ capacity-dmips-mhz = <1024>; + }; + + a53_0: cpu@100 { +@@ -166,6 +170,7 @@ + dynamic-power-coefficient = <277>; + clocks =<&cpg CPG_CORE R8A7795_CLK_Z2>; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + a53_1: cpu@101 { +@@ -175,6 +180,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + a53_2: cpu@102 { +@@ -184,6 +190,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + a53_3: cpu@103 { +@@ -193,6 +200,7 @@ + next-level-cache = <&L2_CA53>; + enable-method = "psci"; + operating-points-v2 = <&cluster1_opp_tb0>; ++ capacity-dmips-mhz = <379>; + }; + + L2_CA57: cache-controller@0 { +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0090-arm64-Define-R-CAR-Gen3-energy-and-provide-it-to-the.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0090-arm64-Define-R-CAR-Gen3-energy-and-provide-it-to-the.patch new file mode 100644 index 0000000..c1d7ef8 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0090-arm64-Define-R-CAR-Gen3-energy-and-provide-it-to-the.patch @@ -0,0 +1,224 @@ +From 5881cc9ff86977d3d506f9f9549f2ffcca0fb932 Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Fri, 24 Mar 2017 20:10:51 +0900 +Subject: [PATCH 90/92] arm64: Define R-CAR Gen3 energy and provide it to the + scheduler + +Set the capacity and the power for R-CAR H3 and M3. +This value is based on the result of the evaluation. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + arch/arm64/kernel/energy_model.h | 186 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 186 insertions(+) + +diff --git a/arch/arm64/kernel/energy_model.h b/arch/arm64/kernel/energy_model.h +index 0a7c134..c2aae4f 100644 +--- a/arch/arm64/kernel/energy_model.h ++++ b/arch/arm64/kernel/energy_model.h +@@ -171,6 +171,178 @@ + .cap_states = cap_states_core_hikey, + }; + ++/* R-Car Gen3 */ ++ ++static struct idle_state idle_states_cluster_h3_a53[] = { ++ { .power = 17 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 17 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_cluster_h3_a57[] = { ++ { .power = 112 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 112 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_cluster_h3_a53[] = { ++ /* Power per cluster */ ++ { .cap = 379, .power = 16, }, /* 1200 MHz */ ++}; ++ ++static struct capacity_state cap_states_cluster_h3_a57[] = { ++ /* Power per cluster */ ++ { .cap = 308, .power = 33, }, /* 500 MHz */ ++ { .cap = 596, .power = 71, }, /* 1000 MHz */ ++ { .cap = 898, .power = 112, }, /* 1500 MHz */ ++ { .cap = 965, .power = 126, }, /* 1600 MHz */ ++ { .cap = 1024, .power = 154, }, /* 1700 MHz */ ++}; ++ ++static struct sched_group_energy energy_cluster_h3_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_h3_a53), ++ .idle_states = idle_states_cluster_h3_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_h3_a53), ++ .cap_states = cap_states_cluster_h3_a53, ++}; ++ ++static struct sched_group_energy energy_cluster_h3_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_h3_a57), ++ .idle_states = idle_states_cluster_h3_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_h3_a57), ++ .cap_states = cap_states_cluster_h3_a57, ++}; ++ ++static struct idle_state idle_states_core_h3_a53[] = { ++ { .power = 17 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 17 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_core_h3_a57[] = { ++ { .power = 71 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 71 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_core_h3_a53[] = { ++ /* Power per cpu */ ++ { .cap = 379, .power = 131, }, /* 1200 MHz */ ++}; ++ ++static struct capacity_state cap_states_core_h3_a57[] = { ++ /* Power per cpu */ ++ { .cap = 308, .power = 315, }, /* 500 MHz */ ++ { .cap = 596, .power = 618, }, /* 1000 MHz */ ++ { .cap = 898, .power = 934, }, /* 1500 MHz */ ++ { .cap = 965, .power = 1044, }, /* 1600 MHz */ ++ { .cap = 1024, .power = 1286, }, /* 1700 MHz */ ++}; ++ ++static struct sched_group_energy energy_core_h3_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_h3_a53), ++ .idle_states = idle_states_core_h3_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_h3_a53), ++ .cap_states = cap_states_core_h3_a53, ++}; ++ ++static struct sched_group_energy energy_core_h3_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_h3_a57), ++ .idle_states = idle_states_core_h3_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_h3_a57), ++ .cap_states = cap_states_core_h3_a57, ++}; ++ ++static struct idle_state idle_states_cluster_m3_a53[] = { ++ { .power = 33 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 33 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_cluster_m3_a57[] = { ++ { .power = 66 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 66 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_cluster_m3_a53[] = { ++ /* Power per cluster */ ++ { .cap = 362, .power = 33, }, /* 1200 MHz */ ++}; ++ ++static struct capacity_state cap_states_cluster_m3_a57[] = { ++ /* Power per cluster */ ++ { .cap = 293, .power = 17, }, /* 500 MHz */ ++ { .cap = 560, .power = 42, }, /* 1000 MHz */ ++ { .cap = 853, .power = 66, }, /* 1500 MHz */ ++ { .cap = 910, .power = 72, }, /* 1600 MHz */ ++ { .cap = 967, .power = 90, }, /* 1700 MHz */ ++ { .cap = 1024, .power = 115, }, /* 1800 MHz */ ++}; ++ ++static struct sched_group_energy energy_cluster_m3_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_m3_a53), ++ .idle_states = idle_states_cluster_m3_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_m3_a53), ++ .cap_states = cap_states_cluster_m3_a53, ++}; ++ ++static struct sched_group_energy energy_cluster_m3_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_cluster_m3_a57), ++ .idle_states = idle_states_cluster_m3_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_cluster_m3_a57), ++ .cap_states = cap_states_cluster_m3_a57, ++}; ++ ++static struct idle_state idle_states_core_m3_a53[] = { ++ { .power = 16 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 16 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct idle_state idle_states_core_m3_a57[] = { ++ { .power = 83 }, /* arch_cpu_idle() (active idle) = WFI */ ++ { .power = 83 }, /* WFI */ ++ { .power = 0 }, /* cpu-sleep-0 */ ++ { .power = 0 }, /* cluster-sleep-0 */ ++}; ++ ++static struct capacity_state cap_states_core_m3_a53[] = { ++ /* Power per cpu */ ++ { .cap = 362, .power = 131, }, /* 1200 MHz */ ++}; ++ ++static struct capacity_state cap_states_core_m3_a57[] = { ++ /* Power per cpu */ ++ { .cap = 293, .power = 299, }, /* 500 MHz */ ++ { .cap = 560, .power = 581, }, /* 1000 MHz */ ++ { .cap = 853, .power = 888, }, /* 1500 MHz */ ++ { .cap = 910, .power = 1098, }, /* 1600 MHz */ ++ { .cap = 967, .power = 1179, }, /* 1700 MHz */ ++ { .cap = 1024, .power = 1421, }, /* 1800 MHz */ ++}; ++ ++static struct sched_group_energy energy_core_m3_a53 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_m3_a53), ++ .idle_states = idle_states_core_m3_a53, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_m3_a53), ++ .cap_states = cap_states_core_m3_a53, ++}; ++ ++static struct sched_group_energy energy_core_m3_a57 = { ++ .nr_idle_states = ARRAY_SIZE(idle_states_core_m3_a57), ++ .idle_states = idle_states_core_m3_a57, ++ .nr_cap_states = ARRAY_SIZE(cap_states_core_m3_a57), ++ .cap_states = cap_states_core_m3_a57, ++}; ++ + /* An energy model contains core, cluster and system sched group energy + * for 2 clusters (cluster id 0 and 1). set_energy_model() relies on + * this feature. It is enforced by a BUG_ON in energy(). +@@ -194,9 +366,23 @@ struct energy_model { + { &energy_system_hikey, &energy_system_hikey, }, + }; + ++static struct energy_model r8a7795_model = { ++ { &energy_core_h3_a57, &energy_core_h3_a53, }, ++ { &energy_cluster_h3_a57, &energy_cluster_h3_a53, }, ++ {}, ++}; ++ ++static struct energy_model r8a7796_model = { ++ { &energy_core_m3_a57, &energy_core_m3_a53, }, ++ { &energy_cluster_m3_a57, &energy_cluster_m3_a53, }, ++ {}, ++}; ++ + static struct of_device_id model_matches[] = { + { .compatible = "arm,juno", .data = &juno_model }, + { .compatible = "hisilicon,hi6220-hikey", .data = &hikey_model }, ++ { .compatible = "renesas,r8a7795", .data = &r8a7795_model }, ++ { .compatible = "renesas,r8a7796", .data = &r8a7796_model }, + {}, + }; + +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0091-sched-Enable-energy-awareness-a-sched-feature-by-def.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0091-sched-Enable-energy-awareness-a-sched-feature-by-def.patch new file mode 100644 index 0000000..a4aa860 --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0091-sched-Enable-energy-awareness-a-sched-feature-by-def.patch @@ -0,0 +1,27 @@ +From 68769e6bf0143899b9d1559a6afc2d45b8c0bc43 Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Thu, 25 Aug 2016 15:21:45 +0900 +Subject: [PATCH 91/92] sched: Enable energy awareness a sched feature by + default + +This patch set the ENERGY_AWARE by default. If you will change +a sched feuture, CONFIG_SCHED_DEBUG should be enabled. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + kernel/sched/features.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/features.h b/kernel/sched/features.h +index b634151..9ca672e 100644 +--- a/kernel/sched/features.h ++++ b/kernel/sched/features.h +@@ -73,4 +73,4 @@ + * Energy aware scheduling. Use platform energy model to guide scheduling + * decisions optimizing for energy efficiency. + */ +-SCHED_FEAT(ENERGY_AWARE, false) ++SCHED_FEAT(ENERGY_AWARE, true) +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/0092-cpufreq-schedutil-Use-policy-dependent-transition-de.patch b/meta-eas/recipes-kernel/linux/linux-renesas/0092-cpufreq-schedutil-Use-policy-dependent-transition-de.patch new file mode 100644 index 0000000..29fdd5c --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/0092-cpufreq-schedutil-Use-policy-dependent-transition-de.patch @@ -0,0 +1,93 @@ +From fd991617b51a628a72bdb45b20fbc5c01d666b70 Mon Sep 17 00:00:00 2001 +From: Gaku Inami <gaku.inami.xw@bp.renesas.com> +Date: Fri, 7 Jul 2017 15:04:31 +0900 +Subject: [PATCH 92/92] cpufreq: schedutil: Use policy-dependent transition + delays for R-Car + +Make the schedutil governor take the initial (default) value of the +rate_limit_us sysfs attribute from the (new) transition_delay_us +policy parameter (to be set by the scaling driver). + +That will allow scaling drivers to make schedutil use smaller default +values of rate_limit_us and reduce the default average time interval +between consecutive frequency changes. + +The cpufreq-dt driver for R-Car H3/M3 set transition_delay_us to value +same as transition_latency. + +Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com> +--- + drivers/cpufreq/cpufreq-dt.c | 4 ++++ + include/linux/cpufreq.h | 7 +++++++ + kernel/sched/cpufreq_schedutil.c | 15 ++++++++++----- + 3 files changed, 21 insertions(+), 5 deletions(-) + +diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c +index 5c07ae0..57cd54e 100644 +--- a/drivers/cpufreq/cpufreq-dt.c ++++ b/drivers/cpufreq/cpufreq-dt.c +@@ -276,6 +276,10 @@ static int cpufreq_init(struct cpufreq_policy *policy) + + policy->cpuinfo.transition_latency = transition_latency; + ++#if IS_ENABLED(CONFIG_ARCH_R8A7795) || IS_ENABLED(CONFIG_ARCH_R8A7796) ++ policy->transition_delay_us = transition_latency; ++#endif ++ + return 0; + + out_free_cpufreq_table: +diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h +index 32dc0cbd..7df5d88 100644 +--- a/include/linux/cpufreq.h ++++ b/include/linux/cpufreq.h +@@ -120,6 +120,13 @@ struct cpufreq_policy { + bool fast_switch_possible; + bool fast_switch_enabled; + ++ /* ++ * Preferred average time interval between consecutive invocations of ++ * the driver to set the frequency for this policy. To be set by the ++ * scaling driver (0, which is the default, means no preference). ++ */ ++ unsigned int transition_delay_us; ++ + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ + unsigned int cached_target_freq; + int cached_resolved_idx; +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 42a220e..9668e0e 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -473,7 +473,6 @@ static int sugov_init(struct cpufreq_policy *policy) + { + struct sugov_policy *sg_policy; + struct sugov_tunables *tunables; +- unsigned int lat; + int ret = 0; + + /* State should be equivalent to EXIT */ +@@ -512,10 +511,16 @@ static int sugov_init(struct cpufreq_policy *policy) + goto stop_kthread; + } + +- tunables->rate_limit_us = LATENCY_MULTIPLIER; +- lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; +- if (lat) +- tunables->rate_limit_us *= lat; ++ if (policy->transition_delay_us) { ++ tunables->rate_limit_us = policy->transition_delay_us; ++ } else { ++ unsigned int lat; ++ ++ tunables->rate_limit_us = LATENCY_MULTIPLIER; ++ lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; ++ if (lat) ++ tunables->rate_limit_us *= lat; ++ } + + policy->governor_data = sg_policy; + sg_policy->tunables = tunables; +-- +1.9.1 + diff --git a/meta-eas/recipes-kernel/linux/linux-renesas/eas.cfg b/meta-eas/recipes-kernel/linux/linux-renesas/eas.cfg new file mode 100644 index 0000000..e8b09aa --- /dev/null +++ b/meta-eas/recipes-kernel/linux/linux-renesas/eas.cfg @@ -0,0 +1 @@ +CONFIG_SCHED_DEBUG=y |