meta-eas/recipes-kernel/linux/linux-renesas/0054-sched-Extend-sched_group_energy-to-test-load-balanci.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218

From f613a647e9c9dc9981b5fe277304fea79aaacfbb Mon Sep 17 00:00:00 2001
From: Morten Rasmussen <morten.rasmussen@arm.com>
Date: Fri, 2 Jan 2015 14:21:56 +0000
Subject: [PATCH 54/92] sched: Extend sched_group_energy to test load-balancing
 decisions

Extended sched_group_energy() to support energy prediction with usage
(tasks) added/removed from a specific cpu or migrated between a pair of
cpus. Useful for load-balancing decision making.

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
(cherry picked from commit 2ab0ed691531c9d04e07c80a91b38970b08c9477)
Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com>
---
 kernel/sched/fair.c | 90 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 63 insertions(+), 27 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1b8dca7..dcc73d8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5300,12 +5300,21 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
  * capacity_orig) as it useful for predicting the capacity required after task
  * migrations (scheduler-driven DVFS).
  */
-static unsigned long cpu_util(int cpu)
+static unsigned long __cpu_util(int cpu, int delta)
 {
 	unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
 	unsigned long capacity = capacity_orig_of(cpu);
 
-	return (util >= capacity) ? capacity : util;
+	delta += util;
+	if (delta < 0)
+		return 0;
+
+	return (delta >= capacity) ? capacity : delta;
+}
+
+static unsigned long cpu_util(int cpu)
+{
+	return __cpu_util(cpu, 0);
 }
 
 static void record_wakee(struct task_struct *p)
@@ -5330,8 +5339,18 @@ static inline bool energy_aware(void)
 	return sched_feat(ENERGY_AWARE);
 }
 
+struct energy_env {
+	struct sched_group	*sg_top;
+	struct sched_group	*sg_cap;
+	int			cap_idx;
+	int			util_delta;
+	int			src_cpu;
+	int			dst_cpu;
+	int			energy;
+};
+
 /*
- * cpu_norm_util() returns the cpu util relative to a specific capacity,
+ * __cpu_norm_util() returns the cpu util relative to a specific capacity,
  * i.e. it's busy ratio, in the range [0..SCHED_CAPACITY_SCALE] which is useful
  * for energy calculations. Using the scale-invariant util returned by
  * cpu_util() and approximating scale-invariant util by:
@@ -5344,9 +5363,9 @@ static inline bool energy_aware(void)
  *
  *   norm_util = running_time/time ~ util/capacity
  */
-static unsigned long cpu_norm_util(int cpu, unsigned long capacity)
+static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
 {
-	int util = cpu_util(cpu);
+	int util = __cpu_util(cpu, delta);
 
 	if (util >= capacity)
 		return SCHED_CAPACITY_SCALE;
@@ -5354,13 +5373,25 @@ static unsigned long cpu_norm_util(int cpu, unsigned long capacity)
 	return (util << SCHED_CAPACITY_SHIFT)/capacity;
 }
 
-static unsigned long group_max_util(struct sched_group *sg)
+static int calc_util_delta(struct energy_env *eenv, int cpu)
 {
-	int i;
+	if (cpu == eenv->src_cpu)
+		return -eenv->util_delta;
+	if (cpu == eenv->dst_cpu)
+		return eenv->util_delta;
+	return 0;
+}
+
+static
+unsigned long group_max_util(struct energy_env *eenv)
+{
+	int i, delta;
 	unsigned long max_util = 0;
 
-	for_each_cpu(i, sched_group_cpus(sg))
-		max_util = max(max_util, cpu_util(i));
+	for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) {
+		delta = calc_util_delta(eenv, i);
+		max_util = max(max_util, __cpu_util(i, delta));
+	}
 
 	return max_util;
 }
@@ -5374,31 +5405,36 @@ static unsigned long group_max_util(struct sched_group *sg)
  * latter is used as the estimate as it leads to a more pessimistic energy
  * estimate (more busy).
  */
-static unsigned long group_norm_util(struct sched_group *sg, int cap_idx)
+static unsigned
+long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
 {
-	int i;
+	int i, delta;
 	unsigned long util_sum = 0;
-	unsigned long capacity = sg->sge->cap_states[cap_idx].cap;
+	unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
 
-	for_each_cpu(i, sched_group_cpus(sg))
-		util_sum += cpu_norm_util(i, capacity);
+	for_each_cpu(i, sched_group_cpus(sg)) {
+		delta = calc_util_delta(eenv, i);
+		util_sum += __cpu_norm_util(i, capacity, delta);
+	}
 
 	if (util_sum > SCHED_CAPACITY_SCALE)
 		return SCHED_CAPACITY_SCALE;
 	return util_sum;
 }
 
-static int find_new_capacity(struct sched_group *sg,
+static int find_new_capacity(struct energy_env *eenv,
 	const struct sched_group_energy const *sge)
 {
 	int idx;
-	unsigned long util = group_max_util(sg);
+	unsigned long util = group_max_util(eenv);
 
 	for (idx = 0; idx < sge->nr_cap_states; idx++) {
 		if (sge->cap_states[idx].cap >= util)
-			return idx;
+			break;
 	}
 
+	eenv->cap_idx = idx;
+
 	return idx;
 }
 
@@ -5412,16 +5448,16 @@ static int find_new_capacity(struct sched_group *sg,
  * This can probably be done in a faster but more complex way.
  * Note: sched_group_energy() may fail when racing with sched_domain updates.
  */
-static int sched_group_energy(struct sched_group *sg_top)
+static int sched_group_energy(struct energy_env *eenv)
 {
 	struct sched_domain *sd;
 	int cpu, total_energy = 0;
 	struct cpumask visit_cpus;
 	struct sched_group *sg;
 
-	WARN_ON(!sg_top->sge);
+	WARN_ON(!eenv->sg_top->sge);
 
-	cpumask_copy(&visit_cpus, sched_group_cpus(sg_top));
+	cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
 
 	while (!cpumask_empty(&visit_cpus)) {
 		struct sched_group *sg_shared_cap = NULL;
@@ -5453,17 +5489,16 @@ static int sched_group_energy(struct sched_group *sg_top)
 				break;
 
 			do {
-				struct sched_group *sg_cap_util;
 				unsigned long group_util;
 				int sg_busy_energy, sg_idle_energy, cap_idx;
 
 				if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
-					sg_cap_util = sg_shared_cap;
+					eenv->sg_cap = sg_shared_cap;
 				else
-					sg_cap_util = sg;
+					eenv->sg_cap = sg;
 
-				cap_idx = find_new_capacity(sg_cap_util, sg->sge);
-				group_util = group_norm_util(sg, cap_idx);
+				cap_idx = find_new_capacity(eenv, sg->sge);
+				group_util = group_norm_util(eenv, sg);
 				sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
 										>> SCHED_CAPACITY_SHIFT;
 				sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) * sg->sge->idle_states[0].power)
@@ -5474,7 +5509,7 @@ static int sched_group_energy(struct sched_group *sg_top)
 				if (!sd->child)
 					cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
 
-				if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top)))
+				if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
 					goto next_cpu;
 
 			} while (sg = sg->next, sg != sd->groups);
@@ -5483,7 +5518,8 @@ static int sched_group_energy(struct sched_group *sg_top)
 		continue;
 	}
 
-	return total_energy;
+	eenv->energy = total_energy;
+	return 0;
 }
 
 /*
-- 
1.9.1