Add powersave settings

Add an optional heuristic to allow cpus to not service interrupts during periods
of low activity, to help power conservation. If more than power_thresh cpus are
more then a standard deviation below the average load, and no cpus are
overloaded by more than a standard deviation and have more than one irq on them,
then we stop balancing to a single cpu.  If at any time we have a cpu go over a
standard deviation of load, we re-enable all the cpus for balancing
This commit is contained in:
Neil Horman 2011-10-10 15:59:15 -04:00
parent 38b3bb825d
commit e2f6588bd4
6 changed files with 58 additions and 61 deletions

View File

@ -40,9 +40,9 @@ int debug_mode;
int numa_avail;
int need_cpu_rescan;
extern cpumask_t banned_cpus;
static int counter;
enum hp_e hint_policy = HINT_POLICY_SUBSET;
unsigned long power_thresh = ULONG_MAX;
unsigned long long cycle_count = 0;
void sleep_approx(int seconds)
{
@ -63,12 +63,14 @@ struct option lopts[] = {
{"oneshot", 0, NULL, 'o'},
{"debug", 0, NULL, 'd'},
{"hintpolicy", 1, NULL, 'h'},
{"powerthresh", 1, NULL, 'p'},
{0, 0, 0, 0}
};
static void usage(void)
{
printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]");
printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]\n");
printf(" [--powerthresh= | -p <off> | <n>]\n");
}
static void parse_command_line(int argc, char **argv)
@ -77,7 +79,7 @@ static void parse_command_line(int argc, char **argv)
int longind;
while ((opt = getopt_long(argc, argv,
"odh:",
"odh:p:",
lopts, &longind)) != -1) {
switch(opt) {
@ -99,6 +101,17 @@ static void parse_command_line(int argc, char **argv)
exit(1);
}
break;
case 'p':
if (!strncmp(optarg, "off", strlen(optarg)))
power_thresh = ULONG_MAX;
else {
power_thresh = strtoull(optarg, NULL, 10);
if (power_thresh == ULONG_MAX) {
usage();
exit(1);
}
}
break;
case 'o':
one_shot_mode=1;
break;
@ -153,7 +166,6 @@ static void force_rebalance_irq(struct irq_info *info, void *data __attribute__(
int main(int argc, char** argv)
{
int compute_migration_status=0;
#ifdef HAVE_GETOPT_LONG
parse_command_line(argc, argv);
@ -214,7 +226,6 @@ int main(int argc, char** argv)
printf("\n\n\n-----------------------------------------------------------------------------\n");
check_power_mode();
parse_proc_interrupts();
parse_proc_stat();
@ -231,14 +242,11 @@ int main(int argc, char** argv)
free_object_tree();
build_object_tree();
for_each_irq(NULL, force_rebalance_irq, NULL);
compute_migration_status=0;
cycle_count=0;
}
if (compute_migration_status)
if (cycle_count)
update_migration_status();
else
compute_migration_status=1;
calculate_placement();
activate_mappings();
@ -248,7 +256,7 @@ int main(int argc, char** argv)
if (one_shot_mode)
break;
clear_work_stats();
counter++;
cycle_count++;
}
free_object_tree();

View File

@ -60,6 +60,8 @@ extern int one_shot_mode;
extern int power_mode;
extern int need_cpu_rescan;
extern enum hp_e hint_policy;
extern unsigned long long cycle_count;
extern unsigned long power_thresh;
/*
* Numa node access routines

View File

@ -42,6 +42,10 @@ struct load_balance_info {
int load_sources;
unsigned long long int deviations;
long double std_deviation;
unsigned int num_within;
unsigned int num_over;
unsigned int num_under;
struct topo_obj *powersave;
};
static void gather_load_stats(struct topo_obj *obj, void *data)
@ -102,13 +106,21 @@ static void migrate_overloaded_irqs(struct topo_obj *obj, void *data)
/*
* Don't rebalance irqs on objects whos load is below the average
*/
if (obj->load <= info->avg_load)
if (obj->load <= info->avg_load) {
if ((obj->load + info->std_deviation) <= info->avg_load) {
info->num_under++;
info->powersave = obj;
} else
info->num_within++;
return;
}
deviation = obj->load - info->avg_load;
if ((deviation > info->std_deviation) &&
(g_list_length(obj->interrupts) > 1)) {
info->num_over++;
/*
* We have a cpu that is overloaded and
* has irqs that can be moved to fix that
@ -124,10 +136,21 @@ static void migrate_overloaded_irqs(struct topo_obj *obj, void *data)
* difference reaches zero
*/
for_each_irq(obj->interrupts, move_candidate_irqs, &deviation);
}
} else
info->num_within++;
}
static void force_irq_migration(struct irq_info *info, void *data __attribute__((unused)))
{
migrate_irq(&info->assigned_obj->interrupts, &rebalance_irq_list, info);
}
static void clear_powersave_mode(struct topo_obj *obj, void *data __attribute__((unused)))
{
obj->powersave_mode = 0;
}
#define find_overloaded_objs(name, info) do {\
int ___load_sources;\
memset(&(info), 0, sizeof(struct load_balance_info));\
@ -145,6 +168,13 @@ void update_migration_status(void)
struct load_balance_info info;
find_overloaded_objs(cpus, info);
if (cycle_count > 5) {
if (!info.num_over && (info.num_under >= power_thresh)) {
info.powersave->powersave_mode = 1;
for_each_irq(info.powersave->interrupts, force_irq_migration, NULL);
} else if (info.num_over)
for_each_object(cpus, clear_powersave_mode, NULL);
}
find_overloaded_objs(cache_domains, info);
find_overloaded_objs(packages, info);
find_overloaded_objs(numa_nodes, info);

View File

@ -58,6 +58,9 @@ static void find_best_object(struct topo_obj *d, void *data)
}
}
if (d->powersave_mode)
return;
newload = d->load;
if (newload < best->best_cost) {
best->best = d;

View File

@ -28,54 +28,7 @@
#include "irqbalance.h"
extern int power_mode;
static uint64_t previous;
static unsigned int hysteresis;
void check_power_mode(void)
{
FILE *file;
char *line = NULL;
size_t size = 0;
char *c;
uint64_t dummy __attribute__((unused));
uint64_t irq, softirq;
file = fopen("/proc/stat", "r");
if (!file)
return;
if (getline(&line, &size, file)==0)
size=0;
fclose(file);
if (!line)
return;
c=&line[4];
dummy = strtoull(c, &c, 10); /* user */
dummy = strtoull(c, &c, 10); /* nice */
dummy = strtoull(c, &c, 10); /* system */
dummy = strtoull(c, &c, 10); /* idle */
dummy = strtoull(c, &c, 10); /* iowait */
irq = strtoull(c, &c, 10); /* irq */
softirq = strtoull(c, &c, 10); /* softirq */
irq += softirq;
printf("IRQ delta is %lu \n", (unsigned long)(irq - previous) );
if (irq - previous < POWER_MODE_SOFTIRQ_THRESHOLD) {
hysteresis++;
if (hysteresis > POWER_MODE_HYSTERESIS) {
if (debug_mode && !power_mode)
printf("IRQ delta is %lu, switching to power mode \n", (unsigned long)(irq - previous) );
power_mode = 1;
}
} else {
if (debug_mode && power_mode)
printf("IRQ delta is %lu, switching to performance mode \n", (unsigned long)(irq - previous) );
power_mode = 0;
hysteresis = 0;
}
previous = irq;
free(line);
}

View File

@ -37,6 +37,7 @@ struct topo_obj {
uint64_t load;
enum obj_type_e obj_type;
int number;
int powersave_mode;
cpumask_t mask;
GList *interrupts;
struct topo_obj *parent;