diff --git a/irqbalance.c b/irqbalance.c index c229b9d..c87953c 100644 --- a/irqbalance.c +++ b/irqbalance.c @@ -40,9 +40,9 @@ int debug_mode; int numa_avail; int need_cpu_rescan; extern cpumask_t banned_cpus; -static int counter; enum hp_e hint_policy = HINT_POLICY_SUBSET; - +unsigned long power_thresh = ULONG_MAX; +unsigned long long cycle_count = 0; void sleep_approx(int seconds) { @@ -63,12 +63,14 @@ struct option lopts[] = { {"oneshot", 0, NULL, 'o'}, {"debug", 0, NULL, 'd'}, {"hintpolicy", 1, NULL, 'h'}, + {"powerthresh", 1, NULL, 'p'}, {0, 0, 0, 0} }; static void usage(void) { - printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]"); + printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]\n"); + printf(" [--powerthresh= | -p | ]\n"); } static void parse_command_line(int argc, char **argv) @@ -77,7 +79,7 @@ static void parse_command_line(int argc, char **argv) int longind; while ((opt = getopt_long(argc, argv, - "odh:", + "odh:p:", lopts, &longind)) != -1) { switch(opt) { @@ -99,6 +101,17 @@ static void parse_command_line(int argc, char **argv) exit(1); } break; + case 'p': + if (!strncmp(optarg, "off", strlen(optarg))) + power_thresh = ULONG_MAX; + else { + power_thresh = strtoull(optarg, NULL, 10); + if (power_thresh == ULONG_MAX) { + usage(); + exit(1); + } + } + break; case 'o': one_shot_mode=1; break; @@ -153,7 +166,6 @@ static void force_rebalance_irq(struct irq_info *info, void *data __attribute__( int main(int argc, char** argv) { - int compute_migration_status=0; #ifdef HAVE_GETOPT_LONG parse_command_line(argc, argv); @@ -214,7 +226,6 @@ int main(int argc, char** argv) printf("\n\n\n-----------------------------------------------------------------------------\n"); - check_power_mode(); parse_proc_interrupts(); parse_proc_stat(); @@ -231,14 +242,11 @@ int main(int argc, char** argv) free_object_tree(); build_object_tree(); for_each_irq(NULL, force_rebalance_irq, NULL); - compute_migration_status=0; + cycle_count=0; } - if (compute_migration_status) + if (cycle_count) update_migration_status(); - else - compute_migration_status=1; - calculate_placement(); activate_mappings(); @@ -248,7 +256,7 @@ int main(int argc, char** argv) if (one_shot_mode) break; clear_work_stats(); - counter++; + cycle_count++; } free_object_tree(); diff --git a/irqbalance.h b/irqbalance.h index acf7e0e..8849d0e 100644 --- a/irqbalance.h +++ b/irqbalance.h @@ -60,6 +60,8 @@ extern int one_shot_mode; extern int power_mode; extern int need_cpu_rescan; extern enum hp_e hint_policy; +extern unsigned long long cycle_count; +extern unsigned long power_thresh; /* * Numa node access routines diff --git a/irqlist.c b/irqlist.c index 5a83f4b..d57049a 100644 --- a/irqlist.c +++ b/irqlist.c @@ -42,6 +42,10 @@ struct load_balance_info { int load_sources; unsigned long long int deviations; long double std_deviation; + unsigned int num_within; + unsigned int num_over; + unsigned int num_under; + struct topo_obj *powersave; }; static void gather_load_stats(struct topo_obj *obj, void *data) @@ -102,13 +106,21 @@ static void migrate_overloaded_irqs(struct topo_obj *obj, void *data) /* * Don't rebalance irqs on objects whos load is below the average */ - if (obj->load <= info->avg_load) + if (obj->load <= info->avg_load) { + if ((obj->load + info->std_deviation) <= info->avg_load) { + info->num_under++; + info->powersave = obj; + } else + info->num_within++; return; + } deviation = obj->load - info->avg_load; if ((deviation > info->std_deviation) && (g_list_length(obj->interrupts) > 1)) { + + info->num_over++; /* * We have a cpu that is overloaded and * has irqs that can be moved to fix that @@ -124,10 +136,21 @@ static void migrate_overloaded_irqs(struct topo_obj *obj, void *data) * difference reaches zero */ for_each_irq(obj->interrupts, move_candidate_irqs, &deviation); - } + } else + info->num_within++; } +static void force_irq_migration(struct irq_info *info, void *data __attribute__((unused))) +{ + migrate_irq(&info->assigned_obj->interrupts, &rebalance_irq_list, info); +} + +static void clear_powersave_mode(struct topo_obj *obj, void *data __attribute__((unused))) +{ + obj->powersave_mode = 0; +} + #define find_overloaded_objs(name, info) do {\ int ___load_sources;\ memset(&(info), 0, sizeof(struct load_balance_info));\ @@ -145,6 +168,13 @@ void update_migration_status(void) struct load_balance_info info; find_overloaded_objs(cpus, info); + if (cycle_count > 5) { + if (!info.num_over && (info.num_under >= power_thresh)) { + info.powersave->powersave_mode = 1; + for_each_irq(info.powersave->interrupts, force_irq_migration, NULL); + } else if (info.num_over) + for_each_object(cpus, clear_powersave_mode, NULL); + } find_overloaded_objs(cache_domains, info); find_overloaded_objs(packages, info); find_overloaded_objs(numa_nodes, info); diff --git a/placement.c b/placement.c index 0e0a998..cfa419e 100644 --- a/placement.c +++ b/placement.c @@ -58,6 +58,9 @@ static void find_best_object(struct topo_obj *d, void *data) } } + if (d->powersave_mode) + return; + newload = d->load; if (newload < best->best_cost) { best->best = d; diff --git a/powermode.c b/powermode.c index 809cae8..82ba490 100644 --- a/powermode.c +++ b/powermode.c @@ -28,54 +28,7 @@ #include "irqbalance.h" -extern int power_mode; - -static uint64_t previous; - -static unsigned int hysteresis; - void check_power_mode(void) { - FILE *file; - char *line = NULL; - size_t size = 0; - char *c; - uint64_t dummy __attribute__((unused)); - uint64_t irq, softirq; - file = fopen("/proc/stat", "r"); - if (!file) - return; - if (getline(&line, &size, file)==0) - size=0; - fclose(file); - if (!line) - return; - c=&line[4]; - dummy = strtoull(c, &c, 10); /* user */ - dummy = strtoull(c, &c, 10); /* nice */ - dummy = strtoull(c, &c, 10); /* system */ - dummy = strtoull(c, &c, 10); /* idle */ - dummy = strtoull(c, &c, 10); /* iowait */ - irq = strtoull(c, &c, 10); /* irq */ - softirq = strtoull(c, &c, 10); /* softirq */ - - - irq += softirq; - printf("IRQ delta is %lu \n", (unsigned long)(irq - previous) ); - if (irq - previous < POWER_MODE_SOFTIRQ_THRESHOLD) { - hysteresis++; - if (hysteresis > POWER_MODE_HYSTERESIS) { - if (debug_mode && !power_mode) - printf("IRQ delta is %lu, switching to power mode \n", (unsigned long)(irq - previous) ); - power_mode = 1; - } - } else { - if (debug_mode && power_mode) - printf("IRQ delta is %lu, switching to performance mode \n", (unsigned long)(irq - previous) ); - power_mode = 0; - hysteresis = 0; - } - previous = irq; - free(line); } diff --git a/types.h b/types.h index c26617f..1fd2be9 100644 --- a/types.h +++ b/types.h @@ -37,6 +37,7 @@ struct topo_obj { uint64_t load; enum obj_type_e obj_type; int number; + int powersave_mode; cpumask_t mask; GList *interrupts; struct topo_obj *parent;