Add back improved affinity_hint handling

The new balancer can now deal with affintiy hinting again, this time in a
reasonably sane manner.  Whereas before having an affintiy hint caused
irqbalance to just assign that hint as the affinity, we now have a policy based
operation, controlled by the hintpolicy option.  The policy can be one of:

exact - affinity_hint is applied for that irq without balancing consideration
subset - balancing takes place, but assigned affinity will be a subset of the
	 hint
ignore - affinity_hint is ignored entirely
This commit is contained in:
Neil Horman 2011-10-06 13:53:07 -04:00
parent 80157dd69a
commit 3252189949
8 changed files with 70 additions and 15 deletions

View file

@ -36,6 +36,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
{ {
char buf[PATH_MAX]; char buf[PATH_MAX];
FILE *file; FILE *file;
cpumask_t applied_mask;
/* /*
* only activate mappings for irqs that have moved * only activate mappings for irqs that have moved
@ -52,7 +53,13 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
if (!file) if (!file)
return; return;
cpumask_scnprintf(buf, PATH_MAX, info->assigned_obj->mask); if ((hint_policy == HINT_POLICY_EXACT) &&
(!cpus_empty(info->affinity_hint)))
applied_mask = info->affinity_hint;
else
applied_mask = info->assigned_obj->mask;
cpumask_scnprintf(buf, PATH_MAX, applied_mask);
fprintf(file, "%s", buf); fprintf(file, "%s", buf);
fclose(file); fclose(file);
info->moved = 0; /*migration is done*/ info->moved = 0; /*migration is done*/

View file

@ -25,8 +25,6 @@ int map_class_to_level[7] =
{ BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CACHE, BALANCE_NONE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE }; { BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CACHE, BALANCE_NONE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE };
int class_counts[7];
#define MAX_CLASS 0x12 #define MAX_CLASS 0x12
/* /*
* Class codes lifted from pci spec, appendix D. * Class codes lifted from pci spec, appendix D.

View file

@ -41,6 +41,19 @@ Causes irqbalance to be run once, after which the daemon exits
.B --debug .B --debug
Causes irqbalance to run in the foreground and extra debug information to be printed Causes irqbalance to run in the foreground and extra debug information to be printed
.TP
.B --hintpolicy=[exact | subset | ignore]
Set the policy for how irq kernel affinity hinting is treated. Can be one of:
.P
.I exact
irq affinity hint is applied unilaterally and never violated
.P
.I subset
irq is balanced, but the assigned object will be a subset of the affintiy hint
.P
.I ignore
irq affinity hint value is completely ignored
.SH "ENVIRONMENT VARIABLES" .SH "ENVIRONMENT VARIABLES"
.TP .TP
.B IRQBALANCE_ONESHOT .B IRQBALANCE_ONESHOT

View file

@ -38,12 +38,10 @@
int one_shot_mode; int one_shot_mode;
int debug_mode; int debug_mode;
int numa_avail; int numa_avail;
int need_cpu_rescan; int need_cpu_rescan;
extern cpumask_t banned_cpus; extern cpumask_t banned_cpus;
static int counter; static int counter;
enum hp_e hint_policy = HINT_POLICY_SUBSET;
void sleep_approx(int seconds) void sleep_approx(int seconds)
@ -64,12 +62,13 @@ void sleep_approx(int seconds)
struct option lopts[] = { struct option lopts[] = {
{"oneshot", 0, NULL, 'o'}, {"oneshot", 0, NULL, 'o'},
{"debug", 0, NULL, 'd'}, {"debug", 0, NULL, 'd'},
{"hintpolicy", 1, NULL, 'h'},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
static void usage(void) static void usage(void)
{ {
printf("irqbalance [--oneshot | -o] [--debug | -d]"); printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]");
} }
static void parse_command_line(int argc, char **argv) static void parse_command_line(int argc, char **argv)
@ -78,7 +77,7 @@ static void parse_command_line(int argc, char **argv)
int longind; int longind;
while ((opt = getopt_long(argc, argv, while ((opt = getopt_long(argc, argv,
"", "odh:",
lopts, &longind)) != -1) { lopts, &longind)) != -1) {
switch(opt) { switch(opt) {
@ -88,6 +87,18 @@ static void parse_command_line(int argc, char **argv)
case 'd': case 'd':
debug_mode=1; debug_mode=1;
break; break;
case 'h':
if (!strncmp(optarg, "exact", strlen(optarg)))
hint_policy = HINT_POLICY_EXACT;
else if (!strncmp(optarg, "subset", strlen(optarg)))
hint_policy = HINT_POLICY_SUBSET;
else if (!strncmp(optarg, "ignore", strlen(optarg)))
hint_policy = HINT_POLICY_IGNORE;
else {
usage();
exit(1);
}
break;
case 'o': case 'o':
one_shot_mode=1; one_shot_mode=1;
break; break;

View file

@ -16,12 +16,6 @@ extern int package_count;
extern int cache_domain_count; extern int cache_domain_count;
extern int core_count; extern int core_count;
extern char *classes[]; extern char *classes[];
extern int map_class_to_level[7];
extern int class_counts[7];
extern int debug_mode;
extern int power_mode;
extern int need_cpu_rescan;
extern int one_shot_mode;
extern void parse_cpu_tree(void); extern void parse_cpu_tree(void);
extern void clear_work_stats(void); extern void clear_work_stats(void);
@ -46,6 +40,18 @@ void clear_cpu_tree(void);
void pci_numa_scan(void); void pci_numa_scan(void);
/*===================NEW BALANCER FUNCTIONS============================*/ /*===================NEW BALANCER FUNCTIONS============================*/
enum hp_e {
HINT_POLICY_IGNORE,
HINT_POLICY_SUBSET,
HINT_POLICY_EXACT
};
extern int debug_mode;
extern int one_shot_mode;
extern int power_mode;
extern int need_cpu_rescan;
extern enum hp_e hint_policy;
/* /*
* Numa node access routines * Numa node access routines
*/ */

View file

@ -68,6 +68,13 @@ static void move_candidate_irqs(struct irq_info *info, void *data)
{ {
int *remaining_deviation = (int *)data; int *remaining_deviation = (int *)data;
/* never move an irq that has an afinity hint when
* hint_policy is HINT_POLICY_EXACT
*/
if (hint_policy == HINT_POLICY_EXACT)
if (!cpus_empty(info->affinity_hint))
return;
/* Don't rebalance irqs that don't want it */ /* Don't rebalance irqs that don't want it */
if (info->level == BALANCE_NONE) if (info->level == BALANCE_NONE)
return; return;

View file

@ -43,6 +43,20 @@ static void find_best_object(struct common_obj_data *d, void *data)
{ {
struct obj_placement *best = (struct obj_placement *)data; struct obj_placement *best = (struct obj_placement *)data;
uint64_t newload; uint64_t newload;
cpumask_t subset;
/*
* If the hint policy is subset, then we only want
* to consider objects that are within the irqs hint, but
* only if that irq in fact has published a hint
*/
if (hint_policy == HINT_POLICY_SUBSET) {
if (!cpus_empty(best->info->affinity_hint)) {
cpus_and(subset, best->info->affinity_hint, d->mask);
if (cpus_empty(subset))
return;
}
}
newload = d->load; newload = d->load;
if (newload < best->best_cost) { if (newload < best->best_cost) {

View file

@ -26,7 +26,6 @@
#define IRQ_TYPE_MSI 1 #define IRQ_TYPE_MSI 1
#define IRQ_TYPE_MSIX 2 #define IRQ_TYPE_MSIX 2
struct common_obj_data { struct common_obj_data {
uint64_t load; uint64_t load;
int number; int number;