Add back improved affinity_hint handling

The new balancer can now deal with affintiy hinting again, this time in a
reasonably sane manner.  Whereas before having an affintiy hint caused
irqbalance to just assign that hint as the affinity, we now have a policy based
operation, controlled by the hintpolicy option.  The policy can be one of:

exact - affinity_hint is applied for that irq without balancing consideration
subset - balancing takes place, but assigned affinity will be a subset of the
	 hint
ignore - affinity_hint is ignored entirely
This commit is contained in:
Neil Horman 2011-10-06 13:53:07 -04:00
parent 80157dd69a
commit 3252189949
8 changed files with 70 additions and 15 deletions

View File

@ -36,6 +36,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
{
char buf[PATH_MAX];
FILE *file;
cpumask_t applied_mask;
/*
* only activate mappings for irqs that have moved
@ -52,7 +53,13 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un
if (!file)
return;
cpumask_scnprintf(buf, PATH_MAX, info->assigned_obj->mask);
if ((hint_policy == HINT_POLICY_EXACT) &&
(!cpus_empty(info->affinity_hint)))
applied_mask = info->affinity_hint;
else
applied_mask = info->assigned_obj->mask;
cpumask_scnprintf(buf, PATH_MAX, applied_mask);
fprintf(file, "%s", buf);
fclose(file);
info->moved = 0; /*migration is done*/

View File

@ -25,8 +25,6 @@ int map_class_to_level[7] =
{ BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CACHE, BALANCE_NONE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE };
int class_counts[7];
#define MAX_CLASS 0x12
/*
* Class codes lifted from pci spec, appendix D.

View File

@ -41,6 +41,19 @@ Causes irqbalance to be run once, after which the daemon exits
.B --debug
Causes irqbalance to run in the foreground and extra debug information to be printed
.TP
.B --hintpolicy=[exact | subset | ignore]
Set the policy for how irq kernel affinity hinting is treated. Can be one of:
.P
.I exact
irq affinity hint is applied unilaterally and never violated
.P
.I subset
irq is balanced, but the assigned object will be a subset of the affintiy hint
.P
.I ignore
irq affinity hint value is completely ignored
.SH "ENVIRONMENT VARIABLES"
.TP
.B IRQBALANCE_ONESHOT

View File

@ -38,12 +38,10 @@
int one_shot_mode;
int debug_mode;
int numa_avail;
int need_cpu_rescan;
extern cpumask_t banned_cpus;
static int counter;
enum hp_e hint_policy = HINT_POLICY_SUBSET;
void sleep_approx(int seconds)
@ -64,12 +62,13 @@ void sleep_approx(int seconds)
struct option lopts[] = {
{"oneshot", 0, NULL, 'o'},
{"debug", 0, NULL, 'd'},
{"hintpolicy", 1, NULL, 'h'},
{0, 0, 0, 0}
};
static void usage(void)
{
printf("irqbalance [--oneshot | -o] [--debug | -d]");
printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]");
}
static void parse_command_line(int argc, char **argv)
@ -78,7 +77,7 @@ static void parse_command_line(int argc, char **argv)
int longind;
while ((opt = getopt_long(argc, argv,
"",
"odh:",
lopts, &longind)) != -1) {
switch(opt) {
@ -88,6 +87,18 @@ static void parse_command_line(int argc, char **argv)
case 'd':
debug_mode=1;
break;
case 'h':
if (!strncmp(optarg, "exact", strlen(optarg)))
hint_policy = HINT_POLICY_EXACT;
else if (!strncmp(optarg, "subset", strlen(optarg)))
hint_policy = HINT_POLICY_SUBSET;
else if (!strncmp(optarg, "ignore", strlen(optarg)))
hint_policy = HINT_POLICY_IGNORE;
else {
usage();
exit(1);
}
break;
case 'o':
one_shot_mode=1;
break;

View File

@ -16,12 +16,6 @@ extern int package_count;
extern int cache_domain_count;
extern int core_count;
extern char *classes[];
extern int map_class_to_level[7];
extern int class_counts[7];
extern int debug_mode;
extern int power_mode;
extern int need_cpu_rescan;
extern int one_shot_mode;
extern void parse_cpu_tree(void);
extern void clear_work_stats(void);
@ -46,6 +40,18 @@ void clear_cpu_tree(void);
void pci_numa_scan(void);
/*===================NEW BALANCER FUNCTIONS============================*/
enum hp_e {
HINT_POLICY_IGNORE,
HINT_POLICY_SUBSET,
HINT_POLICY_EXACT
};
extern int debug_mode;
extern int one_shot_mode;
extern int power_mode;
extern int need_cpu_rescan;
extern enum hp_e hint_policy;
/*
* Numa node access routines
*/

View File

@ -68,6 +68,13 @@ static void move_candidate_irqs(struct irq_info *info, void *data)
{
int *remaining_deviation = (int *)data;
/* never move an irq that has an afinity hint when
* hint_policy is HINT_POLICY_EXACT
*/
if (hint_policy == HINT_POLICY_EXACT)
if (!cpus_empty(info->affinity_hint))
return;
/* Don't rebalance irqs that don't want it */
if (info->level == BALANCE_NONE)
return;

View File

@ -43,6 +43,20 @@ static void find_best_object(struct common_obj_data *d, void *data)
{
struct obj_placement *best = (struct obj_placement *)data;
uint64_t newload;
cpumask_t subset;
/*
* If the hint policy is subset, then we only want
* to consider objects that are within the irqs hint, but
* only if that irq in fact has published a hint
*/
if (hint_policy == HINT_POLICY_SUBSET) {
if (!cpus_empty(best->info->affinity_hint)) {
cpus_and(subset, best->info->affinity_hint, d->mask);
if (cpus_empty(subset))
return;
}
}
newload = d->load;
if (newload < best->best_cost) {

View File

@ -26,7 +26,6 @@
#define IRQ_TYPE_MSI 1
#define IRQ_TYPE_MSIX 2
struct common_obj_data {
uint64_t load;
int number;