irqbalance: add support for affinity_hint hook

This support is found in kernels 2.6.35 and beyond.  The new
affinity_hint hook found in /proc/irq/<num>/ can be populated
by underlying device drivers.  The cpumask in this file will
be used as a preferred affinity, allowing devices with
multiple interrupt sources (e.g. MSI-X enabled network devices)
to better control their NUMA locality of data flows.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>


git-svn-id: https://irqbalance.googlecode.com/svn/trunk@25 46b42954-3823-0410-bd82-eb80b452c9b5
This commit is contained in:
pjwaskiewicz 2010-05-04 20:26:34 +00:00
parent 66da32e3da
commit 7cd766c0f4
4 changed files with 63 additions and 4 deletions

View file

@ -80,7 +80,6 @@ int main(int argc, char** argv)
if (cache_domain_count==1)
one_shot_mode = 1;
if (!debug_mode)
if (daemon(0,0))
exit(EXIT_FAILURE);

View file

@ -36,6 +36,26 @@ GList *interrupts;
void get_affinity_hint(struct interrupt *irq, int number)
{
char buf[PATH_MAX];
char *line = NULL;
size_t size = 0;
FILE *file;
sprintf(buf, "/proc/irq/%i/affinity_hint", number);
file = fopen(buf, "r");
if (!file)
return;
if (getline(&line, &size, file)==0) {
free(line);
fclose(file);
return;
}
cpumask_parse_user(line, strlen(line), irq->node_mask);
fclose(file);
free(line);
}
/*
* This function classifies and reads various things from /proc about a specific irq
*/
@ -84,6 +104,8 @@ static void investigate(struct interrupt *irq, int number)
cpumask_parse_user(line, strlen(line), irq->allowed_mask);
fclose(file);
free(line);
} else if (strcmp(entry->d_name,"affinity_hint")==0) {
get_affinity_hint(irq, number);
} else {
irq->class = find_class(irq, entry->d_name);
}
@ -134,6 +156,8 @@ void set_interrupt_count(int number, uint64_t count)
if (irq->number == number) {
irq->count = count;
/* see if affinity_hint changed */
get_affinity_hint(irq, number);
return;
}
item = g_list_next(item);

View file

@ -257,6 +257,30 @@ static void place_packages(GList *list)
}
static void place_affinity_hint(GList *list)
{
/* still need to balance best workload within the affinity_hint mask */
GList *iter;
struct interrupt *irq;
iter = g_list_first(list);
while (iter) {
irq = iter->data;
if (irq->balance_level == BALANCE_NONE) {
iter = g_list_next(iter);
continue;
}
if ((!cpus_empty(irq->node_mask)) &&
(!cpus_equal(irq->mask, irq->node_mask)) &&
(!cpus_full(irq->node_mask))) {
irq->old_mask = irq->mask;
irq->mask = irq->node_mask;
}
iter = g_list_next(iter);
}
}
static void do_unroutables(void)
{
@ -276,7 +300,8 @@ static void do_unroutables(void)
iter = g_list_first(packages);
while (iter) {
package = iter->data;
if (cpus_intersects(package->mask, irq->mask))
if (cpus_intersects(package->mask, irq->node_mask) ||
cpus_intersects(package->mask, irq->mask))
package->workload += irq->workload;
iter = g_list_next(iter);
}
@ -284,14 +309,16 @@ static void do_unroutables(void)
iter = g_list_first(cache_domains);
while (iter) {
cache_domain = iter->data;
if (cpus_intersects(cache_domain->mask, irq->mask))
if (cpus_intersects(cache_domain->mask, irq->node_mask)
|| cpus_intersects(cache_domain->mask, irq->mask))
cache_domain->workload += irq->workload;
iter = g_list_next(iter);
}
iter = g_list_first(cpus);
while (iter) {
cpu = iter->data;
if (cpus_intersects(cpu->mask, irq->mask))
if (cpus_intersects(cpu->mask, irq->node_mask) ||
cpus_intersects(cpu->mask, irq->mask))
cpu->workload += irq->workload;
iter = g_list_next(iter);
}
@ -323,4 +350,10 @@ void calculate_placement(void)
place_core(cache_domain);
iter = g_list_next(iter);
}
/*
* if affinity_hint is populated on irq and is not set to
* all CPUs (meaning it's initialized), honor that above
* anything in the package locality/workload.
*/
place_affinity_hint(interrupts);
}

View file

@ -81,6 +81,9 @@ struct interrupt {
cpumask_t numa_mask;
cpumask_t allowed_mask;
/* user/driver provided for smarter balancing */
cpumask_t node_mask;
};