From c1af476ab18961f04e386c54ef604436e85288e2 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 13 Feb 2013 15:17:39 -0500 Subject: [PATCH] irqbalance: add numa_node setting to polscript Often systems don't populate ACPI correctly, and inform the OS that a particular device isn't local to any node (numa_node == -1). This property in polscript allows you to override that setting, so that numa nodes can be properly specified based on documentation Signed-off-by: Neil Horman --- classify.c | 16 +++++++++++++++- irqbalance.1 | 10 +++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/classify.c b/classify.c index f6c1ae2..84a576a 100644 --- a/classify.c +++ b/classify.c @@ -54,6 +54,8 @@ static short class_codes[MAX_CLASS] = { struct user_irq_policy { int ban; int level; + int numa_node_set; + int numa_node; }; static GList *interrupts_db; @@ -176,7 +178,10 @@ get_numa_node: fclose(fd); assign_node: - new->numa_node = get_numa_node(numa_node); + if (pol->numa_node_set) + new->numa_node = get_numa_node(pol->numa_node); + else + new->numa_node = get_numa_node(numa_node); sprintf(path, "%s/local_cpus", devpath); fd = fopen(path, "r"); @@ -253,6 +258,15 @@ static void parse_user_policy_key(char *buf, struct user_irq_policy *pol) log(TO_ALL, LOG_WARNING, "Bad value for balance_level policy: %s\n", value); else pol->level = idx; + } else if (!strcasecmp("numa_node", key)) { + idx = strtoul(value, NULL, 10); + if (!get_numa_node(idx)) { + log(TO_ALL, LOG_WARNING, "NUMA node %d doesn't exist\n", + idx); + return; + } + pol->numa_node = idx; + pol->numa_node_set = 1; } else log(TO_ALL, LOG_WARNING, "Unknown key returned, ignoring: %s\n", key); diff --git a/irqbalance.1 b/irqbalance.1 index 4601d48..5c20507 100644 --- a/irqbalance.1 +++ b/irqbalance.1 @@ -108,7 +108,15 @@ Directs irqbalance to exclude the passed in irq from balancing This allows a user to override the balance level of a given irq. By default the balance level is determined automatically based on the pci device class of the device that owns the irq. - +.TP +.I numa_node= +This allows a user to override the numa node that sysfs indicates a given device +irq is local to. Often, systems will not specify this information in ACPI, and as a +result devicesa are considered equidistant from all numa nodes in a system. +This option allows for that hardware provided information to be overridden, so +that irqbalance can bias irq affinity for these devices toward its most local +node. Note that specifying a -1 here forces irqbalance to consider an interrupt +from a device to be equidistant from all nodes. .TP .B --pid= Have irqbalance write its process id to the specified file. By default no