Implement user policy script callout

Issue 35:
http://code.google.com/p/irqbalance/issues/detail?id=35

Indicated that the --banscript option wasn't really sufficient for user policy
needs in irqbalance.  As such, I've improved the implementation and added the
--policyscript option, which allows for a script to return an arbitrary number
of key=value pairs that can direct the behavior of irqbalance for individual
irqs.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
This commit is contained in:
Neil Horman 2012-10-26 16:24:07 -04:00
parent 29ff67c301
commit a8adea2e95
4 changed files with 153 additions and 9 deletions

View file

@ -51,6 +51,11 @@ static short class_codes[MAX_CLASS] = {
IRQ_OTHER,
};
struct user_irq_policy {
int ban;
int level;
};
static GList *interrupts_db;
static GList *new_irq_list;
static GList *banned_irqs;
@ -94,7 +99,7 @@ void add_banned_irq(int irq)
* devpath points to the device directory in sysfs for the
* related device
*/
static struct irq_info *add_one_irq_to_db(const char *devpath, int irq)
static struct irq_info *add_one_irq_to_db(const char *devpath, int irq, struct user_irq_policy *pol)
{
int class = 0;
int rc;
@ -158,7 +163,10 @@ static struct irq_info *add_one_irq_to_db(const char *devpath, int irq)
goto get_numa_node;
new->class = class_codes[class];
new->level = map_class_to_level[class_codes[class]];
if (pol->level >= 0)
new->level = pol->level;
else
new->level = map_class_to_level[class_codes[class]];
get_numa_node:
numa_node = -1;
@ -208,6 +216,103 @@ out:
return new;
}
static void parse_user_policy_key(char *buf, struct user_irq_policy *pol)
{
char *key, *value, *end;
char *levelvals[] = { "none", "package", "cache", "core" };
int idx;
key = buf;
value = strchr(buf, '=');
if (!value) {
syslog(LOG_WARNING, "Bad format for policy, ignoring: %s\n", buf);
return;
}
/* NULL terminate the key and advance value to the start of the value
* string
*/
*value = '\0';
value++;
end = strchr(value, '\n');
if (end)
*end = '\0';
if (!strcasecmp("ban", key)) {
if (!strcasecmp("false", value))
pol->ban = 0;
else if (!strcasecmp("true", value))
pol->ban = 1;
else {
if (!debug_mode)
syslog(LOG_WARNING, "Unknown value for ban poilcy: %s\n", value);
else
printf("Unknown value for ban poilcy: %s\n", value);
return;
}
} else if (!strcasecmp("balance_level", key)) {
for (idx=0; idx<4; idx++) {
if (!strcasecmp(levelvals[idx], value))
break;
}
if (idx>3)
if (!debug_mode)
syslog(LOG_WARNING, "Bad value for balance_level policy: %s\n", value);
else
printf("Bad value for balance_level policy: %s\n", value);
else
pol->level = idx;
} else
if (!debug_mode)
syslog(LOG_WARNING, "Unknown key returned, ignoring: %s\n", key);
else
printf("Unknown key returned, ignoring: %s\n", key);
}
/*
* Calls out to a possibly user defined script to get user assigned poilcy
* aspects for a given irq. A value of -1 in a given field indicates no
* policy was given and that system defaults should be used
*/
static void get_irq_user_policy(char *path, int irq, struct user_irq_policy *pol)
{
char *cmd;
FILE *output;
char buffer[128];
char *brc;
pol->ban = -1;
pol->level = -1;
/* Return defaults if no script was given */
if (!polscript)
return;
cmd = alloca(strlen(path)+strlen(polscript)+64);
if (!cmd)
return;
sprintf(cmd, "exec %s %s %d", polscript, path, irq);
output = popen(cmd, "r");
if (!output) {
if (debug_mode)
printf("Unable to execute user policy script %s\n", polscript);
else
syslog(LOG_WARNING, "Unable to execute user policy script %s\n", polscript);
return;
}
while(!feof(output)) {
brc = fgets(buffer, 128, output);
if (brc)
parse_user_policy_key(brc, pol);
}
pclose(output);
}
static int check_for_irq_ban(char *path, int irq)
{
char *cmd;
@ -220,7 +325,7 @@ static int check_for_irq_ban(char *path, int irq)
if (!cmd)
return 0;
sprintf(cmd, "%s %s %d",banscript, path, irq);
sprintf(cmd, "%s %s %d > /dev/null",banscript, path, irq);
rc = system(cmd);
/*
@ -256,6 +361,7 @@ static void build_one_dev_entry(const char *dirname)
int irqnum;
struct irq_info *new;
char path[PATH_MAX];
struct user_irq_policy pol;
sprintf(path, "%s/%s/msi_irqs", SYSDEV_DIR, dirname);
@ -268,12 +374,13 @@ static void build_one_dev_entry(const char *dirname)
break;
irqnum = strtol(entry->d_name, NULL, 10);
if (irqnum) {
get_irq_user_policy(path, irqnum, &pol);
sprintf(path, "%s/%s", SYSDEV_DIR, dirname);
if (check_for_irq_ban(path, irqnum)) {
if ((pol.ban == 1) || (check_for_irq_ban(path, irqnum))) {
add_banned_irq(irqnum);
continue;
}
new = add_one_irq_to_db(path, irqnum);
new = add_one_irq_to_db(path, irqnum, &pol);
if (!new)
continue;
new->type = IRQ_TYPE_MSIX;
@ -295,12 +402,13 @@ static void build_one_dev_entry(const char *dirname)
*/
if (irqnum) {
sprintf(path, "%s/%s", SYSDEV_DIR, dirname);
if (check_for_irq_ban(path, irqnum)) {
get_irq_user_policy(path, irqnum, &pol);
if ((pol.ban == 1) || (check_for_irq_ban(path, irqnum))) {
add_banned_irq(irqnum);
goto done;
}
new = add_one_irq_to_db(path, irqnum);
new = add_one_irq_to_db(path, irqnum, &pol);
if (!new)
goto done;
new->type = IRQ_TYPE_LEGACY;

View file

@ -86,6 +86,28 @@ should be ignored by irqbalance completely (see --banirq above). Use of this
script provides users the ability to dynamically select which irqs get exluded
from balancing, and provides an opportunity for manual affinity setting in one
single code point.
.B NOTE: The banscript option is deprecated. Please use the
.B --polscript option instead
.TP
.B --policyscript=<script>
An superset of the functionality found in the --banscript option. When
specified, the referenced script will execute once for each discovered irq The
script may specify zero or more key=value pairs that will guide irqbalance in
the management of that irq. The script will be passed the path to the sysfs
device that owns the irq and the irq number itself as the first and second
arguments. Key=value pairs are printed by the script on stdout, and will be
captured and interpreted by irqbalance. Irqbalance expects a zero exit code
from the provided utility. Recognized key=value pairs are:
.TP
.I ban=[true | false]
.tP
Directs irqbalance to exclude the passed in irq from balancing
.TP
.I balance_level=[none | package | cache | core]
This allows a user to override the balance level of a given irq. By default the
balance level is determined automatically based on the pci device class of the
device that owns the irq.
.TP
.B --pid=<file>

View file

@ -53,6 +53,7 @@ unsigned long power_thresh = ULONG_MAX;
unsigned long long cycle_count = 0;
char *pidfile = NULL;
char *banscript = NULL;
char *polscript = NULL;
void sleep_approx(int seconds)
{
@ -77,6 +78,7 @@ struct option lopts[] = {
{"powerthresh", 1, NULL, 'p'},
{"banirq", 1 , NULL, 'i'},
{"banscript", 1, NULL, 'b'},
{"policyscript", 1, NULL, 'l'},
{"pid", 1, NULL, 's'},
{0, 0, 0, 0}
};
@ -84,7 +86,7 @@ struct option lopts[] = {
static void usage(void)
{
printf("irqbalance [--oneshot | -o] [--debug | -d] [--foreground | -f] [--hintpolicy= | -h [exact|subset|ignore]]\n");
printf(" [--powerthresh= | -p <off> | <n>] [--banirq= | -i <n>]\n");
printf(" [--powerthresh= | -p <off> | <n>] [--banirq= | -i <n>] [--policyscript=<script>]\n");
}
static void parse_command_line(int argc, char **argv)
@ -94,7 +96,7 @@ static void parse_command_line(int argc, char **argv)
unsigned long val;
while ((opt = getopt_long(argc, argv,
"odfh:i:p:s:b:",
"odfh:i:p:s:b:l:",
lopts, &longind)) != -1) {
switch(opt) {
@ -132,6 +134,9 @@ static void parse_command_line(int argc, char **argv)
}
add_banned_irq((int)val);
break;
case 'l':
polscript = strdup(optarg);
break;
case 'p':
if (!strncmp(optarg, "off", strlen(optarg)))
power_thresh = ULONG_MAX;
@ -249,6 +254,14 @@ int main(int argc, char** argv)
printf("This machine seems not NUMA capable.\n");
}
if (banscript) {
char *note = "Please note that --banscript is deprecated, please use --policyscript instead";
if (debug_mode)
printf("%s\n", note);
else
syslog(LOG_WARNING, "%s\n", note);
}
action.sa_handler = handler;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;

View file

@ -69,6 +69,7 @@ extern enum hp_e hint_policy;
extern unsigned long long cycle_count;
extern unsigned long power_thresh;
extern char *banscript;
extern char *polscript;
/*
* Numa node access routines