856435ee32
Original code used an old PCI-SIG spec for PCI class code definition: "PCI Local Bus Specification Revision 3.0". In fact, the PCI class code definition in this spec has been obsoleted for a long time. Today, PCI-SIG uses a separate spec for PCI class code definition: "PCI Code and ID Assignment Specification v1.5". Without the fix, it is possible that new PCI devices are classified incorrectly by irqbalance code. Two major codes and one sub-class code were added in the change. Signed-off-by: Yong Yang <yangoliver@gmail.com>
683 lines
13 KiB
C
683 lines
13 KiB
C
#include "config.h"
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <sys/types.h>
|
|
#include <dirent.h>
|
|
#include <assert.h>
|
|
|
|
#include "irqbalance.h"
|
|
#include "types.h"
|
|
|
|
|
|
char *classes[] = {
|
|
"other",
|
|
"legacy",
|
|
"storage",
|
|
"video",
|
|
"ethernet",
|
|
"gbit-ethernet",
|
|
"10gbit-ethernet",
|
|
"virt-event",
|
|
0
|
|
};
|
|
|
|
static int map_class_to_level[8] =
|
|
{ BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE };
|
|
|
|
struct user_irq_policy {
|
|
int ban;
|
|
int level;
|
|
int numa_node_set;
|
|
int numa_node;
|
|
enum hp_e hintpolicy;
|
|
};
|
|
|
|
static GList *interrupts_db = NULL;
|
|
static GList *banned_irqs = NULL;
|
|
static GList *cl_banned_irqs = NULL;
|
|
|
|
#define SYSDEV_DIR "/sys/bus/pci/devices"
|
|
|
|
#define PCI_MAX_CLASS 0x14
|
|
#define PCI_MAX_SERIAL_SUBCLASS 0x81
|
|
|
|
static int get_pci_irq_class(int pci_class)
|
|
{
|
|
int major = pci_class >> 16;
|
|
int sub = (pci_class & 0xFF00) >> 8;
|
|
short irq_class = IRQ_NODEF;
|
|
/*
|
|
* Class codes lifted from below PCI-SIG spec:
|
|
*
|
|
* PCI Code and ID Assignment Specification v1.5
|
|
*
|
|
* and mapped to irqbalance types here.
|
|
*
|
|
* IRQ_NODEF will go through classification by PCI sub-class code.
|
|
*/
|
|
static short major_class_codes[PCI_MAX_CLASS] = {
|
|
IRQ_OTHER,
|
|
IRQ_SCSI,
|
|
IRQ_ETH,
|
|
IRQ_VIDEO,
|
|
IRQ_OTHER,
|
|
IRQ_OTHER,
|
|
IRQ_LEGACY,
|
|
IRQ_OTHER,
|
|
IRQ_OTHER,
|
|
IRQ_LEGACY,
|
|
IRQ_OTHER,
|
|
IRQ_OTHER,
|
|
IRQ_NODEF,
|
|
IRQ_ETH,
|
|
IRQ_SCSI,
|
|
IRQ_OTHER,
|
|
IRQ_OTHER,
|
|
IRQ_OTHER,
|
|
IRQ_LEGACY,
|
|
IRQ_LEGACY,
|
|
};
|
|
|
|
/*
|
|
* All sub-class code for serial bus controllers.
|
|
* The major class code is 0xc.
|
|
*/
|
|
static short serial_sub_codes[PCI_MAX_SERIAL_SUBCLASS] = {
|
|
IRQ_LEGACY,
|
|
IRQ_LEGACY,
|
|
IRQ_LEGACY,
|
|
IRQ_LEGACY,
|
|
IRQ_SCSI,
|
|
IRQ_LEGACY,
|
|
IRQ_SCSI,
|
|
IRQ_LEGACY,
|
|
IRQ_LEGACY,
|
|
IRQ_LEGACY,
|
|
[0xa ... 0x7f] = IRQ_NODEF,
|
|
IRQ_LEGACY,
|
|
};
|
|
|
|
/*
|
|
* Check major class code first
|
|
*/
|
|
|
|
if (major >= PCI_MAX_CLASS)
|
|
return IRQ_NODEF;
|
|
|
|
switch (major) {
|
|
case 0xc: /* Serial bus class */
|
|
if (sub >= PCI_MAX_SERIAL_SUBCLASS)
|
|
return IRQ_NODEF;
|
|
irq_class = serial_sub_codes[sub];
|
|
break;
|
|
default: /* All other PCI classes */
|
|
irq_class = major_class_codes[major];
|
|
break;
|
|
}
|
|
|
|
return irq_class;
|
|
}
|
|
|
|
static gint compare_ints(gconstpointer a, gconstpointer b)
|
|
{
|
|
const struct irq_info *ai = a;
|
|
const struct irq_info *bi = b;
|
|
|
|
return ai->irq - bi->irq;
|
|
}
|
|
|
|
static void add_banned_irq(int irq, GList **list)
|
|
{
|
|
struct irq_info find, *new;
|
|
GList *entry;
|
|
|
|
find.irq = irq;
|
|
entry = g_list_find_custom(*list, &find, compare_ints);
|
|
if (entry)
|
|
return;
|
|
|
|
new = calloc(sizeof(struct irq_info), 1);
|
|
if (!new) {
|
|
log(TO_CONSOLE, LOG_WARNING, "No memory to ban irq %d\n", irq);
|
|
return;
|
|
}
|
|
|
|
new->irq = irq;
|
|
new->flags |= IRQ_FLAG_BANNED;
|
|
new->hint_policy = HINT_POLICY_EXACT;
|
|
|
|
*list = g_list_append(*list, new);
|
|
return;
|
|
}
|
|
|
|
void add_cl_banned_irq(int irq)
|
|
{
|
|
add_banned_irq(irq, &cl_banned_irqs);
|
|
}
|
|
|
|
|
|
static int is_banned_irq(int irq)
|
|
{
|
|
GList *entry;
|
|
struct irq_info find;
|
|
|
|
find.irq = irq;
|
|
|
|
entry = g_list_find_custom(banned_irqs, &find, compare_ints);
|
|
return entry ? 1:0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Inserts an irq_info struct into the intterupts_db list
|
|
* devpath points to the device directory in sysfs for the
|
|
* related device
|
|
*/
|
|
static struct irq_info *add_one_irq_to_db(const char *devpath, int irq, struct user_irq_policy *pol)
|
|
{
|
|
int pci_class = 0;
|
|
int irq_class = IRQ_OTHER;
|
|
int rc;
|
|
struct irq_info *new, find;
|
|
int numa_node;
|
|
char path[PATH_MAX];
|
|
FILE *fd;
|
|
char *lcpu_mask;
|
|
GList *entry;
|
|
ssize_t ret;
|
|
size_t blen;
|
|
|
|
/*
|
|
* First check to make sure this isn't a duplicate entry
|
|
*/
|
|
find.irq = irq;
|
|
entry = g_list_find_custom(interrupts_db, &find, compare_ints);
|
|
if (entry) {
|
|
log(TO_CONSOLE, LOG_INFO, "DROPPING DUPLICATE ENTRY FOR IRQ %d on path %s\n", irq, devpath);
|
|
return NULL;
|
|
}
|
|
|
|
if (is_banned_irq(irq)) {
|
|
log(TO_ALL, LOG_INFO, "SKIPPING BANNED IRQ %d\n", irq);
|
|
return NULL;
|
|
}
|
|
|
|
new = calloc(sizeof(struct irq_info), 1);
|
|
if (!new)
|
|
return NULL;
|
|
|
|
new->irq = irq;
|
|
new->class = IRQ_OTHER;
|
|
new->hint_policy = pol->hintpolicy;
|
|
|
|
interrupts_db = g_list_append(interrupts_db, new);
|
|
|
|
sprintf(path, "%s/class", devpath);
|
|
|
|
fd = fopen(path, "r");
|
|
|
|
if (!fd) {
|
|
perror("Can't open class file: ");
|
|
goto get_numa_node;
|
|
}
|
|
|
|
rc = fscanf(fd, "%x", &pci_class);
|
|
fclose(fd);
|
|
|
|
if (!rc)
|
|
goto get_numa_node;
|
|
|
|
|
|
/*
|
|
* Map PCI class code to irq class
|
|
*/
|
|
irq_class = get_pci_irq_class(pci_class);
|
|
|
|
if (irq_class < 0) {
|
|
log(TO_CONSOLE, LOG_WARNING, "Invalid PCI class code %d\n", pci_class);
|
|
goto get_numa_node;
|
|
}
|
|
|
|
new->class = irq_class;
|
|
if (pol->level >= 0)
|
|
new->level = pol->level;
|
|
else
|
|
new->level = map_class_to_level[irq_class];
|
|
|
|
get_numa_node:
|
|
numa_node = -1;
|
|
if (numa_avail) {
|
|
sprintf(path, "%s/numa_node", devpath);
|
|
fd = fopen(path, "r");
|
|
if (fd) {
|
|
rc = fscanf(fd, "%d", &numa_node);
|
|
fclose(fd);
|
|
}
|
|
}
|
|
|
|
if (pol->numa_node_set == 1)
|
|
new->numa_node = get_numa_node(pol->numa_node);
|
|
else
|
|
new->numa_node = get_numa_node(numa_node);
|
|
|
|
sprintf(path, "%s/local_cpus", devpath);
|
|
fd = fopen(path, "r");
|
|
if (!fd) {
|
|
cpus_setall(new->cpumask);
|
|
goto assign_affinity_hint;
|
|
}
|
|
lcpu_mask = NULL;
|
|
ret = getline(&lcpu_mask, &blen, fd);
|
|
fclose(fd);
|
|
if (ret <= 0) {
|
|
cpus_setall(new->cpumask);
|
|
} else {
|
|
cpumask_parse_user(lcpu_mask, ret, new->cpumask);
|
|
}
|
|
free(lcpu_mask);
|
|
|
|
assign_affinity_hint:
|
|
cpus_clear(new->affinity_hint);
|
|
sprintf(path, "/proc/irq/%d/affinity_hint", irq);
|
|
fd = fopen(path, "r");
|
|
if (!fd)
|
|
goto out;
|
|
lcpu_mask = NULL;
|
|
ret = getline(&lcpu_mask, &blen, fd);
|
|
fclose(fd);
|
|
if (ret <= 0)
|
|
goto out;
|
|
cpumask_parse_user(lcpu_mask, ret, new->affinity_hint);
|
|
free(lcpu_mask);
|
|
out:
|
|
log(TO_CONSOLE, LOG_INFO, "Adding IRQ %d to database\n", irq);
|
|
return new;
|
|
}
|
|
|
|
static void parse_user_policy_key(char *buf, int irq, struct user_irq_policy *pol)
|
|
{
|
|
char *key, *value, *end;
|
|
char *levelvals[] = { "none", "package", "cache", "core" };
|
|
int idx;
|
|
int key_set = 1;
|
|
|
|
key = buf;
|
|
value = strchr(buf, '=');
|
|
|
|
if (!value) {
|
|
log(TO_SYSLOG, LOG_WARNING, "Bad format for policy, ignoring: %s\n", buf);
|
|
return;
|
|
}
|
|
|
|
/* NULL terminate the key and advance value to the start of the value
|
|
* string
|
|
*/
|
|
*value = '\0';
|
|
value++;
|
|
end = strchr(value, '\n');
|
|
if (end)
|
|
*end = '\0';
|
|
|
|
if (!strcasecmp("ban", key)) {
|
|
if (!strcasecmp("false", value))
|
|
pol->ban = 0;
|
|
else if (!strcasecmp("true", value))
|
|
pol->ban = 1;
|
|
else {
|
|
key_set = 0;
|
|
log(TO_ALL, LOG_WARNING, "Unknown value for ban poilcy: %s\n", value);
|
|
}
|
|
} else if (!strcasecmp("balance_level", key)) {
|
|
for (idx=0; idx<4; idx++) {
|
|
if (!strcasecmp(levelvals[idx], value))
|
|
break;
|
|
}
|
|
|
|
if (idx>3) {
|
|
key_set = 0;
|
|
log(TO_ALL, LOG_WARNING, "Bad value for balance_level policy: %s\n", value);
|
|
} else
|
|
pol->level = idx;
|
|
} else if (!strcasecmp("numa_node", key)) {
|
|
idx = strtoul(value, NULL, 10);
|
|
if (!get_numa_node(idx)) {
|
|
log(TO_ALL, LOG_WARNING, "NUMA node %d doesn't exist\n",
|
|
idx);
|
|
return;
|
|
}
|
|
pol->numa_node = idx;
|
|
pol->numa_node_set = 1;
|
|
} else if (!strcasecmp("hintpolicy", key)) {
|
|
if (!strcasecmp("exact", value))
|
|
pol->hintpolicy = HINT_POLICY_EXACT;
|
|
else if (!strcasecmp("subset", value))
|
|
pol->hintpolicy = HINT_POLICY_SUBSET;
|
|
else if (!strcasecmp("ignore", value))
|
|
pol->hintpolicy = HINT_POLICY_IGNORE;
|
|
else {
|
|
key_set = 0;
|
|
log(TO_ALL, LOG_WARNING, "Unknown value for hitpolicy: %s\n", value);
|
|
}
|
|
} else {
|
|
key_set = 0;
|
|
log(TO_ALL, LOG_WARNING, "Unknown key returned, ignoring: %s\n", key);
|
|
}
|
|
|
|
if (key_set)
|
|
log(TO_ALL, LOG_INFO, "IRQ %d: Override %s to %s\n", irq, key, value);
|
|
|
|
|
|
}
|
|
|
|
/*
|
|
* Calls out to a possibly user defined script to get user assigned poilcy
|
|
* aspects for a given irq. A value of -1 in a given field indicates no
|
|
* policy was given and that system defaults should be used
|
|
*/
|
|
static void get_irq_user_policy(char *path, int irq, struct user_irq_policy *pol)
|
|
{
|
|
char *cmd;
|
|
FILE *output;
|
|
char buffer[128];
|
|
char *brc;
|
|
|
|
memset(pol, -1, sizeof(struct user_irq_policy));
|
|
pol->hintpolicy = global_hint_policy;
|
|
|
|
/* Return defaults if no script was given */
|
|
if (!polscript)
|
|
return;
|
|
|
|
cmd = alloca(strlen(path)+strlen(polscript)+64);
|
|
if (!cmd)
|
|
return;
|
|
|
|
sprintf(cmd, "exec %s %s %d", polscript, path, irq);
|
|
output = popen(cmd, "r");
|
|
if (!output) {
|
|
log(TO_ALL, LOG_WARNING, "Unable to execute user policy script %s\n", polscript);
|
|
return;
|
|
}
|
|
|
|
while(!feof(output)) {
|
|
brc = fgets(buffer, 128, output);
|
|
if (brc)
|
|
parse_user_policy_key(brc, irq, pol);
|
|
}
|
|
pclose(output);
|
|
}
|
|
|
|
static int check_for_irq_ban(char *path, int irq)
|
|
{
|
|
char *cmd;
|
|
int rc;
|
|
struct irq_info find;
|
|
GList *entry;
|
|
|
|
/*
|
|
* Check to see if we banned this irq on the command line
|
|
*/
|
|
find.irq = irq;
|
|
entry = g_list_find_custom(cl_banned_irqs, &find, compare_ints);
|
|
if (entry)
|
|
return 1;
|
|
|
|
if (!banscript)
|
|
return 0;
|
|
|
|
if (!path)
|
|
return 0;
|
|
|
|
cmd = alloca(strlen(path)+strlen(banscript)+32);
|
|
if (!cmd)
|
|
return 0;
|
|
|
|
sprintf(cmd, "%s %s %d > /dev/null",banscript, path, irq);
|
|
rc = system(cmd);
|
|
|
|
/*
|
|
* The system command itself failed
|
|
*/
|
|
if (rc == -1) {
|
|
log(TO_ALL, LOG_WARNING, "%s failed, please check the --banscript option\n", cmd);
|
|
return 0;
|
|
}
|
|
|
|
if (WEXITSTATUS(rc)) {
|
|
log(TO_ALL, LOG_INFO, "irq %d is baned by %s\n", irq, banscript);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
* Figures out which interrupt(s) relate to the device we're looking at in dirname
|
|
*/
|
|
static void build_one_dev_entry(const char *dirname)
|
|
{
|
|
struct dirent *entry;
|
|
DIR *msidir;
|
|
FILE *fd;
|
|
int irqnum;
|
|
struct irq_info *new;
|
|
char path[PATH_MAX];
|
|
char devpath[PATH_MAX];
|
|
struct user_irq_policy pol;
|
|
|
|
sprintf(path, "%s/%s/msi_irqs", SYSDEV_DIR, dirname);
|
|
sprintf(devpath, "%s/%s", SYSDEV_DIR, dirname);
|
|
|
|
msidir = opendir(path);
|
|
|
|
if (msidir) {
|
|
do {
|
|
entry = readdir(msidir);
|
|
if (!entry)
|
|
break;
|
|
irqnum = strtol(entry->d_name, NULL, 10);
|
|
if (irqnum) {
|
|
new = get_irq_info(irqnum);
|
|
if (new)
|
|
continue;
|
|
get_irq_user_policy(devpath, irqnum, &pol);
|
|
if ((pol.ban == 1) || (check_for_irq_ban(devpath, irqnum))) {
|
|
add_banned_irq(irqnum, &banned_irqs);
|
|
continue;
|
|
}
|
|
new = add_one_irq_to_db(devpath, irqnum, &pol);
|
|
if (!new)
|
|
continue;
|
|
new->type = IRQ_TYPE_MSIX;
|
|
}
|
|
} while (entry != NULL);
|
|
closedir(msidir);
|
|
return;
|
|
}
|
|
|
|
sprintf(path, "%s/%s/irq", SYSDEV_DIR, dirname);
|
|
fd = fopen(path, "r");
|
|
if (!fd)
|
|
return;
|
|
if (fscanf(fd, "%d", &irqnum) < 0)
|
|
goto done;
|
|
|
|
/*
|
|
* no pci device has irq 0
|
|
*/
|
|
if (irqnum) {
|
|
new = get_irq_info(irqnum);
|
|
if (new)
|
|
goto done;
|
|
get_irq_user_policy(devpath, irqnum, &pol);
|
|
if ((pol.ban == 1) || (check_for_irq_ban(path, irqnum))) {
|
|
add_banned_irq(irqnum, &banned_irqs);
|
|
goto done;
|
|
}
|
|
|
|
new = add_one_irq_to_db(devpath, irqnum, &pol);
|
|
if (!new)
|
|
goto done;
|
|
new->type = IRQ_TYPE_LEGACY;
|
|
}
|
|
|
|
done:
|
|
fclose(fd);
|
|
return;
|
|
}
|
|
|
|
static void free_irq(struct irq_info *info, void *data __attribute__((unused)))
|
|
{
|
|
free(info);
|
|
}
|
|
|
|
void free_irq_db(void)
|
|
{
|
|
for_each_irq(NULL, free_irq, NULL);
|
|
g_list_free(interrupts_db);
|
|
interrupts_db = NULL;
|
|
for_each_irq(banned_irqs, free_irq, NULL);
|
|
g_list_free(banned_irqs);
|
|
banned_irqs = NULL;
|
|
g_list_free(rebalance_irq_list);
|
|
rebalance_irq_list = NULL;
|
|
}
|
|
|
|
static void add_new_irq(int irq, struct irq_info *hint)
|
|
{
|
|
struct irq_info *new;
|
|
struct user_irq_policy pol;
|
|
|
|
new = get_irq_info(irq);
|
|
if (new)
|
|
return;
|
|
|
|
get_irq_user_policy("/sys", irq, &pol);
|
|
if ((pol.ban == 1) || check_for_irq_ban(NULL, irq)) {
|
|
add_banned_irq(irq, &banned_irqs);
|
|
new = get_irq_info(irq);
|
|
} else
|
|
new = add_one_irq_to_db("/sys", irq, &pol);
|
|
|
|
if (!new) {
|
|
log(TO_CONSOLE, LOG_WARNING, "add_new_irq: Failed to add irq %d\n", irq);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Override some of the new irq defaults here
|
|
*/
|
|
if (hint) {
|
|
new->type = hint->type;
|
|
new->class = hint->class;
|
|
}
|
|
|
|
new->level = map_class_to_level[new->class];
|
|
}
|
|
|
|
static void add_missing_irq(struct irq_info *info, void *unused __attribute__((unused)))
|
|
{
|
|
struct irq_info *lookup = get_irq_info(info->irq);
|
|
|
|
if (!lookup)
|
|
add_new_irq(info->irq, info);
|
|
|
|
}
|
|
|
|
|
|
void rebuild_irq_db(void)
|
|
{
|
|
DIR *devdir;
|
|
struct dirent *entry;
|
|
GList *tmp_irqs = NULL;
|
|
|
|
free_irq_db();
|
|
|
|
tmp_irqs = collect_full_irq_list();
|
|
|
|
devdir = opendir(SYSDEV_DIR);
|
|
if (!devdir)
|
|
goto free;
|
|
|
|
do {
|
|
entry = readdir(devdir);
|
|
|
|
if (!entry)
|
|
break;
|
|
|
|
build_one_dev_entry(entry->d_name);
|
|
|
|
} while (entry != NULL);
|
|
|
|
closedir(devdir);
|
|
|
|
|
|
for_each_irq(tmp_irqs, add_missing_irq, NULL);
|
|
|
|
free:
|
|
g_list_free_full(tmp_irqs, free);
|
|
|
|
}
|
|
|
|
void for_each_irq(GList *list, void (*cb)(struct irq_info *info, void *data), void *data)
|
|
{
|
|
GList *entry = g_list_first(list ? list : interrupts_db);
|
|
GList *next;
|
|
|
|
while (entry) {
|
|
next = g_list_next(entry);
|
|
cb(entry->data, data);
|
|
entry = next;
|
|
}
|
|
}
|
|
|
|
struct irq_info *get_irq_info(int irq)
|
|
{
|
|
GList *entry;
|
|
struct irq_info find;
|
|
|
|
find.irq = irq;
|
|
entry = g_list_find_custom(interrupts_db, &find, compare_ints);
|
|
|
|
if (!entry)
|
|
entry = g_list_find_custom(banned_irqs, &find, compare_ints);
|
|
|
|
return entry ? entry->data : NULL;
|
|
}
|
|
|
|
void migrate_irq(GList **from, GList **to, struct irq_info *info)
|
|
{
|
|
GList *entry;
|
|
struct irq_info find, *tmp;
|
|
|
|
find.irq = info->irq;
|
|
entry = g_list_find_custom(*from, &find, compare_ints);
|
|
|
|
if (!entry)
|
|
return;
|
|
|
|
tmp = entry->data;
|
|
*from = g_list_delete_link(*from, entry);
|
|
|
|
|
|
*to = g_list_append(*to, tmp);
|
|
info->moved = 1;
|
|
}
|
|
|
|
static gint sort_irqs(gconstpointer A, gconstpointer B)
|
|
{
|
|
struct irq_info *a, *b;
|
|
a = (struct irq_info*)A;
|
|
b = (struct irq_info*)B;
|
|
|
|
if (a->class < b->class || a->load < b->load || a < b)
|
|
return 1;
|
|
return -1;
|
|
}
|
|
|
|
void sort_irq_list(GList **list)
|
|
{
|
|
*list = g_list_sort(*list, sort_irqs);
|
|
}
|