irqbalance/types.h
Krister Johansen 7bc1244fbf Teach irqbalance about Intel CoD.
This originally surfaced as a bug in placing network interrupts.  In
the case that this submitter observed, the NIC card was in NUMA domain
0, but each RSS interrupt was getting an affinity list for all CPUs in
the domain.  The expected behavior is for a single cpu to be chosen when
attempting to fan out NIC interrupts.  Due to other implementation
details of interrupt placement, this effectively caused all interrupt
mappings for this NIC to end up on CPU 0.

The bug turns out ot have been caused by Intel Cluster on Die breaking
an assumption in irqbalance about the design of the component hierarchy.
The CoD topology allows a CPU package to belong to more than one NUMA
node, which is not expected.

The RCA was that when the second NUMA node was wired up to the existing
physical package, it overwrote the mappings that were placed there by
the first.

This patch attempts to solve that problem by permitting a package to
have multiple NUMA nodes.  The CPU component hierarchy is preserved, in
case other parts of the code depend upon walking it.  When a CoD
topology is detected, the NUMA node -> CPU component mapping is moved
down a level, so that the nodes point to the first level where the
affinity becomes distinct.  In practice, this has been observed to be
the LLC.

A quick illustration (now, with COD, it looks like this):

                 +-----------+
                 | NUMA Node |
                 |     0     |
                 +-----------+
                       |
                       |        +-------+
                      \|/     / | CPU 0 |
                   +---------+  +-------+
                   | Cache 0 |
                   +---------+  +-------+
                   /          \ | CPU 1 |
      +-----------+             +-------+
      | Package 0 |
      +-----------+             +-------+
                  \           / | CPU 2 |
                   +---------+  +-------+
                   | Cache 1 |
                   +---------+
                       ^      \ +-------+
                       |        | CPU 3 |
                       |        +-------+
                 +-----------+
                 | NUMA Node |
                 |     1     |
                 +-----------+

Whereas, previously only NUMA Node 1 would end up pointing to package 0.
The topology should not be different on platforms that do not enable
CoD.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
2017-07-11 09:21:04 -07:00

81 lines
1.3 KiB
C

#ifndef _INCLUDE_GUARD_TYPES_H
#define _INCLUDE_GUARD_TYPES_H
#include <glib.h>
#include "cpumask.h"
#define BALANCE_NONE 0
#define BALANCE_PACKAGE 1
#define BALANCE_CACHE 2
#define BALANCE_CORE 3
/*
* IRQ Classes
*/
#define IRQ_NODEF -1
#define IRQ_OTHER 0
#define IRQ_LEGACY 1
#define IRQ_SCSI 2
#define IRQ_VIDEO 3
#define IRQ_ETH 4
#define IRQ_GBETH 5
#define IRQ_10GBETH 6
#define IRQ_VIRT_EVENT 7
/*
* IRQ Types
*/
#define IRQ_TYPE_LEGACY 0
#define IRQ_TYPE_MSI 1
#define IRQ_TYPE_MSIX 2
#define IRQ_TYPE_VIRT_EVENT 3
/*
* IRQ Internal tracking flags
*/
#define IRQ_FLAG_BANNED 1
enum obj_type_e {
OBJ_TYPE_CPU,
OBJ_TYPE_CACHE,
OBJ_TYPE_PACKAGE,
OBJ_TYPE_NODE
};
struct topo_obj {
uint64_t load;
uint64_t last_load;
uint64_t irq_count;
enum obj_type_e obj_type;
int number;
int powersave_mode;
cpumask_t mask;
GList *interrupts;
struct topo_obj *parent;
GList *children;
GList *numa_nodes;
GList **obj_type_list;
};
struct irq_info {
int irq;
int class;
int type;
int level;
int flags;
struct topo_obj *numa_node;
cpumask_t cpumask;
cpumask_t affinity_hint;
int hint_policy;
uint64_t irq_count;
uint64_t last_irq_count;
uint64_t load;
int moved;
struct topo_obj *assigned_obj;
unsigned int warned;
char *name;
};
#endif