irqbalance/irqbalance.c

265 lines
5.9 KiB
C

/*
* Copyright (C) 2006, Intel Corporation
*
* This file is part of irqbalance
*
* This program file is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program in a file named COPYING; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301 USA
*/
#include "config.h"
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <sys/time.h>
#include <syslog.h>
#include <unistd.h>
#ifdef HAVE_GETOPT_LONG
#include <getopt.h>
#endif
#ifdef HAVE_LIBCAP_NG
#include <cap-ng.h>
#endif
#include "irqbalance.h"
int one_shot_mode;
int debug_mode;
int numa_avail;
int need_cpu_rescan;
extern cpumask_t banned_cpus;
enum hp_e hint_policy = HINT_POLICY_SUBSET;
unsigned long power_thresh = ULONG_MAX;
unsigned long long cycle_count = 0;
void sleep_approx(int seconds)
{
struct timespec ts;
struct timeval tv;
gettimeofday(&tv, NULL);
ts.tv_sec = seconds;
ts.tv_nsec = -tv.tv_usec*1000;
while (ts.tv_nsec < 0) {
ts.tv_sec--;
ts.tv_nsec += 1000000000;
}
nanosleep(&ts, NULL);
}
#ifdef HAVE_GETOPT_LONG
struct option lopts[] = {
{"oneshot", 0, NULL, 'o'},
{"debug", 0, NULL, 'd'},
{"hintpolicy", 1, NULL, 'h'},
{"powerthresh", 1, NULL, 'p'},
{0, 0, 0, 0}
};
static void usage(void)
{
printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]\n");
printf(" [--powerthresh= | -p <off> | <n>]\n");
}
static void parse_command_line(int argc, char **argv)
{
int opt;
int longind;
while ((opt = getopt_long(argc, argv,
"odh:p:",
lopts, &longind)) != -1) {
switch(opt) {
case '?':
usage();
exit(1);
case 'd':
debug_mode=1;
break;
case 'h':
if (!strncmp(optarg, "exact", strlen(optarg)))
hint_policy = HINT_POLICY_EXACT;
else if (!strncmp(optarg, "subset", strlen(optarg)))
hint_policy = HINT_POLICY_SUBSET;
else if (!strncmp(optarg, "ignore", strlen(optarg)))
hint_policy = HINT_POLICY_IGNORE;
else {
usage();
exit(1);
}
break;
case 'p':
if (!strncmp(optarg, "off", strlen(optarg)))
power_thresh = ULONG_MAX;
else {
power_thresh = strtoull(optarg, NULL, 10);
if (power_thresh == ULONG_MAX) {
usage();
exit(1);
}
}
break;
case 'o':
one_shot_mode=1;
break;
}
}
}
#endif
/*
* This builds our object tree. The Heirarchy is pretty straightforward
* At the top are numa_nodes
* All CPU packages belong to a single numa_node
* All Cache domains belong to a CPU package
* All CPU cores belong to a cache domain
*
* Objects are built in that order (top down)
*
* Object workload is the aggregate sum of the
* workload of the objects below it
*/
static void build_object_tree()
{
build_numa_node_list();
parse_cpu_tree();
rebuild_irq_db();
}
static void free_object_tree()
{
free_numa_node_list();
clear_cpu_tree();
free_irq_db();
}
static void dump_object_tree()
{
for_each_object(numa_nodes, dump_numa_node_info, NULL);
}
static void force_rebalance_irq(struct irq_info *info, void *data __attribute__((unused)))
{
if (info->level == BALANCE_NONE)
return;
if (info->assigned_obj == NULL)
rebalance_irq_list = g_list_append(rebalance_irq_list, info);
else
migrate_irq(&info->assigned_obj->interrupts, &rebalance_irq_list, info);
info->assigned_obj = NULL;
}
int main(int argc, char** argv)
{
#ifdef HAVE_GETOPT_LONG
parse_command_line(argc, argv);
#else
if (argc>1 && strstr(argv[1],"--debug"))
debug_mode=1;
if (argc>1 && strstr(argv[1],"--oneshot"))
one_shot_mode=1;
#endif
if (getenv("IRQBALANCE_BANNED_CPUS")) {
cpumask_parse_user(getenv("IRQBALANCE_BANNED_CPUS"), strlen(getenv("IRQBALANCE_BANNED_CPUS")), banned_cpus);
}
if (getenv("IRQBALANCE_ONESHOT"))
one_shot_mode=1;
if (getenv("IRQBALANCE_DEBUG"))
debug_mode=1;
if (numa_available() > -1) {
numa_avail = 1;
} else {
if (debug_mode)
printf("This machine seems not NUMA capable.\n");
}
build_object_tree();
if (debug_mode)
dump_object_tree();
/* On single core UP systems irqbalance obviously has no work to do */
if (core_count<2)
exit(EXIT_SUCCESS);
/* On dual core/hyperthreading shared cache systems just do a one shot setup */
if (cache_domain_count==1)
one_shot_mode = 1;
if (!debug_mode)
if (daemon(0,0))
exit(EXIT_FAILURE);
openlog(argv[0], 0, LOG_DAEMON);
#ifdef HAVE_LIBCAP_NG
// Drop capabilities
capng_clear(CAPNG_SELECT_BOTH);
capng_lock();
capng_apply(CAPNG_SELECT_BOTH);
#endif
for_each_irq(NULL, force_rebalance_irq, NULL);
while (1) {
sleep_approx(SLEEP_INTERVAL);
if (debug_mode)
printf("\n\n\n-----------------------------------------------------------------------------\n");
parse_proc_interrupts();
parse_proc_stat();
/* cope with cpu hotplug -- detected during /proc/interrupts parsing */
if (need_cpu_rescan) {
need_cpu_rescan = 0;
/* if there's a hotplug event we better turn off power mode for a bit until things settle */
power_mode = 0;
if (debug_mode)
printf("Rescanning cpu topology \n");
reset_counts();
clear_work_stats();
free_object_tree();
build_object_tree();
for_each_irq(NULL, force_rebalance_irq, NULL);
cycle_count=0;
}
if (cycle_count)
update_migration_status();
calculate_placement();
activate_mappings();
if (debug_mode)
dump_tree();
if (one_shot_mode)
break;
clear_work_stats();
cycle_count++;
}
free_object_tree();
return EXIT_SUCCESS;
}