From 95f9881ff8d1032ae903bf700c17d6855bb1dacf Mon Sep 17 00:00:00 2001 From: arjanvandeven Date: Sat, 9 Dec 2006 15:59:16 +0000 Subject: [PATCH] initial import git-svn-id: https://irqbalance.googlecode.com/svn/trunk@2 46b42954-3823-0410-bd82-eb80b452c9b5 --- Makefile | 17 ++ activate.c | 57 +++++ bitmap.c | 366 ++++++++++++++++++++++++++++++ bitmap.h | 356 +++++++++++++++++++++++++++++ classify.c | 126 +++++++++++ constants.h | 30 +++ cpumask.h | 400 +++++++++++++++++++++++++++++++++ cputree.c | 371 ++++++++++++++++++++++++++++++ irqbalance.c | 138 ++++++++++++ irqbalance.h | 50 +++++ irqlist.c | 252 +++++++++++++++++++++ network.c | 175 +++++++++++++++ non-atomic.h | 115 ++++++++++ numa.c | 101 +++++++++ placement.c | 315 ++++++++++++++++++++++++++ powermode.c | 75 +++++++ procinterrupts.c | 88 ++++++++ strace | 573 +++++++++++++++++++++++++++++++++++++++++++++++ types.h | 84 +++++++ 19 files changed, 3689 insertions(+) create mode 100644 Makefile create mode 100644 activate.c create mode 100644 bitmap.c create mode 100644 bitmap.h create mode 100644 classify.c create mode 100644 constants.h create mode 100644 cpumask.h create mode 100644 cputree.c create mode 100644 irqbalance.c create mode 100644 irqbalance.h create mode 100644 irqlist.c create mode 100644 network.c create mode 100644 non-atomic.h create mode 100644 numa.c create mode 100644 placement.c create mode 100644 powermode.c create mode 100644 procinterrupts.c create mode 100644 strace create mode 100644 types.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2bb41b1 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +CFLAGS+=-g -Os -D_FORTIFY_SOURCE=2 -Wall -W `pkg-config --cflags glib-2.0` + +all: irqbalance + +LIBS=bitmap.o irqbalance.o cputree.o procinterrupts.o irqlist.o placement.o activate.o network.o powermode.o numa.o classify.o + +irqbalance: .depend $(LIBS) + gcc -g -O2 -D_FORTIFY_SOURCE=2 -Wall `pkg-config --libs glib-2.0` $(LIBS) -o irqbalance + +clean: + rm -f irqbalance *~ *.o .depend + +# rule for building dependency lists, and writing them to a file +# named ".depend". +.depend: + rm -f .depend + gccmakedep -f- -- $(CFLAGS) -- *.c > .depend diff --git a/activate.c b/activate.c new file mode 100644 index 0000000..9384577 --- /dev/null +++ b/activate.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file contains the code to communicate a selected distribution / mapping + * of interrupts to the kernel. + */ +#include +#include +#include +#include + +#include "irqbalance.h" + + +void activate_mapping(void) +{ + struct interrupt *irq; + GList *iter; + + iter = g_list_first(interrupts); + while (iter) { + irq = iter->data; + iter = g_list_next(iter); + + if (!cpus_equal(irq->mask, irq->old_mask)) { + char buf[PATH_MAX]; + FILE *file; + sprintf(buf, "/proc/irq/%i/smp_affinity", irq->number); + file = fopen(buf, "w"); + if (!file) + continue; + cpumask_scnprintf(buf, PATH_MAX, irq->mask); + fprintf(file,"%s", buf); + fclose(file); + irq->old_mask = irq->mask; + } + } +} diff --git a/bitmap.c b/bitmap.c new file mode 100644 index 0000000..0c57759 --- /dev/null +++ b/bitmap.c @@ -0,0 +1,366 @@ +/* + +This file is taken from the Linux kernel and minimally adapted for use in userspace + +*/ + +/* + * lib/bitmap.c + * Helper functions for bitmap.h. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include +#include +#include +#include "bitmap.h" +#include "non-atomic.h" + +/* + * bitmaps provide an array of bits, implemented using an an + * array of unsigned longs. The number of valid bits in a + * given bitmap does _not_ need to be an exact multiple of + * BITS_PER_LONG. + * + * The possible unused bits in the last, partially used word + * of a bitmap are 'don't care'. The implementation makes + * no particular effort to keep them zero. It ensures that + * their value will not affect the results of any operation. + * The bitmap operations that return Boolean (bitmap_empty, + * for example) or scalar (bitmap_weight, for example) results + * carefully filter out these unused bits from impacting their + * results. + * + * These operations actually hold to a slightly stronger rule: + * if you don't input any bitmaps to these ops that have some + * unused bits set, then they won't output any set unused bits + * in output bitmaps. + * + * The byte ordering of bitmaps is more natural on little + * endian architectures. See the big-endian headers + * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h + * for the best explanations of this ordering. + */ + +int __bitmap_empty(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_full(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (~bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] != bitmap2[k]) + return 0; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + dst[k] = ~src[k]; + + if (bits % BITS_PER_LONG) + dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); +} + +/* + * __bitmap_shift_right - logical right shift of the bits in a bitmap + * @dst - destination bitmap + * @src - source bitmap + * @nbits - shift by this many bits + * @bits - bitmap size, in bits + * + * Shifting right (dividing) means moving bits in the MS -> LS bit + * direction. Zeros are fed into the vacated MS positions and the + * LS bits shifted off the bottom are lost. + */ +void __bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned long mask = (1UL << left) - 1; + for (k = 0; off + k < lim; ++k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take lower rem bits of + * word above and make them the top rem bits of result. + */ + if (!rem || off + k + 1 >= lim) + upper = 0; + else { + upper = src[off + k + 1]; + if (off + k + 1 == lim - 1 && left) + upper &= mask; + } + lower = src[off + k]; + if (left && off + k == lim - 1) + lower &= mask; + dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem; + if (left && k == lim - 1) + dst[k] &= mask; + } + if (off) + memset(&dst[lim - off], 0, off*sizeof(unsigned long)); +} + + +/* + * __bitmap_shift_left - logical left shift of the bits in a bitmap + * @dst - destination bitmap + * @src - source bitmap + * @nbits - shift by this many bits + * @bits - bitmap size, in bits + * + * Shifting left (multiplying) means moving bits in the LS -> MS + * direction. Zeros are fed into the vacated LS bit positions + * and those MS bits shifted off the top are lost. + */ + +void __bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + for (k = lim - off - 1; k >= 0; --k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take upper rem bits of + * word below and make them the bottom rem bits of result. + */ + if (rem && k > 0) + lower = src[k - 1]; + else + lower = 0; + upper = src[k]; + if (left && k == lim - 1) + upper &= (1UL << left) - 1; + dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem; + if (left && k + off == lim - 1) + dst[k + off] &= (1UL << left) - 1; + } + if (off) + memset(dst, 0, off*sizeof(unsigned long)); +} + +void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] & bitmap2[k]; +} + +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] | bitmap2[k]; +} + +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] ^ bitmap2[k]; +} + +void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] & ~bitmap2[k]; +} + +int __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & bitmap2[k]) + return 1; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 1; + return 0; +} + +/* + * Bitmap printing & parsing functions: first version by Bill Irwin, + * second version by Paul Jackson, third by Joe Korty. + */ + +#define CHUNKSZ 32 +#define nbits_to_hold_value(val) fls(val) +#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10)) +#define BASEDEC 10 /* fancier cpuset lists input in decimal */ + +/** + * bitmap_scnprintf - convert bitmap to an ASCII hex string. + * @buf: byte buffer into which string is placed + * @buflen: reserved size of @buf, in bytes + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * + * Exactly @nmaskbits bits are displayed. Hex digits are grouped into + * comma-separated sets of eight digits per set. + */ +int bitmap_scnprintf(char *buf, unsigned int buflen, + const unsigned long *maskp, int nmaskbits) +{ + int i, word, bit, len = 0; + unsigned long val; + const char *sep = ""; + int chunksz; + uint32_t chunkmask; + int first = 1; + + chunksz = nmaskbits & (CHUNKSZ - 1); + if (chunksz == 0) + chunksz = CHUNKSZ; + + i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ; + for (; i >= 0; i -= CHUNKSZ) { + chunkmask = ((1ULL << chunksz) - 1); + word = i / BITS_PER_LONG; + bit = i % BITS_PER_LONG; + val = (maskp[word] >> bit) & chunkmask; + if (val!=0 || !first) { + len += snprintf(buf+len, buflen-len, "%s%0*lx", sep, + (chunksz+3)/4, val); + chunksz = CHUNKSZ; + sep = ","; + first = 0; + } + } + return len; +} + +/** + * __bitmap_parse - convert an ASCII hex string into a bitmap. + * @buf: pointer to buffer containing string. + * @buflen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @is_user: location of buffer, 0 indicates kernel space + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Commas group hex digits into chunks. Each chunk defines exactly 32 + * bits of the resultant bitmask. No chunk may specify a value larger + * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value + * then leading 0-bits are prepended. %-EINVAL is returned for illegal + * characters and for grouping errors such as "1,,5", ",44", "," and "". + * Leading and trailing whitespace accepted, but not embedded whitespace. + */ +int __bitmap_parse(const char *buf, unsigned int buflen, + int is_user __attribute((unused)), unsigned long *maskp, + int nmaskbits) +{ + int c, old_c, totaldigits, ndigits, nchunks, nbits; + uint32_t chunk; + + bitmap_zero(maskp, nmaskbits); + + nchunks = nbits = totaldigits = c = 0; + do { + chunk = ndigits = 0; + + /* Get the next chunk of the bitmap */ + while (buflen) { + old_c = c; + c = *buf++; + buflen--; + if (isspace(c)) + continue; + + /* + * If the last character was a space and the current + * character isn't '\0', we've got embedded whitespace. + * This is a no-no, so throw an error. + */ + if (totaldigits && c && isspace(old_c)) + return 0; + + /* A '\0' or a ',' signal the end of the chunk */ + if (c == '\0' || c == ',') + break; + + if (!isxdigit(c)) + return -EINVAL; + + /* + * Make sure there are at least 4 free bits in 'chunk'. + * If not, this hexdigit will overflow 'chunk', so + * throw an error. + */ + if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) + return -EOVERFLOW; + + chunk = (chunk << 4) | unhex(c); + ndigits++; totaldigits++; + } + if (ndigits == 0) + return -EINVAL; + if (nchunks == 0 && chunk == 0) + continue; + + __bitmap_shift_left(maskp, maskp, CHUNKSZ, nmaskbits); + *maskp |= chunk; + nchunks++; + nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ; + if (nbits > nmaskbits) + return -EOVERFLOW; + } while (buflen && c == ','); + + return 0; +} diff --git a/bitmap.h b/bitmap.h new file mode 100644 index 0000000..91ed499 --- /dev/null +++ b/bitmap.h @@ -0,0 +1,356 @@ +#ifndef __LINUX_BITMAP_H +#define __LINUX_BITMAP_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + + +#define BITS_PER_LONG ((int)sizeof(unsigned long)*8) + +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] +#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) + + +#include "non-atomic.h" + +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +} + +static inline unsigned long hweight64(uint64_t w) +{ + if (BITS_PER_LONG == 32) + return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); + + w -= (w >> 1) & 0x5555555555555555ull; + w = (w & 0x3333333333333333ull) + ((w >> 2) & 0x3333333333333333ull); + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0full; + return (w * 0x0101010101010101ull) >> 56; +} + + +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + + +/* + * bitmaps provide bit arrays that consume one or more unsigned + * longs. The bitmap interface and available operations are listed + * here, in bitmap.h + * + * Function implementations generic to all architectures are in + * lib/bitmap.c. Functions implementations that are architecture + * specific are in various include/asm-/bitops.h headers + * and other arch/ specific files. + * + * See lib/bitmap.c for more details. + */ + +/* + * The available bitmap operations and their rough meaning in the + * case that the bitmap is a single unsigned long are thus: + * + * Note that nbits should be always a compile time evaluable constant. + * Otherwise many inlines will generate horrible code. + * + * bitmap_zero(dst, nbits) *dst = 0UL + * bitmap_fill(dst, nbits) *dst = ~0UL + * bitmap_copy(dst, src, nbits) *dst = *src + * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 + * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 + * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 + * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) + * bitmap_complement(dst, src, nbits) *dst = ~(*src) + * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? + * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? + * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? + * bitmap_empty(src, nbits) Are all bits zero in *src? + * bitmap_full(src, nbits) Are all bits set in *src? + * bitmap_weight(src, nbits) Hamming Weight: number set bits + * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n + * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n + * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) + * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) + * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf + * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf + * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf + * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf + * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list + * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region + * bitmap_release_region(bitmap, pos, order) Free specified bit region + * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region + */ + +/* + * Also the following operations in asm/bitops.h apply to bitmaps. + * + * set_bit(bit, addr) *addr |= bit + * clear_bit(bit, addr) *addr &= ~bit + * change_bit(bit, addr) *addr ^= bit + * test_bit(bit, addr) Is bit set in *addr? + * test_and_set_bit(bit, addr) Set bit and return old value + * test_and_clear_bit(bit, addr) Clear bit and return old value + * test_and_change_bit(bit, addr) Change bit and return old value + * find_first_zero_bit(addr, nbits) Position first zero bit in *addr + * find_first_bit(addr, nbits) Position first set bit in *addr + * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit + */ + +/* + * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used + * to declare an array named 'name' of just enough unsigned longs to + * contain all bit positions from 0 to 'bits' - 1. + */ + +/* + * lib/bitmap.c provides these functions: + */ + +extern int __bitmap_empty(const unsigned long *bitmap, int bits); +extern int __bitmap_full(const unsigned long *bitmap, int bits); +extern int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, + int bits); +extern void __bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int shift, int bits); +extern void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_weight(const unsigned long *bitmap, int bits); + +extern int bitmap_scnprintf(char *buf, unsigned int len, + const unsigned long *src, int nbits); +extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, + unsigned long *dst, int nbits); +extern int bitmap_scnlistprintf(char *buf, unsigned int len, + const unsigned long *src, int nbits); +extern int bitmap_parselist(const char *buf, unsigned long *maskp, + int nmaskbits); +extern void bitmap_remap(unsigned long *dst, const unsigned long *src, + const unsigned long *old, const unsigned long *new, int bits); +extern int bitmap_bitremap(int oldbit, + const unsigned long *old, const unsigned long *new, int bits); +extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); +extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); +extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); + +#define BITMAP_LAST_WORD_MASK(nbits) \ +( \ + ((nbits) % BITS_PER_LONG) ? \ + (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ +) + +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} + +static inline void bitmap_fill(unsigned long *dst, int nbits) +{ + size_t nlongs = BITS_TO_LONGS(nbits); + if (nlongs > 1) { + int len = (nlongs - 1) * sizeof(unsigned long); + memset(dst, 0xff, len); + } + dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); +} + +static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, + int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memcpy(dst, src, len); + } +} + +static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 & *src2; + else + __bitmap_and(dst, src1, src2, nbits); +} + +static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 | *src2; + else + __bitmap_or(dst, src1, src2, nbits); +} + +static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 ^ *src2; + else + __bitmap_xor(dst, src1, src2, nbits); +} + +static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 & ~(*src2); + else + __bitmap_andnot(dst, src1, src2, nbits); +} + +static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, + int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); + else + __bitmap_complement(dst, src, nbits); +} + +static inline int bitmap_equal(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_equal(src1, src2, nbits); +} + +static inline int bitmap_intersects(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + else + return __bitmap_intersects(src1, src2, nbits); +} + +static inline int bitmap_subset(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_subset(src1, src2, nbits); +} + +static inline int bitmap_empty(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_empty(src, nbits); +} + +static inline int bitmap_full(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_full(src, nbits); +} + +static inline int bitmap_weight(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight(src, nbits); +} + +static inline void bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int n, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src >> n; + else + __bitmap_shift_right(dst, src, n, nbits); +} + +static inline void bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int n, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); + else + __bitmap_shift_left(dst, src, n, nbits); +} + +static inline int bitmap_parse(const char *buf, unsigned int buflen, + unsigned long *maskp, int nmaskbits) +{ + return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __LINUX_BITMAP_H */ diff --git a/classify.c b/classify.c new file mode 100644 index 0000000..a1c8114 --- /dev/null +++ b/classify.c @@ -0,0 +1,126 @@ +#include +#include +#include + +#include "irqbalance.h" +#include "types.h" + + +char *classes[] = { + "other", + "legacy", + "storage", + "timer", + "ethernet", + "fasteth", + 0 +}; + +int map_class_to_level[7] = +{ BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CACHE, BALANCE_NONE, BALANCE_CORE, BALANCE_CORE }; + + +int class_counts[7]; + +/* + +NOTE NOTE although that this file has a hard-coded list of modules, something missing is not + a big deal; the types are also set based on PCI class information when available. + +*/ + +/* + + Based on the original irqbalance code which is: + + Copyright (C) 2003 Red Hat, Inc. All rights reserved. + + Usage and distribution of this file are subject to the Gnu General Public License Version 2 + that can be found at http://www.gnu.org/licenses/gpl.txt and the COPYING file as + distributed together with this file is included herein by reference. + + Author: Arjan van de Ven + +*/ + +static char *legacy_modules[] = { + "PS/2", + "serial", + "i8042", + "acpi", + "floppy", + "parport", + "keyboard", + "usb-ohci", + "usb-uhci", + "uhci_hcd", + "ohci_hcd", + "ehci_hcd", + "EMU10K1", + 0 +}; + +static char *timer_modules[] = { + "rtc", + "timer", + 0 +}; + +static char *storage_modules[] = { + "aic7xxx", + "aic79xx", + "ide", + "cciss", + "cpqarray", + "qla2", + "megaraid", + "fusion", + "libata", + "ohci1394", + "sym53c8xx", + 0 +}; + +static char *ethernet_modules[] = { + "eth", + "e100", + "eepro100", + "orinico_cs", + "wvlan_cs", + "3c5", + "HiSax", + 0 +}; + + +int find_class(struct interrupt *irq, char *moduletext) +{ + int guess = IRQ_OTHER; + int i; + + if (moduletext == NULL) + return guess; + + for (i=0; legacy_modules[i]; i++) + if (strstr(moduletext, legacy_modules[i])) + guess = IRQ_LEGACY; + + for (i=0; storage_modules[i]; i++) + if (strstr(moduletext, storage_modules[i])) + guess = IRQ_SCSI; + + for (i=0; timer_modules[i]; i++) + if (strstr(moduletext, timer_modules[i])) + guess = IRQ_TIMER; + + for (i=0; ethernet_modules[i]; i++) + if (strstr(moduletext, ethernet_modules[i])) + guess = IRQ_ETH; + + if (guess == IRQ_OTHER && irq->number==0) + guess = IRQ_TIMER; + + if (guess > irq->class) + return guess; + return irq->class; +} diff --git a/constants.h b/constants.h new file mode 100644 index 0000000..ae669eb --- /dev/null +++ b/constants.h @@ -0,0 +1,30 @@ +#ifndef __INCLUDE_GUARD_CONSTANTS_H +#define __INCLUDE_GUARD_CONSTANTS_H + +/* interval between rebalance attempts in seconds */ +#define SLEEP_INTERVAL 10 + +/* NUMA topology refresh intervals, in units of SLEEP_INTERVAL */ +#define NUMA_REFRESH_INTERVAL 32 +/* NIC interrupt refresh interval, in units of SLEEP_INTERVAL */ +#define NIC_REFRESH_INTERVAL 32 + +/* minimum number of interrupts since boot for an interrupt to matter */ +#define MIN_IRQ_COUNT 20 + + +/* balancing tunings */ + +#define CROSS_PACKAGE_PENALTY 3000 +#define NUMA_PENALTY 250 +#define POWER_MODE_PACKAGE_THRESHOLD 10000 +#define CLASS_VIOLATION_PENTALTY 6000 +#define CORE_SPECIFIC_THRESHOLD 5000 + +/* power mode */ + +#define POWER_MODE_SOFTIRQ_THRESHOLD 20 +#define POWER_MODE_HYSTERESIS 3 + + +#endif diff --git a/cpumask.h b/cpumask.h new file mode 100644 index 0000000..8c6606a --- /dev/null +++ b/cpumask.h @@ -0,0 +1,400 @@ +#ifndef __LINUX_CPUMASK_H +#define __LINUX_CPUMASK_H + +#define NR_CPUS 256 +/* + * Cpumasks provide a bitmap suitable for representing the + * set of CPU's in a system, one bit position per CPU number. + * + * See detailed comments in the file linux/bitmap.h describing the + * data type on which these cpumasks are based. + * + * For details of cpumask_scnprintf() and cpumask_parse_user(), + * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. + * For details of cpulist_scnprintf() and cpulist_parse(), see + * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. + * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c + * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. + * + * The available cpumask operations are: + * + * void cpu_set(cpu, mask) turn on bit 'cpu' in mask + * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask + * void cpus_setall(mask) set all bits + * void cpus_clear(mask) clear all bits + * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask + * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask + * + * void cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] + * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] + * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 + * void cpus_andnot(dst, src1, src2) dst = src1 & ~src2 + * void cpus_complement(dst, src) dst = ~src + * + * int cpus_equal(mask1, mask2) Does mask1 == mask2? + * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? + * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? + * int cpus_empty(mask) Is mask empty (no bits sets)? + * int cpus_full(mask) Is mask full (all bits sets)? + * int cpus_weight(mask) Hamming weigh - number of set bits + * + * void cpus_shift_right(dst, src, n) Shift right + * void cpus_shift_left(dst, src, n) Shift left + * + * int first_cpu(mask) Number lowest set bit, or NR_CPUS + * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS + * + * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set + * CPU_MASK_ALL Initializer - all bits set + * CPU_MASK_NONE Initializer - no bits set + * unsigned long *cpus_addr(mask) Array of unsigned long's in mask + * + * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing + * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask + * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing + * int cpulist_parse(buf, map) Parse ascii string as cpulist + * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) + * int cpus_remap(dst, src, old, new) *dst = map(old, new)(src) + * + * for_each_cpu_mask(cpu, mask) for-loop cpu over mask + * + * int num_online_cpus() Number of online CPUs + * int num_possible_cpus() Number of all possible CPUs + * int num_present_cpus() Number of present CPUs + * + * int cpu_online(cpu) Is some cpu online? + * int cpu_possible(cpu) Is some cpu possible? + * int cpu_present(cpu) Is some cpu present (can schedule)? + * + * int any_online_cpu(mask) First online cpu in mask + * + * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map + * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map + * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map + * + * Subtlety: + * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway) + * to generate slightly worse code. Note for example the additional + * 40 lines of assembly code compiling the "for each possible cpu" + * loops buried in the disk_stat_read() macros calls when compiling + * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple + * one-line #define for cpu_isset(), instead of wrapping an inline + * inside a macro, the way we do the other calls. + */ + +#include "bitmap.h" + +typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; +extern cpumask_t _unused_cpumask_arg_; + +#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) +static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +{ + set_bit(cpu, dstp->bits); +} + +#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) +static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) +{ + clear_bit(cpu, dstp->bits); +} + +#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) +static inline void __cpus_setall(cpumask_t *dstp, int nbits) +{ + bitmap_fill(dstp->bits, nbits); +} + +#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) +static inline void __cpus_clear(cpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} + +/* No static inline type checking - see Subtlety (1) above. */ +#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) + +#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_andnot(dst, src1, src2) \ + __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) +static inline void __cpus_complement(cpumask_t *dstp, + const cpumask_t *srcp, int nbits) +{ + bitmap_complement(dstp->bits, srcp->bits, nbits); +} + +#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) +static inline int __cpus_equal(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_equal(src1p->bits, src2p->bits, nbits); +} + +#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) +static inline int __cpus_intersects(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_intersects(src1p->bits, src2p->bits, nbits); +} + +#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) +static inline int __cpus_subset(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_subset(src1p->bits, src2p->bits, nbits); +} + +#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) +static inline int __cpus_empty(const cpumask_t *srcp, int nbits) +{ + return bitmap_empty(srcp->bits, nbits); +} + +#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) +static inline int __cpus_full(const cpumask_t *srcp, int nbits) +{ + return bitmap_full(srcp->bits, nbits); +} + +#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) +static inline int __cpus_weight(const cpumask_t *srcp, int nbits) +{ + return bitmap_weight(srcp->bits, nbits); +} + +#define cpus_shift_right(dst, src, n) \ + __cpus_shift_right(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_right(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); +} + +#define cpus_shift_left(dst, src, n) \ + __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_left(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); +} + +static inline int __first_cpu(const cpumask_t *srcp) +{ + return ffs(*srcp->bits)-1; +} + +#define first_cpu(src) __first_cpu(&(src)) +int __next_cpu(int n, const cpumask_t *srcp); +#define next_cpu(n, src) __next_cpu((n), &(src)) + +#define cpumask_of_cpu(cpu) \ +({ \ + typeof(_unused_cpumask_arg_) m; \ + if (sizeof(m) == sizeof(unsigned long)) { \ + m.bits[0] = 1UL<<(cpu); \ + } else { \ + cpus_clear(m); \ + cpu_set((cpu), m); \ + } \ + m; \ +}) + +#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) + +#if 0 + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#else + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#endif + +#define CPU_MASK_NONE \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} } + +#define CPU_MASK_CPU0 \ +(cpumask_t) { { \ + [0] = 1UL \ +} } + +#define cpus_addr(src) ((src).bits) + +#define cpumask_scnprintf(buf, len, src) \ + __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpumask_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nbits); +} + +#define cpumask_parse_user(ubuf, ulen, dst) \ + __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) +static inline int __cpumask_parse_user(const char *buf, int len, + cpumask_t *dstp, int nbits) +{ + return bitmap_parse(buf, len, dstp->bits, nbits); +} + +#define cpulist_scnprintf(buf, len, src) \ + __cpulist_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpulist_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); +} + +#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS) +static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits) +{ + return bitmap_parselist(buf, dstp->bits, nbits); +} + +#define cpu_remap(oldbit, old, new) \ + __cpu_remap((oldbit), &(old), &(new), NR_CPUS) +static inline int __cpu_remap(int oldbit, + const cpumask_t *oldp, const cpumask_t *newp, int nbits) +{ + return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); +} + +#define cpus_remap(dst, src, old, new) \ + __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS) +static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, + const cpumask_t *oldp, const cpumask_t *newp, int nbits) +{ + bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); +} + +#if NR_CPUS > 1 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = first_cpu(mask); \ + (cpu) < NR_CPUS; \ + (cpu) = next_cpu((cpu), (mask))) +#else /* NR_CPUS == 1 */ +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#endif /* NR_CPUS */ + +/* + * The following particular system cpumasks and operations manage + * possible, present and online cpus. Each of them is a fixed size + * bitmap of size NR_CPUS. + * + * #ifdef CONFIG_HOTPLUG_CPU + * cpu_possible_map - has bit 'cpu' set iff cpu is populatable + * cpu_present_map - has bit 'cpu' set iff cpu is populated + * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler + * #else + * cpu_possible_map - has bit 'cpu' set iff cpu is populated + * cpu_present_map - copy of cpu_possible_map + * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler + * #endif + * + * In either case, NR_CPUS is fixed at compile time, as the static + * size of these bitmaps. The cpu_possible_map is fixed at boot + * time, as the set of CPU id's that it is possible might ever + * be plugged in at anytime during the life of that system boot. + * The cpu_present_map is dynamic(*), representing which CPUs + * are currently plugged in. And cpu_online_map is the dynamic + * subset of cpu_present_map, indicating those CPUs available + * for scheduling. + * + * If HOTPLUG is enabled, then cpu_possible_map is forced to have + * all NR_CPUS bits set, otherwise it is just the set of CPUs that + * ACPI reports present at boot. + * + * If HOTPLUG is enabled, then cpu_present_map varies dynamically, + * depending on what ACPI reports as currently plugged in, otherwise + * cpu_present_map is just a copy of cpu_possible_map. + * + * (*) Well, cpu_present_map is dynamic in the hotplug case. If not + * hotplug, it's a copy of cpu_possible_map, hence fixed at boot. + * + * Subtleties: + * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * assumption that their single CPU is online. The UP + * cpu_{online,possible,present}_maps are placebos. Changing them + * will have no useful affect on the following num_*_cpus() + * and cpu_*() macros in the UP case. This ugliness is a UP + * optimization - don't waste any instructions or memory references + * asking if you're online or how many CPUs there are if there is + * only one CPU. + * 2) Most SMP arch's #define some of these maps to be some + * other map specific to that arch. Therefore, the following + * must be #define macros, not inlines. To see why, examine + * the assembly code produced by the following. Note that + * set1() writes phys_x_map, but set2() writes x_map: + * int x_map, phys_x_map; + * #define set1(a) x_map = a + * inline void set2(int a) { x_map = a; } + * #define x_map phys_x_map + * main(){ set1(3); set2(5); } + */ + +extern cpumask_t cpu_possible_map; +extern cpumask_t cpu_online_map; +extern cpumask_t cpu_present_map; + +#if NR_CPUS > 1 +#define num_online_cpus() cpus_weight(cpu_online_map) +#define num_possible_cpus() cpus_weight(cpu_possible_map) +#define num_present_cpus() cpus_weight(cpu_present_map) +#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) +#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) +#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) +#else +#define num_online_cpus() 1 +#define num_possible_cpus() 1 +#define num_present_cpus() 1 +#define cpu_online(cpu) ((cpu) == 0) +#define cpu_possible(cpu) ((cpu) == 0) +#define cpu_present(cpu) ((cpu) == 0) +#endif + +int highest_possible_processor_id(void); +#define any_online_cpu(mask) __any_online_cpu(&(mask)) +int __any_online_cpu(const cpumask_t *mask); + +#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) +#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) +#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) + +#endif /* __LINUX_CPUMASK_H */ diff --git a/cputree.c b/cputree.c new file mode 100644 index 0000000..36170f2 --- /dev/null +++ b/cputree.c @@ -0,0 +1,371 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file contains the code to construct and manipulate a hierarchy of processors, + * cache domains and processor cores. + */ +#include +#include +#include +#include +#include + +#include + +#include "irqbalance.h" + + +GList *cpus; +GList *cache_domains; +GList *packages; + +int package_count; +int cache_domain_count; +int core_count; + +/* Users want to be able to keep interrupts away from some cpus; store these in a cpumask_t */ +cpumask_t banned_cpus; + + +/* + it's convenient to have the complement of banned_cpus available so that + the AND operator can be used to mask out unwanted cpus +*/ +static cpumask_t unbanned_cpus; + +static void fill_packages(void) +{ + GList *entry; + + entry = g_list_first(cache_domains); + while (entry) { + struct package *package; + struct cache_domain *cache = NULL; + GList *entry2; + + cache = entry->data; + entry2 = entry; + entry = g_list_next(entry); + if (cache->marker) + continue; + package = malloc(sizeof(struct package)); + if (!package) + break; + memset(package, 0, sizeof(struct package)); + package->mask = cache->package_mask; + package->number = cache->number; + while (entry2) { + struct cache_domain *cache2; + cache2 = entry2->data; + if (cpus_equal(cache->package_mask, cache2->package_mask)) { + cache2->marker = 1; + package->cache_domains = g_list_append(package->cache_domains, cache2); + if (package->number > cache2->number) + package->number = cache2->number; + } + entry2 = g_list_next(entry2); + } + packages = g_list_append(packages, package); + package_count++; + } +} + +static void fill_cache_domain(void) +{ + GList *entry; + + entry = g_list_first(cpus); + while (entry) { + struct cache_domain *cache = NULL; + struct cpu_core *cpu; + GList *entry2; + cpu = entry->data; + entry2 = entry; + entry = g_list_next(entry); + if (cpu->marker) + continue; + cache = malloc(sizeof(struct cache_domain)); + if (!cache) + break; + memset(cache, 0, sizeof(struct cache_domain)); + cache->mask = cpu->cache_mask; + cache->package_mask = cpu->package_mask; + cache->number = cpu->number; + cache_domains = g_list_append(cache_domains, cache); + cache_domain_count++; + while (entry2) { + struct cpu_core *cpu2; + cpu2 = entry2->data; + if (cpus_equal(cpu->cache_mask, cpu2->cache_mask) && + cpus_equal(cpu->package_mask, cpu2->package_mask)) { + cpu2->marker = 1; + cache->cpu_cores = g_list_append(cache->cpu_cores, cpu2); + if (cpu2->number < cache->number) + cache->number = cpu2->number; + } + entry2 = g_list_next(entry2); + } + } +} + + +static void do_one_cpu(char *path) +{ + struct cpu_core *cpu; + FILE *file; + char new_path[PATH_MAX]; + + /* skip offline cpus */ + snprintf(new_path, PATH_MAX, "%s/online", path); + file = fopen(new_path, "r"); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)==NULL) + line[0]='1'; + fclose(file); + if (line[0]=='0') + return; + } + + cpu = malloc(sizeof(struct cpu_core)); + if (!cpu) + return; + memset(cpu, 0, sizeof(struct cpu_core)); + + cpu->number = strtoul(&path[27], NULL, 10); + + cpu_set(cpu->number, cpu->mask); + + /* if the cpu is on the banned list, just don't add it */ + if (cpus_intersects(cpu->mask, banned_cpus)) { + free(cpu); + /* even though we don't use the cpu we do need to count it */ + core_count++; + return; + } + + + /* try to read the package mask; if it doesn't exist assume solitary */ + snprintf(new_path, PATH_MAX, "%s/topology/core_siblings", path); + file = fopen(new_path, "r"); + cpu_set(cpu->number, cpu->package_mask); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)) + cpumask_parse_user(line, strlen(line), cpu->package_mask); + fclose(file); + } + + /* try to read the cache mask; if it doesn't exist assume solitary */ + /* We want the deepest cache level available so try index1 first, then index2 */ + cpu_set(cpu->number, cpu->cache_mask); + snprintf(new_path, PATH_MAX, "%s/cache/index1/shared_cpu_map", path); + file = fopen(new_path, "r"); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)) + cpumask_parse_user(line, strlen(line), cpu->cache_mask); + fclose(file); + } + snprintf(new_path, PATH_MAX, "%s/cache/index2/shared_cpu_map", path); + file = fopen(new_path, "r"); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)) + cpumask_parse_user(line, strlen(line), cpu->cache_mask); + fclose(file); + } + + /* + blank out the banned cpus from the various masks so that interrupts + will never be told to go there + */ + cpus_and(cpu->cache_mask, cpu->cache_mask, unbanned_cpus); + cpus_and(cpu->package_mask, cpu->package_mask, unbanned_cpus); + cpus_and(cpu->mask, cpu->mask, unbanned_cpus); + + cpus = g_list_append(cpus, cpu); + core_count++; +} + +static void dump_irqs(int spaces, GList *interrupts) +{ + struct interrupt *irq; + while (interrupts) { + int i; + for (i=0; idata; + printf("Interrupt %i (%s/%u) \n", irq->number, classes[irq->class], (unsigned int)irq->workload); + interrupts = g_list_next(interrupts); + } +} + +void dump_tree(void) +{ + GList *p_iter, *c_iter, *cp_iter; + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + + char buffer[4096]; + p_iter = g_list_first(packages); + while (p_iter) { + package = p_iter->data; + cpumask_scnprintf(buffer, 4096, package->mask); + printf("Package %i: cpu mask is %s (workload %lu)\n", package->number, buffer, (unsigned long)package->workload); + c_iter = g_list_first(package->cache_domains); + while (c_iter) { + cache_domain = c_iter->data; + c_iter = g_list_next(c_iter); + cpumask_scnprintf(buffer, 4095, cache_domain->mask); + printf(" Cache domain %i: cpu mask is %s (workload %lu) \n", cache_domain->number, buffer, (unsigned long)cache_domain->workload); + cp_iter = cache_domain->cpu_cores; + while (cp_iter) { + cpu = cp_iter->data; + cp_iter = g_list_next(cp_iter); + printf(" CPU number %i (workload %lu)\n", cpu->number, (unsigned long)cpu->workload); + dump_irqs(18, cpu->interrupts); + } + dump_irqs(10, cache_domain->interrupts); + } + dump_irqs(2, package->interrupts); + p_iter = g_list_next(p_iter); + } +} + +/* + * this function removes previous state from the cpu tree, such as + * which level does how much work and the actual lists of interrupts + * assigned to each component + */ +void clear_work_stats(void) +{ + GList *p_iter, *c_iter, *cp_iter; + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + + p_iter = g_list_first(packages); + while (p_iter) { + package = p_iter->data; + package->workload = 0; + g_list_free(package->interrupts); + package->interrupts = NULL; + c_iter = g_list_first(package->cache_domains); + memset(package->class_count, 0, sizeof(package->class_count)); + while (c_iter) { + cache_domain = c_iter->data; + c_iter = g_list_next(c_iter); + cache_domain->workload = 0; + cp_iter = cache_domain->cpu_cores; + g_list_free(cache_domain->interrupts); + cache_domain->interrupts = NULL; + memset(cache_domain->class_count, 0, sizeof(cache_domain->class_count)); + while (cp_iter) { + cpu = cp_iter->data; + cp_iter = g_list_next(cp_iter); + cpu->workload = 0; + g_list_free(cpu->interrupts); + cpu->interrupts = NULL; + memset(cpu->class_count, 0, sizeof(cpu->class_count)); + } + } + p_iter = g_list_next(p_iter); + } +} + + +void parse_cpu_tree(void) +{ + DIR *dir; + struct dirent *entry; + + cpus_complement(unbanned_cpus, banned_cpus); + + dir = opendir("/sys/devices/system/cpu"); + if (!dir) + return; + do { + entry = readdir(dir); + if (entry && strlen(entry->d_name)>3 && strstr(entry->d_name,"cpu")) { + char new_path[PATH_MAX]; + sprintf(new_path, "/sys/devices/system/cpu/%s", entry->d_name); + do_one_cpu(new_path); + } + } while (entry); + closedir(dir); + + fill_cache_domain(); + fill_packages(); + + if (debug_mode) + dump_tree(); + +} + + +/* + * This function frees all memory related to a cpu tree so that a new tree + * can be read + */ +void clear_cpu_tree(void) +{ + GList *item; + struct cpu_core *cpu; + struct cache_domain *cache_domain; + struct package *package; + + while (packages) { + item = g_list_first(packages); + package = item->data; + g_list_free(package->cache_domains); + g_list_free(package->interrupts); + free(package); + packages = g_list_delete_link(packages, item); + } + package_count = 0; + + while (cache_domains) { + item = g_list_first(cache_domains); + cache_domain = item->data; + g_list_free(cache_domain->cpu_cores); + g_list_free(cache_domain->interrupts); + free(cache_domain); + cache_domains = g_list_delete_link(cache_domains, item); + } + cache_domain_count = 0; + + + while (cpus) { + item = g_list_first(cpus); + cpu = item->data; + g_list_free(cpu->interrupts); + free(cpu); + cpus = g_list_delete_link(cpus, item); + } + core_count = 0; + +} diff --git a/irqbalance.c b/irqbalance.c new file mode 100644 index 0000000..c37d9d4 --- /dev/null +++ b/irqbalance.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include + +#include "irqbalance.h" + +int one_shot_mode; +int debug_mode; + +int need_cpu_rescan; + +extern cpumask_t banned_cpus; + +static int counter; + + +void sleep_approx(int seconds) +{ + struct timespec ts; + struct timeval tv; + gettimeofday(&tv, NULL); + ts.tv_sec = seconds; + ts.tv_nsec = -tv.tv_usec*1000; + while (ts.tv_nsec < 0) { + ts.tv_sec--; + ts.tv_nsec += 1000000000; + } + nanosleep(&ts, NULL); +} + +int main(int argc, char** argv) +{ + if (argc>1 && strstr(argv[1],"debug")) + debug_mode=1; + if (argc>1 && strstr(argv[1],"oneshot")) + one_shot_mode=1; + + if (getenv("IRQBALANCE_BANNED_CPUS")) { + cpumask_parse_user(getenv("IRQBALANCE_BANNED_CPUS"), strlen(getenv("IRQBALANCE_BANNED_CPUS")), banned_cpus); + } + + if (getenv("IRQBALANCE_ONESHOT")) + one_shot_mode=1; + + if (getenv("IRQBALANCE_DEBUG")) + debug_mode=1; + + parse_cpu_tree(); + + + /* On single core UP systems irqbalance obviously has no work to do */ + if (core_count<2) + exit(EXIT_SUCCESS); + /* On dual core/hyperthreading shared cache systems just do a one shot setup */ + if (cache_domain_count==1) + one_shot_mode = 1; + + + if (!debug_mode) + if (daemon(0,0)) + exit(EXIT_FAILURE); + + parse_proc_interrupts(); + sleep(SLEEP_INTERVAL/4); + reset_counts(); + parse_proc_interrupts(); + pci_numa_scan(); + calculate_workload(); + sort_irq_list(); + if (debug_mode) + dump_workloads(); + + while (1) { + sleep_approx(SLEEP_INTERVAL); + if (debug_mode) + printf("\n\n\n-----------------------------------------------------------------------------\n"); + + + check_power_mode(); + parse_proc_interrupts(); + + /* cope with cpu hotplug -- detected during /proc/interrupts parsing */ + if (need_cpu_rescan) { + need_cpu_rescan = 0; + /* if there's a hotplug event we better turn off power mode for a bit until things settle */ + power_mode = 0; + if (debug_mode) + printf("Rescanning cpu topology \n"); + reset_counts(); + clear_work_stats(); + + clear_cpu_tree(); + parse_cpu_tree(); + } + + /* deal with NAPI */ + account_for_nic_stats(); + calculate_workload(); + + /* to cope with dynamic configurations we scan for new numa information + * once every 5 minutes + */ + if (counter % NUMA_REFRESH_INTERVAL == 16) + pci_numa_scan(); + + calculate_placement(); + activate_mapping(); + + if (debug_mode) + dump_tree(); + if (one_shot_mode) + break; + counter++; + } + return EXIT_SUCCESS; +} diff --git a/irqbalance.h b/irqbalance.h new file mode 100644 index 0000000..e9c1ad4 --- /dev/null +++ b/irqbalance.h @@ -0,0 +1,50 @@ +#ifndef __INCLUDE_GUARD_IRQBALANCE_H_ +#define __INCLUDE_GUARD_IRQBALANCE_H_ + + +#include "constants.h" + +#include "cpumask.h" + +#include +#include + +#include "types.h" + +struct interrupt; + +extern int package_count; +extern int cache_domain_count; +extern int core_count; +extern char *classes[]; +extern int map_class_to_level[7]; +extern int class_counts[7]; +extern int debug_mode; +extern int power_mode; +extern int need_cpu_rescan; +extern int one_shot_mode; +extern GList *interrupts; + + +extern void parse_cpu_tree(void); +extern void clear_work_stats(void); +extern void parse_proc_interrupts(void); +extern void set_interrupt_count(int number, uint64_t count, cpumask_t *mask); +extern void add_interrupt_count(int number, uint64_t count, int type); +extern int find_class(struct interrupt *irq, char *string); +extern void add_interrupt_numa(int number, cpumask_t mask, int type); + +void calculate_workload(void); +void reset_counts(void); +void dump_workloads(void); +void sort_irq_list(void); +void calculate_placement(void); +void dump_tree(void); + +void activate_mapping(void); +void account_for_nic_stats(void); +void check_power_mode(void); +void clear_cpu_tree(void); +void pci_numa_scan(void); + +#endif diff --git a/irqlist.c b/irqlist.c new file mode 100644 index 0000000..a434d20 --- /dev/null +++ b/irqlist.c @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file has the basic functions to manipulate interrupt metadata + */ +#include +#include +#include +#include +#include + +#include "types.h" +#include "irqbalance.h" + +GList *interrupts; + + + +/* + * This function classifies and reads various things from /proc about a specific irq + */ +static void investigate(struct interrupt *irq, int number) +{ + DIR *dir; + struct dirent *entry; + char *c, *c2; + int nr; + char buf[PATH_MAX]; + sprintf(buf, "/proc/irq/%i", number); + dir = opendir(buf); + do { + entry = readdir(dir); + if (!entry) + break; + if (strcmp(entry->d_name,"smp_affinity")==0) { + FILE *file; + sprintf(buf, "/proc/irq/%i/smp_affinity", number); + file = fopen(buf, "r"); + if (!file) + continue; + if (fgets(buf, PATH_MAX, file)==NULL) { + fclose(file); + continue; + } + cpumask_parse_user(buf, strlen(buf), irq->mask); + fclose(file); + } else { + irq->class = find_class(irq, entry->d_name); + } + + } while (entry); + closedir(dir); + irq->balance_level = map_class_to_level[irq->class]; + + /* next, check the IRQBALANCE_BANNED_INTERRUPTS env variable for blacklisted irqs */ + c = getenv("IRQBALANCE_BANNED_INTERRUPTS"); + if (!c) + return; + + do { + nr = strtoul(c, &c2, 10); + if (c!=c2 && nr == number) + irq->balance_level = BALANCE_NONE; + c = c2; + } while (c!=c2 && c2!=NULL); +} + + +/* + * Set the number of interrupts received for a specific irq; + * create the irq metadata if there is none yet + */ +void set_interrupt_count(int number, uint64_t count, cpumask_t *mask) +{ + GList *item; + struct interrupt *irq; + + if (count < MIN_IRQ_COUNT && !one_shot_mode) + return; /* no need to track or set interrupts sources without any activity since boot + but allow for a few (20) boot-time-only interrupts */ + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + + if (irq->number == number) { + irq->count = count; + return; + } + item = g_list_next(item); + } + /* new interrupt */ + irq = malloc(sizeof(struct interrupt)); + if (!irq) + return; + memset(irq, 0, sizeof(struct interrupt)); + irq->number = number; + irq->count = count; + investigate(irq, number); + if (irq->balance_level == BALANCE_NONE) + irq->mask = *mask; + + interrupts = g_list_append(interrupts, irq); +} + +/* + * Add extra irqs to a specific irq metadata structure; + * if no such metadata exists, do nothing at all + */ +void add_interrupt_count(int number, uint64_t count, int type) +{ + GList *item; + struct interrupt *irq; + + if (!count) + return; + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + if (irq->number == number) { + irq->extra += count; + if (irq->class < type && irq->balance_level != BALANCE_NONE) { + irq->class = type; + irq->balance_level = map_class_to_level[irq->class]; + } + return; + } + } +} + +/* + * Set the numa affinity mask for a specific interrupt if there + * is metadata for the interrupt; do nothing if no such data + * exists. + */ +void add_interrupt_numa(int number, cpumask_t mask, int type) +{ + GList *item; + struct interrupt *irq; + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + if (irq->number == number) { + cpus_or(irq->numa_mask, irq->numa_mask, mask); + if (irq->class < type && irq->balance_level != BALANCE_NONE) { + irq->class = type; + irq->balance_level = map_class_to_level[irq->class]; + } + return; + } + } +} + +void calculate_workload(void) +{ + int i; + GList *item; + struct interrupt *irq; + + for (i=0; i<7; i++) + class_counts[i]=0; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + irq->workload = irq->count - irq->old_count + irq->workload/3 + irq->extra; + class_counts[irq->class]++; + irq->old_count = irq->count; + irq->extra = 0; + } +} + +void reset_counts(void) +{ + GList *item; + struct interrupt *irq; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + irq->old_count = irq->count; + irq->extra = 0; + + } +} + +void dump_workloads(void) +{ + GList *item; + struct interrupt *irq; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + printf("Interrupt %i (class %s) has workload %lu \n", irq->number, classes[irq->class], (unsigned long)irq->workload); + + } +} + + +static gint sort_irqs(gconstpointer A, gconstpointer B) +{ + struct interrupt *a, *b; + a = (struct interrupt*)A; + b = (struct interrupt*)B; + + if (a->class < b->class) + return 1; + if (a->class > b->class) + return -1; + if (a->workload < b->workload) + return 1; + if (a->workload > b->workload) + return -1; + if (alow) and then by workload (high->low) */ + interrupts = g_list_sort(interrupts, sort_irqs); +} diff --git a/network.c b/network.c new file mode 100644 index 0000000..7cb8439 --- /dev/null +++ b/network.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * Due to NAPI, the actual number of interrupts for a network NIC is usually low + * even though the amount of work is high; this file is there to compensate for this + * by adding actual package counts to the calculated amount of work of interrupts + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "irqbalance.h" + +struct nic { + char ethname[64]; + int irq; + uint64_t prev_pkt; + int counter; +}; + +static GList *nics; + + +static int dev_to_irq(char *devname) +{ + int sock, ret; + struct ifreq ifr; + struct ethtool_value ethtool; + struct ethtool_drvinfo driver; + FILE *file; + + char buffer[PATH_MAX]; + + memset(&ifr, 0, sizeof(struct ifreq)); + memset(ðtool, 0, sizeof(struct ethtool_value)); + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock<0) + return 0; + + strcpy(ifr.ifr_name, devname); + + driver.cmd = ETHTOOL_GDRVINFO; + ifr.ifr_data = (void*) &driver; + ret = ioctl(sock, SIOCETHTOOL, &ifr); + close(sock); + if (ret<0) + return 0; + sprintf(buffer,"/sys/bus/pci/devices/%s/irq", driver.bus_info); + file = fopen(buffer, "r"); + if (!file) + return 0; + if (fgets(buffer, PATH_MAX, file)==NULL) + strcpy(buffer,"0"); + fclose(file); + return strtoul(buffer, NULL, 10); +} + +static struct nic *new_nic(char *name) +{ + struct nic *nic; + nic = malloc(sizeof(struct nic)); + if (!nic) + return NULL; + memset(nic, 0, sizeof(struct nic)); + strcpy(nic->ethname, name); + nic->irq = dev_to_irq(name); + nics = g_list_append(nics, nic); + return nic; +} + +static struct nic *find_nic(char *name) +{ + GList *item; + struct nic *nic; + item = g_list_first(nics); + while (item) { + nic = item->data; + item = g_list_next(item); + if (strcmp(nic->ethname, name)==0) { + nic->counter++; + /* refresh irq information once in a while; ifup/down + * can make this info go stale over time + */ + if ((nic->counter % NIC_REFRESH_INTERVAL) == 0) + nic->irq = dev_to_irq(nic->ethname); + return nic; + } + } + nic = new_nic(name); + return nic; +} + +void account_for_nic_stats(void) +{ + struct nic *nic; + FILE *file; + char line[8192]; + file = fopen("/proc/net/dev", "r"); + if (!file) + return; + /* first two lines are headers */ + if (fgets(line, 8191, file)==NULL) + return; + if (fgets(line, 8191, file)==NULL) + return; + + while (!feof(file)) { + uint64_t rxcount; + uint64_t txcount; + uint64_t delta; + int dummy; + char *c, *c2; + if (fgets(line, 8191, file)==NULL) + break; + c = strchr(line, ':'); + if (c==NULL) /* header line */ + continue; + *c = 0; + c++; + c2 = &line[0]; + while (*c2==' ') c2++; + nic = find_nic(c2); + if (!nic) + continue; + dummy = strtoul(c, &c, 10); + rxcount = strtoull(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + txcount = strtoull(c, &c, 10); + delta = (txcount+rxcount-nic->prev_pkt)/2; + /* add the RX and TX packets to the irq count, but only for 50%; + many packets generate another IRQ anyway and we don't want to + overweigh this too much */ + if (delta>0 && nic->prev_pkt != 0) + add_interrupt_count(nic->irq, delta, IRQ_ETH); + nic->prev_pkt = rxcount + txcount; + + + } + fclose(file); +} diff --git a/non-atomic.h b/non-atomic.h new file mode 100644 index 0000000..943501a --- /dev/null +++ b/non-atomic.h @@ -0,0 +1,115 @@ +/* + +This file is copied from the Linux kernel and mildly adjusted for use in userspace + + +*/ +#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ +#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/numa.c b/numa.c new file mode 100644 index 0000000..6d8e48a --- /dev/null +++ b/numa.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file tries to map numa affinity of pci devices to their interrupts + * In addition the PCI class information is used to refine the classification + * of interrupt sources + */ +#include +#include +#include +#include +#include + +#include "irqbalance.h" + +void pci_numa_scan(void) +{ + DIR *dir; + struct dirent *entry; + cpumask_t mask; + char line[PATH_MAX]; + FILE *file; + int irq; + unsigned int class; + + dir = opendir("/sys/bus/pci/devices"); + if (!dir) + return; + do { + int type; + entry = readdir(dir); + if (!entry) + return; + if (strlen(entry->d_name)<3) + continue; + + sprintf(line,"/sys/bus/pci/devices/%s/irq", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + irq = strtoul(line, NULL, 10); + if (!irq) + continue; + + sprintf(line,"/sys/bus/pci/devices/%s/class", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + class = strtoul(line, NULL, 16); + + sprintf(line,"/sys/bus/pci/devices/%s/local_cpus", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + cpumask_parse_user(line, strlen(line), mask); + + type = IRQ_OTHER; + if ((class>>16) == 0x01) + type = IRQ_SCSI; +/* + * Ethernet gets the type via /proc/net/dev; in addition down'd interfaces + * shouldn't boost interrupts + if ((class>>16) == 0x02) + type = IRQ_ETH; +*/ + if ((class>>16) >= 0x03 && (class>>16) <= 0x0C) + type = IRQ_LEGACY; + + add_interrupt_numa(irq, mask, type); + + } while (entry); + closedir(dir); +} diff --git a/placement.c b/placement.c new file mode 100644 index 0000000..5d6e2bb --- /dev/null +++ b/placement.c @@ -0,0 +1,315 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include + +#include "types.h" +#include "irqbalance.h" + + +int power_mode; + +extern GList *interrupts, *packages, *cache_domains, *cpus; + +static uint64_t package_cost_func(struct interrupt *irq, struct package *package) +{ + int bonus = 0; + int maxcount; + /* moving to a cold package/cache/etc gets you a 3000 penalty */ + if (!cpus_intersects(irq->old_mask, package->mask)) + bonus = CROSS_PACKAGE_PENALTY; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, package->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + /* in power save mode, you better be on package 0, with overflow to the next package if really needed */ + if (power_mode) + bonus += POWER_MODE_PACKAGE_THRESHOLD * package->number; + + /* if we're out of whack in terms of per class counts.. just block (except in power mode) */ + maxcount = (class_counts[irq->class] + package_count -1 ) / package_count; + if (package->class_count[irq->class]>=maxcount && !power_mode) + bonus += 300000; + + return irq->workload + bonus; +} + +static uint64_t cache_domain_cost_func(struct interrupt *irq, struct cache_domain *cache_domain) +{ + int bonus = 0; + /* moving to a cold cache gets you a 1500 penalty */ + if (!cpus_intersects(irq->old_mask, cache_domain->mask)) + bonus = CROSS_PACKAGE_PENALTY/2; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, cache_domain->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + + /* pay 6000 for each previous interrupt of the same class */ + bonus += CLASS_VIOLATION_PENTALTY * cache_domain->class_count[irq->class]; + + return irq->workload + bonus; +} + +static uint64_t cpu_cost_func(struct interrupt *irq, struct cpu_core *cpu) +{ + int bonus = 0; + /* moving to a colder core gets you a 1000 penalty */ + if (!cpus_intersects(irq->old_mask, cpu->mask)) + bonus = CROSS_PACKAGE_PENALTY/3; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, cpu->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + /* + * since some chipsets only place at the first cpu, give a tiny preference to non-first + * cpus for specifically placed interrupts + */ + if (first_cpu(cpu->cache_mask)==cpu->number) + bonus++; + + + + /* pay 6000 for each previous interrupt of the same class */ + bonus += CLASS_VIOLATION_PENTALTY * cpu->class_count[irq->class]; + + return irq->workload + bonus; +} + + +static void place_cache_domain(struct package *package) +{ + GList *iter, *next; + GList *pkg; + struct interrupt *irq; + struct cache_domain *cache_domain; + + + iter = g_list_first(package->interrupts); + while (iter) { + struct cache_domain *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + + if (irq->balance_level <= BALANCE_PACKAGE) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(package->cache_domains); + while (pkg) { + uint64_t newload; + + cache_domain = pkg->data; + newload = cache_domain->workload + cache_domain_cost_func(irq, cache_domain); + if (newload < best_cost) { + best = cache_domain; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + next = g_list_next(iter); + package->interrupts = g_list_delete_link(package->interrupts, iter); + + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + iter = next; + } else + iter = g_list_next(iter); + } +} + + +static void place_core(struct cache_domain *cache_domain) +{ + GList *iter, *next; + GList *pkg; + struct interrupt *irq; + struct cpu_core *cpu; + + + iter = g_list_first(cache_domain->interrupts); + while (iter) { + struct cpu_core *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + + /* if the irq isn't per-core policy and is not very busy, leave it at cache domain level */ + if (irq->balance_level <= BALANCE_CACHE && irq->workload < CORE_SPECIFIC_THRESHOLD && !one_shot_mode) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(cache_domain->cpu_cores); + while (pkg) { + uint64_t newload; + + cpu = pkg->data; + newload = cpu->workload + cpu_cost_func(irq, cpu); + if (newload < best_cost) { + best = cpu; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + next = g_list_next(iter); + cache_domain->interrupts = g_list_delete_link(cache_domain->interrupts, iter); + + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + iter = next; + } else + iter = g_list_next(iter); + } +} + + +static void place_packages(GList *list) +{ + GList *iter; + GList *pkg; + struct interrupt *irq; + struct package *package; + + + iter = g_list_first(list); + while (iter) { + struct package *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + if (irq->balance_level == BALANCE_NONE) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(packages); + while (pkg) { + uint64_t newload; + + package = pkg->data; + newload = package->workload + package_cost_func(irq, package); + if (newload < best_cost) { + best = package; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + } + iter = g_list_next(iter); + } +} + + + +static void do_unroutables(void) +{ + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + struct interrupt *irq; + GList *iter, *inter; + + inter = g_list_first(interrupts); + while (inter) { + irq = inter->data; + inter = g_list_next(inter); + if (irq->balance_level != BALANCE_NONE) + continue; + + iter = g_list_first(packages); + while (iter) { + package = iter->data; + if (cpus_intersects(package->mask, irq->mask)) + package->workload += irq->workload; + iter = g_list_next(iter); + } + + iter = g_list_first(cache_domains); + while (iter) { + cache_domain = iter->data; + if (cpus_intersects(cache_domain->mask, irq->mask)) + cache_domain->workload += irq->workload; + iter = g_list_next(iter); + } + iter = g_list_first(cpus); + while (iter) { + cpu = iter->data; + if (cpus_intersects(cpu->mask, irq->mask)) + cpu->workload += irq->workload; + iter = g_list_next(iter); + } + } +} + + +void calculate_placement(void) +{ + struct package *package; + struct cache_domain *cache_domain; + GList *iter; + /* first clear old data */ + clear_work_stats(); + sort_irq_list(); + do_unroutables(); + + place_packages(interrupts); + iter = g_list_first(packages); + while (iter) { + package = iter->data; + place_cache_domain(package); + iter = g_list_next(iter); + } + + iter = g_list_first(cache_domains); + while (iter) { + cache_domain = iter->data; + place_core(cache_domain); + iter = g_list_next(iter); + } +} diff --git a/powermode.c b/powermode.c new file mode 100644 index 0000000..acf8bb5 --- /dev/null +++ b/powermode.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include + +#include "irqbalance.h" + + +extern int power_mode; + +static uint64_t previous; + +static unsigned int hysteresis; + +void check_power_mode(void) +{ + FILE *file; + char line[4096]; + char *c; + uint64_t dummy, irq, softirq; + line[0]=0; + line[4095]=0; + file = fopen("/proc/stat", "r"); + if (!file) + return; + if (fgets(line, 4095, file)==NULL) + memset(line,0, 4096); + fclose(file); + c=&line[4]; + dummy = strtoull(c, &c, 10); /* user */ + dummy = strtoull(c, &c, 10); /* nice */ + dummy = strtoull(c, &c, 10); /* system */ + dummy = strtoull(c, &c, 10); /* idle */ + dummy = strtoull(c, &c, 10); /* iowait */ + irq = strtoull(c, &c, 10); /* irq */ + softirq = strtoull(c, &c, 10); /* softirq */ + + irq += softirq; + if (irq - previous < POWER_MODE_SOFTIRQ_THRESHOLD) { + hysteresis++; + if (hysteresis > POWER_MODE_HYSTERESIS) { + if (debug_mode && !power_mode) + printf("IRQ delta is %lu, switching to power mode \n", (unsigned long)(irq - previous) ); + power_mode = 1; + } + } else { + if (debug_mode && power_mode) + printf("IRQ delta is %lu, switching to performance mode \n", (unsigned long)(irq - previous) ); + power_mode = 0; + hysteresis = 0; + } + previous = irq; +} + diff --git a/procinterrupts.c b/procinterrupts.c new file mode 100644 index 0000000..3d84b01 --- /dev/null +++ b/procinterrupts.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include + +#include "cpumask.h" +#include "irqbalance.h" + +#define LINESIZE 4096 + +void parse_proc_interrupts(void) +{ + FILE *file; + char line[LINESIZE+1]; + + line[LINESIZE] = 0; + file = fopen("/proc/interrupts", "r"); + if (!file) + return; + + /* first line is the header we don't need; nuke it */ + if (fgets(line, LINESIZE, file)==NULL) + return; + + while (!feof(file)) { + cpumask_t present; + int cpunr; + int number; + uint64_t count; + char *c, *c2; + + if (fgets(line, LINESIZE, file)==NULL) + break; + + + /* lines with letters in front are special, like NMI count. Ignore */ + if (!(line[0]==' ' || (line[0]>='0' && line[0]<='9'))) + break; + c = strchr(line, ':'); + if (!c) + continue; + *c = 0; + c++; + number = strtoul(line, NULL, 10); + cpus_clear(present); + count = 0; + cpunr = 0; + + c2=NULL; + while (1) { + uint64_t C; + C = strtoull(c, &c2, 10); + if (c==c2) /* end of numbers */ + break; + count += C; + c=c2; + if (C) + cpu_set(cpunr, present); + cpunr++; + } + if (cpunr != core_count) + need_cpu_rescan = 1; + + set_interrupt_count(number, count, &present); + } + fclose(file); +} diff --git a/strace b/strace new file mode 100644 index 0000000..662d98e --- /dev/null +++ b/strace @@ -0,0 +1,573 @@ +execve("./irqbalance", ["./irqbalance", "debug"], [/* 32 vars */]) = 0 +brk(0) = 0x605000 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b1b5000 +uname({sys="Linux", node="benny", ...}) = 0 +access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) +open("/etc/ld.so.cache", O_RDONLY) = 3 +fstat(3, {st_mode=S_IFREG|0644, st_size=171080, ...}) = 0 +mmap(NULL, 171080, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b3a8b1b6000 +close(3) = 0 +open("/lib64/libglib-2.0.so.0", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`\0!\347"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=1839568, ...}) = 0 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b1e0000 +mmap(0x38e7200000, 2743240, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x38e7200000 +mprotect(0x38e729d000, 2093056, PROT_NONE) = 0 +mmap(0x38e749c000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x9c000) = 0x38e749c000 +close(3) = 0 +open("/lib64/libc.so.6", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\333\1\0"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=1672888, ...}) = 0 +mmap(NULL, 3461304, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b3a8b3b6000 +mprotect(0x2b3a8b4fa000, 2097152, PROT_NONE) = 0 +mmap(0x2b3a8b6fa000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x144000) = 0x2b3a8b6fa000 +mmap(0x2b3a8b6ff000, 16568, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b6ff000 +close(3) = 0 +open("/lib64/librt.so.1", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300#\0\0"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=50320, ...}) = 0 +mmap(NULL, 2132968, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b3a8b704000 +mprotect(0x2b3a8b70c000, 2093056, PROT_NONE) = 0 +mmap(0x2b3a8b90b000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7000) = 0x2b3a8b90b000 +close(3) = 0 +open("/lib64/libpthread.so.0", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`X\0\0\0"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=138080, ...}) = 0 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b90d000 +mmap(NULL, 2200432, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b3a8b90e000 +mprotect(0x2b3a8b923000, 2093056, PROT_NONE) = 0 +mmap(0x2b3a8bb22000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x2b3a8bb22000 +mmap(0x2b3a8bb24000, 13168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b3a8bb24000 +close(3) = 0 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8bb28000 +arch_prctl(ARCH_SET_FS, 0x2b3a8bb286f0) = 0 +mprotect(0x2b3a8bb22000, 4096, PROT_READ) = 0 +mprotect(0x2b3a8b90b000, 4096, PROT_READ) = 0 +mprotect(0x2b3a8b6fa000, 16384, PROT_READ) = 0 +mprotect(0x2b3a8b3b4000, 4096, PROT_READ) = 0 +munmap(0x2b3a8b1b6000, 171080) = 0 +set_tid_address(0x2b3a8bb28780) = 3829 +syscall_273(0x2b3a8bb28790, 0x18, 0x7fff1f90e200, 0x2b3a8b19b4a0, 0x2b3a8bb286f0, 0x2b3a8b1b52b8, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2) = 0 +rt_sigaction(SIGRTMIN, {0x2b3a8b9134a0, [], SA_RESTORER|SA_SIGINFO, 0x2b3a8b91bde0}, NULL, 8) = 0 +rt_sigaction(SIGRT_1, {0x2b3a8b9133f0, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x2b3a8b91bde0}, NULL, 8) = 0 +rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0 +getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 +open("/sys/devices/system/cpu", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 3 +fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0 +fcntl(3, F_SETFD, FD_CLOEXEC) = 0 +brk(0) = 0x605000 +brk(0x627000) = 0x627000 +getdents(3, /* 11 entries */, 4096) = 288 +open("/sys/devices/system/cpu/cpu7/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu7/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu7/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu7/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu0/online", O_RDONLY) = -1 ENOENT (No such file or directory) +open("/sys/devices/system/cpu/cpu0/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu0/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +getdents(3, /* 0 entries */, 4096) = 0 +close(3) = 0 +fstat(1, {st_mode=S_IFREG|0644, st_size=11425, ...}) = 0 +open("/proc/interrupts", O_RDONLY) = 3 +fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 +read(3, " CPU0 CPU1 "..., 1024) = 1024 +open("/proc/irq/0", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 3 entries */, 1024) = 80 +open("/proc/irq/0/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +read(3, " 0 IO-APIC-edge i8042\n 1"..., 1024) = 1024 +open("/proc/irq/12", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 112 +open("/proc/irq/12/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/14", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 104 +open("/proc/irq/14/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/16", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 112 +open("/proc/irq/16/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/19", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 5 entries */, 1024) = 152 +open("/proc/irq/19/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/8408", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 104 +open("/proc/irq/8408/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/8409", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 104 +open("/proc/irq/8409/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +close(3) = 0 +rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0 +rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0 +rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 +nanosleep({2, 0}, {2, 0}) = 0 +open("/proc/interrupts", O_RDONLY) = 3 +fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 +read(3, " CPU0 CPU1 "..., 1024) = 1024 +read(3, " 0 IO-APIC-edge i8042\n 1"..., 1024) = 1024 +close(3) = 0 +open("/sys/bus/pci/devices", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 3 +fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0 +fcntl(3, F_SETFD, FD_CLOEXEC) = 0 +getdents(3, /* 35 entries */, 4096) = 1104 +open("/sys/bus/pci/devices/0000:0b:01.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "11\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:0b:01.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x030000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:0b:01.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8408\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x020000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8409\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x020000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "17\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x010000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "16\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x010000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:03:00.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:03:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:02.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8410\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:02.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:02.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8411\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:00.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:00.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.3/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "16\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.3/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "19\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.3/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0500\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.3/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "19\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.2/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x010601\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.2/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "18\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x01018a\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1e.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.7/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "17\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.7/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0320\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.7/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "18\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.2/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0300\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.2/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "19\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0300\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "17\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0300\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1c.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8412\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1c.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1c.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:16.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:15.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:13.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:11.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:10.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:10.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:10.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:08.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "9\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:08.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x088000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:08.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:06.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8413\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:06.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:06.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:04.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8414\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:04.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:04.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:02.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8415\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:02.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:02.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +getdents(3, /* 0 entries */, 4096) = 0 +rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0 +rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0 +rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 +nanosleep({10, 0}, diff --git a/types.h b/types.h new file mode 100644 index 0000000..763924d --- /dev/null +++ b/types.h @@ -0,0 +1,84 @@ +#ifndef _INCLUDE_GUARD_TYPES_H +#define _INCLUDE_GUARD_TYPES_H + +#include + +#include "cpumask.h" + +#define BALANCE_NONE 0 +#define BALANCE_PACKAGE 1 +#define BALANCE_CACHE 2 +#define BALANCE_CORE 3 + +#define IRQ_OTHER 0 +#define IRQ_LEGACY 1 +#define IRQ_SCSI 2 +#define IRQ_TIMER 3 +#define IRQ_ETH 4 + + +struct package { + uint64_t workload; + int number; + + cpumask_t mask; + + int class_count[7]; + + GList *cache_domains; + GList *interrupts; +}; + +struct cache_domain { + uint64_t workload; + int number; + + int marker; + + cpumask_t mask; + + cpumask_t package_mask; + + int class_count[7]; + + GList *cpu_cores; + GList *interrupts; +}; + + +struct cpu_core { + uint64_t workload; + int number; + + int marker; + + int class_count[7]; + + cpumask_t package_mask; + cpumask_t cache_mask; + cpumask_t mask; + + GList *interrupts; +}; + +struct interrupt { + uint64_t workload; + + int balance_level; + + int number; + int class; + + uint64_t count; + uint64_t old_count; + uint64_t extra; + + cpumask_t mask; + cpumask_t old_mask; + + + cpumask_t numa_mask; +}; + + +#endif