diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2bb41b1 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +CFLAGS+=-g -Os -D_FORTIFY_SOURCE=2 -Wall -W `pkg-config --cflags glib-2.0` + +all: irqbalance + +LIBS=bitmap.o irqbalance.o cputree.o procinterrupts.o irqlist.o placement.o activate.o network.o powermode.o numa.o classify.o + +irqbalance: .depend $(LIBS) + gcc -g -O2 -D_FORTIFY_SOURCE=2 -Wall `pkg-config --libs glib-2.0` $(LIBS) -o irqbalance + +clean: + rm -f irqbalance *~ *.o .depend + +# rule for building dependency lists, and writing them to a file +# named ".depend". +.depend: + rm -f .depend + gccmakedep -f- -- $(CFLAGS) -- *.c > .depend diff --git a/activate.c b/activate.c new file mode 100644 index 0000000..9384577 --- /dev/null +++ b/activate.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file contains the code to communicate a selected distribution / mapping + * of interrupts to the kernel. + */ +#include +#include +#include +#include + +#include "irqbalance.h" + + +void activate_mapping(void) +{ + struct interrupt *irq; + GList *iter; + + iter = g_list_first(interrupts); + while (iter) { + irq = iter->data; + iter = g_list_next(iter); + + if (!cpus_equal(irq->mask, irq->old_mask)) { + char buf[PATH_MAX]; + FILE *file; + sprintf(buf, "/proc/irq/%i/smp_affinity", irq->number); + file = fopen(buf, "w"); + if (!file) + continue; + cpumask_scnprintf(buf, PATH_MAX, irq->mask); + fprintf(file,"%s", buf); + fclose(file); + irq->old_mask = irq->mask; + } + } +} diff --git a/bitmap.c b/bitmap.c new file mode 100644 index 0000000..0c57759 --- /dev/null +++ b/bitmap.c @@ -0,0 +1,366 @@ +/* + +This file is taken from the Linux kernel and minimally adapted for use in userspace + +*/ + +/* + * lib/bitmap.c + * Helper functions for bitmap.h. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include +#include +#include +#include "bitmap.h" +#include "non-atomic.h" + +/* + * bitmaps provide an array of bits, implemented using an an + * array of unsigned longs. The number of valid bits in a + * given bitmap does _not_ need to be an exact multiple of + * BITS_PER_LONG. + * + * The possible unused bits in the last, partially used word + * of a bitmap are 'don't care'. The implementation makes + * no particular effort to keep them zero. It ensures that + * their value will not affect the results of any operation. + * The bitmap operations that return Boolean (bitmap_empty, + * for example) or scalar (bitmap_weight, for example) results + * carefully filter out these unused bits from impacting their + * results. + * + * These operations actually hold to a slightly stronger rule: + * if you don't input any bitmaps to these ops that have some + * unused bits set, then they won't output any set unused bits + * in output bitmaps. + * + * The byte ordering of bitmaps is more natural on little + * endian architectures. See the big-endian headers + * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h + * for the best explanations of this ordering. + */ + +int __bitmap_empty(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_full(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (~bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] != bitmap2[k]) + return 0; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + dst[k] = ~src[k]; + + if (bits % BITS_PER_LONG) + dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); +} + +/* + * __bitmap_shift_right - logical right shift of the bits in a bitmap + * @dst - destination bitmap + * @src - source bitmap + * @nbits - shift by this many bits + * @bits - bitmap size, in bits + * + * Shifting right (dividing) means moving bits in the MS -> LS bit + * direction. Zeros are fed into the vacated MS positions and the + * LS bits shifted off the bottom are lost. + */ +void __bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned long mask = (1UL << left) - 1; + for (k = 0; off + k < lim; ++k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take lower rem bits of + * word above and make them the top rem bits of result. + */ + if (!rem || off + k + 1 >= lim) + upper = 0; + else { + upper = src[off + k + 1]; + if (off + k + 1 == lim - 1 && left) + upper &= mask; + } + lower = src[off + k]; + if (left && off + k == lim - 1) + lower &= mask; + dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem; + if (left && k == lim - 1) + dst[k] &= mask; + } + if (off) + memset(&dst[lim - off], 0, off*sizeof(unsigned long)); +} + + +/* + * __bitmap_shift_left - logical left shift of the bits in a bitmap + * @dst - destination bitmap + * @src - source bitmap + * @nbits - shift by this many bits + * @bits - bitmap size, in bits + * + * Shifting left (multiplying) means moving bits in the LS -> MS + * direction. Zeros are fed into the vacated LS bit positions + * and those MS bits shifted off the top are lost. + */ + +void __bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + for (k = lim - off - 1; k >= 0; --k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take upper rem bits of + * word below and make them the bottom rem bits of result. + */ + if (rem && k > 0) + lower = src[k - 1]; + else + lower = 0; + upper = src[k]; + if (left && k == lim - 1) + upper &= (1UL << left) - 1; + dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem; + if (left && k + off == lim - 1) + dst[k + off] &= (1UL << left) - 1; + } + if (off) + memset(dst, 0, off*sizeof(unsigned long)); +} + +void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] & bitmap2[k]; +} + +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] | bitmap2[k]; +} + +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] ^ bitmap2[k]; +} + +void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] & ~bitmap2[k]; +} + +int __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & bitmap2[k]) + return 1; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 1; + return 0; +} + +/* + * Bitmap printing & parsing functions: first version by Bill Irwin, + * second version by Paul Jackson, third by Joe Korty. + */ + +#define CHUNKSZ 32 +#define nbits_to_hold_value(val) fls(val) +#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10)) +#define BASEDEC 10 /* fancier cpuset lists input in decimal */ + +/** + * bitmap_scnprintf - convert bitmap to an ASCII hex string. + * @buf: byte buffer into which string is placed + * @buflen: reserved size of @buf, in bytes + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * + * Exactly @nmaskbits bits are displayed. Hex digits are grouped into + * comma-separated sets of eight digits per set. + */ +int bitmap_scnprintf(char *buf, unsigned int buflen, + const unsigned long *maskp, int nmaskbits) +{ + int i, word, bit, len = 0; + unsigned long val; + const char *sep = ""; + int chunksz; + uint32_t chunkmask; + int first = 1; + + chunksz = nmaskbits & (CHUNKSZ - 1); + if (chunksz == 0) + chunksz = CHUNKSZ; + + i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ; + for (; i >= 0; i -= CHUNKSZ) { + chunkmask = ((1ULL << chunksz) - 1); + word = i / BITS_PER_LONG; + bit = i % BITS_PER_LONG; + val = (maskp[word] >> bit) & chunkmask; + if (val!=0 || !first) { + len += snprintf(buf+len, buflen-len, "%s%0*lx", sep, + (chunksz+3)/4, val); + chunksz = CHUNKSZ; + sep = ","; + first = 0; + } + } + return len; +} + +/** + * __bitmap_parse - convert an ASCII hex string into a bitmap. + * @buf: pointer to buffer containing string. + * @buflen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @is_user: location of buffer, 0 indicates kernel space + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Commas group hex digits into chunks. Each chunk defines exactly 32 + * bits of the resultant bitmask. No chunk may specify a value larger + * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value + * then leading 0-bits are prepended. %-EINVAL is returned for illegal + * characters and for grouping errors such as "1,,5", ",44", "," and "". + * Leading and trailing whitespace accepted, but not embedded whitespace. + */ +int __bitmap_parse(const char *buf, unsigned int buflen, + int is_user __attribute((unused)), unsigned long *maskp, + int nmaskbits) +{ + int c, old_c, totaldigits, ndigits, nchunks, nbits; + uint32_t chunk; + + bitmap_zero(maskp, nmaskbits); + + nchunks = nbits = totaldigits = c = 0; + do { + chunk = ndigits = 0; + + /* Get the next chunk of the bitmap */ + while (buflen) { + old_c = c; + c = *buf++; + buflen--; + if (isspace(c)) + continue; + + /* + * If the last character was a space and the current + * character isn't '\0', we've got embedded whitespace. + * This is a no-no, so throw an error. + */ + if (totaldigits && c && isspace(old_c)) + return 0; + + /* A '\0' or a ',' signal the end of the chunk */ + if (c == '\0' || c == ',') + break; + + if (!isxdigit(c)) + return -EINVAL; + + /* + * Make sure there are at least 4 free bits in 'chunk'. + * If not, this hexdigit will overflow 'chunk', so + * throw an error. + */ + if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) + return -EOVERFLOW; + + chunk = (chunk << 4) | unhex(c); + ndigits++; totaldigits++; + } + if (ndigits == 0) + return -EINVAL; + if (nchunks == 0 && chunk == 0) + continue; + + __bitmap_shift_left(maskp, maskp, CHUNKSZ, nmaskbits); + *maskp |= chunk; + nchunks++; + nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ; + if (nbits > nmaskbits) + return -EOVERFLOW; + } while (buflen && c == ','); + + return 0; +} diff --git a/bitmap.h b/bitmap.h new file mode 100644 index 0000000..91ed499 --- /dev/null +++ b/bitmap.h @@ -0,0 +1,356 @@ +#ifndef __LINUX_BITMAP_H +#define __LINUX_BITMAP_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + + +#define BITS_PER_LONG ((int)sizeof(unsigned long)*8) + +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] +#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) + + +#include "non-atomic.h" + +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +} + +static inline unsigned long hweight64(uint64_t w) +{ + if (BITS_PER_LONG == 32) + return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); + + w -= (w >> 1) & 0x5555555555555555ull; + w = (w & 0x3333333333333333ull) + ((w >> 2) & 0x3333333333333333ull); + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0full; + return (w * 0x0101010101010101ull) >> 56; +} + + +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + + +/* + * bitmaps provide bit arrays that consume one or more unsigned + * longs. The bitmap interface and available operations are listed + * here, in bitmap.h + * + * Function implementations generic to all architectures are in + * lib/bitmap.c. Functions implementations that are architecture + * specific are in various include/asm-/bitops.h headers + * and other arch/ specific files. + * + * See lib/bitmap.c for more details. + */ + +/* + * The available bitmap operations and their rough meaning in the + * case that the bitmap is a single unsigned long are thus: + * + * Note that nbits should be always a compile time evaluable constant. + * Otherwise many inlines will generate horrible code. + * + * bitmap_zero(dst, nbits) *dst = 0UL + * bitmap_fill(dst, nbits) *dst = ~0UL + * bitmap_copy(dst, src, nbits) *dst = *src + * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 + * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 + * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 + * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) + * bitmap_complement(dst, src, nbits) *dst = ~(*src) + * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? + * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? + * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? + * bitmap_empty(src, nbits) Are all bits zero in *src? + * bitmap_full(src, nbits) Are all bits set in *src? + * bitmap_weight(src, nbits) Hamming Weight: number set bits + * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n + * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n + * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) + * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) + * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf + * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf + * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf + * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf + * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list + * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region + * bitmap_release_region(bitmap, pos, order) Free specified bit region + * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region + */ + +/* + * Also the following operations in asm/bitops.h apply to bitmaps. + * + * set_bit(bit, addr) *addr |= bit + * clear_bit(bit, addr) *addr &= ~bit + * change_bit(bit, addr) *addr ^= bit + * test_bit(bit, addr) Is bit set in *addr? + * test_and_set_bit(bit, addr) Set bit and return old value + * test_and_clear_bit(bit, addr) Clear bit and return old value + * test_and_change_bit(bit, addr) Change bit and return old value + * find_first_zero_bit(addr, nbits) Position first zero bit in *addr + * find_first_bit(addr, nbits) Position first set bit in *addr + * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit + */ + +/* + * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used + * to declare an array named 'name' of just enough unsigned longs to + * contain all bit positions from 0 to 'bits' - 1. + */ + +/* + * lib/bitmap.c provides these functions: + */ + +extern int __bitmap_empty(const unsigned long *bitmap, int bits); +extern int __bitmap_full(const unsigned long *bitmap, int bits); +extern int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, + int bits); +extern void __bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int shift, int bits); +extern void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_weight(const unsigned long *bitmap, int bits); + +extern int bitmap_scnprintf(char *buf, unsigned int len, + const unsigned long *src, int nbits); +extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, + unsigned long *dst, int nbits); +extern int bitmap_scnlistprintf(char *buf, unsigned int len, + const unsigned long *src, int nbits); +extern int bitmap_parselist(const char *buf, unsigned long *maskp, + int nmaskbits); +extern void bitmap_remap(unsigned long *dst, const unsigned long *src, + const unsigned long *old, const unsigned long *new, int bits); +extern int bitmap_bitremap(int oldbit, + const unsigned long *old, const unsigned long *new, int bits); +extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); +extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); +extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); + +#define BITMAP_LAST_WORD_MASK(nbits) \ +( \ + ((nbits) % BITS_PER_LONG) ? \ + (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ +) + +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} + +static inline void bitmap_fill(unsigned long *dst, int nbits) +{ + size_t nlongs = BITS_TO_LONGS(nbits); + if (nlongs > 1) { + int len = (nlongs - 1) * sizeof(unsigned long); + memset(dst, 0xff, len); + } + dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); +} + +static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, + int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memcpy(dst, src, len); + } +} + +static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 & *src2; + else + __bitmap_and(dst, src1, src2, nbits); +} + +static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 | *src2; + else + __bitmap_or(dst, src1, src2, nbits); +} + +static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 ^ *src2; + else + __bitmap_xor(dst, src1, src2, nbits); +} + +static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 & ~(*src2); + else + __bitmap_andnot(dst, src1, src2, nbits); +} + +static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, + int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); + else + __bitmap_complement(dst, src, nbits); +} + +static inline int bitmap_equal(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_equal(src1, src2, nbits); +} + +static inline int bitmap_intersects(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + else + return __bitmap_intersects(src1, src2, nbits); +} + +static inline int bitmap_subset(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_subset(src1, src2, nbits); +} + +static inline int bitmap_empty(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_empty(src, nbits); +} + +static inline int bitmap_full(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_full(src, nbits); +} + +static inline int bitmap_weight(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight(src, nbits); +} + +static inline void bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int n, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src >> n; + else + __bitmap_shift_right(dst, src, n, nbits); +} + +static inline void bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int n, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); + else + __bitmap_shift_left(dst, src, n, nbits); +} + +static inline int bitmap_parse(const char *buf, unsigned int buflen, + unsigned long *maskp, int nmaskbits) +{ + return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __LINUX_BITMAP_H */ diff --git a/classify.c b/classify.c new file mode 100644 index 0000000..a1c8114 --- /dev/null +++ b/classify.c @@ -0,0 +1,126 @@ +#include +#include +#include + +#include "irqbalance.h" +#include "types.h" + + +char *classes[] = { + "other", + "legacy", + "storage", + "timer", + "ethernet", + "fasteth", + 0 +}; + +int map_class_to_level[7] = +{ BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CACHE, BALANCE_NONE, BALANCE_CORE, BALANCE_CORE }; + + +int class_counts[7]; + +/* + +NOTE NOTE although that this file has a hard-coded list of modules, something missing is not + a big deal; the types are also set based on PCI class information when available. + +*/ + +/* + + Based on the original irqbalance code which is: + + Copyright (C) 2003 Red Hat, Inc. All rights reserved. + + Usage and distribution of this file are subject to the Gnu General Public License Version 2 + that can be found at http://www.gnu.org/licenses/gpl.txt and the COPYING file as + distributed together with this file is included herein by reference. + + Author: Arjan van de Ven + +*/ + +static char *legacy_modules[] = { + "PS/2", + "serial", + "i8042", + "acpi", + "floppy", + "parport", + "keyboard", + "usb-ohci", + "usb-uhci", + "uhci_hcd", + "ohci_hcd", + "ehci_hcd", + "EMU10K1", + 0 +}; + +static char *timer_modules[] = { + "rtc", + "timer", + 0 +}; + +static char *storage_modules[] = { + "aic7xxx", + "aic79xx", + "ide", + "cciss", + "cpqarray", + "qla2", + "megaraid", + "fusion", + "libata", + "ohci1394", + "sym53c8xx", + 0 +}; + +static char *ethernet_modules[] = { + "eth", + "e100", + "eepro100", + "orinico_cs", + "wvlan_cs", + "3c5", + "HiSax", + 0 +}; + + +int find_class(struct interrupt *irq, char *moduletext) +{ + int guess = IRQ_OTHER; + int i; + + if (moduletext == NULL) + return guess; + + for (i=0; legacy_modules[i]; i++) + if (strstr(moduletext, legacy_modules[i])) + guess = IRQ_LEGACY; + + for (i=0; storage_modules[i]; i++) + if (strstr(moduletext, storage_modules[i])) + guess = IRQ_SCSI; + + for (i=0; timer_modules[i]; i++) + if (strstr(moduletext, timer_modules[i])) + guess = IRQ_TIMER; + + for (i=0; ethernet_modules[i]; i++) + if (strstr(moduletext, ethernet_modules[i])) + guess = IRQ_ETH; + + if (guess == IRQ_OTHER && irq->number==0) + guess = IRQ_TIMER; + + if (guess > irq->class) + return guess; + return irq->class; +} diff --git a/constants.h b/constants.h new file mode 100644 index 0000000..ae669eb --- /dev/null +++ b/constants.h @@ -0,0 +1,30 @@ +#ifndef __INCLUDE_GUARD_CONSTANTS_H +#define __INCLUDE_GUARD_CONSTANTS_H + +/* interval between rebalance attempts in seconds */ +#define SLEEP_INTERVAL 10 + +/* NUMA topology refresh intervals, in units of SLEEP_INTERVAL */ +#define NUMA_REFRESH_INTERVAL 32 +/* NIC interrupt refresh interval, in units of SLEEP_INTERVAL */ +#define NIC_REFRESH_INTERVAL 32 + +/* minimum number of interrupts since boot for an interrupt to matter */ +#define MIN_IRQ_COUNT 20 + + +/* balancing tunings */ + +#define CROSS_PACKAGE_PENALTY 3000 +#define NUMA_PENALTY 250 +#define POWER_MODE_PACKAGE_THRESHOLD 10000 +#define CLASS_VIOLATION_PENTALTY 6000 +#define CORE_SPECIFIC_THRESHOLD 5000 + +/* power mode */ + +#define POWER_MODE_SOFTIRQ_THRESHOLD 20 +#define POWER_MODE_HYSTERESIS 3 + + +#endif diff --git a/cpumask.h b/cpumask.h new file mode 100644 index 0000000..8c6606a --- /dev/null +++ b/cpumask.h @@ -0,0 +1,400 @@ +#ifndef __LINUX_CPUMASK_H +#define __LINUX_CPUMASK_H + +#define NR_CPUS 256 +/* + * Cpumasks provide a bitmap suitable for representing the + * set of CPU's in a system, one bit position per CPU number. + * + * See detailed comments in the file linux/bitmap.h describing the + * data type on which these cpumasks are based. + * + * For details of cpumask_scnprintf() and cpumask_parse_user(), + * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. + * For details of cpulist_scnprintf() and cpulist_parse(), see + * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. + * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c + * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. + * + * The available cpumask operations are: + * + * void cpu_set(cpu, mask) turn on bit 'cpu' in mask + * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask + * void cpus_setall(mask) set all bits + * void cpus_clear(mask) clear all bits + * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask + * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask + * + * void cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] + * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] + * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 + * void cpus_andnot(dst, src1, src2) dst = src1 & ~src2 + * void cpus_complement(dst, src) dst = ~src + * + * int cpus_equal(mask1, mask2) Does mask1 == mask2? + * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? + * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? + * int cpus_empty(mask) Is mask empty (no bits sets)? + * int cpus_full(mask) Is mask full (all bits sets)? + * int cpus_weight(mask) Hamming weigh - number of set bits + * + * void cpus_shift_right(dst, src, n) Shift right + * void cpus_shift_left(dst, src, n) Shift left + * + * int first_cpu(mask) Number lowest set bit, or NR_CPUS + * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS + * + * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set + * CPU_MASK_ALL Initializer - all bits set + * CPU_MASK_NONE Initializer - no bits set + * unsigned long *cpus_addr(mask) Array of unsigned long's in mask + * + * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing + * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask + * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing + * int cpulist_parse(buf, map) Parse ascii string as cpulist + * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) + * int cpus_remap(dst, src, old, new) *dst = map(old, new)(src) + * + * for_each_cpu_mask(cpu, mask) for-loop cpu over mask + * + * int num_online_cpus() Number of online CPUs + * int num_possible_cpus() Number of all possible CPUs + * int num_present_cpus() Number of present CPUs + * + * int cpu_online(cpu) Is some cpu online? + * int cpu_possible(cpu) Is some cpu possible? + * int cpu_present(cpu) Is some cpu present (can schedule)? + * + * int any_online_cpu(mask) First online cpu in mask + * + * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map + * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map + * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map + * + * Subtlety: + * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway) + * to generate slightly worse code. Note for example the additional + * 40 lines of assembly code compiling the "for each possible cpu" + * loops buried in the disk_stat_read() macros calls when compiling + * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple + * one-line #define for cpu_isset(), instead of wrapping an inline + * inside a macro, the way we do the other calls. + */ + +#include "bitmap.h" + +typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; +extern cpumask_t _unused_cpumask_arg_; + +#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) +static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +{ + set_bit(cpu, dstp->bits); +} + +#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) +static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) +{ + clear_bit(cpu, dstp->bits); +} + +#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) +static inline void __cpus_setall(cpumask_t *dstp, int nbits) +{ + bitmap_fill(dstp->bits, nbits); +} + +#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) +static inline void __cpus_clear(cpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} + +/* No static inline type checking - see Subtlety (1) above. */ +#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) + +#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_andnot(dst, src1, src2) \ + __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) +static inline void __cpus_complement(cpumask_t *dstp, + const cpumask_t *srcp, int nbits) +{ + bitmap_complement(dstp->bits, srcp->bits, nbits); +} + +#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) +static inline int __cpus_equal(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_equal(src1p->bits, src2p->bits, nbits); +} + +#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) +static inline int __cpus_intersects(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_intersects(src1p->bits, src2p->bits, nbits); +} + +#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) +static inline int __cpus_subset(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_subset(src1p->bits, src2p->bits, nbits); +} + +#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) +static inline int __cpus_empty(const cpumask_t *srcp, int nbits) +{ + return bitmap_empty(srcp->bits, nbits); +} + +#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) +static inline int __cpus_full(const cpumask_t *srcp, int nbits) +{ + return bitmap_full(srcp->bits, nbits); +} + +#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) +static inline int __cpus_weight(const cpumask_t *srcp, int nbits) +{ + return bitmap_weight(srcp->bits, nbits); +} + +#define cpus_shift_right(dst, src, n) \ + __cpus_shift_right(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_right(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); +} + +#define cpus_shift_left(dst, src, n) \ + __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_left(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); +} + +static inline int __first_cpu(const cpumask_t *srcp) +{ + return ffs(*srcp->bits)-1; +} + +#define first_cpu(src) __first_cpu(&(src)) +int __next_cpu(int n, const cpumask_t *srcp); +#define next_cpu(n, src) __next_cpu((n), &(src)) + +#define cpumask_of_cpu(cpu) \ +({ \ + typeof(_unused_cpumask_arg_) m; \ + if (sizeof(m) == sizeof(unsigned long)) { \ + m.bits[0] = 1UL<<(cpu); \ + } else { \ + cpus_clear(m); \ + cpu_set((cpu), m); \ + } \ + m; \ +}) + +#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) + +#if 0 + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#else + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#endif + +#define CPU_MASK_NONE \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} } + +#define CPU_MASK_CPU0 \ +(cpumask_t) { { \ + [0] = 1UL \ +} } + +#define cpus_addr(src) ((src).bits) + +#define cpumask_scnprintf(buf, len, src) \ + __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpumask_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nbits); +} + +#define cpumask_parse_user(ubuf, ulen, dst) \ + __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) +static inline int __cpumask_parse_user(const char *buf, int len, + cpumask_t *dstp, int nbits) +{ + return bitmap_parse(buf, len, dstp->bits, nbits); +} + +#define cpulist_scnprintf(buf, len, src) \ + __cpulist_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpulist_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); +} + +#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS) +static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits) +{ + return bitmap_parselist(buf, dstp->bits, nbits); +} + +#define cpu_remap(oldbit, old, new) \ + __cpu_remap((oldbit), &(old), &(new), NR_CPUS) +static inline int __cpu_remap(int oldbit, + const cpumask_t *oldp, const cpumask_t *newp, int nbits) +{ + return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); +} + +#define cpus_remap(dst, src, old, new) \ + __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS) +static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, + const cpumask_t *oldp, const cpumask_t *newp, int nbits) +{ + bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); +} + +#if NR_CPUS > 1 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = first_cpu(mask); \ + (cpu) < NR_CPUS; \ + (cpu) = next_cpu((cpu), (mask))) +#else /* NR_CPUS == 1 */ +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#endif /* NR_CPUS */ + +/* + * The following particular system cpumasks and operations manage + * possible, present and online cpus. Each of them is a fixed size + * bitmap of size NR_CPUS. + * + * #ifdef CONFIG_HOTPLUG_CPU + * cpu_possible_map - has bit 'cpu' set iff cpu is populatable + * cpu_present_map - has bit 'cpu' set iff cpu is populated + * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler + * #else + * cpu_possible_map - has bit 'cpu' set iff cpu is populated + * cpu_present_map - copy of cpu_possible_map + * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler + * #endif + * + * In either case, NR_CPUS is fixed at compile time, as the static + * size of these bitmaps. The cpu_possible_map is fixed at boot + * time, as the set of CPU id's that it is possible might ever + * be plugged in at anytime during the life of that system boot. + * The cpu_present_map is dynamic(*), representing which CPUs + * are currently plugged in. And cpu_online_map is the dynamic + * subset of cpu_present_map, indicating those CPUs available + * for scheduling. + * + * If HOTPLUG is enabled, then cpu_possible_map is forced to have + * all NR_CPUS bits set, otherwise it is just the set of CPUs that + * ACPI reports present at boot. + * + * If HOTPLUG is enabled, then cpu_present_map varies dynamically, + * depending on what ACPI reports as currently plugged in, otherwise + * cpu_present_map is just a copy of cpu_possible_map. + * + * (*) Well, cpu_present_map is dynamic in the hotplug case. If not + * hotplug, it's a copy of cpu_possible_map, hence fixed at boot. + * + * Subtleties: + * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * assumption that their single CPU is online. The UP + * cpu_{online,possible,present}_maps are placebos. Changing them + * will have no useful affect on the following num_*_cpus() + * and cpu_*() macros in the UP case. This ugliness is a UP + * optimization - don't waste any instructions or memory references + * asking if you're online or how many CPUs there are if there is + * only one CPU. + * 2) Most SMP arch's #define some of these maps to be some + * other map specific to that arch. Therefore, the following + * must be #define macros, not inlines. To see why, examine + * the assembly code produced by the following. Note that + * set1() writes phys_x_map, but set2() writes x_map: + * int x_map, phys_x_map; + * #define set1(a) x_map = a + * inline void set2(int a) { x_map = a; } + * #define x_map phys_x_map + * main(){ set1(3); set2(5); } + */ + +extern cpumask_t cpu_possible_map; +extern cpumask_t cpu_online_map; +extern cpumask_t cpu_present_map; + +#if NR_CPUS > 1 +#define num_online_cpus() cpus_weight(cpu_online_map) +#define num_possible_cpus() cpus_weight(cpu_possible_map) +#define num_present_cpus() cpus_weight(cpu_present_map) +#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) +#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) +#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) +#else +#define num_online_cpus() 1 +#define num_possible_cpus() 1 +#define num_present_cpus() 1 +#define cpu_online(cpu) ((cpu) == 0) +#define cpu_possible(cpu) ((cpu) == 0) +#define cpu_present(cpu) ((cpu) == 0) +#endif + +int highest_possible_processor_id(void); +#define any_online_cpu(mask) __any_online_cpu(&(mask)) +int __any_online_cpu(const cpumask_t *mask); + +#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) +#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) +#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) + +#endif /* __LINUX_CPUMASK_H */ diff --git a/cputree.c b/cputree.c new file mode 100644 index 0000000..36170f2 --- /dev/null +++ b/cputree.c @@ -0,0 +1,371 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file contains the code to construct and manipulate a hierarchy of processors, + * cache domains and processor cores. + */ +#include +#include +#include +#include +#include + +#include + +#include "irqbalance.h" + + +GList *cpus; +GList *cache_domains; +GList *packages; + +int package_count; +int cache_domain_count; +int core_count; + +/* Users want to be able to keep interrupts away from some cpus; store these in a cpumask_t */ +cpumask_t banned_cpus; + + +/* + it's convenient to have the complement of banned_cpus available so that + the AND operator can be used to mask out unwanted cpus +*/ +static cpumask_t unbanned_cpus; + +static void fill_packages(void) +{ + GList *entry; + + entry = g_list_first(cache_domains); + while (entry) { + struct package *package; + struct cache_domain *cache = NULL; + GList *entry2; + + cache = entry->data; + entry2 = entry; + entry = g_list_next(entry); + if (cache->marker) + continue; + package = malloc(sizeof(struct package)); + if (!package) + break; + memset(package, 0, sizeof(struct package)); + package->mask = cache->package_mask; + package->number = cache->number; + while (entry2) { + struct cache_domain *cache2; + cache2 = entry2->data; + if (cpus_equal(cache->package_mask, cache2->package_mask)) { + cache2->marker = 1; + package->cache_domains = g_list_append(package->cache_domains, cache2); + if (package->number > cache2->number) + package->number = cache2->number; + } + entry2 = g_list_next(entry2); + } + packages = g_list_append(packages, package); + package_count++; + } +} + +static void fill_cache_domain(void) +{ + GList *entry; + + entry = g_list_first(cpus); + while (entry) { + struct cache_domain *cache = NULL; + struct cpu_core *cpu; + GList *entry2; + cpu = entry->data; + entry2 = entry; + entry = g_list_next(entry); + if (cpu->marker) + continue; + cache = malloc(sizeof(struct cache_domain)); + if (!cache) + break; + memset(cache, 0, sizeof(struct cache_domain)); + cache->mask = cpu->cache_mask; + cache->package_mask = cpu->package_mask; + cache->number = cpu->number; + cache_domains = g_list_append(cache_domains, cache); + cache_domain_count++; + while (entry2) { + struct cpu_core *cpu2; + cpu2 = entry2->data; + if (cpus_equal(cpu->cache_mask, cpu2->cache_mask) && + cpus_equal(cpu->package_mask, cpu2->package_mask)) { + cpu2->marker = 1; + cache->cpu_cores = g_list_append(cache->cpu_cores, cpu2); + if (cpu2->number < cache->number) + cache->number = cpu2->number; + } + entry2 = g_list_next(entry2); + } + } +} + + +static void do_one_cpu(char *path) +{ + struct cpu_core *cpu; + FILE *file; + char new_path[PATH_MAX]; + + /* skip offline cpus */ + snprintf(new_path, PATH_MAX, "%s/online", path); + file = fopen(new_path, "r"); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)==NULL) + line[0]='1'; + fclose(file); + if (line[0]=='0') + return; + } + + cpu = malloc(sizeof(struct cpu_core)); + if (!cpu) + return; + memset(cpu, 0, sizeof(struct cpu_core)); + + cpu->number = strtoul(&path[27], NULL, 10); + + cpu_set(cpu->number, cpu->mask); + + /* if the cpu is on the banned list, just don't add it */ + if (cpus_intersects(cpu->mask, banned_cpus)) { + free(cpu); + /* even though we don't use the cpu we do need to count it */ + core_count++; + return; + } + + + /* try to read the package mask; if it doesn't exist assume solitary */ + snprintf(new_path, PATH_MAX, "%s/topology/core_siblings", path); + file = fopen(new_path, "r"); + cpu_set(cpu->number, cpu->package_mask); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)) + cpumask_parse_user(line, strlen(line), cpu->package_mask); + fclose(file); + } + + /* try to read the cache mask; if it doesn't exist assume solitary */ + /* We want the deepest cache level available so try index1 first, then index2 */ + cpu_set(cpu->number, cpu->cache_mask); + snprintf(new_path, PATH_MAX, "%s/cache/index1/shared_cpu_map", path); + file = fopen(new_path, "r"); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)) + cpumask_parse_user(line, strlen(line), cpu->cache_mask); + fclose(file); + } + snprintf(new_path, PATH_MAX, "%s/cache/index2/shared_cpu_map", path); + file = fopen(new_path, "r"); + if (file) { + char line[4096]; + line[4095]=0; + if (fgets(line, 4095, file)) + cpumask_parse_user(line, strlen(line), cpu->cache_mask); + fclose(file); + } + + /* + blank out the banned cpus from the various masks so that interrupts + will never be told to go there + */ + cpus_and(cpu->cache_mask, cpu->cache_mask, unbanned_cpus); + cpus_and(cpu->package_mask, cpu->package_mask, unbanned_cpus); + cpus_and(cpu->mask, cpu->mask, unbanned_cpus); + + cpus = g_list_append(cpus, cpu); + core_count++; +} + +static void dump_irqs(int spaces, GList *interrupts) +{ + struct interrupt *irq; + while (interrupts) { + int i; + for (i=0; idata; + printf("Interrupt %i (%s/%u) \n", irq->number, classes[irq->class], (unsigned int)irq->workload); + interrupts = g_list_next(interrupts); + } +} + +void dump_tree(void) +{ + GList *p_iter, *c_iter, *cp_iter; + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + + char buffer[4096]; + p_iter = g_list_first(packages); + while (p_iter) { + package = p_iter->data; + cpumask_scnprintf(buffer, 4096, package->mask); + printf("Package %i: cpu mask is %s (workload %lu)\n", package->number, buffer, (unsigned long)package->workload); + c_iter = g_list_first(package->cache_domains); + while (c_iter) { + cache_domain = c_iter->data; + c_iter = g_list_next(c_iter); + cpumask_scnprintf(buffer, 4095, cache_domain->mask); + printf(" Cache domain %i: cpu mask is %s (workload %lu) \n", cache_domain->number, buffer, (unsigned long)cache_domain->workload); + cp_iter = cache_domain->cpu_cores; + while (cp_iter) { + cpu = cp_iter->data; + cp_iter = g_list_next(cp_iter); + printf(" CPU number %i (workload %lu)\n", cpu->number, (unsigned long)cpu->workload); + dump_irqs(18, cpu->interrupts); + } + dump_irqs(10, cache_domain->interrupts); + } + dump_irqs(2, package->interrupts); + p_iter = g_list_next(p_iter); + } +} + +/* + * this function removes previous state from the cpu tree, such as + * which level does how much work and the actual lists of interrupts + * assigned to each component + */ +void clear_work_stats(void) +{ + GList *p_iter, *c_iter, *cp_iter; + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + + p_iter = g_list_first(packages); + while (p_iter) { + package = p_iter->data; + package->workload = 0; + g_list_free(package->interrupts); + package->interrupts = NULL; + c_iter = g_list_first(package->cache_domains); + memset(package->class_count, 0, sizeof(package->class_count)); + while (c_iter) { + cache_domain = c_iter->data; + c_iter = g_list_next(c_iter); + cache_domain->workload = 0; + cp_iter = cache_domain->cpu_cores; + g_list_free(cache_domain->interrupts); + cache_domain->interrupts = NULL; + memset(cache_domain->class_count, 0, sizeof(cache_domain->class_count)); + while (cp_iter) { + cpu = cp_iter->data; + cp_iter = g_list_next(cp_iter); + cpu->workload = 0; + g_list_free(cpu->interrupts); + cpu->interrupts = NULL; + memset(cpu->class_count, 0, sizeof(cpu->class_count)); + } + } + p_iter = g_list_next(p_iter); + } +} + + +void parse_cpu_tree(void) +{ + DIR *dir; + struct dirent *entry; + + cpus_complement(unbanned_cpus, banned_cpus); + + dir = opendir("/sys/devices/system/cpu"); + if (!dir) + return; + do { + entry = readdir(dir); + if (entry && strlen(entry->d_name)>3 && strstr(entry->d_name,"cpu")) { + char new_path[PATH_MAX]; + sprintf(new_path, "/sys/devices/system/cpu/%s", entry->d_name); + do_one_cpu(new_path); + } + } while (entry); + closedir(dir); + + fill_cache_domain(); + fill_packages(); + + if (debug_mode) + dump_tree(); + +} + + +/* + * This function frees all memory related to a cpu tree so that a new tree + * can be read + */ +void clear_cpu_tree(void) +{ + GList *item; + struct cpu_core *cpu; + struct cache_domain *cache_domain; + struct package *package; + + while (packages) { + item = g_list_first(packages); + package = item->data; + g_list_free(package->cache_domains); + g_list_free(package->interrupts); + free(package); + packages = g_list_delete_link(packages, item); + } + package_count = 0; + + while (cache_domains) { + item = g_list_first(cache_domains); + cache_domain = item->data; + g_list_free(cache_domain->cpu_cores); + g_list_free(cache_domain->interrupts); + free(cache_domain); + cache_domains = g_list_delete_link(cache_domains, item); + } + cache_domain_count = 0; + + + while (cpus) { + item = g_list_first(cpus); + cpu = item->data; + g_list_free(cpu->interrupts); + free(cpu); + cpus = g_list_delete_link(cpus, item); + } + core_count = 0; + +} diff --git a/irqbalance.c b/irqbalance.c new file mode 100644 index 0000000..c37d9d4 --- /dev/null +++ b/irqbalance.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include + +#include "irqbalance.h" + +int one_shot_mode; +int debug_mode; + +int need_cpu_rescan; + +extern cpumask_t banned_cpus; + +static int counter; + + +void sleep_approx(int seconds) +{ + struct timespec ts; + struct timeval tv; + gettimeofday(&tv, NULL); + ts.tv_sec = seconds; + ts.tv_nsec = -tv.tv_usec*1000; + while (ts.tv_nsec < 0) { + ts.tv_sec--; + ts.tv_nsec += 1000000000; + } + nanosleep(&ts, NULL); +} + +int main(int argc, char** argv) +{ + if (argc>1 && strstr(argv[1],"debug")) + debug_mode=1; + if (argc>1 && strstr(argv[1],"oneshot")) + one_shot_mode=1; + + if (getenv("IRQBALANCE_BANNED_CPUS")) { + cpumask_parse_user(getenv("IRQBALANCE_BANNED_CPUS"), strlen(getenv("IRQBALANCE_BANNED_CPUS")), banned_cpus); + } + + if (getenv("IRQBALANCE_ONESHOT")) + one_shot_mode=1; + + if (getenv("IRQBALANCE_DEBUG")) + debug_mode=1; + + parse_cpu_tree(); + + + /* On single core UP systems irqbalance obviously has no work to do */ + if (core_count<2) + exit(EXIT_SUCCESS); + /* On dual core/hyperthreading shared cache systems just do a one shot setup */ + if (cache_domain_count==1) + one_shot_mode = 1; + + + if (!debug_mode) + if (daemon(0,0)) + exit(EXIT_FAILURE); + + parse_proc_interrupts(); + sleep(SLEEP_INTERVAL/4); + reset_counts(); + parse_proc_interrupts(); + pci_numa_scan(); + calculate_workload(); + sort_irq_list(); + if (debug_mode) + dump_workloads(); + + while (1) { + sleep_approx(SLEEP_INTERVAL); + if (debug_mode) + printf("\n\n\n-----------------------------------------------------------------------------\n"); + + + check_power_mode(); + parse_proc_interrupts(); + + /* cope with cpu hotplug -- detected during /proc/interrupts parsing */ + if (need_cpu_rescan) { + need_cpu_rescan = 0; + /* if there's a hotplug event we better turn off power mode for a bit until things settle */ + power_mode = 0; + if (debug_mode) + printf("Rescanning cpu topology \n"); + reset_counts(); + clear_work_stats(); + + clear_cpu_tree(); + parse_cpu_tree(); + } + + /* deal with NAPI */ + account_for_nic_stats(); + calculate_workload(); + + /* to cope with dynamic configurations we scan for new numa information + * once every 5 minutes + */ + if (counter % NUMA_REFRESH_INTERVAL == 16) + pci_numa_scan(); + + calculate_placement(); + activate_mapping(); + + if (debug_mode) + dump_tree(); + if (one_shot_mode) + break; + counter++; + } + return EXIT_SUCCESS; +} diff --git a/irqbalance.h b/irqbalance.h new file mode 100644 index 0000000..e9c1ad4 --- /dev/null +++ b/irqbalance.h @@ -0,0 +1,50 @@ +#ifndef __INCLUDE_GUARD_IRQBALANCE_H_ +#define __INCLUDE_GUARD_IRQBALANCE_H_ + + +#include "constants.h" + +#include "cpumask.h" + +#include +#include + +#include "types.h" + +struct interrupt; + +extern int package_count; +extern int cache_domain_count; +extern int core_count; +extern char *classes[]; +extern int map_class_to_level[7]; +extern int class_counts[7]; +extern int debug_mode; +extern int power_mode; +extern int need_cpu_rescan; +extern int one_shot_mode; +extern GList *interrupts; + + +extern void parse_cpu_tree(void); +extern void clear_work_stats(void); +extern void parse_proc_interrupts(void); +extern void set_interrupt_count(int number, uint64_t count, cpumask_t *mask); +extern void add_interrupt_count(int number, uint64_t count, int type); +extern int find_class(struct interrupt *irq, char *string); +extern void add_interrupt_numa(int number, cpumask_t mask, int type); + +void calculate_workload(void); +void reset_counts(void); +void dump_workloads(void); +void sort_irq_list(void); +void calculate_placement(void); +void dump_tree(void); + +void activate_mapping(void); +void account_for_nic_stats(void); +void check_power_mode(void); +void clear_cpu_tree(void); +void pci_numa_scan(void); + +#endif diff --git a/irqlist.c b/irqlist.c new file mode 100644 index 0000000..a434d20 --- /dev/null +++ b/irqlist.c @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file has the basic functions to manipulate interrupt metadata + */ +#include +#include +#include +#include +#include + +#include "types.h" +#include "irqbalance.h" + +GList *interrupts; + + + +/* + * This function classifies and reads various things from /proc about a specific irq + */ +static void investigate(struct interrupt *irq, int number) +{ + DIR *dir; + struct dirent *entry; + char *c, *c2; + int nr; + char buf[PATH_MAX]; + sprintf(buf, "/proc/irq/%i", number); + dir = opendir(buf); + do { + entry = readdir(dir); + if (!entry) + break; + if (strcmp(entry->d_name,"smp_affinity")==0) { + FILE *file; + sprintf(buf, "/proc/irq/%i/smp_affinity", number); + file = fopen(buf, "r"); + if (!file) + continue; + if (fgets(buf, PATH_MAX, file)==NULL) { + fclose(file); + continue; + } + cpumask_parse_user(buf, strlen(buf), irq->mask); + fclose(file); + } else { + irq->class = find_class(irq, entry->d_name); + } + + } while (entry); + closedir(dir); + irq->balance_level = map_class_to_level[irq->class]; + + /* next, check the IRQBALANCE_BANNED_INTERRUPTS env variable for blacklisted irqs */ + c = getenv("IRQBALANCE_BANNED_INTERRUPTS"); + if (!c) + return; + + do { + nr = strtoul(c, &c2, 10); + if (c!=c2 && nr == number) + irq->balance_level = BALANCE_NONE; + c = c2; + } while (c!=c2 && c2!=NULL); +} + + +/* + * Set the number of interrupts received for a specific irq; + * create the irq metadata if there is none yet + */ +void set_interrupt_count(int number, uint64_t count, cpumask_t *mask) +{ + GList *item; + struct interrupt *irq; + + if (count < MIN_IRQ_COUNT && !one_shot_mode) + return; /* no need to track or set interrupts sources without any activity since boot + but allow for a few (20) boot-time-only interrupts */ + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + + if (irq->number == number) { + irq->count = count; + return; + } + item = g_list_next(item); + } + /* new interrupt */ + irq = malloc(sizeof(struct interrupt)); + if (!irq) + return; + memset(irq, 0, sizeof(struct interrupt)); + irq->number = number; + irq->count = count; + investigate(irq, number); + if (irq->balance_level == BALANCE_NONE) + irq->mask = *mask; + + interrupts = g_list_append(interrupts, irq); +} + +/* + * Add extra irqs to a specific irq metadata structure; + * if no such metadata exists, do nothing at all + */ +void add_interrupt_count(int number, uint64_t count, int type) +{ + GList *item; + struct interrupt *irq; + + if (!count) + return; + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + if (irq->number == number) { + irq->extra += count; + if (irq->class < type && irq->balance_level != BALANCE_NONE) { + irq->class = type; + irq->balance_level = map_class_to_level[irq->class]; + } + return; + } + } +} + +/* + * Set the numa affinity mask for a specific interrupt if there + * is metadata for the interrupt; do nothing if no such data + * exists. + */ +void add_interrupt_numa(int number, cpumask_t mask, int type) +{ + GList *item; + struct interrupt *irq; + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + if (irq->number == number) { + cpus_or(irq->numa_mask, irq->numa_mask, mask); + if (irq->class < type && irq->balance_level != BALANCE_NONE) { + irq->class = type; + irq->balance_level = map_class_to_level[irq->class]; + } + return; + } + } +} + +void calculate_workload(void) +{ + int i; + GList *item; + struct interrupt *irq; + + for (i=0; i<7; i++) + class_counts[i]=0; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + irq->workload = irq->count - irq->old_count + irq->workload/3 + irq->extra; + class_counts[irq->class]++; + irq->old_count = irq->count; + irq->extra = 0; + } +} + +void reset_counts(void) +{ + GList *item; + struct interrupt *irq; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + irq->old_count = irq->count; + irq->extra = 0; + + } +} + +void dump_workloads(void) +{ + GList *item; + struct interrupt *irq; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + printf("Interrupt %i (class %s) has workload %lu \n", irq->number, classes[irq->class], (unsigned long)irq->workload); + + } +} + + +static gint sort_irqs(gconstpointer A, gconstpointer B) +{ + struct interrupt *a, *b; + a = (struct interrupt*)A; + b = (struct interrupt*)B; + + if (a->class < b->class) + return 1; + if (a->class > b->class) + return -1; + if (a->workload < b->workload) + return 1; + if (a->workload > b->workload) + return -1; + if (alow) and then by workload (high->low) */ + interrupts = g_list_sort(interrupts, sort_irqs); +} diff --git a/network.c b/network.c new file mode 100644 index 0000000..7cb8439 --- /dev/null +++ b/network.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * Due to NAPI, the actual number of interrupts for a network NIC is usually low + * even though the amount of work is high; this file is there to compensate for this + * by adding actual package counts to the calculated amount of work of interrupts + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "irqbalance.h" + +struct nic { + char ethname[64]; + int irq; + uint64_t prev_pkt; + int counter; +}; + +static GList *nics; + + +static int dev_to_irq(char *devname) +{ + int sock, ret; + struct ifreq ifr; + struct ethtool_value ethtool; + struct ethtool_drvinfo driver; + FILE *file; + + char buffer[PATH_MAX]; + + memset(&ifr, 0, sizeof(struct ifreq)); + memset(ðtool, 0, sizeof(struct ethtool_value)); + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock<0) + return 0; + + strcpy(ifr.ifr_name, devname); + + driver.cmd = ETHTOOL_GDRVINFO; + ifr.ifr_data = (void*) &driver; + ret = ioctl(sock, SIOCETHTOOL, &ifr); + close(sock); + if (ret<0) + return 0; + sprintf(buffer,"/sys/bus/pci/devices/%s/irq", driver.bus_info); + file = fopen(buffer, "r"); + if (!file) + return 0; + if (fgets(buffer, PATH_MAX, file)==NULL) + strcpy(buffer,"0"); + fclose(file); + return strtoul(buffer, NULL, 10); +} + +static struct nic *new_nic(char *name) +{ + struct nic *nic; + nic = malloc(sizeof(struct nic)); + if (!nic) + return NULL; + memset(nic, 0, sizeof(struct nic)); + strcpy(nic->ethname, name); + nic->irq = dev_to_irq(name); + nics = g_list_append(nics, nic); + return nic; +} + +static struct nic *find_nic(char *name) +{ + GList *item; + struct nic *nic; + item = g_list_first(nics); + while (item) { + nic = item->data; + item = g_list_next(item); + if (strcmp(nic->ethname, name)==0) { + nic->counter++; + /* refresh irq information once in a while; ifup/down + * can make this info go stale over time + */ + if ((nic->counter % NIC_REFRESH_INTERVAL) == 0) + nic->irq = dev_to_irq(nic->ethname); + return nic; + } + } + nic = new_nic(name); + return nic; +} + +void account_for_nic_stats(void) +{ + struct nic *nic; + FILE *file; + char line[8192]; + file = fopen("/proc/net/dev", "r"); + if (!file) + return; + /* first two lines are headers */ + if (fgets(line, 8191, file)==NULL) + return; + if (fgets(line, 8191, file)==NULL) + return; + + while (!feof(file)) { + uint64_t rxcount; + uint64_t txcount; + uint64_t delta; + int dummy; + char *c, *c2; + if (fgets(line, 8191, file)==NULL) + break; + c = strchr(line, ':'); + if (c==NULL) /* header line */ + continue; + *c = 0; + c++; + c2 = &line[0]; + while (*c2==' ') c2++; + nic = find_nic(c2); + if (!nic) + continue; + dummy = strtoul(c, &c, 10); + rxcount = strtoull(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + txcount = strtoull(c, &c, 10); + delta = (txcount+rxcount-nic->prev_pkt)/2; + /* add the RX and TX packets to the irq count, but only for 50%; + many packets generate another IRQ anyway and we don't want to + overweigh this too much */ + if (delta>0 && nic->prev_pkt != 0) + add_interrupt_count(nic->irq, delta, IRQ_ETH); + nic->prev_pkt = rxcount + txcount; + + + } + fclose(file); +} diff --git a/non-atomic.h b/non-atomic.h new file mode 100644 index 0000000..943501a --- /dev/null +++ b/non-atomic.h @@ -0,0 +1,115 @@ +/* + +This file is copied from the Linux kernel and mildly adjusted for use in userspace + + +*/ +#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ +#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/numa.c b/numa.c new file mode 100644 index 0000000..6d8e48a --- /dev/null +++ b/numa.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file tries to map numa affinity of pci devices to their interrupts + * In addition the PCI class information is used to refine the classification + * of interrupt sources + */ +#include +#include +#include +#include +#include + +#include "irqbalance.h" + +void pci_numa_scan(void) +{ + DIR *dir; + struct dirent *entry; + cpumask_t mask; + char line[PATH_MAX]; + FILE *file; + int irq; + unsigned int class; + + dir = opendir("/sys/bus/pci/devices"); + if (!dir) + return; + do { + int type; + entry = readdir(dir); + if (!entry) + return; + if (strlen(entry->d_name)<3) + continue; + + sprintf(line,"/sys/bus/pci/devices/%s/irq", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + irq = strtoul(line, NULL, 10); + if (!irq) + continue; + + sprintf(line,"/sys/bus/pci/devices/%s/class", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + class = strtoul(line, NULL, 16); + + sprintf(line,"/sys/bus/pci/devices/%s/local_cpus", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + cpumask_parse_user(line, strlen(line), mask); + + type = IRQ_OTHER; + if ((class>>16) == 0x01) + type = IRQ_SCSI; +/* + * Ethernet gets the type via /proc/net/dev; in addition down'd interfaces + * shouldn't boost interrupts + if ((class>>16) == 0x02) + type = IRQ_ETH; +*/ + if ((class>>16) >= 0x03 && (class>>16) <= 0x0C) + type = IRQ_LEGACY; + + add_interrupt_numa(irq, mask, type); + + } while (entry); + closedir(dir); +} diff --git a/placement.c b/placement.c new file mode 100644 index 0000000..5d6e2bb --- /dev/null +++ b/placement.c @@ -0,0 +1,315 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include + +#include "types.h" +#include "irqbalance.h" + + +int power_mode; + +extern GList *interrupts, *packages, *cache_domains, *cpus; + +static uint64_t package_cost_func(struct interrupt *irq, struct package *package) +{ + int bonus = 0; + int maxcount; + /* moving to a cold package/cache/etc gets you a 3000 penalty */ + if (!cpus_intersects(irq->old_mask, package->mask)) + bonus = CROSS_PACKAGE_PENALTY; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, package->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + /* in power save mode, you better be on package 0, with overflow to the next package if really needed */ + if (power_mode) + bonus += POWER_MODE_PACKAGE_THRESHOLD * package->number; + + /* if we're out of whack in terms of per class counts.. just block (except in power mode) */ + maxcount = (class_counts[irq->class] + package_count -1 ) / package_count; + if (package->class_count[irq->class]>=maxcount && !power_mode) + bonus += 300000; + + return irq->workload + bonus; +} + +static uint64_t cache_domain_cost_func(struct interrupt *irq, struct cache_domain *cache_domain) +{ + int bonus = 0; + /* moving to a cold cache gets you a 1500 penalty */ + if (!cpus_intersects(irq->old_mask, cache_domain->mask)) + bonus = CROSS_PACKAGE_PENALTY/2; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, cache_domain->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + + /* pay 6000 for each previous interrupt of the same class */ + bonus += CLASS_VIOLATION_PENTALTY * cache_domain->class_count[irq->class]; + + return irq->workload + bonus; +} + +static uint64_t cpu_cost_func(struct interrupt *irq, struct cpu_core *cpu) +{ + int bonus = 0; + /* moving to a colder core gets you a 1000 penalty */ + if (!cpus_intersects(irq->old_mask, cpu->mask)) + bonus = CROSS_PACKAGE_PENALTY/3; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, cpu->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + /* + * since some chipsets only place at the first cpu, give a tiny preference to non-first + * cpus for specifically placed interrupts + */ + if (first_cpu(cpu->cache_mask)==cpu->number) + bonus++; + + + + /* pay 6000 for each previous interrupt of the same class */ + bonus += CLASS_VIOLATION_PENTALTY * cpu->class_count[irq->class]; + + return irq->workload + bonus; +} + + +static void place_cache_domain(struct package *package) +{ + GList *iter, *next; + GList *pkg; + struct interrupt *irq; + struct cache_domain *cache_domain; + + + iter = g_list_first(package->interrupts); + while (iter) { + struct cache_domain *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + + if (irq->balance_level <= BALANCE_PACKAGE) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(package->cache_domains); + while (pkg) { + uint64_t newload; + + cache_domain = pkg->data; + newload = cache_domain->workload + cache_domain_cost_func(irq, cache_domain); + if (newload < best_cost) { + best = cache_domain; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + next = g_list_next(iter); + package->interrupts = g_list_delete_link(package->interrupts, iter); + + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + iter = next; + } else + iter = g_list_next(iter); + } +} + + +static void place_core(struct cache_domain *cache_domain) +{ + GList *iter, *next; + GList *pkg; + struct interrupt *irq; + struct cpu_core *cpu; + + + iter = g_list_first(cache_domain->interrupts); + while (iter) { + struct cpu_core *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + + /* if the irq isn't per-core policy and is not very busy, leave it at cache domain level */ + if (irq->balance_level <= BALANCE_CACHE && irq->workload < CORE_SPECIFIC_THRESHOLD && !one_shot_mode) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(cache_domain->cpu_cores); + while (pkg) { + uint64_t newload; + + cpu = pkg->data; + newload = cpu->workload + cpu_cost_func(irq, cpu); + if (newload < best_cost) { + best = cpu; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + next = g_list_next(iter); + cache_domain->interrupts = g_list_delete_link(cache_domain->interrupts, iter); + + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + iter = next; + } else + iter = g_list_next(iter); + } +} + + +static void place_packages(GList *list) +{ + GList *iter; + GList *pkg; + struct interrupt *irq; + struct package *package; + + + iter = g_list_first(list); + while (iter) { + struct package *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + if (irq->balance_level == BALANCE_NONE) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(packages); + while (pkg) { + uint64_t newload; + + package = pkg->data; + newload = package->workload + package_cost_func(irq, package); + if (newload < best_cost) { + best = package; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + } + iter = g_list_next(iter); + } +} + + + +static void do_unroutables(void) +{ + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + struct interrupt *irq; + GList *iter, *inter; + + inter = g_list_first(interrupts); + while (inter) { + irq = inter->data; + inter = g_list_next(inter); + if (irq->balance_level != BALANCE_NONE) + continue; + + iter = g_list_first(packages); + while (iter) { + package = iter->data; + if (cpus_intersects(package->mask, irq->mask)) + package->workload += irq->workload; + iter = g_list_next(iter); + } + + iter = g_list_first(cache_domains); + while (iter) { + cache_domain = iter->data; + if (cpus_intersects(cache_domain->mask, irq->mask)) + cache_domain->workload += irq->workload; + iter = g_list_next(iter); + } + iter = g_list_first(cpus); + while (iter) { + cpu = iter->data; + if (cpus_intersects(cpu->mask, irq->mask)) + cpu->workload += irq->workload; + iter = g_list_next(iter); + } + } +} + + +void calculate_placement(void) +{ + struct package *package; + struct cache_domain *cache_domain; + GList *iter; + /* first clear old data */ + clear_work_stats(); + sort_irq_list(); + do_unroutables(); + + place_packages(interrupts); + iter = g_list_first(packages); + while (iter) { + package = iter->data; + place_cache_domain(package); + iter = g_list_next(iter); + } + + iter = g_list_first(cache_domains); + while (iter) { + cache_domain = iter->data; + place_core(cache_domain); + iter = g_list_next(iter); + } +} diff --git a/powermode.c b/powermode.c new file mode 100644 index 0000000..acf8bb5 --- /dev/null +++ b/powermode.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include + +#include "irqbalance.h" + + +extern int power_mode; + +static uint64_t previous; + +static unsigned int hysteresis; + +void check_power_mode(void) +{ + FILE *file; + char line[4096]; + char *c; + uint64_t dummy, irq, softirq; + line[0]=0; + line[4095]=0; + file = fopen("/proc/stat", "r"); + if (!file) + return; + if (fgets(line, 4095, file)==NULL) + memset(line,0, 4096); + fclose(file); + c=&line[4]; + dummy = strtoull(c, &c, 10); /* user */ + dummy = strtoull(c, &c, 10); /* nice */ + dummy = strtoull(c, &c, 10); /* system */ + dummy = strtoull(c, &c, 10); /* idle */ + dummy = strtoull(c, &c, 10); /* iowait */ + irq = strtoull(c, &c, 10); /* irq */ + softirq = strtoull(c, &c, 10); /* softirq */ + + irq += softirq; + if (irq - previous < POWER_MODE_SOFTIRQ_THRESHOLD) { + hysteresis++; + if (hysteresis > POWER_MODE_HYSTERESIS) { + if (debug_mode && !power_mode) + printf("IRQ delta is %lu, switching to power mode \n", (unsigned long)(irq - previous) ); + power_mode = 1; + } + } else { + if (debug_mode && power_mode) + printf("IRQ delta is %lu, switching to performance mode \n", (unsigned long)(irq - previous) ); + power_mode = 0; + hysteresis = 0; + } + previous = irq; +} + diff --git a/procinterrupts.c b/procinterrupts.c new file mode 100644 index 0000000..3d84b01 --- /dev/null +++ b/procinterrupts.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include + +#include "cpumask.h" +#include "irqbalance.h" + +#define LINESIZE 4096 + +void parse_proc_interrupts(void) +{ + FILE *file; + char line[LINESIZE+1]; + + line[LINESIZE] = 0; + file = fopen("/proc/interrupts", "r"); + if (!file) + return; + + /* first line is the header we don't need; nuke it */ + if (fgets(line, LINESIZE, file)==NULL) + return; + + while (!feof(file)) { + cpumask_t present; + int cpunr; + int number; + uint64_t count; + char *c, *c2; + + if (fgets(line, LINESIZE, file)==NULL) + break; + + + /* lines with letters in front are special, like NMI count. Ignore */ + if (!(line[0]==' ' || (line[0]>='0' && line[0]<='9'))) + break; + c = strchr(line, ':'); + if (!c) + continue; + *c = 0; + c++; + number = strtoul(line, NULL, 10); + cpus_clear(present); + count = 0; + cpunr = 0; + + c2=NULL; + while (1) { + uint64_t C; + C = strtoull(c, &c2, 10); + if (c==c2) /* end of numbers */ + break; + count += C; + c=c2; + if (C) + cpu_set(cpunr, present); + cpunr++; + } + if (cpunr != core_count) + need_cpu_rescan = 1; + + set_interrupt_count(number, count, &present); + } + fclose(file); +} diff --git a/strace b/strace new file mode 100644 index 0000000..662d98e --- /dev/null +++ b/strace @@ -0,0 +1,573 @@ +execve("./irqbalance", ["./irqbalance", "debug"], [/* 32 vars */]) = 0 +brk(0) = 0x605000 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b1b5000 +uname({sys="Linux", node="benny", ...}) = 0 +access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) +open("/etc/ld.so.cache", O_RDONLY) = 3 +fstat(3, {st_mode=S_IFREG|0644, st_size=171080, ...}) = 0 +mmap(NULL, 171080, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b3a8b1b6000 +close(3) = 0 +open("/lib64/libglib-2.0.so.0", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`\0!\347"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=1839568, ...}) = 0 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b1e0000 +mmap(0x38e7200000, 2743240, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x38e7200000 +mprotect(0x38e729d000, 2093056, PROT_NONE) = 0 +mmap(0x38e749c000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x9c000) = 0x38e749c000 +close(3) = 0 +open("/lib64/libc.so.6", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\333\1\0"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=1672888, ...}) = 0 +mmap(NULL, 3461304, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b3a8b3b6000 +mprotect(0x2b3a8b4fa000, 2097152, PROT_NONE) = 0 +mmap(0x2b3a8b6fa000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x144000) = 0x2b3a8b6fa000 +mmap(0x2b3a8b6ff000, 16568, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b6ff000 +close(3) = 0 +open("/lib64/librt.so.1", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300#\0\0"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=50320, ...}) = 0 +mmap(NULL, 2132968, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b3a8b704000 +mprotect(0x2b3a8b70c000, 2093056, PROT_NONE) = 0 +mmap(0x2b3a8b90b000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7000) = 0x2b3a8b90b000 +close(3) = 0 +open("/lib64/libpthread.so.0", O_RDONLY) = 3 +read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`X\0\0\0"..., 832) = 832 +fstat(3, {st_mode=S_IFREG|0755, st_size=138080, ...}) = 0 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8b90d000 +mmap(NULL, 2200432, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b3a8b90e000 +mprotect(0x2b3a8b923000, 2093056, PROT_NONE) = 0 +mmap(0x2b3a8bb22000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x2b3a8bb22000 +mmap(0x2b3a8bb24000, 13168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b3a8bb24000 +close(3) = 0 +mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3a8bb28000 +arch_prctl(ARCH_SET_FS, 0x2b3a8bb286f0) = 0 +mprotect(0x2b3a8bb22000, 4096, PROT_READ) = 0 +mprotect(0x2b3a8b90b000, 4096, PROT_READ) = 0 +mprotect(0x2b3a8b6fa000, 16384, PROT_READ) = 0 +mprotect(0x2b3a8b3b4000, 4096, PROT_READ) = 0 +munmap(0x2b3a8b1b6000, 171080) = 0 +set_tid_address(0x2b3a8bb28780) = 3829 +syscall_273(0x2b3a8bb28790, 0x18, 0x7fff1f90e200, 0x2b3a8b19b4a0, 0x2b3a8bb286f0, 0x2b3a8b1b52b8, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2) = 0 +rt_sigaction(SIGRTMIN, {0x2b3a8b9134a0, [], SA_RESTORER|SA_SIGINFO, 0x2b3a8b91bde0}, NULL, 8) = 0 +rt_sigaction(SIGRT_1, {0x2b3a8b9133f0, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x2b3a8b91bde0}, NULL, 8) = 0 +rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0 +getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 +open("/sys/devices/system/cpu", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 3 +fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0 +fcntl(3, F_SETFD, FD_CLOEXEC) = 0 +brk(0) = 0x605000 +brk(0x627000) = 0x627000 +getdents(3, /* 11 entries */, 4096) = 288 +open("/sys/devices/system/cpu/cpu7/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu7/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu7/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu7/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu6/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu5/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu4/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu2/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/online", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0600, st_size=4096, ...}) = 0 +read(4, "1\n", 4096) = 2 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu1/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu0/online", O_RDONLY) = -1 ENOENT (No such file or directory) +open("/sys/devices/system/cpu/cpu0/topology/core_siblings", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu0/cache/index1/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/devices/system/cpu/cpu0/cache/index2/shared_cpu_map", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +getdents(3, /* 0 entries */, 4096) = 0 +close(3) = 0 +fstat(1, {st_mode=S_IFREG|0644, st_size=11425, ...}) = 0 +open("/proc/interrupts", O_RDONLY) = 3 +fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 +read(3, " CPU0 CPU1 "..., 1024) = 1024 +open("/proc/irq/0", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 3 entries */, 1024) = 80 +open("/proc/irq/0/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +read(3, " 0 IO-APIC-edge i8042\n 1"..., 1024) = 1024 +open("/proc/irq/12", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 112 +open("/proc/irq/12/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/14", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 104 +open("/proc/irq/14/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/16", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 112 +open("/proc/irq/16/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/19", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 5 entries */, 1024) = 152 +open("/proc/irq/19/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/8408", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 104 +open("/proc/irq/8408/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +open("/proc/irq/8409", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 4 +fstat(4, {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0 +fcntl(4, F_SETFD, FD_CLOEXEC) = 0 +getdents(4, /* 4 entries */, 1024) = 104 +open("/proc/irq/8409/smp_affinity", O_RDONLY) = 5 +fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0 +read(5, "00000000,00000000,00000000,00000"..., 1024) = 72 +close(5) = 0 +getdents(4, /* 0 entries */, 1024) = 0 +close(4) = 0 +close(3) = 0 +rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0 +rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0 +rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 +nanosleep({2, 0}, {2, 0}) = 0 +open("/proc/interrupts", O_RDONLY) = 3 +fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 +read(3, " CPU0 CPU1 "..., 1024) = 1024 +read(3, " 0 IO-APIC-edge i8042\n 1"..., 1024) = 1024 +close(3) = 0 +open("/sys/bus/pci/devices", O_RDONLY|O_NONBLOCK|O_DIRECTORY) = 3 +fstat(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0 +fcntl(3, F_SETFD, FD_CLOEXEC) = 0 +getdents(3, /* 35 entries */, 4096) = 1104 +open("/sys/bus/pci/devices/0000:0b:01.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "11\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:0b:01.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x030000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:0b:01.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8408\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x020000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8409\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x020000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:06:00.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "17\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x010000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "16\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x010000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:04:02.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:03:00.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:03:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:02.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8410\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:02.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:02.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8411\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:00.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:02:00.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.3/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "16\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:01:00.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.3/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "19\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.3/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0500\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.3/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "19\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.2/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x010601\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.2/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "18\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x01018a\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1f.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1e.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.7/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "17\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.7/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0320\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.7/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "18\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.2/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0300\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.2/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "19\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.1/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0300\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.1/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "17\n", 4096) = 3 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x0c0300\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1d.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1c.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8412\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1c.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:1c.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:16.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:15.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:13.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:11.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:10.2/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:10.1/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:10.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:08.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "9\n", 4096) = 2 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:08.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x088000\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:08.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:06.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8413\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:06.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:06.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:04.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8414\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:04.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:04.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:02.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "8415\n", 4096) = 5 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:02.0/class", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0x060400\n", 4096) = 9 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:02.0/local_cpus", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "00000000,00000000,00000000,00000"..., 4096) = 72 +close(4) = 0 +open("/sys/bus/pci/devices/0000:00:00.0/irq", O_RDONLY) = 4 +fstat(4, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0 +read(4, "0\n", 4096) = 2 +close(4) = 0 +getdents(3, /* 0 entries */, 4096) = 0 +rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0 +rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0 +rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 +nanosleep({10, 0}, diff --git a/types.h b/types.h new file mode 100644 index 0000000..763924d --- /dev/null +++ b/types.h @@ -0,0 +1,84 @@ +#ifndef _INCLUDE_GUARD_TYPES_H +#define _INCLUDE_GUARD_TYPES_H + +#include + +#include "cpumask.h" + +#define BALANCE_NONE 0 +#define BALANCE_PACKAGE 1 +#define BALANCE_CACHE 2 +#define BALANCE_CORE 3 + +#define IRQ_OTHER 0 +#define IRQ_LEGACY 1 +#define IRQ_SCSI 2 +#define IRQ_TIMER 3 +#define IRQ_ETH 4 + + +struct package { + uint64_t workload; + int number; + + cpumask_t mask; + + int class_count[7]; + + GList *cache_domains; + GList *interrupts; +}; + +struct cache_domain { + uint64_t workload; + int number; + + int marker; + + cpumask_t mask; + + cpumask_t package_mask; + + int class_count[7]; + + GList *cpu_cores; + GList *interrupts; +}; + + +struct cpu_core { + uint64_t workload; + int number; + + int marker; + + int class_count[7]; + + cpumask_t package_mask; + cpumask_t cache_mask; + cpumask_t mask; + + GList *interrupts; +}; + +struct interrupt { + uint64_t workload; + + int balance_level; + + int number; + int class; + + uint64_t count; + uint64_t old_count; + uint64_t extra; + + cpumask_t mask; + cpumask_t old_mask; + + + cpumask_t numa_mask; +}; + + +#endif