Adjusting load computation to be fair at all levels

Previuosly we treated the all load as originating from a single cpu. Thats not true in the event an interrupt is assigned to a higher level object, like a cache domain. This change spreads the load out, attempting to use a hureistic whereby we assume that a fair share of interrupts from parent objects is handled by this cpu, so we only attribute a fraction of the total shared load to each single cpu. This allows us to come up with a reasonable load value for interrupts assigned to cache domains, packages, nodes, etc Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
2011-10-10 15:04:26 -04:00 · 2011-10-10 15:04:26 -04:00 · 38b3bb825d
parent 4fd799cbd1
commit 38b3bb825d
4 changed files with 52 additions and 26 deletions
--- a/classify.c
+++ b/classify.c
@ -313,13 +313,10 @@ void migrate_irq(GList **from, GList **to, struct irq_info *info)
 	GList *entry;
 	struct irq_info find, *tmp;;

-	if (from != NULL) {
-		find.irq = info->irq;
-		entry = g_list_find_custom(*from, &find, compare_ints);
-		tmp = entry->data;
-		*from = g_list_delete_link(*from, entry);
-	} else
-		tmp = info;
+	find.irq = info->irq;
+	entry = g_list_find_custom(*from, &find, compare_ints);
+	tmp = entry->data;
+	*from = g_list_delete_link(*from, entry);


 	*to = g_list_append(*to, tmp);
--- a/irqbalance.c
+++ b/irqbalance.c
@ -143,8 +143,11 @@ static void force_rebalance_irq(struct irq_info *info, void *data __attribute__(
 	if (info->level == BALANCE_NONE)
 		return;

-	migrate_irq((info->assigned_obj ? &info->assigned_obj->interrupts : NULL),
-		     &rebalance_irq_list, info);
+	if (info->assigned_obj == NULL)
+		rebalance_irq_list = g_list_append(rebalance_irq_list, info);
+	else
+		migrate_irq(&info->assigned_obj->interrupts, &rebalance_irq_list, info);
+
 	info->assigned_obj = NULL;
 }

--- a/placement.c
+++ b/placement.c
@ -181,16 +181,13 @@ static void validate_object_tree_placement()

 void calculate_placement(void)
 {
-	/* first clear old data */ 
-	clear_work_stats();
-
 	sort_irq_list(&rebalance_irq_list);
-
-	for_each_irq(rebalance_irq_list, place_irq_in_node, NULL);
-	for_each_object(numa_nodes, place_irq_in_object, NULL);
-	for_each_object(packages, place_irq_in_object, NULL);
-	for_each_object(cache_domains, place_irq_in_object, NULL);
-
+	if (g_list_length(rebalance_irq_list) > 0) {
+		for_each_irq(rebalance_irq_list, place_irq_in_node, NULL);
+		for_each_object(numa_nodes, place_irq_in_object, NULL);
+		for_each_object(packages, place_irq_in_object, NULL);
+		for_each_object(cache_domains, place_irq_in_object, NULL);
+	}
 	if (debug_mode)
 		validate_object_tree_placement();
 }
--- a/procinterrupts.c
+++ b/procinterrupts.c
@ -128,16 +128,45 @@ static void assign_load_slice(struct irq_info *info, void *data)
 	info->load = (info->irq_count - info->last_irq_count) * *load_slice;
 }

-static void compute_irq_load_share(struct topo_obj *d, void *data __attribute__((unused)))
+/*
+ * Recursive helper to estimate the number of irqs shared between 
+ * multiple topology objects that was handled by this particular object
+ */
+static uint64_t get_parent_branch_irq_count_share(struct topo_obj *d)
+{
+	uint64_t total_irq_count = 0;
+
+	if (d->parent) {
+		total_irq_count = get_parent_branch_irq_count_share(d->parent);
+		total_irq_count /= g_list_length(*d->obj_type_list);
+	}
+
+	if (g_list_length(d->interrupts) > 0)
+		for_each_irq(d->interrupts, accumulate_irq_count, &total_irq_count);
+
+	return total_irq_count;
+}
+
+static void compute_irq_branch_load_share(struct topo_obj *d, void *data __attribute__((unused)))
 {
 	uint64_t total_irq_counts = 0;
+	uint64_t local_irq_counts = 0;
+
 	uint64_t load_slice;

-	for_each_irq(d->interrupts, accumulate_irq_count, &total_irq_counts);
+	total_irq_counts = get_parent_branch_irq_count_share(d);

-	load_slice = total_irq_counts ? (d->load / total_irq_counts) : 1;
+	load_slice = local_irq_counts ? (d->load / local_irq_counts) : 1;

-	for_each_irq(d->interrupts, assign_load_slice, &load_slice);
+	if (g_list_length(d->interrupts) > 0) {
+		for_each_irq(d->interrupts, accumulate_irq_count, &local_irq_counts);
+		for_each_irq(d->interrupts, assign_load_slice, &load_slice);
+	}
+
+	if (d->parent) {
+		load_slice = total_irq_counts ? (d->load / total_irq_counts) : 1;
+		d->parent->load += (total_irq_counts - local_irq_counts) * load_slice;
+	}
 }

 void parse_proc_stat()
@ -189,9 +218,6 @@ void parse_proc_stat()
 		 * all the way up the device tree
 		 */
 		cpu->load = irq_load + softirq_load;
-		cpu_cache_domain(cpu)->load += cpu->load;
-		cpu_package(cpu)->load += cpu->load;
-		cpu_numa_node(cpu)->load += cpu->load;
 	}

 	fclose(file);
@ -204,6 +230,9 @@ void parse_proc_stat()
 	 * Now that we have load for each cpu attribute a fair share of the load
 	 * to each irq on that cpu
 	 */
-	for_each_object(cpus, compute_irq_load_share, NULL);
+	for_each_object(cpus, compute_irq_branch_load_share, NULL);
+	for_each_object(cache_domains, compute_irq_branch_load_share, NULL);
+	for_each_object(packages, compute_irq_branch_load_share, NULL);
+	for_each_object(numa_nodes, compute_irq_branch_load_share, NULL);

 }