The following commit has been merged in the linux branch: commit 608221fdf9a2170962295dcfbea53dc5c50d1a74 Merge: 72cc129e8dae988d2a132467cfd0ecd7623c35fb b84ff7d6f1b7f8a43414e74d972ec4c8f3361db4 Author: Linus Torvalds torvalds@linux-foundation.org Date: Thu Nov 5 10:56:47 2009 -0800
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: Fix kthread_bind() by moving the body of kthread_bind() to sched.c sched: Disable SD_PREFER_LOCAL at node level sched: Fix boot crash by zalloc()ing most of the cpu masks sched: Strengthen buddies and mitigate buddy induced latencies
diff --combined kernel/sched.c index a455dca,5cb7d63..28dd4f4 --- a/kernel/sched.c +++ b/kernel/sched.c @@@ -1564,7 -1564,11 +1564,7 @@@ static unsigned long cpu_avg_load_per_t
#ifdef CONFIG_FAIR_GROUP_SCHED
-struct update_shares_data { - unsigned long rq_weight[NR_CPUS]; -}; - -static DEFINE_PER_CPU(struct update_shares_data, update_shares_data); +static __read_mostly unsigned long *update_shares_data;
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
@@@ -1574,12 -1578,12 +1574,12 @@@ static void update_group_shares_cpu(struct task_group *tg, int cpu, unsigned long sd_shares, unsigned long sd_rq_weight, - struct update_shares_data *usd) + unsigned long *usd_rq_weight) { unsigned long shares, rq_weight; int boost = 0;
- rq_weight = usd->rq_weight[cpu]; + rq_weight = usd_rq_weight[cpu]; if (!rq_weight) { boost = 1; rq_weight = NICE_0_LOAD; @@@ -1614,7 -1618,7 +1614,7 @@@ static int tg_shares_up(struct task_group *tg, void *data) { unsigned long weight, rq_weight = 0, shares = 0; - struct update_shares_data *usd; + unsigned long *usd_rq_weight; struct sched_domain *sd = data; unsigned long flags; int i; @@@ -1623,11 -1627,11 +1623,11 @@@ return 0;
local_irq_save(flags); - usd = &__get_cpu_var(update_shares_data); + usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
for_each_cpu(i, sched_domain_span(sd)) { weight = tg->cfs_rq[i]->load.weight; - usd->rq_weight[i] = weight; + usd_rq_weight[i] = weight;
/* * If there are currently no tasks on the cpu pretend there @@@ -1648,7 -1652,7 +1648,7 @@@ shares = tg->shares;
for_each_cpu(i, sched_domain_span(sd)) - update_group_shares_cpu(tg, i, shares, rq_weight, usd); + update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
local_irq_restore(flags);
@@@ -1992,6 -1996,38 +1992,38 @@@ static inline void check_class_changed( p->sched_class->prio_changed(rq, p, oldprio, running); }
+ /** + * kthread_bind - bind a just-created kthread to a cpu. + * @k: thread created by kthread_create(). + * @cpu: cpu (might not be online, must be possible) for @k to run on. + * + * Description: This function is equivalent to set_cpus_allowed(), + * except that @cpu doesn't need to be online, and the thread must be + * stopped (i.e., just returned from kthread_create()). + * + * Function lives here instead of kthread.c because it messes with + * scheduler internals which require locking. + */ + void kthread_bind(struct task_struct *p, unsigned int cpu) + { + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + /* Must have done schedule() in kthread() before we set_task_cpu */ + if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { + WARN_ON(1); + return; + } + + spin_lock_irqsave(&rq->lock, flags); + set_task_cpu(p, cpu); + p->cpus_allowed = cpumask_of_cpu(cpu); + p->rt.nr_cpus_allowed = 1; + p->flags |= PF_THREAD_BOUND; + spin_unlock_irqrestore(&rq->lock, flags); + } + EXPORT_SYMBOL(kthread_bind); + #ifdef CONFIG_SMP /* * Is this task likely cache-hot: @@@ -2004,7 -2040,7 +2036,7 @@@ task_hot(struct task_struct *p, u64 now /* * Buddy candidates are cache hot: */ - if (sched_feat(CACHE_HOT_BUDDY) && + if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && (&p->se == cfs_rq_of(&p->se)->next || &p->se == cfs_rq_of(&p->se)->last)) return 1; @@@ -6720,6 -6756,9 +6752,6 @@@ EXPORT_SYMBOL(yield) /* * This task is about to go to sleep on IO. Increment rq->nr_iowait so * that process accounting knows that this is a task in IO wait state. - * - * But don't do that if it is a deliberate, throttling IO wait (this task - * has set its backing_dev_info: the queue against which it should throttle) */ void __sched io_schedule(void) { @@@ -9403,10 -9442,6 +9435,10 @@@ void __init sched_init(void #endif /* CONFIG_USER_SCHED */ #endif /* CONFIG_GROUP_SCHED */
+#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP + update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), + __alignof__(unsigned long)); +#endif for_each_possible_cpu(i) { struct rq *rq;
@@@ -9532,13 -9567,13 +9564,13 @@@ current->sched_class = &fair_sched_class;
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); + zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ - alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); + zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); #endif - alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); + zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */
perf_event_init();