Attachment 'omip-ecrts13.patch'
Download 1 diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
2 index 48d086d..99be3af 100644
3 --- a/include/litmus/debug_trace.h
4 +++ b/include/litmus/debug_trace.h
5 @@ -4,10 +4,18 @@
6 #ifdef CONFIG_SCHED_DEBUG_TRACE
7 void sched_trace_log_message(const char* fmt, ...);
8 void dump_trace_buffer(int max);
9 +
10 #else
11
12 #define sched_trace_log_message(fmt, ...)
13 +#define dump_trace_buffer(max)
14 +
15 +#endif
16
17 +#ifdef CONFIG_SCHED_DEBUG_DUMP_ON_OOPS
18 +#define litmus_dump_trace() dump_trace_buffer(0)
19 +#else
20 +#define litmus_dump_trace()
21 #endif
22
23 extern atomic_t __log_seq_no;
24 @@ -28,8 +36,11 @@ extern atomic_t __log_seq_no;
25 TRACE_ARGS, ## args)
26
27 #define TRACE_TASK(t, fmt, args...) \
28 - TRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid, \
29 - (t)->rt_param.job_params.job_no, ##args)
30 + TRACE("(%s/%d:%d) " fmt, \
31 + t ? (t)->comm : "null", \
32 + t ? (t)->pid : 0, \
33 + t ? (t)->rt_param.job_params.job_no : 0, \
34 + ##args)
35
36 #define TRACE_CUR(fmt, args...) \
37 TRACE_TASK(current, fmt, ## args)
38 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
39 index f2115b8..181da1c 100644
40 --- a/include/litmus/fdso.h
41 +++ b/include/litmus/fdso.h
42 @@ -1,6 +1,6 @@
43 /* fdso.h - file descriptor attached shared objects
44 *
45 - * (c) 2007 B. Brandenburg, LITMUS^RT project
46 + * (c) 2007-2013 B. Brandenburg, LITMUS^RT project
47 */
48
49 #ifndef _LINUX_FDSO_H_
50 @@ -26,7 +26,10 @@ typedef enum {
51
52 PCP_SEM = 5,
53
54 - MAX_OBJ_TYPE = 5
55 + OMLP_SEM = 6,
56 + OMIP_SEM = 7,
57 +
58 + MAX_OBJ_TYPE = 7
59 } obj_type_t;
60
61 struct inode_obj_id {
62 diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
63 index ac24929..c341258 100644
64 --- a/include/litmus/rt_domain.h
65 +++ b/include/litmus/rt_domain.h
66 @@ -60,8 +60,14 @@ struct release_heap {
67 /* used to delegate releases */
68 struct hrtimer_start_on_info info;
69 #endif
70 - /* required for the timer callback */
71 - rt_domain_t* dom;
72 +
73 + union {
74 + /* required for the timer callback */
75 + rt_domain_t* dom;
76 +
77 + /* To simplify per-task timers. */
78 + struct task_struct* task;
79 + };
80 };
81
82
83 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
84 index 4cd06dd..4ddc790 100644
85 --- a/include/litmus/rt_param.h
86 +++ b/include/litmus/rt_param.h
87 @@ -119,6 +119,8 @@ struct _rt_domain;
88 struct bheap_node;
89 struct release_heap;
90
91 +struct migratory_prio_inh;
92 +
93 struct rt_job {
94 /* Time instant the the job was or will be released. */
95 lt_t release;
96 @@ -182,7 +184,14 @@ struct rt_param {
97 * could point to self if PI does not result in
98 * an increased task priority.
99 */
100 - struct task_struct* inh_task;
101 + struct task_struct* inh_task;
102 +
103 + int priodon_state;
104 +
105 + struct migratory_prio_inh* mpi;
106 +
107 + /* priority donation support */
108 + int donor_cpu;
109
110 #ifdef CONFIG_NP_SECTION
111 /* For the FMLP under PSN-EDF, it is required to make the task
112 diff --git a/kernel/panic.c b/kernel/panic.c
113 index 6923167..d87528c 100644
114 --- a/kernel/panic.c
115 +++ b/kernel/panic.c
116 @@ -24,6 +24,8 @@
117 #include <linux/nmi.h>
118 #include <linux/dmi.h>
119
120 +#include <litmus/debug_trace.h>
121 +
122 #define PANIC_TIMER_STEP 100
123 #define PANIC_BLINK_SPD 18
124
125 @@ -77,6 +79,7 @@ NORET_TYPE void panic(const char * fmt, ...)
126 vsnprintf(buf, sizeof(buf), fmt, args);
127 va_end(args);
128 printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
129 + litmus_dump_trace();
130 #ifdef CONFIG_DEBUG_BUGVERBOSE
131 dump_stack();
132 #endif
133 @@ -354,6 +357,7 @@ void oops_exit(void)
134 {
135 do_oops_enter_exit();
136 print_oops_end_marker();
137 + litmus_dump_trace();
138 kmsg_dump(KMSG_DUMP_OOPS);
139 }
140
141 diff --git a/kernel/sched.c b/kernel/sched.c
142 index c4b6bd5..0d013b0 100644
143 --- a/kernel/sched.c
144 +++ b/kernel/sched.c
145 @@ -5317,6 +5317,7 @@ recheck:
146
147 p->sched_reset_on_fork = reset_on_fork;
148
149 + preempt_disable();
150 if (p->policy == SCHED_LITMUS)
151 litmus_exit_task(p);
152
153 @@ -5330,6 +5331,8 @@ recheck:
154 litmus->task_new(p, on_rq, running);
155 }
156
157 + preempt_enable();
158 +
159 if (running)
160 p->sched_class->set_curr_task(rq);
161 if (on_rq)
162 diff --git a/lib/bug.c b/lib/bug.c
163 index 1955209..eac01fe 100644
164 --- a/lib/bug.c
165 +++ b/lib/bug.c
166 @@ -43,6 +43,7 @@
167 #include <linux/bug.h>
168 #include <linux/sched.h>
169
170 +#include <litmus/debug_trace.h>
171 extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
172
173 static inline unsigned long bug_addr(const struct bug_entry *bug)
174 @@ -179,5 +180,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
175 "[verbose debug info unavailable]\n",
176 (void *)bugaddr);
177
178 + litmus_dump_trace();
179 +
180 return BUG_TRAP_TYPE_BUG;
181 }
182 diff --git a/litmus/Kconfig b/litmus/Kconfig
183 index bd6635c..1e6cbfe 100644
184 --- a/litmus/Kconfig
185 +++ b/litmus/Kconfig
186 @@ -263,6 +263,17 @@ config SCHED_DEBUG_TRACE_CALLER
187
188 If unsure, say No.
189
190 +config SCHED_DEBUG_DUMP_ON_OOPS
191 + bool "Dump TRACE() log in case of BUG/OOPS"
192 + depends on SCHED_DEBUG_TRACE
193 + default n
194 + help
195 + With this option enabled, panic() and friends try to dump the remaining
196 + contents of the TRACE() buffer to the console. This creates a fair bit
197 + of clutter, but can be helpful in getting the last messages out.
198 +
199 + If unsure, say No.
200 +
201 config PREEMPT_STATE_TRACE
202 bool "Trace preemption state machine transitions"
203 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
204 diff --git a/litmus/fdso.c b/litmus/fdso.c
205 index 250377d..2a88197 100644
206 --- a/litmus/fdso.c
207 +++ b/litmus/fdso.c
208 @@ -1,6 +1,6 @@
209 /* fdso.c - file descriptor attached shared objects
210 *
211 - * (c) 2007 B. Brandenburg, LITMUS^RT project
212 + * (c) 2007-2013 B. Brandenburg, LITMUS^RT project
213 *
214 * Notes:
215 * - objects descriptor (OD) tables are not cloned during a fork.
216 @@ -27,10 +27,14 @@ static const struct fdso_ops* fdso_ops[] = {
217 &generic_lock_ops, /* MPCP_VS_SEM */
218 &generic_lock_ops, /* DPCP_SEM */
219 &generic_lock_ops, /* PCP_SEM */
220 + &generic_lock_ops, /* OMLP_SEM */
221 + &generic_lock_ops, /* OMIP_SEM */
222 };
223
224 static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
225 {
226 + BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
227 +
228 if (fdso_ops[type]->create)
229 return fdso_ops[type]->create(obj_ref, type, config);
230 else
231 diff --git a/litmus/litmus.c b/litmus/litmus.c
232 index 9c6b738..dc94be7 100644
233 --- a/litmus/litmus.c
234 +++ b/litmus/litmus.c
235 @@ -10,6 +10,7 @@
236 #include <linux/module.h>
237 #include <linux/slab.h>
238 #include <linux/reboot.h>
239 +#include <linux/stop_machine.h>
240
241 #include <litmus/litmus.h>
242 #include <litmus/bheap.h>
243 @@ -24,9 +25,6 @@
244
245 /* Number of RT tasks that exist in the system */
246 atomic_t rt_task_count = ATOMIC_INIT(0);
247 -static DEFINE_RAW_SPINLOCK(task_transition_lock);
248 -/* synchronize plugin switching */
249 -atomic_t cannot_use_plugin = ATOMIC_INIT(0);
250
251 /* Give log messages sequential IDs. */
252 atomic_t __log_seq_no = ATOMIC_INIT(0);
253 @@ -322,10 +320,12 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
254 long litmus_admit_task(struct task_struct* tsk)
255 {
256 long retval = 0;
257 - unsigned long flags;
258
259 BUG_ON(is_realtime(tsk));
260
261 + tsk_rt(tsk)->heap_node = NULL;
262 + tsk_rt(tsk)->rel_heap = NULL;
263 +
264 if (get_rt_relative_deadline(tsk) == 0 ||
265 get_exec_cost(tsk) >
266 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
267 @@ -347,9 +347,6 @@ long litmus_admit_task(struct task_struct* tsk)
268
269 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
270
271 - /* avoid scheduler plugin changing underneath us */
272 - raw_spin_lock_irqsave(&task_transition_lock, flags);
273 -
274 /* allocate heap node for this task */
275 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
276 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
277 @@ -357,15 +354,14 @@ long litmus_admit_task(struct task_struct* tsk)
278 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
279 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
280
281 - bheap_node_free(tsk_rt(tsk)->heap_node);
282 - release_heap_free(tsk_rt(tsk)->rel_heap);
283 -
284 retval = -ENOMEM;
285 - goto out_unlock;
286 + goto out;
287 } else {
288 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
289 }
290
291 + preempt_disable();
292 +
293 retval = litmus->admit_task(tsk);
294
295 if (!retval) {
296 @@ -374,9 +370,13 @@ long litmus_admit_task(struct task_struct* tsk)
297 atomic_inc(&rt_task_count);
298 }
299
300 -out_unlock:
301 - raw_spin_unlock_irqrestore(&task_transition_lock, flags);
302 + preempt_enable();
303 +
304 out:
305 + if (retval) {
306 + bheap_node_free(tsk_rt(tsk)->heap_node);
307 + release_heap_free(tsk_rt(tsk)->rel_heap);
308 + }
309 return retval;
310 }
311
312 @@ -396,37 +396,10 @@ void litmus_exit_task(struct task_struct* tsk)
313 }
314 }
315
316 -/* IPI callback to synchronize plugin switching */
317 -static void synch_on_plugin_switch(void* info)
318 -{
319 - atomic_inc(&cannot_use_plugin);
320 - while (atomic_read(&cannot_use_plugin) > 0)
321 - cpu_relax();
322 -}
323 -
324 -/* Switching a plugin in use is tricky.
325 - * We must watch out that no real-time tasks exists
326 - * (and that none is created in parallel) and that the plugin is not
327 - * currently in use on any processor (in theory).
328 - */
329 -int switch_sched_plugin(struct sched_plugin* plugin)
330 +static int do_plugin_switch(void *_plugin)
331 {
332 - unsigned long flags;
333 - int ret = 0;
334 -
335 - BUG_ON(!plugin);
336 -
337 - /* forbid other cpus to use the plugin */
338 - atomic_set(&cannot_use_plugin, 1);
339 - /* send IPI to force other CPUs to synch with us */
340 - smp_call_function(synch_on_plugin_switch, NULL, 0);
341 -
342 - /* wait until all other CPUs have started synch */
343 - while (atomic_read(&cannot_use_plugin) < num_online_cpus())
344 - cpu_relax();
345 -
346 - /* stop task transitions */
347 - raw_spin_lock_irqsave(&task_transition_lock, flags);
348 + int ret;
349 + struct sched_plugin* plugin = _plugin;
350
351 /* don't switch if there are active real-time tasks */
352 if (atomic_read(&rt_task_count) == 0) {
353 @@ -444,11 +417,24 @@ int switch_sched_plugin(struct sched_plugin* plugin)
354 } else
355 ret = -EBUSY;
356 out:
357 - raw_spin_unlock_irqrestore(&task_transition_lock, flags);
358 - atomic_set(&cannot_use_plugin, 0);
359 return ret;
360 }
361
362 +/* Switching a plugin in use is tricky.
363 + * We must watch out that no real-time tasks exists
364 + * (and that none is created in parallel) and that the plugin is not
365 + * currently in use on any processor (in theory).
366 + */
367 +int switch_sched_plugin(struct sched_plugin* plugin)
368 +{
369 + BUG_ON(!plugin);
370 +
371 + if (atomic_read(&rt_task_count) == 0)
372 + return stop_machine(do_plugin_switch, plugin, NULL);
373 + else
374 + return -EBUSY;
375 +}
376 +
377 /* Called upon fork.
378 * p is the newly forked task.
379 */
380 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
381 index b45b46f..6762d48 100644
382 --- a/litmus/sched_cedf.c
383 +++ b/litmus/sched_cedf.c
384 @@ -1,7 +1,7 @@
385 /*
386 * litmus/sched_cedf.c
387 *
388 - * Implementation of the C-EDF scheduling algorithm.
389 + * Implementation of the C-EDF scheduling algorithm with priority donation.
390 *
391 * This implementation is based on G-EDF:
392 * - CPUs are clustered around L2 or L3 caches.
393 @@ -15,17 +15,15 @@
394 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
395 * online_cpus are placed in a single cluster).
396 *
397 - * For details on functions, take a look at sched_gsn_edf.c
398 - *
399 * Currently, we do not support changes in the number of online cpus.
400 * If the num_online_cpus() dynamically changes, the plugin is broken.
401 *
402 * This version uses the simple approach and serializes all scheduling
403 - * decisions by the use of a queue lock. This is probably not the
404 - * best way to do it, but it should suffice for now.
405 + * decisions by the use of a queue lock.
406 */
407
408 #include <linux/spinlock.h>
409 +#include <linux/cpumask.h>
410 #include <linux/percpu.h>
411 #include <linux/sched.h>
412 #include <linux/slab.h>
413 @@ -39,6 +37,9 @@
414 #include <litmus/sched_plugin.h>
415 #include <litmus/edf_common.h>
416 #include <litmus/sched_trace.h>
417 +#include <litmus/trace.h>
418 +#include <litmus/fdso.h>
419 +#include <litmus/wait.h>
420
421 #include <litmus/clustered.h>
422
423 @@ -70,52 +71,228 @@ typedef struct {
424 struct clusterdomain* cluster; /* owning cluster */
425 struct task_struct* linked; /* only RT tasks */
426 struct task_struct* scheduled; /* only RT tasks */
427 - atomic_t will_schedule; /* prevent unneeded IPIs */
428 struct bheap_node* hn;
429 +
430 + struct task_struct* pd_task; /* priority donation */
431 } cpu_entry_t;
432
433 /* one cpu_entry_t per CPU */
434 DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
435
436 -#define set_will_schedule() \
437 - (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
438 -#define clear_will_schedule() \
439 - (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
440 -#define test_will_schedule(cpu) \
441 - (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
442 -
443 /*
444 * In C-EDF there is a cedf domain _per_ cluster
445 * The number of clusters is dynamically determined accordingly to the
446 * total cpu number and the cluster size
447 */
448 typedef struct clusterdomain {
449 - /* rt_domain for this cluster */
450 - rt_domain_t domain;
451 + /* lock for this cluster */
452 + raw_spinlock_t cluster_lock;
453 + /* ready queue for this cluster */
454 + struct list_head job_queue;
455 /* cpus in this cluster */
456 - cpu_entry_t* *cpus;
457 + cpu_entry_t* *cpus;
458 + /* which cluster is this? */
459 + int cluster;
460 + /* how many CPUs are in this cluster? */
461 + int num_cpus;
462 /* map of this cluster cpus */
463 - cpumask_var_t cpu_map;
464 + cpumask_var_t cpu_map;
465 /* the cpus queue themselves according to priority in here */
466 - struct bheap_node *heap_node;
467 - struct bheap cpu_heap;
468 - /* lock for this cluster */
469 -#define cluster_lock domain.ready_lock
470 + struct bheap_node *heap_node;
471 + struct bheap cpu_heap;
472 } cedf_domain_t;
473
474 /* a cedf_domain per cluster; allocation is done at init/activation time */
475 cedf_domain_t *cedf;
476
477 +#define remote_cpu(cpu) (&per_cpu(cedf_cpu_entries, cpu))
478 #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
479 #define task_cpu_cluster(task) remote_cluster(get_partition(task))
480
481 /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
482 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
483 * information during the initialization of the plugin (e.g., topology)
484 -#define WANT_ALL_SCHED_EVENTS
485 +
486 */
487 +#define WANT_ALL_SCHED_EVENTS
488 #define VERBOSE_INIT
489
490 +#define NOT_WAITING 0
491 +#define WAITING_TO_BE_LINKED 1
492 +#define WAITING_FOR_DONATION_END 2
493 +
494 +
495 +static int is_waiting_to_be_linked(struct task_struct *t)
496 +{
497 + return t && t->rt_param.priodon_state == WAITING_TO_BE_LINKED;
498 +}
499 +
500 +static int is_waiting_for_donation_end(struct task_struct *t)
501 +{
502 + return t && t->rt_param.priodon_state == WAITING_FOR_DONATION_END;
503 +}
504 +
505 +
506 +/* When the task is currently not subject to cross-cluster
507 + * migration. */
508 +#define NOT_XCLUSTER_MIGRATORY (-3)
509 +#define ON_HOME_CLUSTER (-2)
510 +
511 +struct migratory_prio_inh {
512 + raw_spinlock_t lock;
513 +
514 + struct task_struct *owner;
515 + struct cpumask scheduling_candidates;
516 +
517 + int scheduled_on;
518 +};
519 +
520 +#define MPI_EXIT_CS ((struct migratory_prio_inh *) 0x123)
521 +
522 +void mpi_init(struct migratory_prio_inh *mpi)
523 +{
524 + raw_spin_lock_init(&mpi->lock);
525 + cpumask_clear(&mpi->scheduling_candidates);
526 + mpi->scheduled_on = NO_CPU;
527 + mpi->owner = NULL;
528 +}
529 +
530 +/* assumes IRQ off */
531 +struct task_struct* mpi_try_to_schedule(struct migratory_prio_inh *mpi)
532 +{
533 + struct task_struct *t = NULL;
534 + int need_notify = 1;
535 +
536 + /* Check whether we raced with the end of a critical section that
537 + * was migrated to a remote core. The remote core will send an IPI
538 + * when it is safe to schedule this task. */
539 + if (mpi == MPI_EXIT_CS)
540 + return NULL;
541 +
542 + raw_spin_lock(&mpi->lock);
543 +
544 + BUG_ON(!mpi->owner);
545 +
546 + TRACE("trying to schedule mpi-task mpi:%p owner:%s/%d scheduled_on=%d\n",
547 + mpi, mpi->owner->comm, mpi->owner->pid,
548 + mpi->scheduled_on);
549 +
550 + if (mpi->scheduled_on == NO_CPU &&
551 + is_present(mpi->owner) &&
552 + is_running(mpi->owner)) {
553 + /* it's not scheduled, so grab it */
554 + mpi->scheduled_on = smp_processor_id();
555 + t = mpi->owner;
556 + cpumask_clear_cpu(smp_processor_id(),
557 + &mpi->scheduling_candidates);
558 + need_notify = 0;
559 +
560 + } else if (mpi->scheduled_on == smp_processor_id()) {
561 + /* we already got it, nothing to change */
562 + if (is_present(mpi->owner) && is_running(mpi->owner)) {
563 + need_notify = 0;
564 + t = mpi->owner;
565 + } else {
566 + /* we can't actually schedule it */
567 + mpi->scheduled_on = NO_CPU;
568 + }
569 + }
570 +
571 + if (need_notify) {
572 + /* can't get to it, let's register our interest */
573 + cpumask_set_cpu(smp_processor_id(),
574 + &mpi->scheduling_candidates);
575 + }
576 +
577 + raw_spin_unlock(&mpi->lock);
578 +
579 + return t;
580 +}
581 +
582 +/* preempt - force a CPU to reschedule
583 + */
584 +static void preempt(cpu_entry_t *entry)
585 +{
586 + preempt_if_preemptable(entry->scheduled, entry->cpu);
587 +}
588 +
589 +void __mpi_notify_cpus(struct migratory_prio_inh *mpi)
590 +{
591 + int cpu;
592 +
593 + for_each_cpu(cpu, &mpi->scheduling_candidates) {
594 + TRACE("notifying P%d\n", cpu);
595 + preempt(remote_cpu(cpu)); /* XXX use propr smp_call */
596 + }
597 +
598 + cpumask_clear(&mpi->scheduling_candidates);
599 +}
600 +
601 +void mpi_notify_cpus(struct migratory_prio_inh *mpi)
602 +{
603 + unsigned long flags;
604 +
605 + raw_spin_lock_irqsave(&mpi->lock, flags);
606 + __mpi_notify_cpus(mpi);
607 + raw_spin_unlock_irqrestore(&mpi->lock, flags);
608 +}
609 +
610 +
611 +void mpi_deschedule(struct migratory_prio_inh *mpi,
612 + struct task_struct *preempted)
613 +{
614 + unsigned long flags;
615 +
616 + raw_spin_lock_irqsave(&mpi->lock, flags);
617 +
618 + if (preempted == mpi->owner) {
619 + mpi->scheduled_on = NO_CPU;
620 + /* XXX copy out mask and drop lock first? */
621 + __mpi_notify_cpus(mpi);
622 + }
623 +
624 + raw_spin_unlock_irqrestore(&mpi->lock, flags);
625 +}
626 +
627 +/* assumes IRQ off */
628 +void mpi_update_owner(struct migratory_prio_inh *mpi,
629 + struct task_struct *new_owner,
630 + int running_on)
631 +{
632 + raw_spin_lock(&mpi->lock);
633 +
634 + mpi->owner = new_owner;
635 + mpi->scheduled_on = running_on;
636 + if (running_on == NO_CPU)
637 + __mpi_notify_cpus(mpi);
638 +
639 + raw_spin_unlock(&mpi->lock);
640 +}
641 +
642 +/* assumes preempt off */
643 +static void mpi_migrate_back(void)
644 +{
645 + struct task_struct *t = current;
646 + int cpu;
647 +
648 + cpu = smp_processor_id();
649 +
650 + if (task_cpu_cluster(t) != remote_cluster(cpu) ||
651 + remote_cpu(cpu)->linked != t) {
652 +
653 + TRACE_CUR("not on the linked CPU (%d), migrating back (part:%d)\n",
654 + tsk_rt(t)->linked_on,
655 + get_partition(t));
656 +
657 + preempt_enable_no_resched();
658 +
659 + schedule();
660 +
661 + preempt_disable();
662 + } else
663 + tsk_rt(t)->mpi = NULL;
664 +}
665 +
666 static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
667 {
668 cpu_entry_t *a, *b;
669 @@ -150,6 +327,49 @@ static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
670 return hn->value;
671 }
672
673 +static struct task_struct* select_job(struct task_struct *candidate)
674 +{
675 + /* Special case: delegate can be used to mark a task as
676 + * pseudo-suspended. It should not be scheduled in this case. */
677 + if (is_waiting_to_be_linked(candidate) ||
678 + is_waiting_for_donation_end(candidate))
679 + candidate = NULL;
680 +
681 + if (candidate && !(is_present(candidate) && is_running(candidate))) {
682 + TRACE_TASK(candidate, "should be scheduled, but is not running\n");
683 + candidate = NULL;
684 + }
685 +
686 + return candidate;
687 +}
688 +
689 +static int cedf_is_linked(struct task_struct* task)
690 +{
691 + return task->rt_param.linked_on != NO_CPU;
692 +}
693 +
694 +static cpu_entry_t *cedf_linked_cpu(struct task_struct *task)
695 +{
696 + if (cedf_is_linked(task))
697 + return &per_cpu(cedf_cpu_entries, task->rt_param.linked_on);
698 + else
699 + return NULL;
700 +}
701 +
702 +static int cedf_preempt_linked_cpu(struct task_struct* task)
703 +{
704 + cpu_entry_t *cpu = cedf_linked_cpu(task);
705 + struct task_struct *should_schedule = select_job(task);
706 +
707 + if (cpu && should_schedule &&
708 + cpu->scheduled != should_schedule) {
709 + preempt(cpu);
710 + return 1;
711 + } else
712 + return 0;
713 +}
714 +
715 +static void job_completion(struct task_struct *t, int forced);
716
717 /* link_task_to_cpu - Update the link of a CPU.
718 * Handles the case where the to-be-linked task is already
719 @@ -159,22 +379,33 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
720 cpu_entry_t *entry)
721 {
722 cpu_entry_t *sched;
723 - struct task_struct* tmp;
724 + struct task_struct *tmp, *unlinked;
725 int on_cpu;
726
727 BUG_ON(linked && !is_realtime(linked));
728
729 - /* Currently linked task is set to be unlinked. */
730 - if (entry->linked) {
731 - entry->linked->rt_param.linked_on = NO_CPU;
732 + unlinked = entry->linked;
733 +
734 + /* Currently linked task is will be unlinked. */
735 + if (unlinked) {
736 + unlinked->rt_param.linked_on = NO_CPU;
737 }
738
739 /* Link new task to CPU. */
740 if (linked) {
741 - tsk_rt(linked)->completed = 0;
742 - /* handle task is already scheduled somewhere! */
743 +
744 + /* Handle task is already scheduled somewhere!
745 + * Special case with m-PI: it might be scheduled outside
746 + * of our cluster.
747 + */
748 on_cpu = linked->rt_param.scheduled_on;
749 - if (on_cpu != NO_CPU) {
750 + if (on_cpu != NO_CPU &&
751 + remote_cluster(on_cpu) != task_cpu_cluster(linked)) {
752 + TRACE_TASK(linked, "linked, but already scheduled "
753 + "outside of cluster on CPU=%d\n", on_cpu);
754 + }
755 + if (on_cpu != NO_CPU &&
756 + remote_cluster(on_cpu) == task_cpu_cluster(linked)) {
757 sched = &per_cpu(cedf_cpu_entries, on_cpu);
758 /* this should only happen if not linked already */
759 BUG_ON(sched->linked == linked);
760 @@ -188,17 +419,46 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
761 TRACE_TASK(linked,
762 "already scheduled on %d, updating link.\n",
763 sched->cpu);
764 +
765 tmp = sched->linked;
766 +
767 + /* need to get priority donation right */
768 + if (tmp && sched->pd_task == sched->linked) {
769 + /* currently in a CS */
770 + BUG_ON(linked->rt_param.donor_cpu != NO_CPU);
771 + /* Entry just had a completion, there cannot
772 + * be a CS in progress. */
773 + BUG_ON(entry->pd_task != NULL);
774 +
775 + sched->pd_task = NULL;
776 + entry->pd_task = tmp;
777 + tmp->rt_param.donor_cpu = sched->cpu;
778 + } else if (is_waiting_for_donation_end(tmp)) {
779 + preempt(entry);
780 + }
781 +
782 linked->rt_param.linked_on = sched->cpu;
783 sched->linked = linked;
784 update_cpu_position(sched);
785 linked = tmp;
786 }
787 + } else if (linked->rt_param.donor_cpu != NO_CPU &&
788 + entry->cpu != linked->rt_param.donor_cpu) {
789 + /* It's not linked, it's not scheduled, but it's got a
790 + * donor. We need to patch up the priority donation
791 + * info. */
792 + entry->pd_task = linked;
793 + sched = remote_cpu(linked->rt_param.donor_cpu);
794 + linked->rt_param.donor_cpu = entry->cpu;
795 + sched->pd_task = NULL;
796 + if (is_waiting_for_donation_end(sched->linked))
797 + preempt(sched);
798 }
799 if (linked) /* might be NULL due to swap */
800 linked->rt_param.linked_on = entry->cpu;
801 }
802 entry->linked = linked;
803 +
804 #ifdef WANT_ALL_SCHED_EVENTS
805 if (linked)
806 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
807 @@ -206,6 +466,14 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
808 TRACE("NULL linked to %d.\n", entry->cpu);
809 #endif
810 update_cpu_position(entry);
811 +
812 +
813 + if (is_waiting_for_donation_end(unlinked) &&
814 + is_completed(unlinked)) {
815 + /* delayed job completion */
816 + TRACE_TASK(unlinked, "delayed job completion\n");
817 + job_completion(unlinked, 0);
818 + }
819 }
820
821 /* unlink - Make sure a task is not linked any longer to an entry
822 @@ -215,50 +483,178 @@ static noinline void unlink(struct task_struct* t)
823 {
824 cpu_entry_t *entry;
825
826 + TRACE_TASK(t, "unlink (linked_on=%d)\n", t->rt_param.linked_on, in_list(&t->rt_param.list));
827 +
828 + BUG_ON(t->rt_param.linked_on != NO_CPU && in_list(&t->rt_param.list));
829 +
830 if (t->rt_param.linked_on != NO_CPU) {
831 /* unlink */
832 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
833 t->rt_param.linked_on = NO_CPU;
834 link_task_to_cpu(NULL, entry);
835 - } else if (is_queued(t)) {
836 - /* This is an interesting situation: t is scheduled,
837 - * but was just recently unlinked. It cannot be
838 - * linked anywhere else (because then it would have
839 - * been relinked to this CPU), thus it must be in some
840 - * queue. We must remove it from the list in this
841 - * case.
842 - *
843 - * in C-EDF case is should be somewhere in the queue for
844 - * its domain, therefore and we can get the domain using
845 - * task_cpu_cluster
846 - */
847 - remove(&(task_cpu_cluster(t))->domain, t);
848 + } else if (in_list(&t->rt_param.list)) {
849 + /* Remove from queue of pending jobs. */
850 + list_del(&t->rt_param.list);
851 }
852 }
853
854
855 -/* preempt - force a CPU to reschedule
856 - */
857 -static void preempt(cpu_entry_t *entry)
858 +static int edf_queue_job(struct list_head *queue, struct task_struct *t)
859 {
860 - preempt_if_preemptable(entry->scheduled, entry->cpu);
861 + unsigned int passed = 0;
862 + struct list_head *pos;
863 + struct task_struct *other;
864 +
865 + BUG_ON(in_list(&t->rt_param.list));
866 +
867 + /* sorted enqueue */
868 + list_for_each(pos, queue)
869 + {
870 + other = list_entry(pos, struct task_struct, rt_param.list);
871 + if (edf_higher_prio(t, other)) {
872 + /* found correct position: insert after prev and return */
873 + list_add(&t->rt_param.list, pos->prev);
874 + return passed;
875 + } else
876 + passed++;
877 + }
878 +
879 + /* Fell through: hit end of list. */
880 +
881 + list_add_tail(&t->rt_param.list, queue);
882 +
883 + return passed;
884 }
885
886 -/* requeue - Put an unlinked task into gsn-edf domain.
887 +static struct task_struct* job_queue_head(struct list_head *queue)
888 +{
889 + if (list_empty(queue))
890 + return NULL;
891 + else
892 + return list_entry(queue->next, struct task_struct, rt_param.list);
893 +}
894 +
895 +static struct task_struct* job_queue_first_runnable(struct list_head *queue)
896 +{
897 + struct list_head *pos;
898 + struct task_struct *task;
899 +
900 + /* sorted enqueue */
901 + list_for_each(pos, queue)
902 + {
903 + task = list_entry(pos, struct task_struct, rt_param.list);
904 + task = select_job(task);
905 + if (task && tsk_rt(task)->mpi)
906 + task = mpi_try_to_schedule(tsk_rt(task)->mpi);
907 + if (task
908 + && tsk_rt(task)->scheduled_on == NO_CPU
909 + && tsk_rt(task)->donor_cpu == NO_CPU)
910 + return task;
911 + }
912 +
913 + return NULL;
914 +}
915 +
916 +
917 +static struct task_struct* dequeue_head(struct list_head *queue)
918 +{
919 + struct task_struct* head = job_queue_head(queue);
920 +
921 + if (head)
922 + list_del(&head->rt_param.list);
923 +
924 + return head;
925 +}
926 +
927 +static int edf_need_to_preempt(struct list_head *queue, struct task_struct *scheduled)
928 +{
929 + if (list_empty(queue))
930 + return 0;
931 + if (!scheduled || !is_realtime(scheduled))
932 + return 1;
933 +
934 + return edf_higher_prio(job_queue_head(queue), scheduled);
935 +}
936 +
937 +static void check_for_preemptions(cedf_domain_t *cluster);
938 +
939 +enum hrtimer_restart on_cedf_release_timer(struct hrtimer *timer)
940 +{
941 + /* We now that the calling thread/context is not holding the ready
942 + * queue lock because the hrtimer subsystem pushes the arming of
943 + * already expired timers into a softirq (see
944 + * hrtimer_enqueue_reprogram). Thus, even when arming an already
945 + * expired timer, the call to the callback function does not happen
946 + * until after the ready queue lock has been dropped.
947 + */
948 +
949 + struct task_struct *task;
950 + cedf_domain_t* cluster;
951 + unsigned long flags;
952 +
953 + task = container_of(timer, struct release_heap, timer)->task;
954 + cluster = task_cpu_cluster(task);
955 +
956 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
957 +
958 + TRACE_TASK(task, "released! complete:%d\n", task->rt_param.completed);
959 + tsk_rt(task)->completed = 0;
960 +
961 + edf_queue_job(&cluster->job_queue, task);
962 + check_for_preemptions(cluster);
963 +
964 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
965 +
966 + return HRTIMER_NORESTART;
967 +}
968 +
969 +void arm_timer_for_job_release(struct task_struct *t)
970 +{
971 + struct hrtimer *timer = &tsk_rt(t)->rel_heap->timer;
972 +
973 + if (hrtimer_active(timer)) {
974 + TRACE_TASK(t, "BUG BUG BUG timer still active!\n");
975 + TRACE_TASK(t, "giving up\n");
976 + printk(KERN_CRIT "BUG BUG BUG\n");
977 + return;
978 + }
979 + BUG_ON(hrtimer_active(timer));
980 +
981 + timer->function = on_cedf_release_timer;
982 + tsk_rt(t)->rel_heap->task = t;
983 +
984 + TRACE_TASK(t, "arming job releas timer\n");
985 +
986 + /* todo: release master support is missing */
987 + __hrtimer_start_range_ns(timer,
988 + ns_to_ktime(get_release(t)),
989 + 0, HRTIMER_MODE_ABS_PINNED, 0);
990 +
991 + sched_trace_task_release(t);
992 +}
993 +
994 +/* requeue - Put an unlinked task into appropriate queue (ready or release).
995 * Caller must hold cedf_lock.
996 */
997 static noinline void requeue(struct task_struct* task)
998 {
999 cedf_domain_t *cluster = task_cpu_cluster(task);
1000 - BUG_ON(!task);
1001 - /* sanity check before insertion */
1002 - BUG_ON(is_queued(task));
1003 + lt_t now = litmus_clock();
1004
1005 - if (is_released(task, litmus_clock()))
1006 - __add_ready(&cluster->domain, task);
1007 + TRACE_TASK(task, "requeue: rel=%d queued=%d linked=%d\n",
1008 + is_released(task, now),
1009 + in_list(&task->rt_param.list),
1010 + cedf_is_linked(task));
1011 +
1012 + BUG_ON(task->rt_param.linked_on != NO_CPU && !is_released(task, litmus_clock()));
1013 +
1014 + if (is_released(task, now)
1015 + && !in_list(&task->rt_param.list)
1016 + && !cedf_is_linked(task))
1017 + edf_queue_job(&cluster->job_queue, task);
1018 else {
1019 - /* it has got to wait */
1020 - add_release(&cluster->domain, task);
1021 + if (!is_released(task, now))
1022 + TRACE_TASK(task, "not requeueing not-yet-released job\n");
1023 }
1024 }
1025
1026 @@ -288,16 +684,18 @@ static cpu_entry_t* cedf_get_nearest_available_cpu(
1027 /* check for any necessary preemptions */
1028 static void check_for_preemptions(cedf_domain_t *cluster)
1029 {
1030 - struct task_struct *task;
1031 + struct task_struct *task, *unlinked;
1032 cpu_entry_t *last;
1033
1034 for(last = lowest_prio_cpu(cluster);
1035 - edf_preemption_needed(&cluster->domain, last->linked);
1036 + edf_need_to_preempt(&cluster->job_queue, last->linked);
1037 last = lowest_prio_cpu(cluster)) {
1038 /* preemption necessary */
1039 - task = __take_ready(&cluster->domain);
1040 + task = dequeue_head(&cluster->job_queue);
1041 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
1042 task->pid, last->cpu);
1043 + unlinked = last->linked;
1044 + link_task_to_cpu(task, last);
1045 #ifdef CONFIG_SCHED_CPU_AFFINITY
1046 {
1047 cpu_entry_t *affinity =
1048 @@ -309,10 +707,9 @@ static void check_for_preemptions(cedf_domain_t *cluster)
1049 requeue(last->linked);
1050 }
1051 #else
1052 - if (requeue_preempted_job(last->linked))
1053 - requeue(last->linked);
1054 + if (requeue_preempted_job(unlinked))
1055 + requeue(unlinked);
1056 #endif
1057 - link_task_to_cpu(task, last);
1058 preempt(last);
1059 }
1060 }
1061 @@ -325,42 +722,38 @@ static noinline void cedf_job_arrival(struct task_struct* task)
1062
1063 requeue(task);
1064 check_for_preemptions(cluster);
1065 -}
1066 -
1067 -static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
1068 -{
1069 - cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
1070 - unsigned long flags;
1071 -
1072 - raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1073 -
1074 - __merge_ready(&cluster->domain, tasks);
1075 - check_for_preemptions(cluster);
1076 -
1077 - raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1078 + cedf_preempt_linked_cpu(task);
1079 }
1080
1081 /* caller holds cedf_lock */
1082 -static noinline void job_completion(struct task_struct *t, int forced)
1083 +static void job_completion(struct task_struct *t, int forced)
1084 {
1085 + lt_t now = litmus_clock();
1086 +
1087 BUG_ON(!t);
1088
1089 sched_trace_task_completion(t, forced);
1090
1091 - TRACE_TASK(t, "job_completion().\n");
1092 + TRACE_TASK(t, "job_completion(). lateness=%ld\n",
1093 + (long) now - (long) get_deadline(t));
1094
1095 /* set flags */
1096 tsk_rt(t)->completed = 1;
1097 /* prepare for next period */
1098 prepare_for_next_period(t);
1099 - if (is_released(t, litmus_clock()))
1100 + if (is_released(t, now))
1101 sched_trace_task_release(t);
1102 /* unlink */
1103 unlink(t);
1104 /* requeue
1105 * But don't requeue a blocking task. */
1106 - if (is_running(t))
1107 - cedf_job_arrival(t);
1108 + tsk_rt(t)->completed = 0;
1109 + if (is_present(t) && is_running(t)) {
1110 + if (!is_released(t, now))
1111 + arm_timer_for_job_release(t);
1112 + else
1113 + cedf_job_arrival(t);
1114 + }
1115 }
1116
1117 /* cedf_tick - this function is called for every local timer
1118 @@ -377,7 +770,6 @@ static void cedf_tick(struct task_struct* t)
1119 * preemptable again
1120 */
1121 litmus_reschedule_local();
1122 - set_will_schedule();
1123 TRACE("cedf_scheduler_tick: "
1124 "%d is preemptable "
1125 " => FORCE_RESCHED\n", t->pid);
1126 @@ -415,8 +807,9 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1127 {
1128 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
1129 cedf_domain_t *cluster = entry->cluster;
1130 - int out_of_time, sleep, preempt, np, exists, blocks;
1131 + int out_of_time, sleep, preempt, np, exists, blocks, dead, exiting;
1132 struct task_struct* next = NULL;
1133 + struct task_struct* should_schedule = NULL;
1134
1135 #ifdef CONFIG_RELEASE_MASTER
1136 /* Bail out early if we are the release master.
1137 @@ -429,10 +822,17 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1138 #endif
1139
1140 raw_spin_lock(&cluster->cluster_lock);
1141 - clear_will_schedule();
1142
1143 /* sanity checking */
1144 BUG_ON(entry->scheduled && entry->scheduled != prev);
1145 + if (entry->scheduled && !is_realtime(prev)) {
1146 + TRACE_TASK(prev, "BUG BUG BUG :: not real-time\n");
1147 + TRACE_TASK(entry->scheduled, "scheduled instead\n");
1148 + should_schedule = NULL;
1149 + next = NULL;
1150 + exists = blocks = np = 0;
1151 + goto bailout;
1152 + }
1153 BUG_ON(entry->scheduled && !is_realtime(prev));
1154 BUG_ON(is_realtime(prev) && !entry->scheduled);
1155
1156 @@ -444,7 +844,51 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1157 budget_exhausted(entry->scheduled);
1158 np = exists && is_np(entry->scheduled);
1159 sleep = exists && is_completed(entry->scheduled);
1160 - preempt = entry->scheduled != entry->linked;
1161 + dead = exists && entry->scheduled->state == TASK_DEAD;
1162 + exiting = exists && entry->scheduled->flags & PF_EXITING;
1163 +
1164 + /* check whether linked is available */
1165 + should_schedule = select_job(entry->linked);
1166 +
1167 + BUG_ON(entry->pd_task && !entry->linked);
1168 +
1169 + /* Check for priority donation */
1170 + if (!should_schedule && entry->linked) {
1171 +
1172 + if (is_waiting_for_donation_end(entry->linked) &&
1173 + !entry->pd_task) {
1174 + TRACE_TASK(entry->linked, "no longer waiting for donation end\n");
1175 + entry->linked->rt_param.priodon_state = NOT_WAITING;
1176 +
1177 + if (is_completed(entry->linked)) {
1178 + TRACE_TASK(entry->linked, "delayed job completion processed\n");
1179 + job_completion(entry->linked, 0);
1180 + sleep = exists && is_completed(entry->scheduled);
1181 + }
1182 +
1183 + should_schedule = select_job(entry->linked);
1184 +
1185 + } else if (is_waiting_to_be_linked(entry->linked)) {
1186 + TRACE_TASK(entry->linked, "no longer waiting to be linked\n");
1187 + entry->linked->rt_param.priodon_state = NOT_WAITING;
1188 + should_schedule = select_job(entry->linked);
1189 +
1190 + /* A task is only blocked once by priority donation */
1191 + BUG_ON(entry->pd_task);
1192 + }
1193 + }
1194 +
1195 + if (entry->pd_task) {
1196 + should_schedule = select_job(entry->pd_task);
1197 +
1198 + /* special case: donor completion needs to be delayed */
1199 + if (is_completed(entry->linked)) {
1200 + TRACE_TASK(entry->linked, "job completion delayed\n");
1201 + entry->linked->rt_param.priodon_state = WAITING_FOR_DONATION_END;
1202 + }
1203 + }
1204 +
1205 + preempt = entry->scheduled != should_schedule;
1206
1207 #ifdef WANT_ALL_SCHED_EVENTS
1208 TRACE_TASK(prev, "invoked cedf_schedule.\n");
1209 @@ -453,17 +897,22 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1210 if (exists)
1211 TRACE_TASK(prev,
1212 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
1213 - "state:%d sig:%d\n",
1214 + "state:%d sig:%d dead:%d exit_state:%d exiting:%d\n",
1215 blocks, out_of_time, np, sleep, preempt,
1216 - prev->state, signal_pending(prev));
1217 - if (entry->linked && preempt)
1218 + prev->state, signal_pending(prev), dead, prev->exit_state, exiting);
1219 + if (should_schedule && preempt)
1220 TRACE_TASK(prev, "will be preempted by %s/%d\n",
1221 - entry->linked->comm, entry->linked->pid);
1222 + should_schedule->comm, should_schedule->pid);
1223
1224
1225 /* If a task blocks we have no choice but to reschedule.
1226 */
1227 - if (blocks)
1228 +// if (blocks)
1229 +// unlink(entry->scheduled);
1230 +
1231 +
1232 + /* Exiting tasks do not count as having pending jobs. */
1233 + if (unlikely(blocks && exiting))
1234 unlink(entry->scheduled);
1235
1236 /* Request a sys_exit_np() call if we would like to preempt but cannot.
1237 @@ -472,7 +921,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1238 * hurt.
1239 */
1240 if (np && (out_of_time || preempt || sleep)) {
1241 - unlink(entry->scheduled);
1242 +// unlink(entry->scheduled);
1243 request_exit_np(entry->scheduled);
1244 }
1245
1246 @@ -480,24 +929,65 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1247 * budget or wants to sleep completes. We may have to reschedule after
1248 * this. Don't do a job completion if we block (can't have timers running
1249 * for blocked jobs).
1250 + *
1251 + * Priority donation: priority donors may not complete.o
1252 */
1253 - if (!np && (out_of_time || sleep) && !blocks)
1254 + if (!np && (out_of_time || sleep) && !blocks
1255 + && !(entry->linked == entry->scheduled
1256 + && is_waiting_for_donation_end(entry->linked)))
1257 job_completion(entry->scheduled, !sleep);
1258
1259 /* Link pending task if we became unlinked.
1260 */
1261 - if (!entry->linked)
1262 - link_task_to_cpu(__take_ready(&cluster->domain), entry);
1263 + if (!entry->linked) {
1264 + link_task_to_cpu(dequeue_head(&cluster->job_queue), entry);
1265 + if (is_waiting_to_be_linked(entry->linked)) {
1266 + TRACE_TASK(entry->linked, "no longer waiting to be linked\n");
1267 + entry->linked->rt_param.priodon_state = NOT_WAITING;
1268 +
1269 + /* A task is only blocked once by priority donation */
1270 + BUG_ON(entry->pd_task);
1271 + }
1272 + }
1273 +
1274 + TRACE_TASK(should_schedule, "PRE PD\n");
1275 +
1276 + should_schedule = select_job(entry->pd_task);
1277
1278 + TRACE_TASK(should_schedule, "POST PD\n");
1279 + if (!should_schedule) {
1280 + /* check for migratory priority inheritance */
1281 + if (entry->linked && entry->linked->rt_param.mpi)
1282 + should_schedule = mpi_try_to_schedule(entry->linked->rt_param.mpi);
1283 + else
1284 + should_schedule = select_job(entry->linked);
1285 + }
1286 +
1287 + TRACE_TASK(should_schedule, "should_schedule\n");
1288 +
1289 + if (entry->linked && entry->linked != should_schedule
1290 + && should_schedule)
1291 + TRACE_TASK(entry->linked, "linked, but delegates to %s/%d\n",
1292 + should_schedule->comm, should_schedule->pid);
1293 +
1294 + if (!should_schedule) {
1295 + /* Neither linked nor delegate are available. */
1296 + /* Pick someone else. */
1297 + should_schedule = job_queue_first_runnable(&cluster->job_queue);
1298 + }
1299 +
1300 +
1301 +bailout:
1302 /* The final scheduling decision. Do we need to switch for some reason?
1303 * If linked is different from scheduled, then select linked as next.
1304 */
1305 if ((!np || blocks) &&
1306 - entry->linked != entry->scheduled) {
1307 + should_schedule != entry->scheduled) {
1308 /* Schedule a linked job? */
1309 - if (entry->linked) {
1310 - entry->linked->rt_param.scheduled_on = entry->cpu;
1311 - next = entry->linked;
1312 + if (should_schedule) {
1313 + should_schedule->rt_param.scheduled_on = entry->cpu;
1314 + next = should_schedule;
1315 + TRACE_TASK(should_schedule, "should_schedule!\n");
1316 }
1317 if (entry->scheduled) {
1318 /* not gonna be scheduled soon */
1319 @@ -528,13 +1018,39 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1320 }
1321
1322
1323 +// XXX When a task wakes up, we also need to check whether it
1324 +// has any notifiers attached.
1325 +
1326 +// XXX What if prev is already scheduled elsewhere?
1327 +// XXX Need to handle completion while migrated away!
1328 +
1329 /* _finish_switch - we just finished the switch away from prev
1330 */
1331 static void cedf_finish_switch(struct task_struct *prev)
1332 {
1333 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
1334 + cedf_domain_t *cluster = task_cpu_cluster(prev);
1335 + unsigned long flags;
1336 +
1337 + /* check for preemption notifiers */
1338
1339 entry->scheduled = is_realtime(current) ? current : NULL;
1340 + if (is_realtime(prev) && prev->rt_param.mpi) {
1341 + if (prev->rt_param.mpi == MPI_EXIT_CS) {
1342 + /* Signal to remote cores that is now safe
1343 + * to consider this task again. */
1344 + prev->rt_param.mpi = NULL;
1345 + } else
1346 + mpi_deschedule(prev->rt_param.mpi, prev);
1347 + }
1348 +
1349 + if (cluster != entry->cluster) {
1350 + /* needs to migrate back to its own cluster */
1351 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1352 + cedf_preempt_linked_cpu(prev);
1353 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1354 + }
1355 +
1356 #ifdef WANT_ALL_SCHED_EVENTS
1357 TRACE_TASK(prev, "switched away from\n");
1358 #endif
1359 @@ -549,13 +1065,15 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
1360 cpu_entry_t* entry;
1361 cedf_domain_t* cluster;
1362
1363 - TRACE("gsn edf: task new %d\n", t->pid);
1364 + TRACE("cedf: task new %d\n", t->pid);
1365
1366 /* the cluster doesn't change even if t is running */
1367 cluster = task_cpu_cluster(t);
1368
1369 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1370
1371 + t->rt_param.donor_cpu = NO_CPU;
1372 +
1373 /* setup job params */
1374 release_at(t, litmus_clock());
1375
1376 @@ -595,36 +1113,44 @@ static void cedf_task_wake_up(struct task_struct *task)
1377 cluster = task_cpu_cluster(task);
1378
1379 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1380 - now = litmus_clock();
1381 - if (is_tardy(task, now)) {
1382 - /* new sporadic release */
1383 - release_at(task, now);
1384 - sched_trace_task_release(task);
1385 - }
1386 - else {
1387 - if (task->rt.time_slice) {
1388 - /* came back in time before deadline
1389 - */
1390 - tsk_rt(task)->completed = 0;
1391 +
1392 + if (task->rt_param.donor_cpu != NO_CPU) {
1393 + /* preempt the CPU where the donor is located at */
1394 + preempt(&per_cpu(cedf_cpu_entries, task->rt_param.donor_cpu));
1395 + } else if (task->rt_param.mpi) {
1396 + mpi_notify_cpus(task->rt_param.mpi);
1397 + } else {
1398 + now = litmus_clock();
1399 + if (is_tardy(task, now)) {
1400 + /* new sporadic release */
1401 + release_at(task, now);
1402 + sched_trace_task_release(task);
1403 }
1404 + else {
1405 + if (task->rt.time_slice) {
1406 + /* came back in time before deadline
1407 + */
1408 + tsk_rt(task)->completed = 0;
1409 + }
1410 + }
1411 + cedf_job_arrival(task);
1412 }
1413 - cedf_job_arrival(task);
1414 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1415 }
1416
1417 static void cedf_task_block(struct task_struct *t)
1418 {
1419 - unsigned long flags;
1420 +// unsigned long flags;
1421 cedf_domain_t *cluster;
1422
1423 TRACE_TASK(t, "block at %llu\n", litmus_clock());
1424
1425 cluster = task_cpu_cluster(t);
1426
1427 - /* unlink if necessary */
1428 - raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1429 - unlink(t);
1430 - raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1431 +// /* unlink if necessary */
1432 +// raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1433 +// unlink(t);
1434 +// raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1435
1436 BUG_ON(!is_realtime(t));
1437 }
1438 @@ -634,16 +1160,25 @@ static void cedf_task_exit(struct task_struct * t)
1439 {
1440 unsigned long flags;
1441 cedf_domain_t *cluster = task_cpu_cluster(t);
1442 + cpu_entry_t *cpu;
1443
1444 /* unlink if necessary */
1445 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1446 +
1447 + cpu = &__get_cpu_var(cedf_cpu_entries);
1448 + TRACE_TASK(t, "exiting linked_on=%d scheduled_on=%d\n",
1449 + tsk_rt(t)->linked_on,
1450 + tsk_rt(t)->scheduled_on);
1451 + TRACE_TASK(cpu->linked, "locally linked\n");
1452 + TRACE_TASK(cpu->scheduled, "locally scheduled\n");
1453 +
1454 unlink(t);
1455 if (tsk_rt(t)->scheduled_on != NO_CPU) {
1456 - cpu_entry_t *cpu;
1457 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
1458 cpu->scheduled = NULL;
1459 tsk_rt(t)->scheduled_on = NO_CPU;
1460 }
1461 + check_for_preemptions(cluster);
1462 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1463
1464 BUG_ON(!is_realtime(t));
1465 @@ -652,6 +1187,10 @@ static void cedf_task_exit(struct task_struct * t)
1466
1467 static long cedf_admit_task(struct task_struct* tsk)
1468 {
1469 + struct hrtimer *timer = &tsk_rt(tsk)->rel_heap->timer;
1470 +
1471 + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1472 +
1473 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
1474 }
1475
1476 @@ -739,17 +1278,19 @@ static long cedf_activate_plugin(void)
1477 /* initialize clusters */
1478 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
1479 for (i = 0; i < num_clusters; i++) {
1480 -
1481 + cedf[i].cluster = i;
1482 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
1483 GFP_ATOMIC);
1484 cedf[i].heap_node = kmalloc(
1485 cluster_size * sizeof(struct bheap_node),
1486 GFP_ATOMIC);
1487 bheap_init(&(cedf[i].cpu_heap));
1488 - edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
1489 + INIT_LIST_HEAD(&cedf[i].job_queue);
1490 + raw_spin_lock_init(&cedf[i].cluster_lock);
1491
1492 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
1493 return -ENOMEM;
1494 + cedf[i].num_cpus = 0;
1495 #ifdef CONFIG_RELEASE_MASTER
1496 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
1497 #endif
1498 @@ -785,7 +1326,6 @@ static long cedf_activate_plugin(void)
1499
1500 entry = &per_cpu(cedf_cpu_entries, ccpu);
1501 cedf[i].cpus[cpu_count] = entry;
1502 - atomic_set(&entry->will_schedule, 0);
1503 entry->cpu = ccpu;
1504 entry->cluster = &cedf[i];
1505 entry->hn = &(cedf[i].heap_node[cpu_count]);
1506 @@ -795,12 +1335,14 @@ static long cedf_activate_plugin(void)
1507
1508 entry->linked = NULL;
1509 entry->scheduled = NULL;
1510 + entry->pd_task = NULL;
1511 #ifdef CONFIG_RELEASE_MASTER
1512 /* only add CPUs that should schedule jobs */
1513 if (entry->cpu != entry->cluster->domain.release_master)
1514 #endif
1515 update_cpu_position(entry);
1516 }
1517 + cedf[i].num_cpus = cpu_count;
1518 /* done with this cluster */
1519 break;
1520 }
1521 @@ -811,6 +1353,494 @@ static long cedf_activate_plugin(void)
1522 return 0;
1523 }
1524
1525 +
1526 +#ifdef CONFIG_LITMUS_LOCKING
1527 +
1528 +static void priodon_become_eligible(void)
1529 +{
1530 + struct task_struct* t = current;
1531 + cpu_entry_t* cpu;
1532 + int started = 0;
1533 +
1534 + while (!started) {
1535 +
1536 + preempt_disable();
1537 +
1538 + cpu = &__get_cpu_var(cedf_cpu_entries);
1539 +
1540 + BUG_ON(cpu->pd_task == t);
1541 +
1542 + if (cpu->linked == t) {
1543 + /* We observed that we are linked.
1544 + * => We are among the $m$ highest-priority pending jobs.
1545 + * => Start the critical section. preempt_disable() ensures
1546 + * that this does not race with other critical
1547 + * sections starting on the same CPU.
1548 + */
1549 +
1550 +
1551 + if (cpu->pd_task) {
1552 + /* Uh oh, we are currently a priority donor,
1553 + * so we are not allowed to start our own
1554 + * critical section yet.
1555 + */
1556 + TRACE_TASK(t, "must wait for donation to end\n");
1557 +
1558 + t->rt_param.priodon_state = WAITING_FOR_DONATION_END;
1559 + preempt_enable_no_resched();
1560 +
1561 + schedule();
1562 + } else {
1563 + started = 1;
1564 + cpu->pd_task = t;
1565 + t->rt_param.donor_cpu = cpu->cpu;
1566 + preempt_enable();
1567 + }
1568 + } else {
1569 + /* We are currently not linked. Must wait. */
1570 + t->rt_param.priodon_state = WAITING_TO_BE_LINKED;
1571 + TRACE_TASK(t, "must wait to be linked\n");
1572 + preempt_enable_no_resched();
1573 + schedule();
1574 + }
1575 + }
1576 +}
1577 +
1578 +static void priodon_complete_request(void)
1579 +{
1580 + struct task_struct* t = current;
1581 + cpu_entry_t* cpu;
1582 +
1583 + preempt_disable();
1584 +
1585 + cpu = &__get_cpu_var(cedf_cpu_entries);
1586 +
1587 + BUG_ON(cpu->pd_task != t);
1588 +
1589 + cpu->pd_task = NULL;
1590 + t->rt_param.donor_cpu = NO_CPU;
1591 +
1592 + /* Reschedule if we became unlinked during
1593 + * the critical section.
1594 + */
1595 + if (cpu->linked != t)
1596 + preempt(cpu);
1597 +
1598 + preempt_enable();
1599 +}
1600 +
1601 +/* struct for semaphore with priority inheritance */
1602 +struct omlp_semaphore {
1603 + struct litmus_lock litmus_lock;
1604 +
1605 + /* current resource holder */
1606 + struct task_struct *owner;
1607 +
1608 + /* FIFO queue of waiting tasks */
1609 + wait_queue_head_t fifo_wait;
1610 +};
1611 +
1612 +static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock)
1613 +{
1614 + return container_of(lock, struct omlp_semaphore, litmus_lock);
1615 +}
1616 +
1617 +static int cedf_omlp_lock(struct litmus_lock* l)
1618 +{
1619 + struct task_struct* t = current;
1620 + struct omlp_semaphore *sem = omlp_from_lock(l);
1621 + wait_queue_t wait;
1622 + unsigned long flags;
1623 +
1624 + if (!is_realtime(t))
1625 + return -EPERM;
1626 +
1627 + priodon_become_eligible();
1628 +
1629 + spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1630 +
1631 + if (sem->owner) {
1632 + /* resource is not free => must suspend and wait */
1633 +
1634 + init_waitqueue_entry(&wait, t);
1635 +
1636 + set_task_state(t, TASK_UNINTERRUPTIBLE);
1637 +
1638 + __add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait);
1639 +
1640 + TS_LOCK_SUSPEND;
1641 +
1642 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1643 +
1644 + schedule();
1645 +
1646 + TS_LOCK_RESUME;
1647 +
1648 + BUG_ON(sem->owner != t);
1649 + } else {
1650 + /* it's ours now */
1651 + sem->owner = t;
1652 +
1653 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1654 + }
1655 +
1656 + return 0;
1657 +}
1658 +
1659 +static int cedf_omlp_unlock(struct litmus_lock* l)
1660 +{
1661 + struct task_struct *t = current, *next;
1662 + struct omlp_semaphore *sem = omlp_from_lock(l);
1663 + unsigned long flags;
1664 + int err = 0;
1665 +
1666 + spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1667 +
1668 + if (sem->owner != t) {
1669 + err = -EINVAL;
1670 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1671 + goto out;
1672 + }
1673 +
1674 + /* check if there are jobs waiting for this resource */
1675 + next = __waitqueue_remove_first(&sem->fifo_wait);
1676 + if (next) {
1677 + /* next becomes the resouce holder */
1678 + sem->owner = next;
1679 + TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
1680 +
1681 + /* wake up next */
1682 + wake_up_process(next);
1683 + } else
1684 + /* becomes available */
1685 + sem->owner = NULL;
1686 +
1687 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1688 +
1689 + priodon_complete_request();
1690 +
1691 +out:
1692 + return err;
1693 +}
1694 +
1695 +static int cedf_omlp_close(struct litmus_lock* l)
1696 +{
1697 + struct task_struct *t = current;
1698 + struct omlp_semaphore *sem = omlp_from_lock(l);
1699 + unsigned long flags;
1700 +
1701 + int owner;
1702 +
1703 + spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1704 +
1705 + owner = sem->owner == t;
1706 +
1707 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1708 +
1709 + if (owner)
1710 + cedf_omlp_unlock(l);
1711 +
1712 + return 0;
1713 +}
1714 +
1715 +static void cedf_omlp_free(struct litmus_lock* lock)
1716 +{
1717 + kfree(omlp_from_lock(lock));
1718 +}
1719 +
1720 +static struct litmus_lock_ops cedf_omlp_lock_ops = {
1721 + .close = cedf_omlp_close,
1722 + .lock = cedf_omlp_lock,
1723 + .unlock = cedf_omlp_unlock,
1724 + .deallocate = cedf_omlp_free,
1725 +};
1726 +
1727 +static struct litmus_lock* cedf_new_omlp(void)
1728 +{
1729 + struct omlp_semaphore* sem;
1730 +
1731 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1732 + if (!sem)
1733 + return NULL;
1734 +
1735 + sem->owner = NULL;
1736 + init_waitqueue_head(&sem->fifo_wait);
1737 + sem->litmus_lock.ops = &cedf_omlp_lock_ops;
1738 +
1739 + return &sem->litmus_lock;
1740 +}
1741 +
1742 +// xxxx
1743 +
1744 +struct omip_per_cluster_info {
1745 + /* per-cluster FIFO queue of waiting tasks */
1746 + wait_queue_head_t fifo_wait;
1747 + unsigned int queued;
1748 +
1749 + /* Priority queue of waiting tasks, on each cluster. */
1750 + wait_queue_head_t prio_wait;
1751 +};
1752 +
1753 +static void omip_per_cluster_init(struct omip_per_cluster_info *cluster)
1754 +{
1755 + init_waitqueue_head(&cluster->fifo_wait);
1756 + init_waitqueue_head(&cluster->prio_wait);
1757 + cluster->queued = 0;
1758 +}
1759 +
1760 +
1761 +/* struct for semaphore with priority inheritance */
1762 +struct omip_semaphore {
1763 + struct litmus_lock litmus_lock;
1764 +
1765 + /* information for migratory priority inheritance */
1766 + struct migratory_prio_inh mpi;
1767 +
1768 + struct task_struct *owner;
1769 +
1770 + /* FIFO queue of waiting tasks */
1771 + wait_queue_head_t fifo_wait;
1772 +
1773 + /* per-cluster queues */
1774 + struct omip_per_cluster_info cluster[NR_CPUS];
1775 +};
1776 +
1777 +static inline struct omip_semaphore* omip_from_lock(struct litmus_lock* lock)
1778 +{
1779 + return container_of(lock, struct omip_semaphore, litmus_lock);
1780 +}
1781 +
1782 +static void omip_enqueue(prio_wait_queue_t *wait,
1783 + struct omip_semaphore *sem,
1784 + struct task_struct *t)
1785 +{
1786 + int c = get_partition(t);
1787 + cedf_domain_t *cluster = remote_cluster(c);
1788 +
1789 + if (sem->cluster[c].queued >= cluster->num_cpus) {
1790 + /* must go into priority queue */
1791 + TRACE_TASK(t, "sem=%p added to prio queue in cluster C%\n", sem, c);
1792 + __add_wait_queue_prio_exclusive(&sem->cluster[c].prio_wait, wait);
1793 + } else if (sem->cluster[c].queued) {
1794 + /* must go into FIFO queue */
1795 + TRACE_TASK(t, "sem=%p added to FIFO queue in cluster C%\n", sem, c);
1796 + __add_wait_queue_tail_exclusive(&sem->cluster[c].fifo_wait,
1797 + &wait->wq);
1798 + } else {
1799 + /* empty, t immediately progresses to the global queue */
1800 + TRACE_TASK(t, "sem=%p added to global queue in cluster C%\n", sem, c);
1801 + __add_wait_queue_tail_exclusive(&sem->fifo_wait,
1802 + &wait->wq);
1803 + }
1804 +
1805 + sem->cluster[c].queued++;
1806 +}
1807 +
1808 +static void omip_dequeue(struct omip_semaphore *sem, struct task_struct *t)
1809 +{
1810 + int c = get_partition(t);
1811 + struct list_head* first;
1812 +
1813 + BUG_ON(!sem->cluster[c].queued);
1814 +
1815 + sem->cluster[c].queued--;
1816 +
1817 + /* move from prio to FIFO queue within cluster */
1818 + if (waitqueue_active(&sem->cluster[c].prio_wait)) {
1819 + TRACE_TASK(t, "sem=%p moves head of PRIO queue to FIFO queue C%d\n", sem, c);
1820 + first = sem->cluster[c].prio_wait.task_list.next;
1821 + list_move_tail(first, &sem->cluster[c].fifo_wait.task_list);
1822 + }
1823 +
1824 + /* move new head from local to global queue */
1825 + if (waitqueue_active(&sem->cluster[c].fifo_wait)) {
1826 + TRACE_TASK(t, "sem=%p moves head of FIFO queue to global queue C%d\n", sem, c);
1827 + first = sem->cluster[c].fifo_wait.task_list.next;
1828 + list_move_tail(first, &sem->fifo_wait.task_list);
1829 + }
1830 +}
1831 +
1832 +static int cedf_omip_lock(struct litmus_lock* l)
1833 +{
1834 + struct task_struct* t = current;
1835 + struct omip_semaphore *sem = omip_from_lock(l);
1836 + prio_wait_queue_t wait;
1837 + unsigned long flags;
1838 +
1839 + if (!is_realtime(t))
1840 + return -EPERM;
1841 +
1842 + spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1843 +
1844 + t->rt_param.mpi = &sem->mpi;
1845 +
1846 + if (sem->owner) {
1847 + /* resource is not free => must suspend and wait */
1848 +
1849 + init_prio_waitqueue_entry(&wait, t, get_deadline(t));
1850 +
1851 + set_task_state(t, TASK_UNINTERRUPTIBLE);
1852 +
1853 + omip_enqueue(&wait, sem, t);
1854 +
1855 + TS_LOCK_SUSPEND;
1856 +
1857 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1858 +
1859 + schedule();
1860 +
1861 + TS_LOCK_RESUME;
1862 +
1863 + BUG_ON(sem->owner != t);
1864 + } else {
1865 + /* it's ours now */
1866 + sem->owner = t;
1867 + sem->cluster[get_partition(t)].queued++;
1868 +
1869 + mpi_update_owner(&sem->mpi, t, smp_processor_id());
1870 +
1871 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1872 + }
1873 +
1874 + return 0;
1875 +}
1876 +
1877 +static int cedf_omip_unlock(struct litmus_lock* l)
1878 +{
1879 + struct task_struct *t = current, *next;
1880 + struct omip_semaphore *sem = omip_from_lock(l);
1881 + unsigned long flags;
1882 + int err = 0;
1883 +
1884 + preempt_disable();
1885 +
1886 + spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1887 +
1888 + if (sem->owner != t) {
1889 + err = -EINVAL;
1890 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1891 + goto out;
1892 + }
1893 +
1894 + omip_dequeue(sem, t);
1895 +
1896 + /* check if there are jobs waiting for this resource */
1897 + next = __waitqueue_remove_first(&sem->fifo_wait);
1898 + if (next) {
1899 + /* next becomes the resouce holder */
1900 + sem->owner = next;
1901 + TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
1902 +
1903 + /* wake up next */
1904 + wake_up_process(next);
1905 + } else
1906 + /* becomes available */
1907 + sem->owner = NULL;
1908 +
1909 + /* Mark us that we are no longer holding a resource. This
1910 + * is required to prevent scheduler races when we have to
1911 + * migrate back to our core at the end of a CS. */
1912 + tsk_rt(t)->mpi = MPI_EXIT_CS;
1913 + /* Tell everyone else who the new owner is and notify
1914 + * other CPUs if required. */
1915 + mpi_update_owner(&sem->mpi, sem->owner, NO_CPU);
1916 +
1917 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1918 +
1919 + mpi_migrate_back();
1920 +
1921 +out:
1922 + preempt_enable();
1923 + return err;
1924 +}
1925 +
1926 +static int cedf_omip_close(struct litmus_lock* l)
1927 +{
1928 + struct task_struct *t = current;
1929 + struct omip_semaphore *sem = omip_from_lock(l);
1930 + unsigned long flags;
1931 +
1932 + int owner;
1933 +
1934 + spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1935 +
1936 + owner = sem->owner == t;
1937 +
1938 + spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1939 +
1940 + if (owner)
1941 + cedf_omip_unlock(l);
1942 +
1943 + return 0;
1944 +}
1945 +
1946 +static void cedf_omip_free(struct litmus_lock* lock)
1947 +{
1948 + kfree(omip_from_lock(lock));
1949 +}
1950 +
1951 +static struct litmus_lock_ops cedf_omip_lock_ops = {
1952 + .close = cedf_omip_close,
1953 + .lock = cedf_omip_lock,
1954 + .unlock = cedf_omip_unlock,
1955 + .deallocate = cedf_omip_free,
1956 +};
1957 +
1958 +static struct litmus_lock* cedf_new_omip(void)
1959 +{
1960 + struct omip_semaphore* sem;
1961 + int i;
1962 +
1963 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1964 + if (!sem)
1965 + return NULL;
1966 +
1967 + sem->owner = NULL;
1968 + init_waitqueue_head(&sem->fifo_wait);
1969 + mpi_init(&sem->mpi);
1970 + sem->litmus_lock.ops = &cedf_omip_lock_ops;
1971 + for (i = 0; i < num_clusters; i++)
1972 + omip_per_cluster_init(sem->cluster + i);
1973 +
1974 + return &sem->litmus_lock;
1975 +}
1976 +
1977 +
1978 +static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1979 + void* __user unused)
1980 +{
1981 + int err = -ENXIO;
1982 +
1983 + switch (type) {
1984 +
1985 + case OMLP_SEM:
1986 + /* O(m) Multiprocessor Locking Protocol */
1987 + *lock = cedf_new_omlp();
1988 + if (*lock)
1989 + err = 0;
1990 + else
1991 + err = -ENOMEM;
1992 + break;
1993 +
1994 + case OMIP_SEM:
1995 + /* O(m) Multiprocessor Independence-Preserving Locking Protocol */
1996 + *lock = cedf_new_omip();
1997 + if (*lock)
1998 + err = 0;
1999 + else
2000 + err = -ENOMEM;
2001 + break;
2002 +
2003 + };
2004 +
2005 +
2006 + return err;
2007 +}
2008 +
2009 +
2010 +#endif
2011 +
2012 +
2013 /* Plugin object */
2014 static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
2015 .plugin_name = "C-EDF",
2016 @@ -824,6 +1854,9 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
2017 .task_block = cedf_task_block,
2018 .admit_task = cedf_admit_task,
2019 .activate_plugin = cedf_activate_plugin,
2020 +#ifdef CONFIG_LITMUS_LOCKING
2021 + .allocate_lock = cedf_allocate_lock,
2022 +#endif
2023 };
2024
2025 static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
2026 diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
2027 index 91e5239..0e875a3 100644
2028 --- a/litmus/sched_pfp.c
2029 +++ b/litmus/sched_pfp.c
2030 @@ -182,7 +182,7 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
2031 np = exists && is_np(pfp->scheduled);
2032 sleep = exists && is_completed(pfp->scheduled);
2033 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
2034 - preempt = migrate || fp_preemption_needed(&pfp->ready_queue, prev);
2035 + preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
2036
2037 /* If we need to preempt do so.
2038 * The following checks set resched to 1 in case of special
2039 @@ -1089,8 +1089,10 @@ static void pcp_priority_inheritance(void)
2040 fp_set_prio_inh(pfp, blocker, blocked);
2041 }
2042
2043 - /* check if anything changed */
2044 - if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
2045 + /* Check if anything changed. If the blocked job is current, then it is
2046 + * just blocking and hence is going to call the scheduler anyway. */
2047 + if (blocked != current &&
2048 + fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
2049 preempt(pfp);
2050
2051 raw_spin_unlock_irqrestore(&pfp->slock, flags);
2052 @@ -1201,10 +1203,10 @@ static void pcp_lower_ceiling(struct pcp_semaphore* sem)
2053
2054 TRACE_CUR("PCP released sem %p\n", sem);
2055
2056 + pcp_priority_inheritance();
2057 +
2058 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
2059 pcp_resume_unblocked();
2060 -
2061 - pcp_priority_inheritance();
2062 }
2063
2064 static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
2065 diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
2066 index f4171fd..536bc67 100644
2067 --- a/litmus/sched_trace.c
2068 +++ b/litmus/sched_trace.c
2069 @@ -75,7 +75,6 @@ void sched_trace_log_message(const char* fmt, ...)
2070 va_end(args);
2071 }
2072
2073 -
2074 /*
2075 * log_read - Read the trace buffer
2076 *
2077 @@ -199,23 +198,27 @@ static struct miscdevice litmus_log_dev = {
2078 .fops = &log_fops,
2079 };
2080
2081 -#ifdef CONFIG_MAGIC_SYSRQ
2082 +
2083 void dump_trace_buffer(int max)
2084 {
2085 - char line[80];
2086 + char line[255];
2087 int len;
2088 int count = 0;
2089
2090 /* potential, but very unlikely, race... */
2091 trace_recurse = 1;
2092 + printk(KERN_CRIT "<<< LITMUS^RT TRACE() dump start >>>");
2093 while ((max == 0 || count++ < max) &&
2094 - (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) {
2095 + (len = kfifo_out(&debug_buffer, line, sizeof(line) -1)) > 0) {
2096 line[len] = '\0';
2097 - printk("%s", line);
2098 + printk(KERN_CRIT "%s", line);
2099 }
2100 + printk(KERN_CRIT "<<< LITMUS^RT TRACE() dump end >>>");
2101 trace_recurse = 0;
2102 }
2103
2104 +#ifdef CONFIG_MAGIC_SYSRQ
2105 +
2106 static void sysrq_dump_trace_buffer(int key)
2107 {
2108 dump_trace_buffer(100);
2109 diff --git a/litmus/sync.c b/litmus/sync.c
2110 index 873b3ff..3e79e0a 100644
2111 --- a/litmus/sync.c
2112 +++ b/litmus/sync.c
2113 @@ -89,7 +89,7 @@ static long do_release_ts(lt_t start)
2114 {
2115 long task_count = 0;
2116
2117 - struct list_head *pos;
2118 + struct list_head *pos, *safe;
2119 struct ts_release_wait *wait;
2120
2121 if (mutex_lock_interruptible(&task_release_lock)) {
2122 @@ -101,7 +101,7 @@ static long do_release_ts(lt_t start)
2123 sched_trace_sys_release(&start);
2124
2125 task_count = 0;
2126 - list_for_each(pos, &task_release_list) {
2127 + list_for_each_safe(pos, safe, &task_release_list) {
2128 wait = (struct ts_release_wait*)
2129 list_entry(pos, struct ts_release_wait, list);
2130
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.