Attachment 'gpu-klmirqd-litmus-rt-ecrts12.patch'
Download 1 diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
2 index 91fd0c7..433cd15 100644
3 --- a/arch/x86/kernel/irq.c
4 +++ b/arch/x86/kernel/irq.c
5 @@ -8,6 +8,10 @@
6 #include <linux/smp.h>
7 #include <linux/ftrace.h>
8
9 +#ifdef CONFIG_LITMUS_NVIDIA
10 +#include <litmus/sched_trace.h>
11 +#endif
12 +
13 #include <asm/apic.h>
14 #include <asm/io_apic.h>
15 #include <asm/irq.h>
16 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
17 index 3770290..b5ddae4 100644
18 --- a/arch/x86/kernel/syscall_table_32.S
19 +++ b/arch/x86/kernel/syscall_table_32.S
20 @@ -352,3 +352,4 @@ ENTRY(sys_call_table)
21 .long sys_wait_for_ts_release
22 .long sys_release_ts
23 .long sys_null_call
24 + .long sys_register_nv_device
25 diff --git a/include/linux/completion.h b/include/linux/completion.h
26 index c63950e..3ce20dd 100644
27 --- a/include/linux/completion.h
28 +++ b/include/linux/completion.h
29 @@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
30 init_waitqueue_head(&x->wait);
31 }
32
33 +extern void __wait_for_completion_locked(struct completion *);
34 extern void wait_for_completion(struct completion *);
35 extern int wait_for_completion_interruptible(struct completion *x);
36 extern int wait_for_completion_killable(struct completion *x);
37 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
38 index a0384a4..a2f2880 100644
39 --- a/include/linux/interrupt.h
40 +++ b/include/linux/interrupt.h
41 @@ -459,6 +459,10 @@ struct tasklet_struct
42 atomic_t count;
43 void (*func)(unsigned long);
44 unsigned long data;
45 +
46 +#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
47 + struct task_struct *owner;
48 +#endif
49 };
50
51 #define DECLARE_TASKLET(name, func, data) \
52 @@ -496,6 +500,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
53 #define tasklet_unlock(t) do { } while (0)
54 #endif
55
56 +extern void ___tasklet_schedule(struct tasklet_struct *t);
57 extern void __tasklet_schedule(struct tasklet_struct *t);
58
59 static inline void tasklet_schedule(struct tasklet_struct *t)
60 @@ -504,6 +509,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
61 __tasklet_schedule(t);
62 }
63
64 +extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
65 extern void __tasklet_hi_schedule(struct tasklet_struct *t);
66
67 static inline void tasklet_hi_schedule(struct tasklet_struct *t)
68 @@ -512,6 +518,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
69 __tasklet_hi_schedule(t);
70 }
71
72 +extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
73 extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
74
75 /*
76 @@ -541,7 +548,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
77 }
78
79 static inline void tasklet_enable(struct tasklet_struct *t)
80 -{
81 +{
82 smp_mb__before_atomic_dec();
83 atomic_dec(&t->count);
84 }
85 diff --git a/include/linux/mutex.h b/include/linux/mutex.h
86 index f363bc8..9f31995 100644
87 --- a/include/linux/mutex.h
88 +++ b/include/linux/mutex.h
89 @@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
90 return atomic_read(&lock->count) != 1;
91 }
92
93 +/* return non-zero to abort. only pre-side-effects may abort */
94 +typedef int (*side_effect_t)(unsigned long);
95 +extern void mutex_lock_sfx(struct mutex *lock,
96 + side_effect_t pre, unsigned long pre_arg,
97 + side_effect_t post, unsigned long post_arg);
98 +extern void mutex_unlock_sfx(struct mutex *lock,
99 + side_effect_t pre, unsigned long pre_arg,
100 + side_effect_t post, unsigned long post_arg);
101 +
102 /*
103 * See kernel/mutex.c for detailed documentation of these APIs.
104 * Also see Documentation/mutex-design.txt.
105 @@ -145,6 +154,7 @@ extern void mutex_lock(struct mutex *lock);
106 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
107 extern int __must_check mutex_lock_killable(struct mutex *lock);
108
109 +
110 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
111 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
112 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
113 diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
114 index 5310d27..69e3f57 100644
115 --- a/include/linux/semaphore.h
116 +++ b/include/linux/semaphore.h
117 @@ -49,4 +49,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
118 extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
119 extern void up(struct semaphore *sem);
120
121 +extern void __down(struct semaphore *sem);
122 +extern void __up(struct semaphore *sem);
123 +
124 +struct semaphore_waiter {
125 + struct list_head list;
126 + struct task_struct *task;
127 + int up;
128 +};
129 +
130 #endif /* __LINUX_SEMAPHORE_H */
131 diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
132 index 25e02c9..5fecfb3 100644
133 --- a/include/linux/workqueue.h
134 +++ b/include/linux/workqueue.h
135 @@ -83,6 +83,9 @@ struct work_struct {
136 #ifdef CONFIG_LOCKDEP
137 struct lockdep_map lockdep_map;
138 #endif
139 +#ifdef CONFIG_LITMUS_SOFTIRQD
140 + struct task_struct *owner;
141 +#endif
142 };
143
144 #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
145 @@ -115,11 +118,25 @@ struct execute_work {
146 #define __WORK_INIT_LOCKDEP_MAP(n, k)
147 #endif
148
149 +#ifdef CONFIG_LITMUS_SOFTIRQD
150 +#define __WORK_INIT_OWNER() \
151 + .owner = NULL,
152 +
153 +#define PREPARE_OWNER(_work, _owner) \
154 + do { \
155 + (_work)->owner = (_owner); \
156 + } while(0)
157 +#else
158 +#define __WORK_INIT_OWNER()
159 +#define PREPARE_OWNER(_work, _owner)
160 +#endif
161 +
162 #define __WORK_INITIALIZER(n, f) { \
163 .data = WORK_DATA_STATIC_INIT(), \
164 .entry = { &(n).entry, &(n).entry }, \
165 .func = (f), \
166 __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
167 + __WORK_INIT_OWNER() \
168 }
169
170 #define __DELAYED_WORK_INITIALIZER(n, f) { \
171 @@ -327,6 +344,7 @@ extern void flush_workqueue(struct workqueue_struct *wq);
172 extern void flush_scheduled_work(void);
173 extern void flush_delayed_work(struct delayed_work *work);
174
175 +extern int __schedule_work(struct work_struct *work);
176 extern int schedule_work(struct work_struct *work);
177 extern int schedule_work_on(int cpu, struct work_struct *work);
178 extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
179 diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
180 new file mode 100644
181 index 0000000..877b409
182 --- /dev/null
183 +++ b/include/litmus/affinity.h
184 @@ -0,0 +1,78 @@
185 +#ifndef __LITMUS_AFFINITY_H
186 +#define __LITMUS_AFFINITY_H
187 +
188 +#include <linux/cpumask.h>
189 +
190 +/*
191 + L1 (instr) = depth 0
192 + L1 (data) = depth 1
193 + L2 = depth 2
194 + L3 = depth 3
195 + */
196 +#define NUM_CACHE_LEVELS 4
197 +
198 +struct neighborhood
199 +{
200 + unsigned int size[NUM_CACHE_LEVELS];
201 + cpumask_var_t neighbors[NUM_CACHE_LEVELS];
202 +};
203 +
204 +/* topology info is stored redundently in a big array for fast lookups */
205 +extern struct neighborhood neigh_info[NR_CPUS];
206 +
207 +void init_topology(void); /* called by Litmus module's _init_litmus() */
208 +
209 +/* Works like:
210 +void get_nearest_available_cpu(cpu_entry_t* nearest, cpu_entry_t* start, cpu_entry_t* entries, int release_master)
211 +
212 +Set release_master = -1 for no RM.
213 + */
214 +#define get_nearest_available_cpu(nearest, start, entries, release_master) \
215 +{ \
216 + (nearest) = NULL; \
217 + if(!(start)->linked) \
218 + { \
219 + (nearest) = (start); \
220 + } \
221 + else \
222 + { \
223 + int __level; \
224 + int __cpu; \
225 + struct neighborhood* __neighbors = &neigh_info[(start)->cpu]; \
226 + \
227 + for(__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) \
228 + { \
229 + if(__neighbors->size[__level] > 1) \
230 + { \
231 + for_each_cpu(__cpu, __neighbors->neighbors[__level]) \
232 + { \
233 + if(__cpu != (release_master)) \
234 + { \
235 + cpu_entry_t* __entry = &per_cpu((entries), __cpu); \
236 + if(!__entry->linked) \
237 + { \
238 + (nearest) = __entry; \
239 + break; \
240 + } \
241 + } \
242 + } \
243 + } \
244 + else if(__neighbors->size[__level] == 0) \
245 + { \
246 + break; \
247 + } \
248 + } \
249 + } \
250 + \
251 + if((nearest)) \
252 + { \
253 + TRACE("P%d is closest available CPU to P%d\n", (nearest)->cpu, (start)->cpu); \
254 + } \
255 + else \
256 + { \
257 + TRACE("Could not find an available CPU close to P%d\n", \
258 + (start)->cpu); \
259 + } \
260 +}
261 +
262 +#endif
263 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
264 index caf2a1e..c740e8f 100644
265 --- a/include/litmus/fdso.h
266 +++ b/include/litmus/fdso.h
267 @@ -18,9 +18,10 @@ typedef enum {
268 MIN_OBJ_TYPE = 0,
269
270 FMLP_SEM = 0,
271 - SRP_SEM = 1,
272 + KFMLP_SEM = 1,
273 + SRP_SEM = 2,
274
275 - MAX_OBJ_TYPE = 1
276 + MAX_OBJ_TYPE = SRP_SEM
277 } obj_type_t;
278
279 struct inode_obj_id {
280 @@ -64,6 +65,7 @@ static inline void* od_lookup(int od, obj_type_t type)
281 }
282
283 #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
284 +#define lookup_kfmlp_sem(od)((struct pi_semaphore*) od_lookup(od, KFMLP_SEM))
285 #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
286 #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
287
288 diff --git a/include/litmus/fifo_common.h b/include/litmus/fifo_common.h
289 new file mode 100644
290 index 0000000..12cfbfe
291 --- /dev/null
292 +++ b/include/litmus/fifo_common.h
293 @@ -0,0 +1,25 @@
294 +/*
295 + * EDF common data structures and utility functions shared by all EDF
296 + * based scheduler plugins
297 + */
298 +
299 +/* CLEANUP: Add comments and make it less messy.
300 + *
301 + */
302 +
303 +#ifndef __UNC_FIFO_COMMON_H__
304 +#define __UNC_FIFO_COMMON_H__
305 +
306 +#include <litmus/rt_domain.h>
307 +
308 +void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
309 + release_jobs_t release);
310 +
311 +int fifo_higher_prio(struct task_struct* first,
312 + struct task_struct* second);
313 +
314 +int fifo_ready_order(struct bheap_node* a, struct bheap_node* b);
315 +
316 +int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t);
317 +
318 +#endif
319 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
320 index e7769ca..829c1c5 100644
321 --- a/include/litmus/litmus.h
322 +++ b/include/litmus/litmus.h
323 @@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
324 );
325 }
326
327 +
328 struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
329
330 #define NO_CPU 0xffffffff
331 @@ -117,7 +118,9 @@ static inline lt_t litmus_clock(void)
332 #define earlier_release(a, b) (lt_before(\
333 (a)->rt_param.job_params.release,\
334 (b)->rt_param.job_params.release))
335 -
336 +#define shorter_period(a, b) (lt_before(\
337 + (a)->rt_param.task_params.period,\
338 + (b)->rt_param.task_params.period))
339 void preempt_if_preemptable(struct task_struct* t, int on_cpu);
340
341 #ifdef CONFIG_LITMUS_LOCKING
342 diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
343 new file mode 100644
344 index 0000000..34287f3
345 --- /dev/null
346 +++ b/include/litmus/litmus_softirq.h
347 @@ -0,0 +1,199 @@
348 +#ifndef __LITMUS_SOFTIRQ_H
349 +#define __LITMUS_SOFTIRQ_H
350 +
351 +#include <linux/interrupt.h>
352 +#include <linux/workqueue.h>
353 +
354 +/*
355 + Threaded tasklet handling for Litmus. Tasklets
356 + are scheduled with the priority of the tasklet's
357 + owner---that is, the RT task on behalf the tasklet
358 + runs.
359 +
360 + Tasklets are current scheduled in FIFO order with
361 + NO priority inheritance for "blocked" tasklets.
362 +
363 + klitirqd assumes the priority of the owner of the
364 + tasklet when the tasklet is next to execute.
365 +
366 + Currently, hi-tasklets are scheduled before
367 + low-tasklets, regardless of priority of low-tasklets.
368 + And likewise, low-tasklets are scheduled before work
369 + queue objects. This priority inversion probably needs
370 + to be fixed, though it is not an issue if our work with
371 + GPUs as GPUs are owned (and associated klitirqds) for
372 + exclusive time periods, thus no inversions can
373 + occur.
374 + */
375 +
376 +
377 +
378 +#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
379 +
380 +/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
381 + Actual launch of threads is deffered to kworker's
382 + workqueue, so daemons will likely not be immediately
383 + running when this function returns, though the required
384 + data will be initialized.
385 +
386 + @affinity_set: an array expressing the processor affinity
387 + for each of the NR_LITMUS_SOFTIRQD daemons. May be set
388 + to NULL for global scheduling.
389 +
390 + - Examples -
391 + 8-CPU system with two CPU clusters:
392 + affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
393 + NOTE: Daemons not actually bound to specified CPU, but rather
394 + cluster in which the CPU resides.
395 +
396 + 8-CPU system, partitioned:
397 + affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
398 +
399 + FIXME: change array to a CPU topology or array of cpumasks
400 +
401 + */
402 +void spawn_klitirqd(int* affinity);
403 +
404 +
405 +/* Raises a flag to tell klitirqds to terminate.
406 + Termination is async, so some threads may be running
407 + after function return. */
408 +void kill_klitirqd(void);
409 +
410 +
411 +/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
412 + to handle tasklets. 0, otherwise.*/
413 +int klitirqd_is_ready(void);
414 +
415 +/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
416 + to handle tasklets. 0, otherwise.*/
417 +int klitirqd_is_dead(void);
418 +
419 +/* Flushes all pending work out to the OS for regular
420 + * tasklet/work processing of the specified 'owner'
421 + *
422 + * PRECOND: klitirqd_thread must have a clear entry
423 + * in the GPU registry, otherwise this call will become
424 + * a no-op as work will loop back to the klitirqd_thread.
425 + *
426 + * Pass NULL for owner to flush ALL pending items.
427 + */
428 +void flush_pending(struct task_struct* klitirqd_thread,
429 + struct task_struct* owner);
430 +
431 +struct task_struct* get_klitirqd(unsigned int k_id);
432 +
433 +
434 +extern int __litmus_tasklet_schedule(
435 + struct tasklet_struct *t,
436 + unsigned int k_id);
437 +
438 +/* schedule a tasklet on klitirqd #k_id */
439 +static inline int litmus_tasklet_schedule(
440 + struct tasklet_struct *t,
441 + unsigned int k_id)
442 +{
443 + int ret = 0;
444 + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
445 + ret = __litmus_tasklet_schedule(t, k_id);
446 + return(ret);
447 +}
448 +
449 +/* for use by __tasklet_schedule() */
450 +static inline int _litmus_tasklet_schedule(
451 + struct tasklet_struct *t,
452 + unsigned int k_id)
453 +{
454 + return(__litmus_tasklet_schedule(t, k_id));
455 +}
456 +
457 +
458 +
459 +
460 +extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
461 + unsigned int k_id);
462 +
463 +/* schedule a hi tasklet on klitirqd #k_id */
464 +static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
465 + unsigned int k_id)
466 +{
467 + int ret = 0;
468 + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
469 + ret = __litmus_tasklet_hi_schedule(t, k_id);
470 + return(ret);
471 +}
472 +
473 +/* for use by __tasklet_hi_schedule() */
474 +static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
475 + unsigned int k_id)
476 +{
477 + return(__litmus_tasklet_hi_schedule(t, k_id));
478 +}
479 +
480 +
481 +
482 +
483 +
484 +extern int __litmus_tasklet_hi_schedule_first(
485 + struct tasklet_struct *t,
486 + unsigned int k_id);
487 +
488 +/* schedule a hi tasklet on klitirqd #k_id on next go-around */
489 +/* PRECONDITION: Interrupts must be disabled. */
490 +static inline int litmus_tasklet_hi_schedule_first(
491 + struct tasklet_struct *t,
492 + unsigned int k_id)
493 +{
494 + int ret = 0;
495 + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
496 + ret = __litmus_tasklet_hi_schedule_first(t, k_id);
497 + return(ret);
498 +}
499 +
500 +/* for use by __tasklet_hi_schedule_first() */
501 +static inline int _litmus_tasklet_hi_schedule_first(
502 + struct tasklet_struct *t,
503 + unsigned int k_id)
504 +{
505 + return(__litmus_tasklet_hi_schedule_first(t, k_id));
506 +}
507 +
508 +
509 +
510 +//////////////
511 +
512 +extern int __litmus_schedule_work(
513 + struct work_struct* w,
514 + unsigned int k_id);
515 +
516 +static inline int litmus_schedule_work(
517 + struct work_struct* w,
518 + unsigned int k_id)
519 +{
520 + return(__litmus_schedule_work(w, k_id));
521 +}
522 +
523 +
524 +
525 +///////////// mutex operations for client threads.
526 +
527 +void down_and_set_stat(struct task_struct* t,
528 + enum klitirqd_sem_status to_set,
529 + struct mutex* sem);
530 +
531 +void __down_and_reset_and_set_stat(struct task_struct* t,
532 + enum klitirqd_sem_status to_reset,
533 + enum klitirqd_sem_status to_set,
534 + struct mutex* sem);
535 +
536 +void up_and_set_stat(struct task_struct* t,
537 + enum klitirqd_sem_status to_set,
538 + struct mutex* sem);
539 +
540 +
541 +
542 +void release_klitirqd_lock(struct task_struct* t);
543 +
544 +int reacquire_klitirqd_lock(struct task_struct* t);
545 +
546 +#endif
547 diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
548 new file mode 100644
549 index 0000000..9e07a27
550 --- /dev/null
551 +++ b/include/litmus/nvidia_info.h
552 @@ -0,0 +1,38 @@
553 +#ifndef __LITMUS_NVIDIA_H
554 +#define __LITMUS_NVIDIA_H
555 +
556 +#include <linux/interrupt.h>
557 +
558 +
559 +#include <litmus/litmus_softirq.h>
560 +
561 +
562 +//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
563 +#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
564 +
565 +int init_nvidia_info(void);
566 +
567 +int is_nvidia_func(void* func_addr);
568 +
569 +void dump_nvidia_info(const struct tasklet_struct *t);
570 +
571 +
572 +// Returns the Nvidia device # associated with provided tasklet and work_struct.
573 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
574 +u32 get_work_nv_device_num(const struct work_struct *t);
575 +
576 +
577 +int init_nv_device_reg(void);
578 +//int get_nv_device_id(struct task_struct* owner);
579 +
580 +
581 +int reg_nv_device(int reg_device_id, int register_device);
582 +
583 +struct task_struct* get_nv_device_owner(u32 target_device_id);
584 +
585 +void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
586 +void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
587 +
588 +void increment_nv_int_count(u32 device);
589 +
590 +#endif
591 diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
592 index 260c6fe..244924f 100644
593 --- a/include/litmus/preempt.h
594 +++ b/include/litmus/preempt.h
595 @@ -26,6 +26,7 @@ const char* sched_state_name(int s);
596 (x), #x, __FUNCTION__); \
597 } while (0);
598
599 +//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
600 #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
601 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
602 cpu, (x), sched_state_name(x), \
603 diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h
604 new file mode 100644
605 index 0000000..5991b0b
606 --- /dev/null
607 +++ b/include/litmus/rm_common.h
608 @@ -0,0 +1,25 @@
609 +/*
610 + * EDF common data structures and utility functions shared by all EDF
611 + * based scheduler plugins
612 + */
613 +
614 +/* CLEANUP: Add comments and make it less messy.
615 + *
616 + */
617 +
618 +#ifndef __UNC_RM_COMMON_H__
619 +#define __UNC_RM_COMMON_H__
620 +
621 +#include <litmus/rt_domain.h>
622 +
623 +void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
624 + release_jobs_t release);
625 +
626 +int rm_higher_prio(struct task_struct* first,
627 + struct task_struct* second);
628 +
629 +int rm_ready_order(struct bheap_node* a, struct bheap_node* b);
630 +
631 +int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t);
632 +
633 +#endif
634 diff --git a/include/litmus/rm_srt_common.h b/include/litmus/rm_srt_common.h
635 new file mode 100644
636 index 0000000..78aa287
637 --- /dev/null
638 +++ b/include/litmus/rm_srt_common.h
639 @@ -0,0 +1,25 @@
640 +/*
641 + * EDF common data structures and utility functions shared by all EDF
642 + * based scheduler plugins
643 + */
644 +
645 +/* CLEANUP: Add comments and make it less messy.
646 + *
647 + */
648 +
649 +#ifndef __UNC_RM_SRT_COMMON_H__
650 +#define __UNC_RM_SRT_COMMON_H__
651 +
652 +#include <litmus/rt_domain.h>
653 +
654 +void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
655 + release_jobs_t release);
656 +
657 +int rm_srt_higher_prio(struct task_struct* first,
658 + struct task_struct* second);
659 +
660 +int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b);
661 +
662 +int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t);
663 +
664 +#endif
665 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
666 index 5de422c..53af3ce 100644
667 --- a/include/litmus/rt_param.h
668 +++ b/include/litmus/rt_param.h
669 @@ -69,6 +69,8 @@ struct control_page {
670 /* don't export internal data structures to user space (liblitmus) */
671 #ifdef __KERNEL__
672
673 +#include <linux/semaphore.h>
674 +
675 struct _rt_domain;
676 struct bheap_node;
677 struct release_heap;
678 @@ -94,6 +96,14 @@ struct rt_job {
679
680 struct pfair_param;
681
682 +enum klitirqd_sem_status
683 +{
684 + NEED_TO_REACQUIRE,
685 + REACQUIRING,
686 + NOT_HELD,
687 + HELD
688 +};
689 +
690 /* RT task parameters for scheduling extensions
691 * These parameters are inherited during clone and therefore must
692 * be explicitly set up before the task set is launched.
693 @@ -108,6 +118,38 @@ struct rt_param {
694 /* is the task present? (true if it can be scheduled) */
695 unsigned int present:1;
696
697 +#ifdef CONFIG_LITMUS_SOFTIRQD
698 + /* proxy threads have minimum priority by default */
699 + unsigned int is_proxy_thread:1;
700 +
701 + /* pointer to klitirqd currently working on this
702 + task_struct's behalf. only set by the task pointed
703 + to by klitirqd.
704 +
705 + ptr only valid if is_proxy_thread == 0
706 + */
707 + struct task_struct* cur_klitirqd;
708 +
709 + /* Used to implement mutual execution exclusion between
710 + * job and klitirqd execution. Job must always hold
711 + * it's klitirqd_sem to execute. klitirqd instance
712 + * must hold the semaphore before executing on behalf
713 + * of a job.
714 + */
715 + //struct semaphore klitirqd_sem;
716 + struct mutex klitirqd_sem;
717 +
718 + /* status of held klitirqd_sem, even if the held klitirqd_sem is from
719 + another task (only proxy threads do this though).
720 + */
721 + atomic_t klitirqd_sem_stat;
722 +#endif
723 +
724 +#ifdef CONFIG_LITMUS_NVIDIA
725 + /* number of top-half interrupts handled on behalf of current job */
726 + atomic_t nv_int_count;
727 +#endif
728 +
729 #ifdef CONFIG_LITMUS_LOCKING
730 /* Is the task being priority-boosted by a locking protocol? */
731 unsigned int priority_boosted:1;
732 @@ -128,7 +170,7 @@ struct rt_param {
733 * an increased task priority.
734 */
735 struct task_struct* inh_task;
736 -
737 +
738 #ifdef CONFIG_NP_SECTION
739 /* For the FMLP under PSN-EDF, it is required to make the task
740 * non-preemptive from kernel space. In order not to interfere with
741 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
742 index 6e7cabd..8fdf05d 100644
743 --- a/include/litmus/sched_plugin.h
744 +++ b/include/litmus/sched_plugin.h
745 @@ -11,6 +11,8 @@
746 #include <litmus/locking.h>
747 #endif
748
749 +#include <linux/interrupt.h>
750 +
751 /************************ setup/tear down ********************/
752
753 typedef long (*activate_plugin_t) (void);
754 @@ -29,7 +31,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
755 */
756 typedef void (*finish_switch_t)(struct task_struct *prev);
757
758 -
759 /********************* task state changes ********************/
760
761 /* Called to setup a new real-time task.
762 @@ -58,6 +59,21 @@ typedef void (*task_exit_t) (struct task_struct *);
763 typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
764 void* __user config);
765
766 +/* Called to change inheritance levels of given task */
767 +typedef void (*set_prio_inh_t)(struct task_struct* t,
768 + struct task_struct* prio_inh);
769 +typedef void (*clear_prio_inh_t)(struct task_struct* t);
770 +
771 +
772 +typedef void (*set_prio_inh_klitirq_t)(struct task_struct* klitirqd,
773 + struct task_struct* old_owner,
774 + struct task_struct* new_owner);
775 +typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd,
776 + struct task_struct* old_owner);
777 +
778 +
779 +typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
780 +typedef void (*run_tasklets_t)(struct task_struct* next);
781
782 /********************* sys call backends ********************/
783 /* This function causes the caller to sleep until the next release */
784 @@ -88,7 +104,7 @@ struct sched_plugin {
785 /* task state changes */
786 admit_task_t admit_task;
787
788 - task_new_t task_new;
789 + task_new_t task_new;
790 task_wake_up_t task_wake_up;
791 task_block_t task_block;
792 task_exit_t task_exit;
793 @@ -96,6 +112,19 @@ struct sched_plugin {
794 #ifdef CONFIG_LITMUS_LOCKING
795 /* locking protocols */
796 allocate_lock_t allocate_lock;
797 +
798 + set_prio_inh_t set_prio_inh;
799 + clear_prio_inh_t clear_prio_inh;
800 +#endif
801 +
802 +#ifdef CONFIG_LITMUS_SOFTIRQD
803 + set_prio_inh_klitirq_t set_prio_inh_klitirqd;
804 + clear_prio_inh_klitirqd_t clear_prio_inh_klitirqd;
805 +#endif
806 +
807 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
808 + enqueue_pai_tasklet_t enqueue_pai_tasklet;
809 + run_tasklets_t run_tasklets;
810 #endif
811 } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
812
813 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
814 index 7ca34cb..232c758 100644
815 --- a/include/litmus/sched_trace.h
816 +++ b/include/litmus/sched_trace.h
817 @@ -11,12 +11,12 @@ struct st_trace_header {
818 u8 cpu; /* On which CPU was it recorded? */
819 u16 pid; /* PID of the task. */
820 u32 job; /* The job sequence number. */
821 -};
822 +} __attribute__((packed));
823
824 #define ST_NAME_LEN 16
825 struct st_name_data {
826 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
827 -};
828 +} __attribute__((packed));
829
830 struct st_param_data { /* regular params */
831 u32 wcet;
832 @@ -25,30 +25,29 @@ struct st_param_data { /* regular params */
833 u8 partition;
834 u8 class;
835 u8 __unused[2];
836 -};
837 +} __attribute__((packed));
838
839 struct st_release_data { /* A job is was/is going to be released. */
840 u64 release; /* What's the release time? */
841 u64 deadline; /* By when must it finish? */
842 -};
843 +} __attribute__((packed));
844
845 struct st_assigned_data { /* A job was asigned to a CPU. */
846 u64 when;
847 u8 target; /* Where should it execute? */
848 u8 __unused[7];
849 -};
850 +} __attribute__((packed));
851
852 struct st_switch_to_data { /* A process was switched to on a given CPU. */
853 u64 when; /* When did this occur? */
854 u32 exec_time; /* Time the current job has executed. */
855 u8 __unused[4];
856 -
857 -};
858 +} __attribute__((packed));
859
860 struct st_switch_away_data { /* A process was switched away from on a given CPU. */
861 u64 when;
862 u64 exec_time;
863 -};
864 +} __attribute__((packed));
865
866 struct st_completion_data { /* A job completed. */
867 u64 when;
868 @@ -56,35 +55,92 @@ struct st_completion_data { /* A job completed. */
869 * next task automatically; set to 0 otherwise.
870 */
871 u8 __uflags:7;
872 - u8 __unused[7];
873 -};
874 + u16 nv_int_count;
875 + u8 __unused[5];
876 +} __attribute__((packed));
877
878 struct st_block_data { /* A task blocks. */
879 u64 when;
880 u64 __unused;
881 -};
882 +} __attribute__((packed));
883
884 struct st_resume_data { /* A task resumes. */
885 u64 when;
886 u64 __unused;
887 -};
888 +} __attribute__((packed));
889
890 struct st_action_data {
891 u64 when;
892 u8 action;
893 u8 __unused[7];
894 -};
895 +} __attribute__((packed));
896
897 struct st_sys_release_data {
898 u64 when;
899 u64 release;
900 -};
901 +} __attribute__((packed));
902 +
903 +
904 +struct st_tasklet_release_data {
905 + u64 when;
906 + u64 __unused;
907 +} __attribute__((packed));
908 +
909 +struct st_tasklet_begin_data {
910 + u64 when;
911 + u16 exe_pid;
912 + u8 __unused[6];
913 +} __attribute__((packed));
914 +
915 +struct st_tasklet_end_data {
916 + u64 when;
917 + u16 exe_pid;
918 + u8 flushed;
919 + u8 __unused[5];
920 +} __attribute__((packed));
921 +
922 +
923 +struct st_work_release_data {
924 + u64 when;
925 + u64 __unused;
926 +} __attribute__((packed));
927 +
928 +struct st_work_begin_data {
929 + u64 when;
930 + u16 exe_pid;
931 + u8 __unused[6];
932 +} __attribute__((packed));
933 +
934 +struct st_work_end_data {
935 + u64 when;
936 + u16 exe_pid;
937 + u8 flushed;
938 + u8 __unused[5];
939 +} __attribute__((packed));
940 +
941 +struct st_effective_priority_change_data {
942 + u64 when;
943 + u16 inh_pid;
944 + u8 __unused[6];
945 +} __attribute__((packed));
946 +
947 +struct st_nv_interrupt_begin_data {
948 + u64 when;
949 + u32 device;
950 + u32 serialNumber;
951 +} __attribute__((packed));
952 +
953 +struct st_nv_interrupt_end_data {
954 + u64 when;
955 + u32 device;
956 + u32 serialNumber;
957 +} __attribute__((packed));
958
959 #define DATA(x) struct st_ ## x ## _data x;
960
961 typedef enum {
962 - ST_NAME = 1, /* Start at one, so that we can spot
963 - * uninitialized records. */
964 + ST_NAME = 1, /* Start at one, so that we can spot
965 + * uninitialized records. */
966 ST_PARAM,
967 ST_RELEASE,
968 ST_ASSIGNED,
969 @@ -94,7 +150,16 @@ typedef enum {
970 ST_BLOCK,
971 ST_RESUME,
972 ST_ACTION,
973 - ST_SYS_RELEASE
974 + ST_SYS_RELEASE,
975 + ST_TASKLET_RELEASE,
976 + ST_TASKLET_BEGIN,
977 + ST_TASKLET_END,
978 + ST_WORK_RELEASE,
979 + ST_WORK_BEGIN,
980 + ST_WORK_END,
981 + ST_EFF_PRIO_CHANGE,
982 + ST_NV_INTERRUPT_BEGIN,
983 + ST_NV_INTERRUPT_END,
984 } st_event_record_type_t;
985
986 struct st_event_record {
987 @@ -113,8 +178,17 @@ struct st_event_record {
988 DATA(resume);
989 DATA(action);
990 DATA(sys_release);
991 + DATA(tasklet_release);
992 + DATA(tasklet_begin);
993 + DATA(tasklet_end);
994 + DATA(work_release);
995 + DATA(work_begin);
996 + DATA(work_end);
997 + DATA(effective_priority_change);
998 + DATA(nv_interrupt_begin);
999 + DATA(nv_interrupt_end);
1000 } data;
1001 -};
1002 +} __attribute__((packed));
1003
1004 #undef DATA
1005
1006 @@ -129,6 +203,8 @@ struct st_event_record {
1007 ft_event1(id, callback, task)
1008 #define SCHED_TRACE2(id, callback, task, xtra) \
1009 ft_event2(id, callback, task, xtra)
1010 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
1011 + ft_event3(id, callback, task, xtra1, xtra2)
1012
1013 /* provide prototypes; needed on sparc64 */
1014 #ifndef NO_TASK_TRACE_DECLS
1015 @@ -155,12 +231,45 @@ feather_callback void do_sched_trace_action(unsigned long id,
1016 feather_callback void do_sched_trace_sys_release(unsigned long id,
1017 lt_t* start);
1018
1019 +
1020 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
1021 + struct task_struct* owner);
1022 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
1023 + struct task_struct* owner);
1024 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
1025 + struct task_struct* owner,
1026 + unsigned long flushed);
1027 +
1028 +feather_callback void do_sched_trace_work_release(unsigned long id,
1029 + struct task_struct* owner);
1030 +feather_callback void do_sched_trace_work_begin(unsigned long id,
1031 + struct task_struct* owner,
1032 + struct task_struct* exe);
1033 +feather_callback void do_sched_trace_work_end(unsigned long id,
1034 + struct task_struct* owner,
1035 + struct task_struct* exe,
1036 + unsigned long flushed);
1037 +
1038 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
1039 + struct task_struct* task,
1040 + struct task_struct* inh);
1041 +
1042 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
1043 + u32 device);
1044 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
1045 + unsigned long unused);
1046 +
1047 +
1048 +/* returns true if we're tracing an interrupt on current CPU */
1049 +/* int is_interrupt_tracing_active(void); */
1050 +
1051 #endif
1052
1053 #else
1054
1055 #define SCHED_TRACE(id, callback, task) /* no tracing */
1056 #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
1057 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
1058
1059 #endif
1060
1061 @@ -193,6 +302,35 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
1062 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
1063
1064
1065 +#define sched_trace_tasklet_release(t) \
1066 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
1067 +
1068 +#define sched_trace_tasklet_begin(t) \
1069 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
1070 +
1071 +#define sched_trace_tasklet_end(t, flushed) \
1072 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
1073 +
1074 +
1075 +#define sched_trace_work_release(t) \
1076 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
1077 +
1078 +#define sched_trace_work_begin(t, e) \
1079 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
1080 +
1081 +#define sched_trace_work_end(t, e, flushed) \
1082 + SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
1083 +
1084 +
1085 +#define sched_trace_eff_prio_change(t, inh) \
1086 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
1087 +
1088 +
1089 +#define sched_trace_nv_interrupt_begin(d) \
1090 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
1091 +#define sched_trace_nv_interrupt_end(d) \
1092 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
1093 +
1094 #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
1095
1096 #endif /* __KERNEL__ */
1097 diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
1098 new file mode 100644
1099 index 0000000..e70e45e
1100 --- /dev/null
1101 +++ b/include/litmus/sched_trace_external.h
1102 @@ -0,0 +1,78 @@
1103 +/*
1104 + * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
1105 + */
1106 +#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
1107 +#define _LINUX_SCHED_TRACE_EXTERNAL_H_
1108 +
1109 +
1110 +#ifdef CONFIG_SCHED_TASK_TRACE
1111 +extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
1112 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
1113 +{
1114 + __sched_trace_tasklet_begin_external(t);
1115 +}
1116 +
1117 +extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
1118 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
1119 +{
1120 + __sched_trace_tasklet_end_external(t, flushed);
1121 +}
1122 +
1123 +extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
1124 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
1125 +{
1126 + __sched_trace_work_begin_external(t, e);
1127 +}
1128 +
1129 +extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
1130 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
1131 +{
1132 + __sched_trace_work_end_external(t, e, f);
1133 +}
1134 +
1135 +#ifdef CONFIG_LITMUS_NVIDIA
1136 +extern void __sched_trace_nv_interrupt_begin_external(u32 device);
1137 +static inline void sched_trace_nv_interrupt_begin_external(u32 device)
1138 +{
1139 + __sched_trace_nv_interrupt_begin_external(device);
1140 +}
1141 +
1142 +extern void __sched_trace_nv_interrupt_end_external(u32 device);
1143 +static inline void sched_trace_nv_interrupt_end_external(u32 device)
1144 +{
1145 + __sched_trace_nv_interrupt_end_external(device);
1146 +}
1147 +#endif
1148 +
1149 +#else
1150 +
1151 +// no tracing.
1152 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t){}
1153 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){}
1154 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){}
1155 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){}
1156 +
1157 +#ifdef CONFIG_LITMUS_NVIDIA
1158 +static inline void sched_trace_nv_interrupt_begin_external(u32 device){}
1159 +static inline void sched_trace_nv_interrupt_end_external(u32 device){}
1160 +#endif
1161 +
1162 +#endif
1163 +
1164 +
1165 +#ifdef CONFIG_LITMUS_NVIDIA
1166 +
1167 +#define EX_TS(evt) \
1168 +extern void __##evt(void); \
1169 +static inline void EX_##evt(void) { __##evt(); }
1170 +
1171 +EX_TS(TS_NV_TOPISR_START)
1172 +EX_TS(TS_NV_TOPISR_END)
1173 +EX_TS(TS_NV_BOTISR_START)
1174 +EX_TS(TS_NV_BOTISR_END)
1175 +EX_TS(TS_NV_RELEASE_BOTISR_START)
1176 +EX_TS(TS_NV_RELEASE_BOTISR_END)
1177 +
1178 +#endif
1179 +
1180 +#endif
1181 diff --git a/include/litmus/trace.h b/include/litmus/trace.h
1182 index 05f4872..09d409b 100644
1183 --- a/include/litmus/trace.h
1184 +++ b/include/litmus/trace.h
1185 @@ -100,4 +100,24 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu)
1186 #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
1187
1188
1189 +
1190 +#ifdef CONFIG_LITMUS_NVIDIA
1191 +
1192 +#define TS_NV_TOPISR_START TIMESTAMP(200)
1193 +#define TS_NV_TOPISR_END TIMESTAMP(201)
1194 +
1195 +#define TS_NV_BOTISR_START TIMESTAMP(202)
1196 +#define TS_NV_BOTISR_END TIMESTAMP(203)
1197 +
1198 +#define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204)
1199 +#define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205)
1200 +
1201 +#endif
1202 +
1203 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1204 +#define TS_NV_SCHED_BOTISR_START TIMESTAMP(206)
1205 +#define TS_NV_SCHED_BOTISR_END TIMESTAMP(207)
1206 +#endif
1207 +
1208 +
1209 #endif /* !_SYS_TRACE_H_ */
1210 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
1211 index 94264c2..c6efc4c 100644
1212 --- a/include/litmus/unistd_32.h
1213 +++ b/include/litmus/unistd_32.h
1214 @@ -17,5 +17,6 @@
1215 #define __NR_wait_for_ts_release __LSC(9)
1216 #define __NR_release_ts __LSC(10)
1217 #define __NR_null_call __LSC(11)
1218 +#define __NR_register_nv_device __LSC(12)
1219
1220 -#define NR_litmus_syscalls 12
1221 +#define NR_litmus_syscalls 13
1222 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
1223 index d5ced0d..b44a7c3 100644
1224 --- a/include/litmus/unistd_64.h
1225 +++ b/include/litmus/unistd_64.h
1226 @@ -29,5 +29,8 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
1227 __SYSCALL(__NR_release_ts, sys_release_ts)
1228 #define __NR_null_call __LSC(11)
1229 __SYSCALL(__NR_null_call, sys_null_call)
1230 +#define __NR_register_nv_device __LSC(12)
1231 +__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
1232
1233 -#define NR_litmus_syscalls 12
1234 +
1235 +#define NR_litmus_syscalls 13
1236 diff --git a/kernel/lockdep.c b/kernel/lockdep.c
1237 index f2852a5..ebff2cf 100644
1238 --- a/kernel/lockdep.c
1239 +++ b/kernel/lockdep.c
1240 @@ -530,7 +530,7 @@ static void print_lock(struct held_lock *hlock)
1241 print_ip_sym(hlock->acquire_ip);
1242 }
1243
1244 -static void lockdep_print_held_locks(struct task_struct *curr)
1245 +void lockdep_print_held_locks(struct task_struct *curr)
1246 {
1247 int i, depth = curr->lockdep_depth;
1248
1249 @@ -546,6 +546,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
1250 print_lock(curr->held_locks + i);
1251 }
1252 }
1253 +EXPORT_SYMBOL(lockdep_print_held_locks);
1254
1255 static void print_kernel_version(void)
1256 {
1257 diff --git a/kernel/mutex.c b/kernel/mutex.c
1258 index 200407c..435685e 100644
1259 --- a/kernel/mutex.c
1260 +++ b/kernel/mutex.c
1261 @@ -496,3 +496,144 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
1262 return 1;
1263 }
1264 EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
1265 +
1266 +
1267 +
1268 +
1269 +
1270 +
1271 +
1272 +
1273 +//__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
1274 +
1275 +void mutex_lock_sfx(struct mutex *lock,
1276 + side_effect_t pre, unsigned long pre_arg,
1277 + side_effect_t post, unsigned long post_arg)
1278 +{
1279 + long state = TASK_UNINTERRUPTIBLE;
1280 + unsigned int subclass = 0;
1281 + unsigned long ip = _RET_IP_;
1282 +
1283 +
1284 + struct task_struct *task = current;
1285 + struct mutex_waiter waiter;
1286 + unsigned long flags;
1287 +
1288 + preempt_disable();
1289 + mutex_acquire(&lock->dep_map, subclass, 0, ip);
1290 +
1291 + spin_lock_mutex(&lock->wait_lock, flags);
1292 +
1293 + if(pre)
1294 + {
1295 + if(unlikely(pre(pre_arg)))
1296 + {
1297 + // this will fuck with lockdep's CONFIG_PROVE_LOCKING...
1298 + spin_unlock_mutex(&lock->wait_lock, flags);
1299 + preempt_enable();
1300 + return;
1301 + }
1302 + }
1303 +
1304 + debug_mutex_lock_common(lock, &waiter);
1305 + debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
1306 +
1307 + /* add waiting tasks to the end of the waitqueue (FIFO): */
1308 + list_add_tail(&waiter.list, &lock->wait_list);
1309 + waiter.task = task;
1310 +
1311 + if (atomic_xchg(&lock->count, -1) == 1)
1312 + goto done;
1313 +
1314 + lock_contended(&lock->dep_map, ip);
1315 +
1316 + for (;;) {
1317 + /*
1318 + * Lets try to take the lock again - this is needed even if
1319 + * we get here for the first time (shortly after failing to
1320 + * acquire the lock), to make sure that we get a wakeup once
1321 + * it's unlocked. Later on, if we sleep, this is the
1322 + * operation that gives us the lock. We xchg it to -1, so
1323 + * that when we release the lock, we properly wake up the
1324 + * other waiters:
1325 + */
1326 + if (atomic_xchg(&lock->count, -1) == 1)
1327 + break;
1328 +
1329 + __set_task_state(task, state);
1330 +
1331 + /* didnt get the lock, go to sleep: */
1332 + spin_unlock_mutex(&lock->wait_lock, flags);
1333 + preempt_enable_no_resched();
1334 + schedule();
1335 + preempt_disable();
1336 + spin_lock_mutex(&lock->wait_lock, flags);
1337 + }
1338 +
1339 +done:
1340 + lock_acquired(&lock->dep_map, ip);
1341 + /* got the lock - rejoice! */
1342 + mutex_remove_waiter(lock, &waiter, current_thread_info());
1343 + mutex_set_owner(lock);
1344 +
1345 + /* set it to 0 if there are no waiters left: */
1346 + if (likely(list_empty(&lock->wait_list)))
1347 + atomic_set(&lock->count, 0);
1348 +
1349 + if(post)
1350 + post(post_arg);
1351 +
1352 + spin_unlock_mutex(&lock->wait_lock, flags);
1353 +
1354 + debug_mutex_free_waiter(&waiter);
1355 + preempt_enable();
1356 +
1357 + //return 0;
1358 +}
1359 +EXPORT_SYMBOL(mutex_lock_sfx);
1360 +
1361 +
1362 +
1363 +//__mutex_unlock_common_slowpath(lock_count, 1);
1364 +
1365 +void mutex_unlock_sfx(struct mutex *lock,
1366 + side_effect_t pre, unsigned long pre_arg,
1367 + side_effect_t post, unsigned long post_arg)
1368 +{
1369 + //struct mutex *lock = container_of(lock_count, struct mutex, count);
1370 + unsigned long flags;
1371 +
1372 + spin_lock_mutex(&lock->wait_lock, flags);
1373 +
1374 + if(pre)
1375 + pre(pre_arg);
1376 +
1377 + //mutex_release(&lock->dep_map, nested, _RET_IP_);
1378 + mutex_release(&lock->dep_map, 1, _RET_IP_);
1379 + debug_mutex_unlock(lock);
1380 +
1381 + /*
1382 + * some architectures leave the lock unlocked in the fastpath failure
1383 + * case, others need to leave it locked. In the later case we have to
1384 + * unlock it here
1385 + */
1386 + if (__mutex_slowpath_needs_to_unlock())
1387 + atomic_set(&lock->count, 1);
1388 +
1389 + if (!list_empty(&lock->wait_list)) {
1390 + /* get the first entry from the wait-list: */
1391 + struct mutex_waiter *waiter =
1392 + list_entry(lock->wait_list.next,
1393 + struct mutex_waiter, list);
1394 +
1395 + debug_mutex_wake_waiter(lock, waiter);
1396 +
1397 + wake_up_process(waiter->task);
1398 + }
1399 +
1400 + if(post)
1401 + post(post_arg);
1402 +
1403 + spin_unlock_mutex(&lock->wait_lock, flags);
1404 +}
1405 +EXPORT_SYMBOL(mutex_unlock_sfx);
1406 diff --git a/kernel/sched.c b/kernel/sched.c
1407 index c5d7750..08b725c 100644
1408 --- a/kernel/sched.c
1409 +++ b/kernel/sched.c
1410 @@ -82,6 +82,10 @@
1411 #include <litmus/sched_trace.h>
1412 #include <litmus/trace.h>
1413
1414 +#ifdef CONFIG_LITMUS_SOFTIRQD
1415 +#include <litmus/litmus_softirq.h>
1416 +#endif
1417 +
1418 static void litmus_tick(struct rq*, struct task_struct*);
1419
1420 #define CREATE_TRACE_POINTS
1421 @@ -2879,6 +2883,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
1422 struct mm_struct *mm, *oldmm;
1423
1424 prepare_task_switch(rq, prev, next);
1425 +
1426 trace_sched_switch(prev, next);
1427 mm = next->mm;
1428 oldmm = prev->active_mm;
1429 @@ -3789,6 +3794,7 @@ pick_next_task(struct rq *rq)
1430 }
1431 }
1432
1433 +
1434 /*
1435 * schedule() is the main scheduler function.
1436 */
1437 @@ -3807,6 +3813,10 @@ need_resched:
1438 rcu_note_context_switch(cpu);
1439 prev = rq->curr;
1440
1441 +#ifdef CONFIG_LITMUS_SOFTIRQD
1442 + release_klitirqd_lock(prev);
1443 +#endif
1444 +
1445 release_kernel_lock(prev);
1446 need_resched_nonpreemptible:
1447 TS_SCHED_START;
1448 @@ -3882,15 +3892,26 @@ need_resched_nonpreemptible:
1449
1450 if (sched_state_validate_switch() || unlikely(reacquire_kernel_lock(prev)))
1451 goto need_resched_nonpreemptible;
1452 -
1453 +
1454 preempt_enable_no_resched();
1455 +
1456 if (need_resched())
1457 goto need_resched;
1458
1459 +#ifdef LITMUS_SOFTIRQD
1460 + reacquire_klitirqd_lock(prev);
1461 +#endif
1462 +
1463 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1464 + litmus->run_tasklets(prev);
1465 +#endif
1466 +
1467 srp_ceiling_block();
1468 }
1469 EXPORT_SYMBOL(schedule);
1470
1471 +
1472 +
1473 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
1474 /*
1475 * Look out! "owner" is an entirely speculative pointer
1476 @@ -4051,6 +4072,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
1477 }
1478 }
1479
1480 +
1481 /**
1482 * __wake_up - wake up threads blocked on a waitqueue.
1483 * @q: the waitqueue
1484 @@ -4236,6 +4258,12 @@ void __sched wait_for_completion(struct completion *x)
1485 }
1486 EXPORT_SYMBOL(wait_for_completion);
1487
1488 +void __sched __wait_for_completion_locked(struct completion *x)
1489 +{
1490 + do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
1491 +}
1492 +EXPORT_SYMBOL(__wait_for_completion_locked);
1493 +
1494 /**
1495 * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
1496 * @x: holds the state of this particular completion
1497 diff --git a/kernel/semaphore.c b/kernel/semaphore.c
1498 index 94a62c0..c947a04 100644
1499 --- a/kernel/semaphore.c
1500 +++ b/kernel/semaphore.c
1501 @@ -33,11 +33,11 @@
1502 #include <linux/spinlock.h>
1503 #include <linux/ftrace.h>
1504
1505 -static noinline void __down(struct semaphore *sem);
1506 +noinline void __down(struct semaphore *sem);
1507 static noinline int __down_interruptible(struct semaphore *sem);
1508 static noinline int __down_killable(struct semaphore *sem);
1509 static noinline int __down_timeout(struct semaphore *sem, long jiffies);
1510 -static noinline void __up(struct semaphore *sem);
1511 +noinline void __up(struct semaphore *sem);
1512
1513 /**
1514 * down - acquire the semaphore
1515 @@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
1516
1517 /* Functions for the contended case */
1518
1519 +/*
1520 struct semaphore_waiter {
1521 struct list_head list;
1522 struct task_struct *task;
1523 int up;
1524 };
1525 + */
1526
1527 /*
1528 * Because this function is inlined, the 'state' parameter will be
1529 @@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
1530 return -EINTR;
1531 }
1532
1533 -static noinline void __sched __down(struct semaphore *sem)
1534 +noinline void __sched __down(struct semaphore *sem)
1535 {
1536 __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
1537 }
1538 +EXPORT_SYMBOL(__down);
1539 +
1540
1541 static noinline int __sched __down_interruptible(struct semaphore *sem)
1542 {
1543 @@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
1544 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
1545 }
1546
1547 -static noinline void __sched __up(struct semaphore *sem)
1548 +noinline void __sched __up(struct semaphore *sem)
1549 {
1550 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
1551 struct semaphore_waiter, list);
1552 @@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
1553 waiter->up = 1;
1554 wake_up_process(waiter->task);
1555 }
1556 +EXPORT_SYMBOL(__up);
1557 \ No newline at end of file
1558 diff --git a/kernel/softirq.c b/kernel/softirq.c
1559 index 07b4f1b..7a6f500 100644
1560 --- a/kernel/softirq.c
1561 +++ b/kernel/softirq.c
1562 @@ -29,6 +29,15 @@
1563 #include <trace/events/irq.h>
1564
1565 #include <asm/irq.h>
1566 +
1567 +#include <litmus/litmus.h>
1568 +#include <litmus/sched_trace.h>
1569 +
1570 +#ifdef CONFIG_LITMUS_NVIDIA
1571 +#include <litmus/nvidia_info.h>
1572 +#include <litmus/trace.h>
1573 +#endif
1574 +
1575 /*
1576 - No shared variables, all the data are CPU local.
1577 - If a softirq needs serialization, let it serialize itself
1578 @@ -54,7 +63,7 @@ EXPORT_SYMBOL(irq_stat);
1579
1580 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
1581
1582 -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
1583 +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd) = NULL;
1584
1585 char *softirq_to_name[NR_SOFTIRQS] = {
1586 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
1587 @@ -177,6 +186,7 @@ void local_bh_enable_ip(unsigned long ip)
1588 }
1589 EXPORT_SYMBOL(local_bh_enable_ip);
1590
1591 +
1592 /*
1593 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
1594 * and we fall back to softirqd after that.
1595 @@ -187,34 +197,30 @@ EXPORT_SYMBOL(local_bh_enable_ip);
1596 * should not be able to lock up the box.
1597 */
1598 #define MAX_SOFTIRQ_RESTART 10
1599 -
1600 -asmlinkage void __do_softirq(void)
1601 +static void ____do_softirq(void)
1602 {
1603 - struct softirq_action *h;
1604 __u32 pending;
1605 - int max_restart = MAX_SOFTIRQ_RESTART;
1606 +
1607 + struct softirq_action *h;
1608 int cpu;
1609 -
1610 +
1611 pending = local_softirq_pending();
1612 +
1613 account_system_vtime(current);
1614 -
1615 - __local_bh_disable((unsigned long)__builtin_return_address(0));
1616 - lockdep_softirq_enter();
1617 -
1618 +
1619 cpu = smp_processor_id();
1620 -restart:
1621 - /* Reset the pending bitmask before enabling irqs */
1622 - set_softirq_pending(0);
1623
1624 + set_softirq_pending(0);
1625 +
1626 local_irq_enable();
1627 -
1628 +
1629 h = softirq_vec;
1630 -
1631 +
1632 do {
1633 if (pending & 1) {
1634 int prev_count = preempt_count();
1635 kstat_incr_softirqs_this_cpu(h - softirq_vec);
1636 -
1637 +
1638 trace_softirq_entry(h, softirq_vec);
1639 h->action(h);
1640 trace_softirq_exit(h, softirq_vec);
1641 @@ -226,26 +232,70 @@ restart:
1642 h->action, prev_count, preempt_count());
1643 preempt_count() = prev_count;
1644 }
1645 -
1646 +
1647 rcu_bh_qs(cpu);
1648 }
1649 h++;
1650 pending >>= 1;
1651 } while (pending);
1652 -
1653 +
1654 local_irq_disable();
1655 +}
1656 +
1657 +static void ___do_softirq(void)
1658 +{
1659 + __u32 pending;
1660 +
1661 + //struct softirq_action *h;
1662 + int max_restart = MAX_SOFTIRQ_RESTART;
1663 + //int cpu;
1664 +
1665 + pending = local_softirq_pending();
1666 +
1667 +restart:
1668 + ____do_softirq();
1669
1670 pending = local_softirq_pending();
1671 if (pending && --max_restart)
1672 goto restart;
1673
1674 if (pending)
1675 + {
1676 wakeup_softirqd();
1677 + }
1678 +}
1679
1680 +asmlinkage void __do_softirq(void)
1681 +{
1682 +#ifdef LITMUS_THREAD_ALL_SOFTIRQ
1683 + /* Skip straight to wakeup_softirqd() if we're using
1684 + LITMUS_THREAD_ALL_SOFTIRQ (unless there's really high prio-stuff waiting.). */
1685 + struct task_struct *tsk = __get_cpu_var(ksoftirqd);
1686 +
1687 + if(tsk)
1688 + {
1689 + __u32 pending = local_softirq_pending();
1690 + const __u32 high_prio_softirq = (1<<HI_SOFTIRQ) | (1<<TIMER_SOFTIRQ) | (1<<HRTIMER_SOFTIRQ);
1691 + if(pending && !(pending & high_prio_softirq))
1692 + {
1693 + wakeup_softirqd();
1694 + return;
1695 + }
1696 + }
1697 +#endif
1698 +
1699 + /*
1700 + * 'immediate' softirq execution:
1701 + */
1702 + __local_bh_disable((unsigned long)__builtin_return_address(0));
1703 + lockdep_softirq_enter();
1704 +
1705 + ___do_softirq();
1706 +
1707 lockdep_softirq_exit();
1708 -
1709 +
1710 account_system_vtime(current);
1711 - _local_bh_enable();
1712 + _local_bh_enable();
1713 }
1714
1715 #ifndef __ARCH_HAS_DO_SOFTIRQ
1716 @@ -357,8 +407,65 @@ struct tasklet_head
1717 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
1718 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
1719
1720 +
1721 void __tasklet_schedule(struct tasklet_struct *t)
1722 {
1723 +#ifdef CONFIG_LITMUS_NVIDIA
1724 + if(is_nvidia_func(t->func))
1725 + {
1726 + u32 nvidia_device = get_tasklet_nv_device_num(t);
1727 + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
1728 + // __FUNCTION__, nvidia_device,litmus_clock());
1729 +
1730 + unsigned long flags;
1731 + struct task_struct* device_owner;
1732 +
1733 + lock_nv_registry(nvidia_device, &flags);
1734 +
1735 + device_owner = get_nv_device_owner(nvidia_device);
1736 +
1737 + if(device_owner==NULL)
1738 + {
1739 + t->owner = NULL;
1740 + }
1741 + else
1742 + {
1743 + if(is_realtime(device_owner))
1744 + {
1745 + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
1746 + __FUNCTION__, nvidia_device,litmus_clock());
1747 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
1748 + __FUNCTION__,device_owner->pid,nvidia_device);
1749 +
1750 + t->owner = device_owner;
1751 + sched_trace_tasklet_release(t->owner);
1752 +
1753 + if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
1754 + {
1755 + unlock_nv_registry(nvidia_device, &flags);
1756 + return;
1757 + }
1758 + else
1759 + {
1760 + t->owner = NULL; /* fall through to normal scheduling */
1761 + }
1762 + }
1763 + else
1764 + {
1765 + t->owner = NULL;
1766 + }
1767 + }
1768 + unlock_nv_registry(nvidia_device, &flags);
1769 + }
1770 +#endif
1771 +
1772 + ___tasklet_schedule(t);
1773 +}
1774 +EXPORT_SYMBOL(__tasklet_schedule);
1775 +
1776 +
1777 +void ___tasklet_schedule(struct tasklet_struct *t)
1778 +{
1779 unsigned long flags;
1780
1781 local_irq_save(flags);
1782 @@ -368,11 +475,65 @@ void __tasklet_schedule(struct tasklet_struct *t)
1783 raise_softirq_irqoff(TASKLET_SOFTIRQ);
1784 local_irq_restore(flags);
1785 }
1786 +EXPORT_SYMBOL(___tasklet_schedule);
1787
1788 -EXPORT_SYMBOL(__tasklet_schedule);
1789
1790 void __tasklet_hi_schedule(struct tasklet_struct *t)
1791 {
1792 +#ifdef CONFIG_LITMUS_NVIDIA
1793 + if(is_nvidia_func(t->func))
1794 + {
1795 + u32 nvidia_device = get_tasklet_nv_device_num(t);
1796 + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
1797 + // __FUNCTION__, nvidia_device,litmus_clock());
1798 +
1799 + unsigned long flags;
1800 + struct task_struct* device_owner;
1801 +
1802 + lock_nv_registry(nvidia_device, &flags);
1803 +
1804 + device_owner = get_nv_device_owner(nvidia_device);
1805 +
1806 + if(device_owner==NULL)
1807 + {
1808 + t->owner = NULL;
1809 + }
1810 + else
1811 + {
1812 + if( is_realtime(device_owner))
1813 + {
1814 + TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
1815 + __FUNCTION__, nvidia_device,litmus_clock());
1816 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
1817 + __FUNCTION__,device_owner->pid,nvidia_device);
1818 +
1819 + t->owner = device_owner;
1820 + sched_trace_tasklet_release(t->owner);
1821 + if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
1822 + {
1823 + unlock_nv_registry(nvidia_device, &flags);
1824 + return;
1825 + }
1826 + else
1827 + {
1828 + t->owner = NULL; /* fall through to normal scheduling */
1829 + }
1830 + }
1831 + else
1832 + {
1833 + t->owner = NULL;
1834 + }
1835 + }
1836 + unlock_nv_registry(nvidia_device, &flags);
1837 + }
1838 +#endif
1839 +
1840 + ___tasklet_hi_schedule(t);
1841 +}
1842 +EXPORT_SYMBOL(__tasklet_hi_schedule);
1843 +
1844 +void ___tasklet_hi_schedule(struct tasklet_struct* t)
1845 +{
1846 unsigned long flags;
1847
1848 local_irq_save(flags);
1849 @@ -382,19 +543,72 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
1850 raise_softirq_irqoff(HI_SOFTIRQ);
1851 local_irq_restore(flags);
1852 }
1853 -
1854 -EXPORT_SYMBOL(__tasklet_hi_schedule);
1855 +EXPORT_SYMBOL(___tasklet_hi_schedule);
1856
1857 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
1858 {
1859 BUG_ON(!irqs_disabled());
1860 +#ifdef CONFIG_LITMUS_NVIDIA
1861 + if(is_nvidia_func(t->func))
1862 + {
1863 + u32 nvidia_device = get_tasklet_nv_device_num(t);
1864 + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
1865 + // __FUNCTION__, nvidia_device,litmus_clock());
1866 + unsigned long flags;
1867 + struct task_struct* device_owner;
1868 +
1869 + lock_nv_registry(nvidia_device, &flags);
1870 +
1871 + device_owner = get_nv_device_owner(nvidia_device);
1872 +
1873 + if(device_owner==NULL)
1874 + {
1875 + t->owner = NULL;
1876 + }
1877 + else
1878 + {
1879 + if(is_realtime(device_owner))
1880 + {
1881 + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
1882 + __FUNCTION__, nvidia_device,litmus_clock());
1883 +
1884 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
1885 + __FUNCTION__,device_owner->pid,nvidia_device);
1886 +
1887 + t->owner = device_owner;
1888 + sched_trace_tasklet_release(t->owner);
1889 + if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
1890 + {
1891 + unlock_nv_registry(nvidia_device, &flags);
1892 + return;
1893 + }
1894 + else
1895 + {
1896 + t->owner = NULL; /* fall through to normal scheduling */
1897 + }
1898 + }
1899 + else
1900 + {
1901 + t->owner = NULL;
1902 + }
1903 + }
1904 + unlock_nv_registry(nvidia_device, &flags);
1905 + }
1906 +#endif
1907 +
1908 + ___tasklet_hi_schedule_first(t);
1909 +}
1910 +EXPORT_SYMBOL(__tasklet_hi_schedule_first);
1911 +
1912 +void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
1913 +{
1914 + BUG_ON(!irqs_disabled());
1915
1916 t->next = __get_cpu_var(tasklet_hi_vec).head;
1917 __get_cpu_var(tasklet_hi_vec).head = t;
1918 __raise_softirq_irqoff(HI_SOFTIRQ);
1919 }
1920 -
1921 -EXPORT_SYMBOL(__tasklet_hi_schedule_first);
1922 +EXPORT_SYMBOL(___tasklet_hi_schedule_first);
1923
1924 static void tasklet_action(struct softirq_action *a)
1925 {
1926 @@ -450,6 +664,7 @@ static void tasklet_hi_action(struct softirq_action *a)
1927 if (!atomic_read(&t->count)) {
1928 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
1929 BUG();
1930 +
1931 t->func(t->data);
1932 tasklet_unlock(t);
1933 continue;
1934 @@ -473,8 +688,13 @@ void tasklet_init(struct tasklet_struct *t,
1935 t->next = NULL;
1936 t->state = 0;
1937 atomic_set(&t->count, 0);
1938 +
1939 t->func = func;
1940 t->data = data;
1941 +
1942 +#ifdef CONFIG_LITMUS_SOFTIRQD
1943 + t->owner = NULL;
1944 +#endif
1945 }
1946
1947 EXPORT_SYMBOL(tasklet_init);
1948 @@ -489,6 +709,7 @@ void tasklet_kill(struct tasklet_struct *t)
1949 yield();
1950 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
1951 }
1952 +
1953 tasklet_unlock_wait(t);
1954 clear_bit(TASKLET_STATE_SCHED, &t->state);
1955 }
1956 @@ -694,6 +915,8 @@ void __init softirq_init(void)
1957
1958 static int run_ksoftirqd(void * __bind_cpu)
1959 {
1960 + unsigned long flags;
1961 +
1962 set_current_state(TASK_INTERRUPTIBLE);
1963
1964 while (!kthread_should_stop()) {
1965 @@ -712,7 +935,11 @@ static int run_ksoftirqd(void * __bind_cpu)
1966 don't process */
1967 if (cpu_is_offline((long)__bind_cpu))
1968 goto wait_to_die;
1969 - do_softirq();
1970 +
1971 + local_irq_save(flags);
1972 + ____do_softirq();
1973 + local_irq_restore(flags);
1974 +
1975 preempt_enable_no_resched();
1976 cond_resched();
1977 preempt_disable();
1978 @@ -760,6 +987,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
1979 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
1980 if (*i == t) {
1981 *i = t->next;
1982 +
1983 /* If this was the tail element, move the tail ptr */
1984 if (*i == NULL)
1985 per_cpu(tasklet_vec, cpu).tail = i;
1986 diff --git a/kernel/workqueue.c b/kernel/workqueue.c
1987 index f77afd9..2293aad 100644
1988 --- a/kernel/workqueue.c
1989 +++ b/kernel/workqueue.c
1990 @@ -47,6 +47,13 @@
1991
1992 #include "workqueue_sched.h"
1993
1994 +#ifdef CONFIG_LITMUS_NVIDIA
1995 +#include <litmus/litmus.h>
1996 +#include <litmus/sched_trace.h>
1997 +#include <litmus/nvidia_info.h>
1998 +#endif
1999 +
2000 +
2001 enum {
2002 /* global_cwq flags */
2003 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
2004 @@ -1010,9 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
2005 work_flags |= WORK_STRUCT_DELAYED;
2006 worklist = &cwq->delayed_works;
2007 }
2008 -
2009 insert_work(cwq, work, worklist, work_flags);
2010 -
2011 spin_unlock_irqrestore(&gcwq->lock, flags);
2012 }
2013
2014 @@ -2526,10 +2531,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
2015 */
2016 int schedule_work(struct work_struct *work)
2017 {
2018 - return queue_work(system_wq, work);
2019 +#if 0
2020 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2021 + if(is_nvidia_func(work->func))
2022 + {
2023 + u32 nvidiaDevice = get_work_nv_device_num(work);
2024 +
2025 + //1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
2026 + unsigned long flags;
2027 + struct task_struct* device_owner;
2028 +
2029 + lock_nv_registry(nvidiaDevice, &flags);
2030 +
2031 + device_owner = get_nv_device_owner(nvidiaDevice);
2032 +
2033 + //2) If there is an owner, set work->owner to the owner's task struct.
2034 + if(device_owner==NULL)
2035 + {
2036 + work->owner = NULL;
2037 + //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
2038 + }
2039 + else
2040 + {
2041 + if( is_realtime(device_owner))
2042 + {
2043 + TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
2044 + __FUNCTION__, nvidiaDevice,litmus_clock());
2045 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2046 + __FUNCTION__,
2047 + device_owner->pid,
2048 + nvidiaDevice);
2049 +
2050 + //3) Call litmus_schedule_work() and return (don't execute the rest
2051 + // of schedule_schedule()).
2052 + work->owner = device_owner;
2053 + sched_trace_work_release(work->owner);
2054 + if(likely(litmus_schedule_work(work, nvidiaDevice)))
2055 + {
2056 + unlock_nv_registry(nvidiaDevice, &flags);
2057 + return 1;
2058 + }
2059 + else
2060 + {
2061 + work->owner = NULL; /* fall through to normal work scheduling */
2062 + }
2063 + }
2064 + else
2065 + {
2066 + work->owner = NULL;
2067 + }
2068 + }
2069 + unlock_nv_registry(nvidiaDevice, &flags);
2070 + }
2071 +#endif
2072 +#endif
2073 + return(__schedule_work(work));
2074 }
2075 EXPORT_SYMBOL(schedule_work);
2076
2077 +int __schedule_work(struct work_struct* work)
2078 +{
2079 + return queue_work(system_wq, work);
2080 +}
2081 +EXPORT_SYMBOL(__schedule_work);
2082 +
2083 /*
2084 * schedule_work_on - put work task on a specific cpu
2085 * @cpu: cpu to put the work task on
2086 diff --git a/litmus/Kconfig b/litmus/Kconfig
2087 index ad8dc83..5109cf7 100644
2088 --- a/litmus/Kconfig
2089 +++ b/litmus/Kconfig
2090 @@ -62,6 +62,25 @@ config LITMUS_LOCKING
2091
2092 endmenu
2093
2094 +menu "Performance Enhancements"
2095 +
2096 +config SCHED_CPU_AFFINITY
2097 + bool "Local Migration Affinity"
2098 + default y
2099 + help
2100 + Rescheduled tasks prefer CPUs near to their previously used CPU. This
2101 + may improve performance through possible preservation of cache affinity.
2102 +
2103 + Warning: May make bugs ahrder to find since tasks may migrate less often.
2104 +
2105 + NOTES:
2106 + * Pfair/PD^2 does not support this option.
2107 + * Only x86 currently supported.
2108 +
2109 + Say Yes if unsure.
2110 +
2111 +endmenu
2112 +
2113 menu "Tracing"
2114
2115 config FEATHER_TRACE
2116 @@ -182,4 +201,106 @@ config SCHED_DEBUG_TRACE_CALLER
2117
2118 endmenu
2119
2120 +menu "Interrupt Handling"
2121 +
2122 +config LITMUS_THREAD_ALL_SOFTIRQ
2123 + bool "Process all softirqs in ksoftirqd threads."
2124 + default n
2125 + help
2126 + (Experimental) Thread all softirqs to ksoftirqd
2127 + daemon threads, similar to PREEMPT_RT. I/O
2128 + throughput will will drop with this enabled, but
2129 + latencies due to interrupts will be reduced.
2130 +
2131 + WARNING: Timer responsiveness will likely be
2132 + decreased as timer callbacks are also threaded.
2133 + This is unlike PREEEMPT_RTs hardirqs.
2134 +
2135 + If unsure, say No.
2136 +
2137 +
2138 +choice
2139 + prompt "Scheduling of interrupt bottom-halves in Litmus."
2140 + default LITMUS_SOFTIRQD_NONE
2141 + depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
2142 + help
2143 + Schedule tasklets with known priorities in Litmus.
2144 +
2145 +config LITMUS_SOFTIRQD_NONE
2146 + bool "No tasklet scheduling in Litmus."
2147 + help
2148 + Don't schedule tasklets in Litmus. Default.
2149 +
2150 +config LITMUS_SOFTIRQD
2151 + bool "Spawn klitirqd interrupt handling threads."
2152 + help
2153 + Create klitirqd interrupt handling threads. Work must be
2154 + specifically dispatched to these workers. (Softirqs for
2155 + Litmus tasks are not magically redirected to klitirqd.)
2156 +
2157 + G-EDF/RM, C-EDF/RM ONLY for now!
2158 +
2159 +
2160 +config LITMUS_PAI_SOFTIRQD
2161 + bool "Defer tasklets to context switch points."
2162 + help
2163 + Only execute scheduled tasklet bottom halves at
2164 + scheduling points. Trades context switch overhead
2165 + at the cost of non-preemptive durations of bottom half
2166 + processing.
2167 +
2168 + G-EDF/RM, C-EDF/RM ONLY for now!
2169 +
2170 +endchoice
2171 +
2172 +
2173 +config NR_LITMUS_SOFTIRQD
2174 + int "Number of klitirqd."
2175 + depends on LITMUS_SOFTIRQD
2176 + range 1 4096
2177 + default "1"
2178 + help
2179 + Should be <= to the number of CPUs in your system.
2180 +
2181 +config LITMUS_NVIDIA
2182 + bool "Litmus handling of NVIDIA interrupts."
2183 + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
2184 + default n
2185 + help
2186 + Direct tasklets from NVIDIA devices to Litmus's klitirqd.
2187 +
2188 + If unsure, say No.
2189 +
2190 +config NV_DEVICE_NUM
2191 + int "Number of NVIDIA GPUs."
2192 + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
2193 + range 1 4096
2194 + default "1"
2195 + help
2196 + Should be (<= to the number of CPUs) and
2197 + (<= to the number of GPUs) in your system.
2198 +
2199 +choice
2200 + prompt "CUDA/Driver Version Support"
2201 + default CUDA_4_0
2202 + depends on LITMUS_NVIDIA
2203 + help
2204 + Select the version of CUDA/driver to support.
2205 +
2206 +config CUDA_4_0
2207 + bool "CUDA 4.0"
2208 + depends on LITMUS_NVIDIA
2209 + help
2210 + Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
2211 +
2212 +config CUDA_3_2
2213 + bool "CUDA 3.2"
2214 + depends on LITMUS_NVIDIA
2215 + help
2216 + Support CUDA 3.2 (dev. driver version: x86_64-260.24)
2217 +
2218 +endchoice
2219 +
2220 +endmenu
2221 +
2222 endmenu
2223 diff --git a/litmus/Makefile b/litmus/Makefile
2224 index ad9936e..869939e 100644
2225 --- a/litmus/Makefile
2226 +++ b/litmus/Makefile
2227 @@ -19,10 +19,15 @@ obj-y = sched_plugin.o litmus.o \
2228 sched_gsn_edf.o \
2229 sched_psn_edf.o
2230
2231 -obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
2232 +obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o sched_cfifo.o fifo_common.o sched_crm.o rm_common.o sched_crm_srt.o rm_srt_common.o
2233 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
2234 +obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
2235
2236 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
2237 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
2238 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
2239 obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
2240 +
2241 +obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
2242 +obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
2243 +obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
2244 diff --git a/litmus/affinity.c b/litmus/affinity.c
2245 new file mode 100644
2246 index 0000000..3b430d1
2247 --- /dev/null
2248 +++ b/litmus/affinity.c
2249 @@ -0,0 +1,49 @@
2250 +#include <linux/cpu.h>
2251 +
2252 +#include <litmus/affinity.h>
2253 +
2254 +struct neighborhood neigh_info[NR_CPUS];
2255 +
2256 +/* called by _init_litmus() */
2257 +void init_topology(void)
2258 +{
2259 + int cpu;
2260 + int i;
2261 + int chk;
2262 + int depth = num_cache_leaves;
2263 +
2264 + if(depth > NUM_CACHE_LEVELS)
2265 + depth = NUM_CACHE_LEVELS;
2266 +
2267 + for_each_online_cpu(cpu)
2268 + {
2269 + for(i = 0; i < depth; ++i)
2270 + {
2271 + long unsigned int firstbits;
2272 +
2273 + chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
2274 + if(chk) /* failed */
2275 + {
2276 + neigh_info[cpu].size[i] = 0;
2277 + }
2278 + else
2279 + {
2280 + /* size = num bits in mask */
2281 + neigh_info[cpu].size[i] = cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
2282 + }
2283 + firstbits = *neigh_info[cpu].neighbors[i]->bits;
2284 + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
2285 + cpu, neigh_info[cpu].size[i], i, firstbits);
2286 + }
2287 +
2288 + /* set data for non-existent levels */
2289 + for(; i < NUM_CACHE_LEVELS; ++i)
2290 + {
2291 + neigh_info[cpu].size[i] = 0;
2292 +
2293 + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
2294 + cpu, neigh_info[cpu].size[i], i, 0lu);
2295 + }
2296 + }
2297 +}
2298 +
2299 diff --git a/litmus/edf_common.c b/litmus/edf_common.c
2300 index 9b44dc2..0a06d7a 100644
2301 --- a/litmus/edf_common.c
2302 +++ b/litmus/edf_common.c
2303 @@ -63,8 +63,52 @@ int edf_higher_prio(struct task_struct* first,
2304
2305 #endif
2306
2307 + if (!is_realtime(second_task))
2308 + return true;
2309 +
2310 + if (earlier_deadline(first_task, second_task))
2311 + return true;
2312 +
2313 + if (get_deadline(first_task) == get_deadline(second_task))
2314 + {
2315 + if (shorter_period(first_task, second_task))
2316 + {
2317 + return true;
2318 + }
2319 + if (get_rt_period(first_task) == get_rt_period(second_task))
2320 + {
2321 +#ifdef CONFIG_LITMUS_SOFTIRQD
2322 + if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
2323 + {
2324 + return true;
2325 + }
2326 + if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
2327 + {
2328 +#endif
2329 + if (first_task->pid < second_task->pid)
2330 + {
2331 + return true;
2332 + }
2333 + if (first_task->pid == second_task->pid)
2334 + {
2335 + return !second->rt_param.inh_task;
2336 + }
2337 +#ifdef CONFIG_LITMUS_SOFTIRQD
2338 + }
2339 +#endif
2340 + }
2341 + }
2342 +
2343 + return false;
2344
2345 +#if 0
2346 return !is_realtime(second_task) ||
2347 +
2348 +#ifdef CONFIG_LITMUS_SOFTIRQD
2349 + /* proxy threads always lose w/o inheritance. */
2350 + (first_task->rt_param.is_proxy_thread <
2351 + second_task->rt_param.is_proxy_thread) ||
2352 +#endif
2353
2354 /* is the deadline of the first task earlier?
2355 * Then it has higher priority.
2356 @@ -82,6 +126,7 @@ int edf_higher_prio(struct task_struct* first,
2357 */
2358 (first_task->pid == second_task->pid &&
2359 !second->rt_param.inh_task)));
2360 +#endif
2361 }
2362
2363 int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
2364 diff --git a/litmus/fdso.c b/litmus/fdso.c
2365 index aa7b384..2b7f9ba 100644
2366 --- a/litmus/fdso.c
2367 +++ b/litmus/fdso.c
2368 @@ -22,6 +22,7 @@ extern struct fdso_ops generic_lock_ops;
2369
2370 static const struct fdso_ops* fdso_ops[] = {
2371 &generic_lock_ops, /* FMLP_SEM */
2372 + &generic_lock_ops, /* KFMLP_SEM */
2373 &generic_lock_ops, /* SRP_SEM */
2374 };
2375
2376 diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c
2377 new file mode 100644
2378 index 0000000..c94510a
2379 --- /dev/null
2380 +++ b/litmus/fifo_common.c
2381 @@ -0,0 +1,124 @@
2382 +/*
2383 + * kernel/fifo_common.c
2384 + *
2385 + * Common functions for EDF based scheduler.
2386 + */
2387 +
2388 +#include <linux/percpu.h>
2389 +#include <linux/sched.h>
2390 +#include <linux/list.h>
2391 +
2392 +#include <litmus/litmus.h>
2393 +#include <litmus/sched_plugin.h>
2394 +#include <litmus/sched_trace.h>
2395 +
2396 +#include <litmus/fifo_common.h>
2397 +
2398 +/* fifo_higher_prio - returns true if first has a higher EDF priority
2399 + * than second. Deadline ties are broken by PID.
2400 + *
2401 + * both first and second may be NULL
2402 + */
2403 +int fifo_higher_prio(struct task_struct* first,
2404 + struct task_struct* second)
2405 +{
2406 + struct task_struct *first_task = first;
2407 + struct task_struct *second_task = second;
2408 +
2409 + /* There is no point in comparing a task to itself. */
2410 + if (first && first == second) {
2411 + TRACE_TASK(first,
2412 + "WARNING: pointless edf priority comparison.\n");
2413 + return 0;
2414 + }
2415 +
2416 +
2417 + /* check for NULL tasks */
2418 + if (!first || !second)
2419 + return first && !second;
2420 +
2421 +#ifdef CONFIG_LITMUS_LOCKING
2422 +
2423 + /* Check for inherited priorities. Change task
2424 + * used for comparison in such a case.
2425 + */
2426 + if (unlikely(first->rt_param.inh_task))
2427 + first_task = first->rt_param.inh_task;
2428 + if (unlikely(second->rt_param.inh_task))
2429 + second_task = second->rt_param.inh_task;
2430 +
2431 + /* Check for priority boosting. Tie-break by start of boosting.
2432 + */
2433 + if (unlikely(is_priority_boosted(first_task))) {
2434 + /* first_task is boosted, how about second_task? */
2435 + if (!is_priority_boosted(second_task) ||
2436 + lt_before(get_boost_start(first_task),
2437 + get_boost_start(second_task)))
2438 + return 1;
2439 + else
2440 + return 0;
2441 + } else if (unlikely(is_priority_boosted(second_task)))
2442 + /* second_task is boosted, first is not*/
2443 + return 0;
2444 +
2445 +#endif
2446 +
2447 +
2448 + return !is_realtime(second_task) ||
2449 +
2450 +#ifdef CONFIG_LITMUS_SOFTIRQD
2451 + /* proxy threads always lose w/o inheritance. */
2452 + (first_task->rt_param.is_proxy_thread <
2453 + second_task->rt_param.is_proxy_thread) ||
2454 +#endif
2455 +
2456 + /* is the deadline of the first task earlier?
2457 + * Then it has higher priority.
2458 + */
2459 + earlier_release(first_task, second_task) ||
2460 +
2461 + /* Do we have a deadline tie?
2462 + * Then break by PID.
2463 + */
2464 + (get_release(first_task) == get_release(second_task) &&
2465 + (first_task->pid < second_task->pid ||
2466 +
2467 + /* If the PIDs are the same then the task with the inherited
2468 + * priority wins.
2469 + */
2470 + (first_task->pid == second_task->pid &&
2471 + !second->rt_param.inh_task)));
2472 +}
2473 +
2474 +int fifo_ready_order(struct bheap_node* a, struct bheap_node* b)
2475 +{
2476 + return fifo_higher_prio(bheap2task(a), bheap2task(b));
2477 +}
2478 +
2479 +void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
2480 + release_jobs_t release)
2481 +{
2482 + rt_domain_init(rt, fifo_ready_order, resched, release);
2483 +}
2484 +
2485 +/* need_to_preempt - check whether the task t needs to be preempted
2486 + * call only with irqs disabled and with ready_lock acquired
2487 + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
2488 + */
2489 +int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t)
2490 +{
2491 + /* we need the read lock for fifo_ready_queue */
2492 + /* no need to preempt if there is nothing pending */
2493 + if (!__jobs_pending(rt))
2494 + return 0;
2495 + /* we need to reschedule if t doesn't exist */
2496 + if (!t)
2497 + return 1;
2498 +
2499 + /* NOTE: We cannot check for non-preemptibility since we
2500 + * don't know what address space we're currently in.
2501 + */
2502 +
2503 + /* make sure to get non-rt stuff out of the way */
2504 + return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t);
2505 +}
2506 diff --git a/litmus/litmus.c b/litmus/litmus.c
2507 index 26938ac..29363c6 100644
2508 --- a/litmus/litmus.c
2509 +++ b/litmus/litmus.c
2510 @@ -17,6 +17,14 @@
2511 #include <litmus/litmus_proc.h>
2512 #include <litmus/sched_trace.h>
2513
2514 +#ifdef CONFIG_SCHED_CPU_AFFINITY
2515 +#include <litmus/affinity.h>
2516 +#endif
2517 +
2518 +#ifdef CONFIG_LITMUS_NVIDIA
2519 +#include <litmus/nvidia_info.h>
2520 +#endif
2521 +
2522 /* Number of RT tasks that exist in the system */
2523 atomic_t rt_task_count = ATOMIC_INIT(0);
2524 static DEFINE_RAW_SPINLOCK(task_transition_lock);
2525 @@ -47,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
2526 struct release_heap* release_heap_alloc(int gfp_flags);
2527 void release_heap_free(struct release_heap* rh);
2528
2529 +#ifdef CONFIG_LITMUS_NVIDIA
2530 +/*
2531 + * sys_register_nv_device
2532 + * @nv_device_id: The Nvidia device id that the task want to register
2533 + * @reg_action: set to '1' to register the specified device. zero otherwise.
2534 + * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
2535 + * Returns EFAULT if nv_device_id is out of range.
2536 + * 0 if success
2537 + */
2538 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
2539 +{
2540 + /* register the device to caller (aka 'current') */
2541 + return(reg_nv_device(nv_device_id, reg_action));
2542 +}
2543 +#else
2544 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
2545 +{
2546 + return(-EINVAL);
2547 +}
2548 +#endif
2549 +
2550 +
2551 /*
2552 * sys_set_task_rt_param
2553 * @pid: Pid of the task which scheduling parameters must be changed
2554 @@ -115,7 +145,7 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
2555 tp.cls != RT_CLASS_BEST_EFFORT)
2556 {
2557 printk(KERN_INFO "litmus: real-time task %d rejected "
2558 - "because its class is invalid\n");
2559 + "because its class is invalid\n", pid);
2560 goto out_unlock;
2561 }
2562 if (tp.budget_policy != NO_ENFORCEMENT &&
2563 @@ -131,6 +161,22 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
2564
2565 target->rt_param.task_params = tp;
2566
2567 +#ifdef CONFIG_LITMUS_SOFTIRQD
2568 + /* proxy thread off by default */
2569 + target->rt_param.is_proxy_thread = 0;
2570 + target->rt_param.cur_klitirqd = NULL;
2571 + //init_MUTEX(&target->rt_param.klitirqd_sem);
2572 + mutex_init(&target->rt_param.klitirqd_sem);
2573 + //init_completion(&target->rt_param.klitirqd_sem);
2574 + //target->rt_param.klitirqd_sem_stat = NOT_HELD;
2575 + atomic_set(&target->rt_param.klitirqd_sem_stat, NOT_HELD);
2576 +#endif
2577 +
2578 +#ifdef CONFIG_LITMUS_NVIDIA
2579 + atomic_set(&target->rt_param.nv_int_count, 0);
2580 +#endif
2581 +
2582 +
2583 retval = 0;
2584 out_unlock:
2585 read_unlock_irq(&tasklist_lock);
2586 @@ -265,6 +311,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
2587 return retval;
2588 }
2589
2590 +
2591 /* sys_null_call() is only used for determining raw system call
2592 * overheads (kernel entry, kernel exit). It has no useful side effects.
2593 * If ts is non-NULL, then the current Feather-Trace time is recorded.
2594 @@ -278,7 +325,7 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
2595 now = get_cycles();
2596 ret = put_user(now, ts);
2597 }
2598 -
2599 +
2600 return ret;
2601 }
2602
2603 @@ -299,6 +346,20 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
2604 * at this point in time.
2605 */
2606 WARN_ON(p->rt_param.inh_task);
2607 +
2608 +#ifdef CONFIG_LITMUS_SOFTIRQD
2609 + /* We probably should not have any tasklets executing for
2610 + * us at this time.
2611 + */
2612 + WARN_ON(p->rt_param.cur_klitirqd);
2613 + WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
2614 +
2615 + if(p->rt_param.cur_klitirqd)
2616 + flush_pending(p->rt_param.cur_klitirqd, p);
2617 +
2618 + if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
2619 + up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
2620 +#endif
2621
2622 /* Cleanup everything else. */
2623 memset(&p->rt_param, 0, sizeof(p->rt_param));
2624 @@ -399,7 +460,7 @@ static void synch_on_plugin_switch(void* info)
2625 */
2626 int switch_sched_plugin(struct sched_plugin* plugin)
2627 {
2628 - unsigned long flags;
2629 + //unsigned long flags;
2630 int ret = 0;
2631
2632 BUG_ON(!plugin);
2633 @@ -413,8 +474,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
2634 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
2635 cpu_relax();
2636
2637 +#ifdef CONFIG_LITMUS_SOFTIRQD
2638 + if(!klitirqd_is_dead())
2639 + {
2640 + kill_klitirqd();
2641 + }
2642 +#endif
2643 +
2644 /* stop task transitions */
2645 - raw_spin_lock_irqsave(&task_transition_lock, flags);
2646 + //raw_spin_lock_irqsave(&task_transition_lock, flags);
2647
2648 /* don't switch if there are active real-time tasks */
2649 if (atomic_read(&rt_task_count) == 0) {
2650 @@ -432,7 +500,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
2651 } else
2652 ret = -EBUSY;
2653 out:
2654 - raw_spin_unlock_irqrestore(&task_transition_lock, flags);
2655 + //raw_spin_unlock_irqrestore(&task_transition_lock, flags);
2656 atomic_set(&cannot_use_plugin, 0);
2657 return ret;
2658 }
2659 @@ -540,6 +608,10 @@ static int __init _init_litmus(void)
2660
2661 init_litmus_proc();
2662
2663 +#ifdef CONFIG_SCHED_CPU_AFFINITY
2664 + init_topology();
2665 +#endif
2666 +
2667 return 0;
2668 }
2669
2670 diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
2671 new file mode 100644
2672 index 0000000..b31eeb8
2673 --- /dev/null
2674 +++ b/litmus/litmus_pai_softirq.c
2675 @@ -0,0 +1,64 @@
2676 +#include <linux/interrupt.h>
2677 +#include <linux/percpu.h>
2678 +#include <linux/cpu.h>
2679 +#include <linux/kthread.h>
2680 +#include <linux/ftrace.h>
2681 +#include <linux/smp.h>
2682 +#include <linux/slab.h>
2683 +#include <linux/mutex.h>
2684 +
2685 +#include <linux/sched.h>
2686 +#include <linux/cpuset.h>
2687 +
2688 +#include <litmus/litmus.h>
2689 +#include <litmus/sched_trace.h>
2690 +#include <litmus/jobs.h>
2691 +#include <litmus/sched_plugin.h>
2692 +#include <litmus/litmus_softirq.h>
2693 +
2694 +
2695 +
2696 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
2697 +{
2698 + int ret = 0; /* assume failure */
2699 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
2700 + {
2701 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
2702 + BUG();
2703 + }
2704 +
2705 + ret = litmus->enqueue_pai_tasklet(t);
2706 +
2707 + return(ret);
2708 +}
2709 +
2710 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
2711 +
2712 +
2713 +
2714 +// failure causes default Linux handling.
2715 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
2716 +{
2717 + int ret = 0; /* assume failure */
2718 + return(ret);
2719 +}
2720 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
2721 +
2722 +
2723 +// failure causes default Linux handling.
2724 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
2725 +{
2726 + int ret = 0; /* assume failure */
2727 + return(ret);
2728 +}
2729 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
2730 +
2731 +
2732 +// failure causes default Linux handling.
2733 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
2734 +{
2735 + int ret = 0; /* assume failure */
2736 + return(ret);
2737 +}
2738 +EXPORT_SYMBOL(__litmus_schedule_work);
2739 +
2740 diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
2741 index 4bf725a..3815133 100644
2742 --- a/litmus/litmus_proc.c
2743 +++ b/litmus/litmus_proc.c
2744 @@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
2745 #ifdef CONFIG_RELEASE_MASTER
2746 *release_master_file = NULL,
2747 #endif
2748 +#ifdef CONFIG_LITMUS_SOFTIRQD
2749 + *klitirqd_file = NULL,
2750 +#endif
2751 *plugs_file = NULL;
2752
2753 /* in litmus/sync.c */
2754 int count_tasks_waiting_for_release(void);
2755
2756 +extern int proc_read_klitirqd_stats(char *page, char **start,
2757 + off_t off, int count,
2758 + int *eof, void *data);
2759 +
2760 static int proc_read_stats(char *page, char **start,
2761 off_t off, int count,
2762 int *eof, void *data)
2763 @@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
2764 release_master_file->write_proc = proc_write_release_master;
2765 #endif
2766
2767 +#ifdef CONFIG_LITMUS_SOFTIRQD
2768 + klitirqd_file =
2769 + create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
2770 + proc_read_klitirqd_stats, NULL);
2771 +#endif
2772 +
2773 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
2774 proc_read_stats, NULL);
2775
2776 @@ -187,6 +200,10 @@ void exit_litmus_proc(void)
2777 remove_proc_entry("stats", litmus_dir);
2778 if (curr_file)
2779 remove_proc_entry("active_plugin", litmus_dir);
2780 +#ifdef CONFIG_LITMUS_SOFTIRQD
2781 + if (klitirqd_file)
2782 + remove_proc_entry("klitirqd_stats", litmus_dir);
2783 +#endif
2784 #ifdef CONFIG_RELEASE_MASTER
2785 if (release_master_file)
2786 remove_proc_entry("release_master", litmus_dir);
2787 diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
2788 new file mode 100644
2789 index 0000000..c49676c
2790 --- /dev/null
2791 +++ b/litmus/litmus_softirq.c
2792 @@ -0,0 +1,1584 @@
2793 +#include <linux/interrupt.h>
2794 +#include <linux/percpu.h>
2795 +#include <linux/cpu.h>
2796 +#include <linux/kthread.h>
2797 +#include <linux/ftrace.h>
2798 +#include <linux/smp.h>
2799 +#include <linux/slab.h>
2800 +#include <linux/mutex.h>
2801 +
2802 +#include <linux/sched.h>
2803 +#include <linux/cpuset.h>
2804 +
2805 +#include <litmus/litmus.h>
2806 +#include <litmus/sched_trace.h>
2807 +#include <litmus/jobs.h>
2808 +#include <litmus/sched_plugin.h>
2809 +#include <litmus/litmus_softirq.h>
2810 +
2811 +/* TODO: Remove unneeded mb() and other barriers. */
2812 +
2813 +
2814 +/* counts number of daemons ready to handle litmus irqs. */
2815 +static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
2816 +
2817 +enum pending_flags
2818 +{
2819 + LIT_TASKLET_LOW = 0x1,
2820 + LIT_TASKLET_HI = LIT_TASKLET_LOW<<1,
2821 + LIT_WORK = LIT_TASKLET_HI<<1
2822 +};
2823 +
2824 +/* only support tasklet processing for now. */
2825 +struct tasklet_head
2826 +{
2827 + struct tasklet_struct *head;
2828 + struct tasklet_struct **tail;
2829 +};
2830 +
2831 +struct klitirqd_info
2832 +{
2833 + struct task_struct* klitirqd;
2834 + struct task_struct* current_owner;
2835 + int terminating;
2836 +
2837 +
2838 + raw_spinlock_t lock;
2839 +
2840 + u32 pending;
2841 + atomic_t num_hi_pending;
2842 + atomic_t num_low_pending;
2843 + atomic_t num_work_pending;
2844 +
2845 + /* in order of priority */
2846 + struct tasklet_head pending_tasklets_hi;
2847 + struct tasklet_head pending_tasklets;
2848 + struct list_head worklist;
2849 +};
2850 +
2851 +/* one list for each klitirqd */
2852 +static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
2853 +
2854 +
2855 +
2856 +
2857 +
2858 +int proc_read_klitirqd_stats(char *page, char **start,
2859 + off_t off, int count,
2860 + int *eof, void *data)
2861 +{
2862 + int len = snprintf(page, PAGE_SIZE,
2863 + "num ready klitirqds: %d\n\n",
2864 + atomic_read(&num_ready_klitirqds));
2865 +
2866 + if(klitirqd_is_ready())
2867 + {
2868 + int i;
2869 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
2870 + {
2871 + len +=
2872 + snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
2873 + "klitirqd_th%d: %s/%d\n"
2874 + "\tcurrent_owner: %s/%d\n"
2875 + "\tpending: %x\n"
2876 + "\tnum hi: %d\n"
2877 + "\tnum low: %d\n"
2878 + "\tnum work: %d\n\n",
2879 + i,
2880 + klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
2881 + (klitirqds[i].current_owner != NULL) ?
2882 + klitirqds[i].current_owner->comm : "(null)",
2883 + (klitirqds[i].current_owner != NULL) ?
2884 + klitirqds[i].current_owner->pid : 0,
2885 + klitirqds[i].pending,
2886 + atomic_read(&klitirqds[i].num_hi_pending),
2887 + atomic_read(&klitirqds[i].num_low_pending),
2888 + atomic_read(&klitirqds[i].num_work_pending));
2889 + }
2890 + }
2891 +
2892 + return(len);
2893 +}
2894 +
2895 +
2896 +
2897 +
2898 +
2899 +#if 0
2900 +static atomic_t dump_id = ATOMIC_INIT(0);
2901 +
2902 +static void __dump_state(struct klitirqd_info* which, const char* caller)
2903 +{
2904 + struct tasklet_struct* list;
2905 +
2906 + int id = atomic_inc_return(&dump_id);
2907 +
2908 + //if(in_interrupt())
2909 + {
2910 + if(which->current_owner)
2911 + {
2912 + TRACE("(id: %d caller: %s)\n"
2913 + "klitirqd: %s/%d\n"
2914 + "current owner: %s/%d\n"
2915 + "pending: %x\n",
2916 + id, caller,
2917 + which->klitirqd->comm, which->klitirqd->pid,
2918 + which->current_owner->comm, which->current_owner->pid,
2919 + which->pending);
2920 + }
2921 + else
2922 + {
2923 + TRACE("(id: %d caller: %s)\n"
2924 + "klitirqd: %s/%d\n"
2925 + "current owner: %p\n"
2926 + "pending: %x\n",
2927 + id, caller,
2928 + which->klitirqd->comm, which->klitirqd->pid,
2929 + NULL,
2930 + which->pending);
2931 + }
2932 +
2933 + list = which->pending_tasklets.head;
2934 + while(list)
2935 + {
2936 + struct tasklet_struct *t = list;
2937 + list = list->next; /* advance */
2938 + if(t->owner)
2939 + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
2940 + else
2941 + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
2942 + }
2943 + }
2944 +}
2945 +
2946 +static void dump_state(struct klitirqd_info* which, const char* caller)
2947 +{
2948 + unsigned long flags;
2949 +
2950 + raw_spin_lock_irqsave(&which->lock, flags);
2951 + __dump_state(which, caller);
2952 + raw_spin_unlock_irqrestore(&which->lock, flags);
2953 +}
2954 +#endif
2955 +
2956 +
2957 +/* forward declarations */
2958 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
2959 + struct klitirqd_info *which,
2960 + int wakeup);
2961 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
2962 + struct klitirqd_info *which,
2963 + int wakeup);
2964 +static void ___litmus_schedule_work(struct work_struct *w,
2965 + struct klitirqd_info *which,
2966 + int wakeup);
2967 +
2968 +
2969 +
2970 +inline unsigned int klitirqd_id(struct task_struct* tsk)
2971 +{
2972 + int i;
2973 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
2974 + {
2975 + if(klitirqds[i].klitirqd == tsk)
2976 + {
2977 + return i;
2978 + }
2979 + }
2980 +
2981 + BUG();
2982 +
2983 + return 0;
2984 +}
2985 +
2986 +
2987 +inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
2988 +{
2989 + return (which->pending & LIT_TASKLET_HI);
2990 +}
2991 +
2992 +inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
2993 +{
2994 + return (which->pending & LIT_TASKLET_LOW);
2995 +}
2996 +
2997 +inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
2998 +{
2999 + return (which->pending & LIT_WORK);
3000 +}
3001 +
3002 +inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
3003 +{
3004 + return(which->pending);
3005 +}
3006 +
3007 +
3008 +inline static u32 litirq_pending(struct klitirqd_info* which)
3009 +{
3010 + unsigned long flags;
3011 + u32 pending;
3012 +
3013 + raw_spin_lock_irqsave(&which->lock, flags);
3014 + pending = litirq_pending_irqoff(which);
3015 + raw_spin_unlock_irqrestore(&which->lock, flags);
3016 +
3017 + return pending;
3018 +};
3019 +
3020 +inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
3021 +{
3022 + unsigned long flags;
3023 + u32 pending;
3024 +
3025 + raw_spin_lock_irqsave(&which->lock, flags);
3026 + pending = litirq_pending_irqoff(which);
3027 + if(pending)
3028 + {
3029 + if(which->current_owner != owner)
3030 + {
3031 + pending = 0; // owner switch!
3032 + }
3033 + }
3034 + raw_spin_unlock_irqrestore(&which->lock, flags);
3035 +
3036 + return pending;
3037 +}
3038 +
3039 +
3040 +inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
3041 + struct mutex** sem,
3042 + struct task_struct** t)
3043 +{
3044 + unsigned long flags;
3045 + u32 pending;
3046 +
3047 + /* init values */
3048 + *sem = NULL;
3049 + *t = NULL;
3050 +
3051 + raw_spin_lock_irqsave(&which->lock, flags);
3052 +
3053 + pending = litirq_pending_irqoff(which);
3054 + if(pending)
3055 + {
3056 + if(which->current_owner != NULL)
3057 + {
3058 + *t = which->current_owner;
3059 + *sem = &tsk_rt(which->current_owner)->klitirqd_sem;
3060 + }
3061 + else
3062 + {
3063 + BUG();
3064 + }
3065 + }
3066 + raw_spin_unlock_irqrestore(&which->lock, flags);
3067 +
3068 + if(likely(*sem))
3069 + {
3070 + return pending;
3071 + }
3072 + else
3073 + {
3074 + return 0;
3075 + }
3076 +}
3077 +
3078 +/* returns true if the next piece of work to do is from a different owner.
3079 + */
3080 +static int tasklet_ownership_change(
3081 + struct klitirqd_info* which,
3082 + enum pending_flags taskletQ)
3083 +{
3084 + /* this function doesn't have to look at work objects since they have
3085 + priority below tasklets. */
3086 +
3087 + unsigned long flags;
3088 + int ret = 0;
3089 +
3090 + raw_spin_lock_irqsave(&which->lock, flags);
3091 +
3092 + switch(taskletQ)
3093 + {
3094 + case LIT_TASKLET_HI:
3095 + if(litirq_pending_hi_irqoff(which))
3096 + {
3097 + ret = (which->pending_tasklets_hi.head->owner !=
3098 + which->current_owner);
3099 + }
3100 + break;
3101 + case LIT_TASKLET_LOW:
3102 + if(litirq_pending_low_irqoff(which))
3103 + {
3104 + ret = (which->pending_tasklets.head->owner !=
3105 + which->current_owner);
3106 + }
3107 + break;
3108 + default:
3109 + break;
3110 + }
3111 +
3112 + raw_spin_unlock_irqrestore(&which->lock, flags);
3113 +
3114 + TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
3115 +
3116 + return ret;
3117 +}
3118 +
3119 +
3120 +static void __reeval_prio(struct klitirqd_info* which)
3121 +{
3122 + struct task_struct* next_owner = NULL;
3123 + struct task_struct* klitirqd = which->klitirqd;
3124 +
3125 + /* Check in prio-order */
3126 + u32 pending = litirq_pending_irqoff(which);
3127 +
3128 + //__dump_state(which, "__reeval_prio: before");
3129 +
3130 + if(pending)
3131 + {
3132 + if(pending & LIT_TASKLET_HI)
3133 + {
3134 + next_owner = which->pending_tasklets_hi.head->owner;
3135 + }
3136 + else if(pending & LIT_TASKLET_LOW)
3137 + {
3138 + next_owner = which->pending_tasklets.head->owner;
3139 + }
3140 + else if(pending & LIT_WORK)
3141 + {
3142 + struct work_struct* work =
3143 + list_first_entry(&which->worklist, struct work_struct, entry);
3144 + next_owner = work->owner;
3145 + }
3146 + }
3147 +
3148 + if(next_owner != which->current_owner)
3149 + {
3150 + struct task_struct* old_owner = which->current_owner;
3151 +
3152 + /* bind the next owner. */
3153 + which->current_owner = next_owner;
3154 + mb();
3155 +
3156 + if(next_owner != NULL)
3157 + {
3158 + if(!in_interrupt())
3159 + {
3160 + TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
3161 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
3162 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
3163 + next_owner->comm, next_owner->pid);
3164 + }
3165 + else
3166 + {
3167 + TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
3168 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
3169 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
3170 + next_owner->comm, next_owner->pid);
3171 + }
3172 +
3173 + litmus->set_prio_inh_klitirqd(klitirqd, old_owner, next_owner);
3174 + }
3175 + else
3176 + {
3177 + if(likely(!in_interrupt()))
3178 + {
3179 + TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
3180 + __FUNCTION__, klitirqd->comm, klitirqd->pid);
3181 + }
3182 + else
3183 + {
3184 + // is this a bug?
3185 + TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
3186 + __FUNCTION__, klitirqd->comm, klitirqd->pid);
3187 + }
3188 +
3189 + BUG_ON(pending != 0);
3190 + litmus->clear_prio_inh_klitirqd(klitirqd, old_owner);
3191 + }
3192 + }
3193 +
3194 + //__dump_state(which, "__reeval_prio: after");
3195 +}
3196 +
3197 +static void reeval_prio(struct klitirqd_info* which)
3198 +{
3199 + unsigned long flags;
3200 +
3201 + raw_spin_lock_irqsave(&which->lock, flags);
3202 + __reeval_prio(which);
3203 + raw_spin_unlock_irqrestore(&which->lock, flags);
3204 +}
3205 +
3206 +
3207 +static void wakeup_litirqd_locked(struct klitirqd_info* which)
3208 +{
3209 + /* Interrupts are disabled: no need to stop preemption */
3210 + if (which && which->klitirqd)
3211 + {
3212 + __reeval_prio(which); /* configure the proper priority */
3213 +
3214 + if(which->klitirqd->state != TASK_RUNNING)
3215 + {
3216 + TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
3217 + which->klitirqd->comm, which->klitirqd->pid);
3218 +
3219 + wake_up_process(which->klitirqd);
3220 + }
3221 + }
3222 +}
3223 +
3224 +
3225 +static void do_lit_tasklet(struct klitirqd_info* which,
3226 + struct tasklet_head* pending_tasklets)
3227 +{
3228 + unsigned long flags;
3229 + struct tasklet_struct *list;
3230 + atomic_t* count;
3231 +
3232 + raw_spin_lock_irqsave(&which->lock, flags);
3233 +
3234 + //__dump_state(which, "do_lit_tasklet: before steal");
3235 +
3236 + /* copy out the tasklets for our private use. */
3237 + list = pending_tasklets->head;
3238 + pending_tasklets->head = NULL;
3239 + pending_tasklets->tail = &pending_tasklets->head;
3240 +
3241 + /* remove pending flag */
3242 + which->pending &= (pending_tasklets == &which->pending_tasklets) ?
3243 + ~LIT_TASKLET_LOW :
3244 + ~LIT_TASKLET_HI;
3245 +
3246 + count = (pending_tasklets == &which->pending_tasklets) ?
3247 + &which->num_low_pending:
3248 + &which->num_hi_pending;
3249 +
3250 + //__dump_state(which, "do_lit_tasklet: after steal");
3251 +
3252 + raw_spin_unlock_irqrestore(&which->lock, flags);
3253 +
3254 +
3255 + while(list)
3256 + {
3257 + struct tasklet_struct *t = list;
3258 +
3259 + /* advance, lest we forget */
3260 + list = list->next;
3261 +
3262 + /* execute tasklet if it has my priority and is free */
3263 + if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
3264 + if (!atomic_read(&t->count)) {
3265 +
3266 + sched_trace_tasklet_begin(t->owner);
3267 +
3268 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
3269 + {
3270 + BUG();
3271 + }
3272 + TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
3273 + t->func(t->data);
3274 + tasklet_unlock(t);
3275 +
3276 + atomic_dec(count);
3277 +
3278 + sched_trace_tasklet_end(t->owner, 0ul);
3279 +
3280 + continue; /* process more tasklets */
3281 + }
3282 + tasklet_unlock(t);
3283 + }
3284 +
3285 + TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__);
3286 +
3287 + /* couldn't process tasklet. put it back at the end of the queue. */
3288 + if(pending_tasklets == &which->pending_tasklets)
3289 + ___litmus_tasklet_schedule(t, which, 0);
3290 + else
3291 + ___litmus_tasklet_hi_schedule(t, which, 0);
3292 + }
3293 +}
3294 +
3295 +
3296 +// returns 1 if priorities need to be changed to continue processing
3297 +// pending tasklets.
3298 +static int do_litirq(struct klitirqd_info* which)
3299 +{
3300 + u32 pending;
3301 + int resched = 0;
3302 +
3303 + if(in_interrupt())
3304 + {
3305 + TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
3306 + return(0);
3307 + }
3308 +
3309 + if(which->klitirqd != current)
3310 + {
3311 + TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
3312 + __FUNCTION__, current->comm, current->pid,
3313 + which->klitirqd->comm, which->klitirqd->pid);
3314 + return(0);
3315 + }
3316 +
3317 + if(!is_realtime(current))
3318 + {
3319 + TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
3320 + __FUNCTION__, current->policy);
3321 + return(0);
3322 + }
3323 +
3324 +
3325 + /* We only handle tasklets & work objects, no need for RCU triggers? */
3326 +
3327 + pending = litirq_pending(which);
3328 + if(pending)
3329 + {
3330 + /* extract the work to do and do it! */
3331 + if(pending & LIT_TASKLET_HI)
3332 + {
3333 + TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
3334 + do_lit_tasklet(which, &which->pending_tasklets_hi);
3335 + resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
3336 +
3337 + if(resched)
3338 + {
3339 + TRACE_CUR("%s: HI tasklets of another owner remain. "
3340 + "Skipping any LOW tasklets.\n", __FUNCTION__);
3341 + }
3342 + }
3343 +
3344 + if(!resched && (pending & LIT_TASKLET_LOW))
3345 + {
3346 + TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
3347 + do_lit_tasklet(which, &which->pending_tasklets);
3348 + resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
3349 +
3350 + if(resched)
3351 + {
3352 + TRACE_CUR("%s: LOW tasklets of another owner remain. "
3353 + "Skipping any work objects.\n", __FUNCTION__);
3354 + }
3355 + }
3356 + }
3357 +
3358 + return(resched);
3359 +}
3360 +
3361 +
3362 +static void do_work(struct klitirqd_info* which)
3363 +{
3364 + unsigned long flags;
3365 + work_func_t f;
3366 + struct work_struct* work;
3367 +
3368 + // only execute one work-queue item to yield to tasklets.
3369 + // ...is this a good idea, or should we just batch them?
3370 + raw_spin_lock_irqsave(&which->lock, flags);
3371 +
3372 + if(!litirq_pending_work_irqoff(which))
3373 + {
3374 + raw_spin_unlock_irqrestore(&which->lock, flags);
3375 + goto no_work;
3376 + }
3377 +
3378 + work = list_first_entry(&which->worklist, struct work_struct, entry);
3379 + list_del_init(&work->entry);
3380 +
3381 + if(list_empty(&which->worklist))
3382 + {
3383 + which->pending &= ~LIT_WORK;
3384 + }
3385 +
3386 + raw_spin_unlock_irqrestore(&which->lock, flags);
3387 +
3388 +
3389 +
3390 + /* safe to read current_owner outside of lock since only this thread
3391 + may write to the pointer. */
3392 + if(work->owner == which->current_owner)
3393 + {
3394 + TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
3395 + // do the work!
3396 + work_clear_pending(work);
3397 + f = work->func;
3398 + f(work); /* can't touch 'work' after this point,
3399 + the user may have freed it. */
3400 +
3401 + atomic_dec(&which->num_work_pending);
3402 + }
3403 + else
3404 + {
3405 + TRACE_CUR("%s: Could not invoke work object. Requeuing.\n",
3406 + __FUNCTION__);
3407 + ___litmus_schedule_work(work, which, 0);
3408 + }
3409 +
3410 +no_work:
3411 + return;
3412 +}
3413 +
3414 +
3415 +static int set_litmus_daemon_sched(void)
3416 +{
3417 + /* set up a daemon job that will never complete.
3418 + it should only ever run on behalf of another
3419 + real-time task.
3420 +
3421 + TODO: Transition to a new job whenever a
3422 + new tasklet is handled */
3423 +
3424 + int ret = 0;
3425 +
3426 + struct rt_task tp = {
3427 + .exec_cost = 0,
3428 + .period = 1000000000, /* dummy 1 second period */
3429 + .phase = 0,
3430 + .cpu = task_cpu(current),
3431 + .budget_policy = NO_ENFORCEMENT,
3432 + .cls = RT_CLASS_BEST_EFFORT
3433 + };
3434 +
3435 + struct sched_param param = { .sched_priority = 0};
3436 +
3437 +
3438 + /* set task params, mark as proxy thread, and init other data */
3439 + tsk_rt(current)->task_params = tp;
3440 + tsk_rt(current)->is_proxy_thread = 1;
3441 + tsk_rt(current)->cur_klitirqd = NULL;
3442 + //init_MUTEX(&tsk_rt(current)->klitirqd_sem);
3443 + mutex_init(&tsk_rt(current)->klitirqd_sem);
3444 + //init_completion(&tsk_rt(current)->klitirqd_sem);
3445 + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
3446 +
3447 + /* inform the OS we're SCHED_LITMUS --
3448 + sched_setscheduler_nocheck() calls litmus_admit_task(). */
3449 + sched_setscheduler_nocheck(current, SCHED_LITMUS, ¶m);
3450 +
3451 + return ret;
3452 +}
3453 +
3454 +static void enter_execution_phase(struct klitirqd_info* which,
3455 + struct mutex* sem,
3456 + struct task_struct* t)
3457 +{
3458 + TRACE_CUR("%s: Trying to enter execution phase. "
3459 + "Acquiring semaphore of %s/%d\n", __FUNCTION__,
3460 + t->comm, t->pid);
3461 + down_and_set_stat(current, HELD, sem);
3462 + TRACE_CUR("%s: Execution phase entered! "
3463 + "Acquired semaphore of %s/%d\n", __FUNCTION__,
3464 + t->comm, t->pid);
3465 +}
3466 +
3467 +static void exit_execution_phase(struct klitirqd_info* which,
3468 + struct mutex* sem,
3469 + struct task_struct* t)
3470 +{
3471 + TRACE_CUR("%s: Exiting execution phase. "
3472 + "Releasing semaphore of %s/%d\n", __FUNCTION__,
3473 + t->comm, t->pid);
3474 + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
3475 + {
3476 + up_and_set_stat(current, NOT_HELD, sem);
3477 + TRACE_CUR("%s: Execution phase exited! "
3478 + "Released semaphore of %s/%d\n", __FUNCTION__,
3479 + t->comm, t->pid);
3480 + }
3481 + else
3482 + {
3483 + TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
3484 + }
3485 +}
3486 +
3487 +/* main loop for klitsoftirqd */
3488 +static int run_klitirqd(void* unused)
3489 +{
3490 + struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
3491 + struct mutex* sem;
3492 + struct task_struct* owner;
3493 +
3494 + int rt_status = set_litmus_daemon_sched();
3495 +
3496 + if(rt_status != 0)
3497 + {
3498 + TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
3499 + goto rt_failed;
3500 + }
3501 +
3502 + atomic_inc(&num_ready_klitirqds);
3503 +
3504 + set_current_state(TASK_INTERRUPTIBLE);
3505 +
3506 + while (!kthread_should_stop())
3507 + {
3508 + preempt_disable();
3509 + if (!litirq_pending(which))
3510 + {
3511 + /* sleep for work */
3512 + TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
3513 + __FUNCTION__);
3514 + preempt_enable_no_resched();
3515 + schedule();
3516 +
3517 + if(kthread_should_stop()) /* bail out */
3518 + {
3519 + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
3520 + continue;
3521 + }
3522 +
3523 + preempt_disable();
3524 + }
3525 +
3526 + __set_current_state(TASK_RUNNING);
3527 +
3528 + while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
3529 + {
3530 + int needs_resched = 0;
3531 +
3532 + preempt_enable_no_resched();
3533 +
3534 + BUG_ON(sem == NULL);
3535 +
3536 + // wait to enter execution phase; wait for 'current_owner' to block.
3537 + enter_execution_phase(which, sem, owner);
3538 +
3539 + if(kthread_should_stop())
3540 + {
3541 + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
3542 + break;
3543 + }
3544 +
3545 + preempt_disable();
3546 +
3547 + /* Double check that there's still pending work and the owner hasn't
3548 + * changed. Pending items may have been flushed while we were sleeping.
3549 + */
3550 + if(litirq_pending_with_owner(which, owner))
3551 + {
3552 + TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
3553 + __FUNCTION__);
3554 +
3555 + needs_resched = do_litirq(which);
3556 +
3557 + preempt_enable_no_resched();
3558 +
3559 + // work objects are preemptible.
3560 + if(!needs_resched)
3561 + {
3562 + do_work(which);
3563 + }
3564 +
3565 + // exit execution phase.
3566 + exit_execution_phase(which, sem, owner);
3567 +
3568 + TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
3569 + reeval_prio(which); /* check if we need to change priority here */
3570 + }
3571 + else
3572 + {
3573 + TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n",
3574 + __FUNCTION__,
3575 + owner->comm, owner->pid);
3576 + preempt_enable_no_resched();
3577 +
3578 + // exit execution phase.
3579 + exit_execution_phase(which, sem, owner);
3580 + }
3581 +
3582 + cond_resched();
3583 + preempt_disable();
3584 + }
3585 + preempt_enable();
3586 + set_current_state(TASK_INTERRUPTIBLE);
3587 + }
3588 + __set_current_state(TASK_RUNNING);
3589 +
3590 + atomic_dec(&num_ready_klitirqds);
3591 +
3592 +rt_failed:
3593 + litmus_exit_task(current);
3594 +
3595 + return rt_status;
3596 +}
3597 +
3598 +
3599 +struct klitirqd_launch_data
3600 +{
3601 + int* cpu_affinity;
3602 + struct work_struct work;
3603 +};
3604 +
3605 +/* executed by a kworker from workqueues */
3606 +static void launch_klitirqd(struct work_struct *work)
3607 +{
3608 + int i;
3609 +
3610 + struct klitirqd_launch_data* launch_data =
3611 + container_of(work, struct klitirqd_launch_data, work);
3612 +
3613 + TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
3614 +
3615 + /* create the daemon threads */
3616 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3617 + {
3618 + if(launch_data->cpu_affinity)
3619 + {
3620 + klitirqds[i].klitirqd =
3621 + kthread_create(
3622 + run_klitirqd,
3623 + /* treat the affinity as a pointer, we'll cast it back later */
3624 + (void*)(long long)launch_data->cpu_affinity[i],
3625 + "klitirqd_th%d/%d",
3626 + i,
3627 + launch_data->cpu_affinity[i]);
3628 +
3629 + /* litmus will put is in the right cluster. */
3630 + kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
3631 + }
3632 + else
3633 + {
3634 + klitirqds[i].klitirqd =
3635 + kthread_create(
3636 + run_klitirqd,
3637 + /* treat the affinity as a pointer, we'll cast it back later */
3638 + (void*)(long long)(-1),
3639 + "klitirqd_th%d",
3640 + i);
3641 + }
3642 + }
3643 +
3644 + TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
3645 +
3646 + /* unleash the daemons */
3647 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3648 + {
3649 + wake_up_process(klitirqds[i].klitirqd);
3650 + }
3651 +
3652 + if(launch_data->cpu_affinity)
3653 + kfree(launch_data->cpu_affinity);
3654 + kfree(launch_data);
3655 +}
3656 +
3657 +
3658 +void spawn_klitirqd(int* affinity)
3659 +{
3660 + int i;
3661 + struct klitirqd_launch_data* delayed_launch;
3662 +
3663 + if(atomic_read(&num_ready_klitirqds) != 0)
3664 + {
3665 + TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
3666 + return;
3667 + }
3668 +
3669 + /* init the tasklet & work queues */
3670 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3671 + {
3672 + klitirqds[i].terminating = 0;
3673 + klitirqds[i].pending = 0;
3674 +
3675 + klitirqds[i].num_hi_pending.counter = 0;
3676 + klitirqds[i].num_low_pending.counter = 0;
3677 + klitirqds[i].num_work_pending.counter = 0;
3678 +
3679 + klitirqds[i].pending_tasklets_hi.head = NULL;
3680 + klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
3681 +
3682 + klitirqds[i].pending_tasklets.head = NULL;
3683 + klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
3684 +
3685 + INIT_LIST_HEAD(&klitirqds[i].worklist);
3686 +
3687 + raw_spin_lock_init(&klitirqds[i].lock);
3688 + }
3689 +
3690 + /* wait to flush the initializations to memory since other threads
3691 + will access it. */
3692 + mb();
3693 +
3694 + /* tell a work queue to launch the threads. we can't make scheduling
3695 + calls since we're in an atomic state. */
3696 + TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
3697 + delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
3698 + if(affinity)
3699 + {
3700 + delayed_launch->cpu_affinity =
3701 + kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
3702 +
3703 + memcpy(delayed_launch->cpu_affinity, affinity,
3704 + sizeof(int)*NR_LITMUS_SOFTIRQD);
3705 + }
3706 + else
3707 + {
3708 + delayed_launch->cpu_affinity = NULL;
3709 + }
3710 + INIT_WORK(&delayed_launch->work, launch_klitirqd);
3711 + schedule_work(&delayed_launch->work);
3712 +}
3713 +
3714 +
3715 +void kill_klitirqd(void)
3716 +{
3717 + if(!klitirqd_is_dead())
3718 + {
3719 + int i;
3720 +
3721 + TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
3722 +
3723 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3724 + {
3725 + if(klitirqds[i].terminating != 1)
3726 + {
3727 + klitirqds[i].terminating = 1;
3728 + mb(); /* just to be sure? */
3729 + flush_pending(klitirqds[i].klitirqd, NULL);
3730 +
3731 + /* signal termination */
3732 + kthread_stop(klitirqds[i].klitirqd);
3733 + }
3734 + }
3735 + }
3736 +}
3737 +
3738 +
3739 +int klitirqd_is_ready(void)
3740 +{
3741 + return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
3742 +}
3743 +
3744 +int klitirqd_is_dead(void)
3745 +{
3746 + return(atomic_read(&num_ready_klitirqds) == 0);
3747 +}
3748 +
3749 +
3750 +struct task_struct* get_klitirqd(unsigned int k_id)
3751 +{
3752 + return(klitirqds[k_id].klitirqd);
3753 +}
3754 +
3755 +
3756 +void flush_pending(struct task_struct* klitirqd_thread,
3757 + struct task_struct* owner)
3758 +{
3759 + unsigned int k_id = klitirqd_id(klitirqd_thread);
3760 + struct klitirqd_info *which = &klitirqds[k_id];
3761 +
3762 + unsigned long flags;
3763 + struct tasklet_struct *list;
3764 +
3765 + u32 work_flushed = 0;
3766 +
3767 + raw_spin_lock_irqsave(&which->lock, flags);
3768 +
3769 + //__dump_state(which, "flush_pending: before");
3770 +
3771 + // flush hi tasklets.
3772 + if(litirq_pending_hi_irqoff(which))
3773 + {
3774 + which->pending &= ~LIT_TASKLET_HI;
3775 +
3776 + list = which->pending_tasklets_hi.head;
3777 + which->pending_tasklets_hi.head = NULL;
3778 + which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
3779 +
3780 + TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
3781 +
3782 + while(list)
3783 + {
3784 + struct tasklet_struct *t = list;
3785 + list = list->next;
3786 +
3787 + if(likely((t->owner == owner) || (owner == NULL)))
3788 + {
3789 + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
3790 + {
3791 + BUG();
3792 + }
3793 +
3794 + work_flushed |= LIT_TASKLET_HI;
3795 +
3796 + t->owner = NULL;
3797 +
3798 + // WTF?
3799 + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
3800 + {
3801 + atomic_dec(&which->num_hi_pending);
3802 + ___tasklet_hi_schedule(t);
3803 + }
3804 + else
3805 + {
3806 + TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
3807 + BUG();
3808 + }
3809 + }
3810 + else
3811 + {
3812 + TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
3813 + // put back on queue.
3814 + ___litmus_tasklet_hi_schedule(t, which, 0);
3815 + }
3816 + }
3817 + }
3818 +
3819 + // flush low tasklets.
3820 + if(litirq_pending_low_irqoff(which))
3821 + {
3822 + which->pending &= ~LIT_TASKLET_LOW;
3823 +
3824 + list = which->pending_tasklets.head;
3825 + which->pending_tasklets.head = NULL;
3826 + which->pending_tasklets.tail = &which->pending_tasklets.head;
3827 +
3828 + TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
3829 +
3830 + while(list)
3831 + {
3832 + struct tasklet_struct *t = list;
3833 + list = list->next;
3834 +
3835 + if(likely((t->owner == owner) || (owner == NULL)))
3836 + {
3837 + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
3838 + {
3839 + BUG();
3840 + }
3841 +
3842 + work_flushed |= LIT_TASKLET_LOW;
3843 +
3844 + t->owner = NULL;
3845 + sched_trace_tasklet_end(owner, 1ul);
3846 +
3847 + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
3848 + {
3849 + atomic_dec(&which->num_low_pending);
3850 + ___tasklet_schedule(t);
3851 + }
3852 + else
3853 + {
3854 + TRACE("%s: dropped tasklet??\n", __FUNCTION__);
3855 + BUG();
3856 + }
3857 + }
3858 + else
3859 + {
3860 + TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
3861 + // put back on queue
3862 + ___litmus_tasklet_schedule(t, which, 0);
3863 + }
3864 + }
3865 + }
3866 +
3867 + // flush work objects
3868 + if(litirq_pending_work_irqoff(which))
3869 + {
3870 + which->pending &= ~LIT_WORK;
3871 +
3872 + TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
3873 +
3874 + while(!list_empty(&which->worklist))
3875 + {
3876 + struct work_struct* work =
3877 + list_first_entry(&which->worklist, struct work_struct, entry);
3878 + list_del_init(&work->entry);
3879 +
3880 + if(likely((work->owner == owner) || (owner == NULL)))
3881 + {
3882 + work_flushed |= LIT_WORK;
3883 + atomic_dec(&which->num_work_pending);
3884 +
3885 + work->owner = NULL;
3886 + sched_trace_work_end(owner, current, 1ul);
3887 + __schedule_work(work);
3888 + }
3889 + else
3890 + {
3891 + TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
3892 + // put back on queue
3893 + ___litmus_schedule_work(work, which, 0);
3894 + }
3895 + }
3896 + }
3897 +
3898 + //__dump_state(which, "flush_pending: after (before reeval prio)");
3899 +
3900 +
3901 + mb(); /* commit changes to pending flags */
3902 +
3903 + /* reset the scheduling priority */
3904 + if(work_flushed)
3905 + {
3906 + __reeval_prio(which);
3907 +
3908 + /* Try to offload flushed tasklets to Linux's ksoftirqd. */
3909 + if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
3910 + {
3911 + wakeup_softirqd();
3912 + }
3913 + }
3914 + else
3915 + {
3916 + TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
3917 + }
3918 +
3919 + raw_spin_unlock_irqrestore(&which->lock, flags);
3920 +}
3921 +
3922 +
3923 +
3924 +
3925 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
3926 + struct klitirqd_info *which,
3927 + int wakeup)
3928 +{
3929 + unsigned long flags;
3930 + u32 old_pending;
3931 +
3932 + t->next = NULL;
3933 +
3934 + raw_spin_lock_irqsave(&which->lock, flags);
3935 +
3936 + //__dump_state(which, "___litmus_tasklet_schedule: before queuing");
3937 +
3938 + *(which->pending_tasklets.tail) = t;
3939 + which->pending_tasklets.tail = &t->next;
3940 +
3941 + old_pending = which->pending;
3942 + which->pending |= LIT_TASKLET_LOW;
3943 +
3944 + atomic_inc(&which->num_low_pending);
3945 +
3946 + mb();
3947 +
3948 + if(!old_pending && wakeup)
3949 + {
3950 + wakeup_litirqd_locked(which); /* wake up the klitirqd */
3951 + }
3952 +
3953 + //__dump_state(which, "___litmus_tasklet_schedule: after queuing");
3954 +
3955 + raw_spin_unlock_irqrestore(&which->lock, flags);
3956 +}
3957 +
3958 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
3959 +{
3960 + int ret = 0; /* assume failure */
3961 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
3962 + {
3963 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
3964 + BUG();
3965 + }
3966 +
3967 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
3968 + {
3969 + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
3970 + BUG();
3971 + }
3972 +
3973 + if(likely(!klitirqds[k_id].terminating))
3974 + {
3975 + /* Can't accept tasklets while we're processing a workqueue
3976 + because they're handled by the same thread. This case is
3977 + very RARE.
3978 +
3979 + TODO: Use a separate thread for work objects!!!!!!
3980 + */
3981 + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
3982 + {
3983 + ret = 1;
3984 + ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
3985 + }
3986 + else
3987 + {
3988 + TRACE("%s: rejected tasklet because of pending work.\n",
3989 + __FUNCTION__);
3990 + }
3991 + }
3992 + return(ret);
3993 +}
3994 +
3995 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
3996 +
3997 +
3998 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
3999 + struct klitirqd_info *which,
4000 + int wakeup)
4001 +{
4002 + unsigned long flags;
4003 + u32 old_pending;
4004 +
4005 + t->next = NULL;
4006 +
4007 + raw_spin_lock_irqsave(&which->lock, flags);
4008 +
4009 + *(which->pending_tasklets_hi.tail) = t;
4010 + which->pending_tasklets_hi.tail = &t->next;
4011 +
4012 + old_pending = which->pending;
4013 + which->pending |= LIT_TASKLET_HI;
4014 +
4015 + atomic_inc(&which->num_hi_pending);
4016 +
4017 + mb();
4018 +
4019 + if(!old_pending && wakeup)
4020 + {
4021 + wakeup_litirqd_locked(which); /* wake up the klitirqd */
4022 + }
4023 +
4024 + raw_spin_unlock_irqrestore(&which->lock, flags);
4025 +}
4026 +
4027 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
4028 +{
4029 + int ret = 0; /* assume failure */
4030 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
4031 + {
4032 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
4033 + BUG();
4034 + }
4035 +
4036 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
4037 + {
4038 + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
4039 + BUG();
4040 + }
4041 +
4042 + if(unlikely(!klitirqd_is_ready()))
4043 + {
4044 + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
4045 + BUG();
4046 + }
4047 +
4048 + if(likely(!klitirqds[k_id].terminating))
4049 + {
4050 + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
4051 + {
4052 + ret = 1;
4053 + ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
4054 + }
4055 + else
4056 + {
4057 + TRACE("%s: rejected tasklet because of pending work.\n",
4058 + __FUNCTION__);
4059 + }
4060 + }
4061 + return(ret);
4062 +}
4063 +
4064 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
4065 +
4066 +
4067 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
4068 +{
4069 + int ret = 0; /* assume failure */
4070 + u32 old_pending;
4071 +
4072 + BUG_ON(!irqs_disabled());
4073 +
4074 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
4075 + {
4076 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
4077 + BUG();
4078 + }
4079 +
4080 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
4081 + {
4082 + TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
4083 + BUG();
4084 + }
4085 +
4086 + if(unlikely(!klitirqd_is_ready()))
4087 + {
4088 + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
4089 + BUG();
4090 + }
4091 +
4092 + if(likely(!klitirqds[k_id].terminating))
4093 + {
4094 + raw_spin_lock(&klitirqds[k_id].lock);
4095 +
4096 + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
4097 + {
4098 + ret = 1; // success!
4099 +
4100 + t->next = klitirqds[k_id].pending_tasklets_hi.head;
4101 + klitirqds[k_id].pending_tasklets_hi.head = t;
4102 +
4103 + old_pending = klitirqds[k_id].pending;
4104 + klitirqds[k_id].pending |= LIT_TASKLET_HI;
4105 +
4106 + atomic_inc(&klitirqds[k_id].num_hi_pending);
4107 +
4108 + mb();
4109 +
4110 + if(!old_pending)
4111 + wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
4112 + }
4113 + else
4114 + {
4115 + TRACE("%s: rejected tasklet because of pending work.\n",
4116 + __FUNCTION__);
4117 + }
4118 +
4119 + raw_spin_unlock(&klitirqds[k_id].lock);
4120 + }
4121 + return(ret);
4122 +}
4123 +
4124 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
4125 +
4126 +
4127 +
4128 +static void ___litmus_schedule_work(struct work_struct *w,
4129 + struct klitirqd_info *which,
4130 + int wakeup)
4131 +{
4132 + unsigned long flags;
4133 + u32 old_pending;
4134 +
4135 + raw_spin_lock_irqsave(&which->lock, flags);
4136 +
4137 + work_pending(w);
4138 + list_add_tail(&w->entry, &which->worklist);
4139 +
4140 + old_pending = which->pending;
4141 + which->pending |= LIT_WORK;
4142 +
4143 + atomic_inc(&which->num_work_pending);
4144 +
4145 + mb();
4146 +
4147 + if(!old_pending && wakeup)
4148 + {
4149 + wakeup_litirqd_locked(which); /* wakeup the klitirqd */
4150 + }
4151 +
4152 + raw_spin_unlock_irqrestore(&which->lock, flags);
4153 +}
4154 +
4155 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
4156 +{
4157 + int ret = 1; /* assume success */
4158 + if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
4159 + {
4160 + TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
4161 + BUG();
4162 + }
4163 +
4164 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
4165 + {
4166 + TRACE("%s: No klitirqd_th%u!\n", k_id);
4167 + BUG();
4168 + }
4169 +
4170 + if(unlikely(!klitirqd_is_ready()))
4171 + {
4172 + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
4173 + BUG();
4174 + }
4175 +
4176 + if(likely(!klitirqds[k_id].terminating))
4177 + ___litmus_schedule_work(w, &klitirqds[k_id], 1);
4178 + else
4179 + ret = 0;
4180 + return(ret);
4181 +}
4182 +EXPORT_SYMBOL(__litmus_schedule_work);
4183 +
4184 +
4185 +static int set_klitirqd_sem_status(unsigned long stat)
4186 +{
4187 + TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
4188 + atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
4189 + stat);
4190 + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
4191 + //mb();
4192 +
4193 + return(0);
4194 +}
4195 +
4196 +static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
4197 +{
4198 + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
4199 + {
4200 + return(set_klitirqd_sem_status(stat));
4201 + }
4202 + return(-1);
4203 +}
4204 +
4205 +
4206 +void __down_and_reset_and_set_stat(struct task_struct* t,
4207 + enum klitirqd_sem_status to_reset,
4208 + enum klitirqd_sem_status to_set,
4209 + struct mutex* sem)
4210 +{
4211 +#if 0
4212 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
4213 + struct task_struct* task = container_of(param, struct task_struct, rt_param);
4214 +
4215 + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
4216 + __FUNCTION__, task->comm, task->pid);
4217 +#endif
4218 +
4219 + mutex_lock_sfx(sem,
4220 + set_klitirqd_sem_status_if_not_held, to_reset,
4221 + set_klitirqd_sem_status, to_set);
4222 +#if 0
4223 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
4224 + __FUNCTION__, task->comm, task->pid);
4225 +#endif
4226 +}
4227 +
4228 +void down_and_set_stat(struct task_struct* t,
4229 + enum klitirqd_sem_status to_set,
4230 + struct mutex* sem)
4231 +{
4232 +#if 0
4233 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
4234 + struct task_struct* task = container_of(param, struct task_struct, rt_param);
4235 +
4236 + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
4237 + __FUNCTION__, task->comm, task->pid);
4238 +#endif
4239 +
4240 + mutex_lock_sfx(sem,
4241 + NULL, 0,
4242 + set_klitirqd_sem_status, to_set);
4243 +
4244 +#if 0
4245 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
4246 + __FUNCTION__, task->comm, task->pid);
4247 +#endif
4248 +}
4249 +
4250 +
4251 +void up_and_set_stat(struct task_struct* t,
4252 + enum klitirqd_sem_status to_set,
4253 + struct mutex* sem)
4254 +{
4255 +#if 0
4256 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
4257 + struct task_struct* task = container_of(param, struct task_struct, rt_param);
4258 +
4259 + TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n",
4260 + __FUNCTION__,
4261 + task->comm, task->pid);
4262 +#endif
4263 +
4264 + mutex_unlock_sfx(sem, NULL, 0,
4265 + set_klitirqd_sem_status, to_set);
4266 +
4267 +#if 0
4268 + TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n",
4269 + __FUNCTION__,
4270 + task->comm, task->pid);
4271 +#endif
4272 +}
4273 +
4274 +
4275 +
4276 +void release_klitirqd_lock(struct task_struct* t)
4277 +{
4278 + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
4279 + {
4280 + struct mutex* sem;
4281 + struct task_struct* owner = t;
4282 +
4283 + if(t->state == TASK_RUNNING)
4284 + {
4285 + TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
4286 + return;
4287 + }
4288 +
4289 + if(likely(!tsk_rt(t)->is_proxy_thread))
4290 + {
4291 + sem = &tsk_rt(t)->klitirqd_sem;
4292 + }
4293 + else
4294 + {
4295 + unsigned int k_id = klitirqd_id(t);
4296 + owner = klitirqds[k_id].current_owner;
4297 +
4298 + BUG_ON(t != klitirqds[k_id].klitirqd);
4299 +
4300 + if(likely(owner))
4301 + {
4302 + sem = &tsk_rt(owner)->klitirqd_sem;
4303 + }
4304 + else
4305 + {
4306 + BUG();
4307 +
4308 + // We had the rug pulled out from under us. Abort attempt
4309 + // to reacquire the lock since our client no longer needs us.
4310 + TRACE_CUR("HUH?! How did this happen?\n");
4311 + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
4312 + return;
4313 + }
4314 + }
4315 +
4316 + //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
4317 + up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
4318 + //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
4319 + }
4320 + /*
4321 + else if(is_realtime(t))
4322 + {
4323 + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
4324 + }
4325 + */
4326 +}
4327 +
4328 +int reacquire_klitirqd_lock(struct task_struct* t)
4329 +{
4330 + int ret = 0;
4331 +
4332 + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
4333 + {
4334 + struct mutex* sem;
4335 + struct task_struct* owner = t;
4336 +
4337 + if(likely(!tsk_rt(t)->is_proxy_thread))
4338 + {
4339 + sem = &tsk_rt(t)->klitirqd_sem;
4340 + }
4341 + else
4342 + {
4343 + unsigned int k_id = klitirqd_id(t);
4344 + //struct task_struct* owner = klitirqds[k_id].current_owner;
4345 + owner = klitirqds[k_id].current_owner;
4346 +
4347 + BUG_ON(t != klitirqds[k_id].klitirqd);
4348 +
4349 + if(likely(owner))
4350 + {
4351 + sem = &tsk_rt(owner)->klitirqd_sem;
4352 + }
4353 + else
4354 + {
4355 + // We had the rug pulled out from under us. Abort attempt
4356 + // to reacquire the lock since our client no longer needs us.
4357 + TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
4358 + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
4359 + return(0);
4360 + }
4361 + }
4362 +
4363 + //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
4364 + __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
4365 + //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
4366 + }
4367 + /*
4368 + else if(is_realtime(t))
4369 + {
4370 + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
4371 + }
4372 + */
4373 +
4374 + return(ret);
4375 +}
4376 +
4377 diff --git a/litmus/locking.c b/litmus/locking.c
4378 index 2693f1a..cfce98e 100644
4379 --- a/litmus/locking.c
4380 +++ b/litmus/locking.c
4381 @@ -121,7 +121,6 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
4382 return(t);
4383 }
4384
4385 -
4386 #else
4387
4388 struct fdso_ops generic_lock_ops = {};
4389 diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
4390 new file mode 100644
4391 index 0000000..d171521
4392 --- /dev/null
4393 +++ b/litmus/nvidia_info.c
4394 @@ -0,0 +1,536 @@
4395 +#include <linux/module.h>
4396 +#include <linux/semaphore.h>
4397 +#include <linux/pci.h>
4398 +
4399 +#include <litmus/sched_trace.h>
4400 +#include <litmus/nvidia_info.h>
4401 +#include <litmus/litmus.h>
4402 +
4403 +typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
4404 +typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
4405 +typedef unsigned char NvU8; /* 0 to 255 */
4406 +typedef unsigned short NvU16; /* 0 to 65535 */
4407 +typedef signed char NvS8; /* -128 to 127 */
4408 +typedef signed short NvS16; /* -32768 to 32767 */
4409 +typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
4410 +typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
4411 +typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
4412 +typedef unsigned int NvU32; /* 0 to 4294967295 */
4413 +typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
4414 +typedef union
4415 +{
4416 + volatile NvV8 Reg008[1];
4417 + volatile NvV16 Reg016[1];
4418 + volatile NvV32 Reg032[1];
4419 +} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
4420 +
4421 +typedef struct
4422 +{
4423 + NvU64 address;
4424 + NvU64 size;
4425 + NvU32 offset;
4426 + NvU32 *map;
4427 + litmus_nv_phwreg_t map_u;
4428 +} litmus_nv_aperture_t;
4429 +
4430 +typedef struct
4431 +{
4432 + void *priv; /* private data */
4433 + void *os_state; /* os-specific device state */
4434 +
4435 + int rmInitialized;
4436 + int flags;
4437 +
4438 + /* PCI config info */
4439 + NvU32 domain;
4440 + NvU16 bus;
4441 + NvU16 slot;
4442 + NvU16 vendor_id;
4443 + NvU16 device_id;
4444 + NvU16 subsystem_id;
4445 + NvU32 gpu_id;
4446 + void *handle;
4447 +
4448 + NvU32 pci_cfg_space[16];
4449 +
4450 + /* physical characteristics */
4451 + litmus_nv_aperture_t bars[3];
4452 + litmus_nv_aperture_t *regs;
4453 + litmus_nv_aperture_t *fb, ud;
4454 + litmus_nv_aperture_t agp;
4455 +
4456 + NvU32 interrupt_line;
4457 +
4458 + NvU32 agp_config;
4459 + NvU32 agp_status;
4460 +
4461 + NvU32 primary_vga;
4462 +
4463 + NvU32 sim_env;
4464 +
4465 + NvU32 rc_timer_enabled;
4466 +
4467 + /* list of events allocated for this device */
4468 + void *event_list;
4469 +
4470 + void *kern_mappings;
4471 +
4472 +} litmus_nv_state_t;
4473 +
4474 +typedef struct work_struct litmus_nv_task_t;
4475 +
4476 +typedef struct litmus_nv_work_s {
4477 + litmus_nv_task_t task;
4478 + void *data;
4479 +} litmus_nv_work_t;
4480 +
4481 +typedef struct litmus_nv_linux_state_s {
4482 + litmus_nv_state_t nv_state;
4483 + atomic_t usage_count;
4484 +
4485 + struct pci_dev *dev;
4486 + void *agp_bridge;
4487 + void *alloc_queue;
4488 +
4489 + void *timer_sp;
4490 + void *isr_sp;
4491 + void *pci_cfgchk_sp;
4492 + void *isr_bh_sp;
4493 +
4494 +#ifdef CONFIG_CUDA_4_0
4495 + char registry_keys[512];
4496 +#endif
4497 +
4498 + /* keep track of any pending bottom halfes */
4499 + struct tasklet_struct tasklet;
4500 + litmus_nv_work_t work;
4501 +
4502 + /* get a timer callback every second */
4503 + struct timer_list rc_timer;
4504 +
4505 + /* lock for linux-specific data, not used by core rm */
4506 + struct semaphore ldata_lock;
4507 +
4508 + /* lock for linux-specific alloc queue */
4509 + struct semaphore at_lock;
4510 +
4511 +#if 0
4512 +#if defined(NV_USER_MAP)
4513 + /* list of user mappings */
4514 + struct nv_usermap_s *usermap_list;
4515 +
4516 + /* lock for VMware-specific mapping list */
4517 + struct semaphore mt_lock;
4518 +#endif /* defined(NV_USER_MAP) */
4519 +#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
4520 + void *apm_nv_dev;
4521 +#endif
4522 +#endif
4523 +
4524 + NvU32 device_num;
4525 + struct litmus_nv_linux_state_s *next;
4526 +} litmus_nv_linux_state_t;
4527 +
4528 +void dump_nvidia_info(const struct tasklet_struct *t)
4529 +{
4530 + litmus_nv_state_t* nvstate = NULL;
4531 + litmus_nv_linux_state_t* linuxstate = NULL;
4532 + struct pci_dev* pci = NULL;
4533 +
4534 + nvstate = (litmus_nv_state_t*)(t->data);
4535 +
4536 + if(nvstate)
4537 + {
4538 + TRACE("NV State:\n"
4539 + "\ttasklet ptr = %p\n"
4540 + "\tstate ptr = %p\n"
4541 + "\tprivate data ptr = %p\n"
4542 + "\tos state ptr = %p\n"
4543 + "\tdomain = %u\n"
4544 + "\tbus = %u\n"
4545 + "\tslot = %u\n"
4546 + "\tvender_id = %u\n"
4547 + "\tdevice_id = %u\n"
4548 + "\tsubsystem_id = %u\n"
4549 + "\tgpu_id = %u\n"
4550 + "\tinterrupt_line = %u\n",
4551 + t,
4552 + nvstate,
4553 + nvstate->priv,
4554 + nvstate->os_state,
4555 + nvstate->domain,
4556 + nvstate->bus,
4557 + nvstate->slot,
4558 + nvstate->vendor_id,
4559 + nvstate->device_id,
4560 + nvstate->subsystem_id,
4561 + nvstate->gpu_id,
4562 + nvstate->interrupt_line);
4563 +
4564 + linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
4565 + }
4566 + else
4567 + {
4568 + TRACE("INVALID NVSTATE????\n");
4569 + }
4570 +
4571 + if(linuxstate)
4572 + {
4573 + int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
4574 + int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
4575 + int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
4576 +
4577 +
4578 + TRACE("LINUX NV State:\n"
4579 + "\tlinux nv state ptr: %p\n"
4580 + "\taddress of tasklet: %p\n"
4581 + "\taddress of work: %p\n"
4582 + "\tusage_count: %d\n"
4583 + "\tdevice_num: %u\n"
4584 + "\ttasklet addr == this tasklet: %d\n"
4585 + "\tpci: %p\n",
4586 + linuxstate,
4587 + &(linuxstate->tasklet),
4588 + &(linuxstate->work),
4589 + atomic_read(&(linuxstate->usage_count)),
4590 + linuxstate->device_num,
4591 + (t == &(linuxstate->tasklet)),
4592 + linuxstate->dev);
4593 +
4594 + pci = linuxstate->dev;
4595 +
4596 + TRACE("Offsets:\n"
4597 + "\tOffset from LinuxState: %d, %x\n"
4598 + "\tOffset from NVState: %d, %x\n"
4599 + "\tOffset from parameter: %d, %x\n"
4600 + "\tdevice_num: %u\n",
4601 + ls_offset, ls_offset,
4602 + ns_offset_raw, ns_offset_raw,
4603 + ns_offset_desired, ns_offset_desired,
4604 + *((u32*)((void*)nvstate + ns_offset_desired)));
4605 + }
4606 + else
4607 + {
4608 + TRACE("INVALID LINUXNVSTATE?????\n");
4609 + }
4610 +
4611 +#if 0
4612 + if(pci)
4613 + {
4614 + TRACE("PCI DEV Info:\n"
4615 + "pci device ptr: %p\n"
4616 + "\tdevfn = %d\n"
4617 + "\tvendor = %d\n"
4618 + "\tdevice = %d\n"
4619 + "\tsubsystem_vendor = %d\n"
4620 + "\tsubsystem_device = %d\n"
4621 + "\tslot # = %d\n",
4622 + pci,
4623 + pci->devfn,
4624 + pci->vendor,
4625 + pci->device,
4626 + pci->subsystem_vendor,
4627 + pci->subsystem_device,
4628 + pci->slot->number);
4629 + }
4630 + else
4631 + {
4632 + TRACE("INVALID PCIDEV PTR?????\n");
4633 + }
4634 +#endif
4635 +}
4636 +
4637 +static struct module* nvidia_mod = NULL;
4638 +int init_nvidia_info(void)
4639 +{
4640 + mutex_lock(&module_mutex);
4641 + nvidia_mod = find_module("nvidia");
4642 + mutex_unlock(&module_mutex);
4643 + if(nvidia_mod != NULL)
4644 + {
4645 + TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
4646 + (void*)(nvidia_mod->module_core),
4647 + (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
4648 + init_nv_device_reg();
4649 + return(0);
4650 + }
4651 + else
4652 + {
4653 + TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
4654 + return(-1);
4655 + }
4656 +}
4657 +
4658 +
4659 +/* works with pointers to static data inside the module too. */
4660 +int is_nvidia_func(void* func_addr)
4661 +{
4662 + int ret = 0;
4663 + if(nvidia_mod)
4664 + {
4665 + ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
4666 + /*
4667 + if(ret)
4668 + {
4669 + TRACE("%s : %p is in NVIDIA module: %d\n",
4670 + __FUNCTION__, func_addr, ret);
4671 + }*/
4672 + }
4673 +
4674 + return(ret);
4675 +}
4676 +
4677 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
4678 +{
4679 + // life is too short to use hard-coded offsets. update this later.
4680 + litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
4681 + litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
4682 +
4683 + BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
4684 +
4685 + return(linuxstate->device_num);
4686 +
4687 + //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
4688 +
4689 +#if 0
4690 + // offset determined though observed behavior of the NV driver.
4691 + //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1
4692 + //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2
4693 +
4694 + void* state = (void*)(t->data);
4695 + void* device_num_ptr = state + DEVICE_NUM_OFFSET;
4696 +
4697 + //dump_nvidia_info(t);
4698 + return(*((u32*)device_num_ptr));
4699 +#endif
4700 +}
4701 +
4702 +u32 get_work_nv_device_num(const struct work_struct *t)
4703 +{
4704 + // offset determined though observed behavior of the NV driver.
4705 + const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
4706 + void* state = (void*)(t);
4707 + void** device_num_ptr = state + DEVICE_NUM_OFFSET;
4708 + return(*((u32*)(*device_num_ptr)));
4709 +}
4710 +
4711 +
4712 +
4713 +typedef struct {
4714 + raw_spinlock_t lock;
4715 + struct task_struct *device_owner;
4716 +}nv_device_registry_t;
4717 +
4718 +static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
4719 +
4720 +int init_nv_device_reg(void)
4721 +{
4722 + int i;
4723 +
4724 + //memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
4725 +
4726 + for(i = 0; i < NV_DEVICE_NUM; ++i)
4727 + {
4728 + raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
4729 + NV_DEVICE_REG[i].device_owner = NULL;
4730 + }
4731 +
4732 + return(1);
4733 +}
4734 +
4735 +/* use to get nv_device_id by given owner.
4736 + (if return -1, can't get the assocaite device id)*/
4737 +/*
4738 +int get_nv_device_id(struct task_struct* owner)
4739 +{
4740 + int i;
4741 + if(!owner)
4742 + {
4743 + return(-1);
4744 + }
4745 + for(i = 0; i < NV_DEVICE_NUM; ++i)
4746 + {
4747 + if(NV_DEVICE_REG[i].device_owner == owner)
4748 + return(i);
4749 + }
4750 + return(-1);
4751 +}
4752 +*/
4753 +
4754 +
4755 +
4756 +static int __reg_nv_device(int reg_device_id)
4757 +{
4758 + int ret = 0;
4759 + struct task_struct* old =
4760 + cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner,
4761 + NULL,
4762 + current);
4763 +
4764 + mb();
4765 +
4766 + if(likely(old == NULL))
4767 + {
4768 +#ifdef CONFIG_LITMUS_SOFTIRQD
4769 + down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
4770 +#endif
4771 + TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
4772 + }
4773 + else
4774 + {
4775 + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
4776 + ret = -EBUSY;
4777 + }
4778 +
4779 + return(ret);
4780 +
4781 +
4782 +
4783 +#if 0
4784 + //unsigned long flags;
4785 + //raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags);
4786 + //lock_nv_registry(reg_device_id, &flags);
4787 +
4788 + if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL))
4789 + {
4790 + NV_DEVICE_REG[reg_device_id].device_owner = current;
4791 + mb(); // needed?
4792 +
4793 + // release spin lock before chance of going to sleep.
4794 + //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);
4795 + //unlock_nv_registry(reg_device_id, &flags);
4796 +
4797 + down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
4798 + TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
4799 + return(0);
4800 + }
4801 + else
4802 + {
4803 + //raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);
4804 + //unlock_nv_registry(reg_device_id, &flags);
4805 +
4806 + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
4807 + return(-EBUSY);
4808 + }
4809 +#endif
4810 +}
4811 +
4812 +static int __clear_reg_nv_device(int de_reg_device_id)
4813 +{
4814 + int ret = 0;
4815 + struct task_struct* old;
4816 +
4817 +#ifdef CONFIG_LITMUS_SOFTIRQD
4818 + unsigned long flags;
4819 + struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
4820 + lock_nv_registry(de_reg_device_id, &flags);
4821 +#endif
4822 +
4823 + old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner,
4824 + current,
4825 + NULL);
4826 +
4827 + mb();
4828 +
4829 +#ifdef CONFIG_LITMUS_SOFTIRQD
4830 + if(likely(old == current))
4831 + {
4832 + flush_pending(klitirqd_th, current);
4833 + //unlock_nv_registry(de_reg_device_id, &flags);
4834 +
4835 + up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem);
4836 +
4837 + unlock_nv_registry(de_reg_device_id, &flags);
4838 + ret = 0;
4839 +
4840 + TRACE_CUR("%s: semaphore released.\n",__FUNCTION__);
4841 + }
4842 + else
4843 + {
4844 + unlock_nv_registry(de_reg_device_id, &flags);
4845 + ret = -EINVAL;
4846 +
4847 + if(old)
4848 + TRACE_CUR("%s: device %d is not registered for this process's use! %s/%d is!\n",
4849 + __FUNCTION__, de_reg_device_id, old->comm, old->pid);
4850 + else
4851 + TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n",
4852 + __FUNCTION__, de_reg_device_id);
4853 + }
4854 +#endif
4855 +
4856 + return(ret);
4857 +}
4858 +
4859 +
4860 +int reg_nv_device(int reg_device_id, int reg_action)
4861 +{
4862 + int ret;
4863 +
4864 + if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
4865 + {
4866 + if(reg_action)
4867 + ret = __reg_nv_device(reg_device_id);
4868 + else
4869 + ret = __clear_reg_nv_device(reg_device_id);
4870 + }
4871 + else
4872 + {
4873 + ret = -ENODEV;
4874 + }
4875 +
4876 + return(ret);
4877 +}
4878 +
4879 +/* use to get the owner of nv_device_id. */
4880 +struct task_struct* get_nv_device_owner(u32 target_device_id)
4881 +{
4882 + struct task_struct* owner;
4883 + BUG_ON(target_device_id >= NV_DEVICE_NUM);
4884 + owner = NV_DEVICE_REG[target_device_id].device_owner;
4885 + return(owner);
4886 +}
4887 +
4888 +void lock_nv_registry(u32 target_device_id, unsigned long* flags)
4889 +{
4890 + BUG_ON(target_device_id >= NV_DEVICE_NUM);
4891 +
4892 + if(in_interrupt())
4893 + TRACE("Locking registry for %d.\n", target_device_id);
4894 + else
4895 + TRACE_CUR("Locking registry for %d.\n", target_device_id);
4896 +
4897 + raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
4898 +}
4899 +
4900 +void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
4901 +{
4902 + BUG_ON(target_device_id >= NV_DEVICE_NUM);
4903 +
4904 + if(in_interrupt())
4905 + TRACE("Unlocking registry for %d.\n", target_device_id);
4906 + else
4907 + TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
4908 +
4909 + raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
4910 +}
4911 +
4912 +
4913 +void increment_nv_int_count(u32 device)
4914 +{
4915 + unsigned long flags;
4916 + struct task_struct* owner;
4917 +
4918 + lock_nv_registry(device, &flags);
4919 +
4920 + owner = NV_DEVICE_REG[device].device_owner;
4921 + if(owner)
4922 + {
4923 + atomic_inc(&tsk_rt(owner)->nv_int_count);
4924 + }
4925 +
4926 + unlock_nv_registry(device, &flags);
4927 +}
4928 +EXPORT_SYMBOL(increment_nv_int_count);
4929 +
4930 +
4931 diff --git a/litmus/preempt.c b/litmus/preempt.c
4932 index ebe2e34..08b98c3 100644
4933 --- a/litmus/preempt.c
4934 +++ b/litmus/preempt.c
4935 @@ -30,8 +30,11 @@ void sched_state_will_schedule(struct task_struct* tsk)
4936 /* Litmus tasks should never be subject to a remote
4937 * set_tsk_need_resched(). */
4938 BUG_ON(is_realtime(tsk));
4939 +
4940 +/*
4941 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
4942 __builtin_return_address(0));
4943 +*/
4944 }
4945
4946 /* Called by the IPI handler after another CPU called smp_send_resched(). */
4947 @@ -43,13 +46,17 @@ void sched_state_ipi(void)
4948 /* Cause scheduler to be invoked.
4949 * This will cause a transition to WILL_SCHEDULE. */
4950 set_tsk_need_resched(current);
4951 + /*
4952 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
4953 current->comm, current->pid);
4954 + */
4955 } else {
4956 /* ignore */
4957 + /*
4958 TRACE_STATE("ignoring IPI in state %x (%s)\n",
4959 get_sched_state(),
4960 sched_state_name(get_sched_state()));
4961 + */
4962 }
4963 }
4964
4965 diff --git a/litmus/rm_common.c b/litmus/rm_common.c
4966 new file mode 100644
4967 index 0000000..88f83bc
4968 --- /dev/null
4969 +++ b/litmus/rm_common.c
4970 @@ -0,0 +1,160 @@
4971 +/*
4972 + * kernel/rm_common.c
4973 + *
4974 + * Common functions for EDF based scheduler.
4975 + */
4976 +
4977 +#include <linux/percpu.h>
4978 +#include <linux/sched.h>
4979 +#include <linux/list.h>
4980 +
4981 +#include <litmus/litmus.h>
4982 +#include <litmus/sched_plugin.h>
4983 +#include <litmus/sched_trace.h>
4984 +
4985 +#include <litmus/rm_common.h>
4986 +
4987 +/* rm_higher_prio - returns true if first has a higher EDF priority
4988 + * than second. Deadline ties are broken by PID.
4989 + *
4990 + * both first and second may be NULL
4991 + */
4992 +int rm_higher_prio(struct task_struct* first,
4993 + struct task_struct* second)
4994 +{
4995 + struct task_struct *first_task = first;
4996 + struct task_struct *second_task = second;
4997 +
4998 + /* There is no point in comparing a task to itself. */
4999 + if (first && first == second) {
5000 + TRACE_TASK(first,
5001 + "WARNING: pointless edf priority comparison.\n");
5002 + return 0;
5003 + }
5004 +
5005 +
5006 + /* check for NULL tasks */
5007 + if (!first || !second)
5008 + return first && !second;
5009 +
5010 +#ifdef CONFIG_LITMUS_LOCKING
5011 +
5012 + /* Check for inherited priorities. Change task
5013 + * used for comparison in such a case.
5014 + */
5015 + if (unlikely(first->rt_param.inh_task))
5016 + first_task = first->rt_param.inh_task;
5017 + if (unlikely(second->rt_param.inh_task))
5018 + second_task = second->rt_param.inh_task;
5019 +
5020 + /* Check for priority boosting. Tie-break by start of boosting.
5021 + */
5022 + if (unlikely(is_priority_boosted(first_task))) {
5023 + /* first_task is boosted, how about second_task? */
5024 + if (!is_priority_boosted(second_task) ||
5025 + lt_before(get_boost_start(first_task),
5026 + get_boost_start(second_task)))
5027 + return 1;
5028 + else
5029 + return 0;
5030 + } else if (unlikely(is_priority_boosted(second_task)))
5031 + /* second_task is boosted, first is not*/
5032 + return 0;
5033 +
5034 +#endif
5035 +
5036 + if (!is_realtime(second_task))
5037 + return true;
5038 +
5039 + if (shorter_period(first_task, second_task))
5040 + return true;
5041 +
5042 + if (get_rt_period(first_task) == get_rt_period(second_task))
5043 + {
5044 +#ifdef CONFIG_LITMUS_SOFTIRQD
5045 + if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
5046 + {
5047 + return true;
5048 + }
5049 + if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
5050 + {
5051 +#endif
5052 + if (first_task->pid < second_task->pid)
5053 + {
5054 + return true;
5055 + }
5056 + if (first_task->pid == second_task->pid)
5057 + {
5058 + return !second->rt_param.inh_task;
5059 + }
5060 +#ifdef CONFIG_LITMUS_SOFTIRQD
5061 + }
5062 +#endif
5063 + }
5064 +
5065 + return false;
5066 +
5067 +#if 0
5068 + return !is_realtime(second_task) ||
5069 + shorter_period(first_task, second_task) ||
5070 + ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task))
5071 +
5072 +#ifdef CONFIG_LITMUS_SOFTIRQD
5073 + /* proxy threads always lose w/o inheritance. */
5074 + (first_task->rt_param.is_proxy_thread <
5075 + second_task->rt_param.is_proxy_thread) ||
5076 +#endif
5077 +
5078 + /* is the period of the first task shorter?
5079 + * Then it has higher priority.
5080 + */
5081 + shorter_period(first_task, second_task) ||
5082 +
5083 + (earlier_deadline(first_task, second_task) ||
5084 +
5085 + /* Do we have a deadline tie?
5086 + * Then break by PID.
5087 + */
5088 + (get_rt_period(first_task) == get_rt_period(second_task) &&
5089 + (first_task->pid < second_task->pid ||
5090 +
5091 + /* If the PIDs are the same then the task with the inherited
5092 + * priority wins.
5093 + */
5094 + (first_task->pid == second_task->pid &&
5095 + !second->rt_param.inh_task)));
5096 +#endif
5097 +}
5098 +
5099 +int rm_ready_order(struct bheap_node* a, struct bheap_node* b)
5100 +{
5101 + return rm_higher_prio(bheap2task(a), bheap2task(b));
5102 +}
5103 +
5104 +void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
5105 + release_jobs_t release)
5106 +{
5107 + rt_domain_init(rt, rm_ready_order, resched, release);
5108 +}
5109 +
5110 +/* need_to_preempt - check whether the task t needs to be preempted
5111 + * call only with irqs disabled and with ready_lock acquired
5112 + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
5113 + */
5114 +int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
5115 +{
5116 + /* we need the read lock for rm_ready_queue */
5117 + /* no need to preempt if there is nothing pending */
5118 + if (!__jobs_pending(rt))
5119 + return 0;
5120 + /* we need to reschedule if t doesn't exist */
5121 + if (!t)
5122 + return 1;
5123 +
5124 + /* NOTE: We cannot check for non-preemptibility since we
5125 + * don't know what address space we're currently in.
5126 + */
5127 +
5128 + /* make sure to get non-rt stuff out of the way */
5129 + return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t);
5130 +}
5131 diff --git a/litmus/rm_srt_common.c b/litmus/rm_srt_common.c
5132 new file mode 100644
5133 index 0000000..f58a800
5134 --- /dev/null
5135 +++ b/litmus/rm_srt_common.c
5136 @@ -0,0 +1,167 @@
5137 +/*
5138 + * kernel/rm_common.c
5139 + *
5140 + * Common functions for EDF based scheduler.
5141 + */
5142 +
5143 +#include <linux/percpu.h>
5144 +#include <linux/sched.h>
5145 +#include <linux/list.h>
5146 +
5147 +#include <litmus/litmus.h>
5148 +#include <litmus/sched_plugin.h>
5149 +#include <litmus/sched_trace.h>
5150 +
5151 +#include <litmus/rm_common.h>
5152 +
5153 +/* rm_srt_higher_prio - returns true if first has a higher EDF priority
5154 + * than second. Deadline ties are broken by PID.
5155 + *
5156 + * both first and second may be NULL
5157 + */
5158 +int rm_srt_higher_prio(struct task_struct* first,
5159 + struct task_struct* second)
5160 +{
5161 + struct task_struct *first_task = first;
5162 + struct task_struct *second_task = second;
5163 +
5164 + /* There is no point in comparing a task to itself. */
5165 + if (first && first == second) {
5166 + TRACE_TASK(first,
5167 + "WARNING: pointless edf priority comparison.\n");
5168 + return 0;
5169 + }
5170 +
5171 +
5172 + /* check for NULL tasks */
5173 + if (!first || !second)
5174 + return first && !second;
5175 +
5176 +#ifdef CONFIG_LITMUS_LOCKING
5177 +
5178 + /* Check for inherited priorities. Change task
5179 + * used for comparison in such a case.
5180 + */
5181 + if (unlikely(first->rt_param.inh_task))
5182 + first_task = first->rt_param.inh_task;
5183 + if (unlikely(second->rt_param.inh_task))
5184 + second_task = second->rt_param.inh_task;
5185 +
5186 + /* Check for priority boosting. Tie-break by start of boosting.
5187 + */
5188 + if (unlikely(is_priority_boosted(first_task))) {
5189 + /* first_task is boosted, how about second_task? */
5190 + if (!is_priority_boosted(second_task) ||
5191 + lt_before(get_boost_start(first_task),
5192 + get_boost_start(second_task)))
5193 + return 1;
5194 + else
5195 + return 0;
5196 + } else if (unlikely(is_priority_boosted(second_task)))
5197 + /* second_task is boosted, first is not*/
5198 + return 0;
5199 +
5200 +#endif
5201 +
5202 + if (!is_realtime(second_task))
5203 + return true;
5204 +
5205 + if (shorter_period(first_task, second_task))
5206 + return true;
5207 +
5208 + if (get_rt_period(first_task) == get_rt_period(second_task))
5209 + {
5210 + if (earlier_deadline(first_task, second_task))
5211 + {
5212 + return true;
5213 + }
5214 + if(get_deadline(first_task) == get_deadline(second_task))
5215 + {
5216 +#ifdef CONFIG_LITMUS_SOFTIRQD
5217 + if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
5218 + {
5219 + return true;
5220 + }
5221 + if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
5222 + {
5223 +#endif
5224 + if (first_task->pid < second_task->pid)
5225 + {
5226 + return true;
5227 + }
5228 + if (first_task->pid == second_task->pid)
5229 + {
5230 + return !second->rt_param.inh_task;
5231 + }
5232 +#ifdef CONFIG_LITMUS_SOFTIRQD
5233 + }
5234 +#endif
5235 + }
5236 + }
5237 +
5238 + return false;
5239 +
5240 +#if 0
5241 + return !is_realtime(second_task) ||
5242 + shorter_period(first_task, second_task) ||
5243 + ((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task))
5244 +
5245 +#ifdef CONFIG_LITMUS_SOFTIRQD
5246 + /* proxy threads always lose w/o inheritance. */
5247 + (first_task->rt_param.is_proxy_thread <
5248 + second_task->rt_param.is_proxy_thread) ||
5249 +#endif
5250 +
5251 + /* is the period of the first task shorter?
5252 + * Then it has higher priority.
5253 + */
5254 + shorter_period(first_task, second_task) ||
5255 +
5256 + (earlier_deadline(first_task, second_task) ||
5257 +
5258 + /* Do we have a deadline tie?
5259 + * Then break by PID.
5260 + */
5261 + (get_rt_period(first_task) == get_rt_period(second_task) &&
5262 + (first_task->pid < second_task->pid ||
5263 +
5264 + /* If the PIDs are the same then the task with the inherited
5265 + * priority wins.
5266 + */
5267 + (first_task->pid == second_task->pid &&
5268 + !second->rt_param.inh_task)));
5269 +#endif
5270 +}
5271 +
5272 +int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b)
5273 +{
5274 + return rm_srt_higher_prio(bheap2task(a), bheap2task(b));
5275 +}
5276 +
5277 +void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
5278 + release_jobs_t release)
5279 +{
5280 + rt_domain_init(rt, rm_srt_ready_order, resched, release);
5281 +}
5282 +
5283 +/* need_to_preempt - check whether the task t needs to be preempted
5284 + * call only with irqs disabled and with ready_lock acquired
5285 + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
5286 + */
5287 +int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t)
5288 +{
5289 + /* we need the read lock for rm_ready_queue */
5290 + /* no need to preempt if there is nothing pending */
5291 + if (!__jobs_pending(rt))
5292 + return 0;
5293 + /* we need to reschedule if t doesn't exist */
5294 + if (!t)
5295 + return 1;
5296 +
5297 + /* NOTE: We cannot check for non-preemptibility since we
5298 + * don't know what address space we're currently in.
5299 + */
5300 +
5301 + /* make sure to get non-rt stuff out of the way */
5302 + return !is_realtime(t) || rm_srt_higher_prio(__next_ready(rt), t);
5303 +}
5304 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
5305 index 73fe1c4..9aa5822 100644
5306 --- a/litmus/sched_cedf.c
5307 +++ b/litmus/sched_cedf.c
5308 @@ -29,6 +29,7 @@
5309 #include <linux/percpu.h>
5310 #include <linux/sched.h>
5311 #include <linux/slab.h>
5312 +#include <linux/uaccess.h>
5313
5314 #include <linux/module.h>
5315
5316 @@ -45,7 +46,23 @@
5317
5318 /* to configure the cluster size */
5319 #include <litmus/litmus_proc.h>
5320 -#include <linux/uaccess.h>
5321 +
5322 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5323 +#include <litmus/affinity.h>
5324 +#endif
5325 +
5326 +#ifdef CONFIG_LITMUS_SOFTIRQD
5327 +#include <litmus/litmus_softirq.h>
5328 +#endif
5329 +
5330 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5331 +#include <linux/interrupt.h>
5332 +#include <litmus/trace.h>
5333 +#endif
5334 +
5335 +#ifdef CONFIG_LITMUS_NVIDIA
5336 +#include <litmus/nvidia_info.h>
5337 +#endif
5338
5339 /* Reference configuration variable. Determines which cache level is used to
5340 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
5341 @@ -79,6 +96,15 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
5342 #define test_will_schedule(cpu) \
5343 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
5344
5345 +
5346 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5347 +struct tasklet_head
5348 +{
5349 + struct tasklet_struct *head;
5350 + struct tasklet_struct **tail;
5351 +};
5352 +#endif
5353 +
5354 /*
5355 * In C-EDF there is a cedf domain _per_ cluster
5356 * The number of clusters is dynamically determined accordingly to the
5357 @@ -95,7 +121,13 @@ typedef struct clusterdomain {
5358 struct bheap_node *heap_node;
5359 struct bheap cpu_heap;
5360 /* lock for this cluster */
5361 -#define lock domain.ready_lock
5362 +#define cedf_lock domain.ready_lock
5363 +
5364 +
5365 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5366 + struct tasklet_head pending_tasklets;
5367 +#endif
5368 +
5369 } cedf_domain_t;
5370
5371 /* a cedf_domain per cluster; allocation is done at init/activation time */
5372 @@ -257,21 +289,50 @@ static noinline void requeue(struct task_struct* task)
5373 }
5374 }
5375
5376 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5377 +static cpu_entry_t* cedf_get_nearest_available_cpu(
5378 + cedf_domain_t *cluster, cpu_entry_t* start)
5379 +{
5380 + cpu_entry_t* affinity;
5381 +
5382 + get_nearest_available_cpu(affinity, start, cedf_cpu_entries, -1);
5383 +
5384 + /* make sure CPU is in our cluster */
5385 + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
5386 + return(affinity);
5387 + else
5388 + return(NULL);
5389 +}
5390 +#endif
5391 +
5392 +
5393 /* check for any necessary preemptions */
5394 static void check_for_preemptions(cedf_domain_t *cluster)
5395 {
5396 struct task_struct *task;
5397 - cpu_entry_t* last;
5398 + cpu_entry_t *last;
5399
5400 for(last = lowest_prio_cpu(cluster);
5401 edf_preemption_needed(&cluster->domain, last->linked);
5402 last = lowest_prio_cpu(cluster)) {
5403 /* preemption necessary */
5404 task = __take_ready(&cluster->domain);
5405 - TRACE("check_for_preemptions: attempting to link task %d to %d\n",
5406 - task->pid, last->cpu);
5407 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5408 + {
5409 + cpu_entry_t* affinity =
5410 + cedf_get_nearest_available_cpu(cluster,
5411 + &per_cpu(cedf_cpu_entries, task_cpu(task)));
5412 + if(affinity)
5413 + last = affinity;
5414 + else if(last->linked)
5415 + requeue(last->linked);
5416 + }
5417 +#else
5418 if (last->linked)
5419 requeue(last->linked);
5420 +#endif
5421 + TRACE("check_for_preemptions: attempting to link task %d to %d\n",
5422 + task->pid, last->cpu);
5423 link_task_to_cpu(task, last);
5424 preempt(last);
5425 }
5426 @@ -292,12 +353,12 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
5427 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
5428 unsigned long flags;
5429
5430 - raw_spin_lock_irqsave(&cluster->lock, flags);
5431 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5432
5433 __merge_ready(&cluster->domain, tasks);
5434 check_for_preemptions(cluster);
5435
5436 - raw_spin_unlock_irqrestore(&cluster->lock, flags);
5437 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5438 }
5439
5440 /* caller holds cedf_lock */
5441 @@ -307,6 +368,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
5442
5443 sched_trace_task_completion(t, forced);
5444
5445 +#ifdef CONFIG_LITMUS_NVIDIA
5446 + atomic_set(&tsk_rt(t)->nv_int_count, 0);
5447 +#endif
5448 +
5449 TRACE_TASK(t, "job_completion().\n");
5450
5451 /* set flags */
5452 @@ -350,6 +415,461 @@ static void cedf_tick(struct task_struct* t)
5453 }
5454 }
5455
5456 +
5457 +
5458 +
5459 +
5460 +
5461 +
5462 +
5463 +
5464 +
5465 +
5466 +
5467 +
5468 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5469 +
5470 +
5471 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
5472 +{
5473 + if (!atomic_read(&tasklet->count)) {
5474 + if(tasklet->owner) {
5475 + sched_trace_tasklet_begin(tasklet->owner);
5476 + }
5477 +
5478 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
5479 + {
5480 + BUG();
5481 + }
5482 + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
5483 + __FUNCTION__,
5484 + (tasklet->owner) ? tasklet->owner->pid : -1,
5485 + (tasklet->owner) ? 0 : 1);
5486 + tasklet->func(tasklet->data);
5487 + tasklet_unlock(tasklet);
5488 +
5489 + if(tasklet->owner) {
5490 + sched_trace_tasklet_end(tasklet->owner, flushed);
5491 + }
5492 + }
5493 + else {
5494 + BUG();
5495 + }
5496 +}
5497 +
5498 +
5499 +static void __extract_tasklets(cedf_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
5500 +{
5501 + struct tasklet_struct* step;
5502 + struct tasklet_struct* tasklet;
5503 + struct tasklet_struct* prev;
5504 +
5505 + task_tasklets->head = NULL;
5506 + task_tasklets->tail = &(task_tasklets->head);
5507 +
5508 + prev = NULL;
5509 + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
5510 + {
5511 + if(step->owner == task)
5512 + {
5513 + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
5514 +
5515 + tasklet = step;
5516 +
5517 + if(prev) {
5518 + prev->next = tasklet->next;
5519 + }
5520 + else if(cluster->pending_tasklets.head == tasklet) {
5521 + // we're at the head.
5522 + cluster->pending_tasklets.head = tasklet->next;
5523 + }
5524 +
5525 + if(cluster->pending_tasklets.tail == &tasklet) {
5526 + // we're at the tail
5527 + if(prev) {
5528 + cluster->pending_tasklets.tail = &prev;
5529 + }
5530 + else {
5531 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
5532 + }
5533 + }
5534 +
5535 + tasklet->next = NULL;
5536 + *(task_tasklets->tail) = tasklet;
5537 + task_tasklets->tail = &(tasklet->next);
5538 + }
5539 + else {
5540 + prev = step;
5541 + }
5542 + }
5543 +}
5544 +
5545 +static void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task)
5546 +{
5547 +#if 0
5548 + unsigned long flags;
5549 + struct tasklet_head task_tasklets;
5550 + struct tasklet_struct* step;
5551 +
5552 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5553 + __extract_tasklets(cluster, task, &task_tasklets);
5554 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5555 +
5556 + if(cluster->pending_tasklets.head != NULL) {
5557 + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
5558 + }
5559 +
5560 + // now execute any flushed tasklets.
5561 + for(step = cluster->pending_tasklets.head; step != NULL; /**/)
5562 + {
5563 + struct tasklet_struct* temp = step->next;
5564 +
5565 + step->next = NULL;
5566 + __do_lit_tasklet(step, 1ul);
5567 +
5568 + step = temp;
5569 + }
5570 +#endif
5571 +
5572 + // lazy flushing.
5573 + // just change ownership to NULL and let an idle processor
5574 + // take care of it. :P
5575 +
5576 + struct tasklet_struct* step;
5577 + unsigned long flags;
5578 +
5579 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5580 +
5581 + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
5582 + {
5583 + if(step->owner == task)
5584 + {
5585 + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
5586 + step->owner = NULL;
5587 + }
5588 + }
5589 +
5590 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5591 +}
5592 +
5593 +
5594 +static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
5595 +{
5596 + int work_to_do = 1;
5597 + struct tasklet_struct *tasklet = NULL;
5598 + //struct tasklet_struct *step;
5599 + unsigned long flags;
5600 +
5601 + while(work_to_do) {
5602 +
5603 + TS_NV_SCHED_BOTISR_START;
5604 +
5605 + // remove tasklet at head of list if it has higher priority.
5606 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5607 +
5608 +/*
5609 + step = cluster->pending_tasklets.head;
5610 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
5611 + while(step != NULL){
5612 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5613 + step = step->next;
5614 + }
5615 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5616 + TRACE("%s: done.\n", __FUNCTION__);
5617 + */
5618 +
5619 + if(cluster->pending_tasklets.head != NULL) {
5620 + // remove tasklet at head.
5621 + tasklet = cluster->pending_tasklets.head;
5622 +
5623 + if(edf_higher_prio(tasklet->owner, sched_task)) {
5624 +
5625 + if(NULL == tasklet->next) {
5626 + // tasklet is at the head, list only has one element
5627 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
5628 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
5629 + }
5630 +
5631 + // remove the tasklet from the queue
5632 + cluster->pending_tasklets.head = tasklet->next;
5633 +
5634 + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
5635 + }
5636 + else {
5637 + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1, smp_processor_id());
5638 + tasklet = NULL;
5639 + }
5640 + }
5641 + else {
5642 + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
5643 + }
5644 +
5645 +
5646 + /*
5647 + step = cluster->pending_tasklets.head;
5648 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
5649 + while(step != NULL){
5650 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5651 + step = step->next;
5652 + }
5653 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5654 + TRACE("%s: done.\n", __FUNCTION__);
5655 + */
5656 +
5657 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5658 +
5659 +
5660 + TS_NV_SCHED_BOTISR_END;
5661 +
5662 + if(tasklet) {
5663 + __do_lit_tasklet(tasklet, 0ul);
5664 + tasklet = NULL;
5665 + }
5666 + else {
5667 + work_to_do = 0;
5668 + }
5669 + }
5670 +
5671 + //TRACE("%s: exited.\n", __FUNCTION__);
5672 +}
5673 +
5674 +
5675 +static void run_tasklets(struct task_struct* sched_task)
5676 +{
5677 + cedf_domain_t* cluster;
5678 +
5679 +#if 0
5680 + int task_is_rt = is_realtime(sched_task);
5681 + cedf_domain_t* cluster;
5682 +
5683 + if(is_realtime(sched_task)) {
5684 + cluster = task_cpu_cluster(sched_task);
5685 + }
5686 + else {
5687 + cluster = remote_cluster(get_cpu());
5688 + }
5689 +
5690 + if(cluster && cluster->pending_tasklets.head != NULL) {
5691 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
5692 +
5693 + do_lit_tasklets(cluster, sched_task);
5694 + }
5695 +
5696 + if(!task_is_rt) {
5697 + put_cpu_no_resched();
5698 + }
5699 +#else
5700 +
5701 + preempt_disable();
5702 +
5703 + cluster = (is_realtime(sched_task)) ?
5704 + task_cpu_cluster(sched_task) :
5705 + remote_cluster(smp_processor_id());
5706 +
5707 + if(cluster && cluster->pending_tasklets.head != NULL) {
5708 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
5709 + do_lit_tasklets(cluster, sched_task);
5710 + }
5711 +
5712 + preempt_enable_no_resched();
5713 +
5714 +#endif
5715 +}
5716 +
5717 +
5718 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
5719 +{
5720 + struct tasklet_struct* step;
5721 +
5722 + /*
5723 + step = cluster->pending_tasklets.head;
5724 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
5725 + while(step != NULL){
5726 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5727 + step = step->next;
5728 + }
5729 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5730 + TRACE("%s: done.\n", __FUNCTION__);
5731 + */
5732 +
5733 +
5734 + tasklet->next = NULL; // make sure there are no old values floating around
5735 +
5736 + step = cluster->pending_tasklets.head;
5737 + if(step == NULL) {
5738 + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
5739 + // insert at tail.
5740 + *(cluster->pending_tasklets.tail) = tasklet;
5741 + cluster->pending_tasklets.tail = &(tasklet->next);
5742 + }
5743 + else if((*(cluster->pending_tasklets.tail) != NULL) &&
5744 + edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
5745 + // insert at tail.
5746 + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
5747 +
5748 + *(cluster->pending_tasklets.tail) = tasklet;
5749 + cluster->pending_tasklets.tail = &(tasklet->next);
5750 + }
5751 + else {
5752 +
5753 + //WARN_ON(1 == 1);
5754 +
5755 + // insert the tasklet somewhere in the middle.
5756 +
5757 + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
5758 +
5759 + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
5760 + step = step->next;
5761 + }
5762 +
5763 + // insert tasklet right before step->next.
5764 +
5765 + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
5766 + tasklet->owner->pid,
5767 + (step->owner) ?
5768 + step->owner->pid :
5769 + -1,
5770 + (step->next) ?
5771 + ((step->next->owner) ?
5772 + step->next->owner->pid :
5773 + -1) :
5774 + -1);
5775 +
5776 + tasklet->next = step->next;
5777 + step->next = tasklet;
5778 +
5779 + // patch up the head if needed.
5780 + if(cluster->pending_tasklets.head == step)
5781 + {
5782 + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
5783 + cluster->pending_tasklets.head = tasklet;
5784 + }
5785 + }
5786 +
5787 + /*
5788 + step = cluster->pending_tasklets.head;
5789 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
5790 + while(step != NULL){
5791 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5792 + step = step->next;
5793 + }
5794 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5795 + TRACE("%s: done.\n", __FUNCTION__);
5796 + */
5797 +
5798 +// TODO: Maintain this list in priority order.
5799 +// tasklet->next = NULL;
5800 +// *(cluster->pending_tasklets.tail) = tasklet;
5801 +// cluster->pending_tasklets.tail = &tasklet->next;
5802 +}
5803 +
5804 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
5805 +{
5806 + cedf_domain_t *cluster = NULL;
5807 + cpu_entry_t *targetCPU = NULL;
5808 + int thisCPU;
5809 + int runLocal = 0;
5810 + int runNow = 0;
5811 + unsigned long flags;
5812 +
5813 + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
5814 + {
5815 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
5816 + return 0;
5817 + }
5818 +
5819 + cluster = task_cpu_cluster(tasklet->owner);
5820 +
5821 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5822 +
5823 + thisCPU = smp_processor_id();
5824 +
5825 +#if 1
5826 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5827 + {
5828 + cpu_entry_t* affinity = NULL;
5829 +
5830 + // use this CPU if it is in our cluster and isn't running any RT work.
5831 + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
5832 + affinity = &(__get_cpu_var(cedf_cpu_entries));
5833 + }
5834 + else {
5835 + // this CPU is busy or shouldn't run tasklet in this cluster.
5836 + // look for available near by CPUs.
5837 + // NOTE: Affinity towards owner and not this CPU. Is this right?
5838 + affinity =
5839 + cedf_get_nearest_available_cpu(cluster,
5840 + &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
5841 + }
5842 +
5843 + targetCPU = affinity;
5844 + }
5845 +#endif
5846 +#endif
5847 +
5848 + if (targetCPU == NULL) {
5849 + targetCPU = lowest_prio_cpu(cluster);
5850 + }
5851 +
5852 + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
5853 + if (thisCPU == targetCPU->cpu) {
5854 + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
5855 + runLocal = 1;
5856 + runNow = 1;
5857 + }
5858 + else {
5859 + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
5860 + runLocal = 0;
5861 + runNow = 1;
5862 + }
5863 + }
5864 + else {
5865 + runLocal = 0;
5866 + runNow = 0;
5867 + }
5868 +
5869 + if(!runLocal) {
5870 + // enqueue the tasklet
5871 + __add_pai_tasklet(tasklet, cluster);
5872 + }
5873 +
5874 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5875 +
5876 +
5877 + if (runLocal /*&& runNow */) { // runNow == 1 is implied
5878 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
5879 + __do_lit_tasklet(tasklet, 0ul);
5880 + }
5881 + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
5882 + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
5883 + preempt(targetCPU); // need to be protected by cedf_lock?
5884 + }
5885 + else {
5886 + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
5887 + }
5888 +
5889 + return(1); // success
5890 +}
5891 +
5892 +
5893 +#endif
5894 +
5895 +
5896 +
5897 +
5898 +
5899 +
5900 +
5901 +
5902 +
5903 +
5904 +
5905 +
5906 +
5907 +
5908 +
5909 +
5910 +
5911 /* Getting schedule() right is a bit tricky. schedule() may not make any
5912 * assumptions on the state of the current task since it may be called for a
5913 * number of reasons. The reasons include a scheduler_tick() determined that it
5914 @@ -378,7 +898,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
5915 int out_of_time, sleep, preempt, np, exists, blocks;
5916 struct task_struct* next = NULL;
5917
5918 - raw_spin_lock(&cluster->lock);
5919 + raw_spin_lock(&cluster->cedf_lock);
5920 clear_will_schedule();
5921
5922 /* sanity checking */
5923 @@ -462,8 +982,18 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
5924 next = prev;
5925
5926 sched_state_task_picked();
5927 - raw_spin_unlock(&cluster->lock);
5928 -
5929 +
5930 + raw_spin_unlock(&cluster->cedf_lock);
5931 +
5932 + /*
5933 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5934 + if(cluster->pending_tasklets.head != NULL) // peak at data. normally locked with cluster->cedf_lock
5935 + {
5936 + do_lit_tasklets(cluster, next);
5937 + }
5938 +#endif
5939 +*/
5940 +
5941 #ifdef WANT_ALL_SCHED_EVENTS
5942 TRACE("cedf_lock released, next=0x%p\n", next);
5943
5944 @@ -473,7 +1003,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
5945 TRACE("becomes idle at %llu.\n", litmus_clock());
5946 #endif
5947
5948 -
5949 return next;
5950 }
5951
5952 @@ -504,7 +1033,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
5953 /* the cluster doesn't change even if t is running */
5954 cluster = task_cpu_cluster(t);
5955
5956 - raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags);
5957 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5958
5959 /* setup job params */
5960 release_at(t, litmus_clock());
5961 @@ -521,20 +1050,22 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
5962 t->rt_param.linked_on = NO_CPU;
5963
5964 cedf_job_arrival(t);
5965 - raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags);
5966 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5967 }
5968
5969 static void cedf_task_wake_up(struct task_struct *task)
5970 {
5971 unsigned long flags;
5972 - lt_t now;
5973 + //lt_t now;
5974 cedf_domain_t *cluster;
5975
5976 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
5977
5978 cluster = task_cpu_cluster(task);
5979
5980 - raw_spin_lock_irqsave(&cluster->lock, flags);
5981 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5982 +
5983 +#if 0 // sporadic task model
5984 /* We need to take suspensions because of semaphores into
5985 * account! If a job resumes after being suspended due to acquiring
5986 * a semaphore, it should never be treated as a new job release.
5987 @@ -556,8 +1087,17 @@ static void cedf_task_wake_up(struct task_struct *task)
5988 }
5989 }
5990 }
5991 - cedf_job_arrival(task);
5992 - raw_spin_unlock_irqrestore(&cluster->lock, flags);
5993 +#endif
5994 +
5995 + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
5996 + set_rt_flags(task, RT_F_RUNNING); // periodic model
5997 +
5998 + if(tsk_rt(task)->linked_on == NO_CPU)
5999 + cedf_job_arrival(task);
6000 + else
6001 + TRACE("WTF, mate?!\n");
6002 +
6003 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
6004 }
6005
6006 static void cedf_task_block(struct task_struct *t)
6007 @@ -570,9 +1110,9 @@ static void cedf_task_block(struct task_struct *t)
6008 cluster = task_cpu_cluster(t);
6009
6010 /* unlink if necessary */
6011 - raw_spin_lock_irqsave(&cluster->lock, flags);
6012 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
6013 unlink(t);
6014 - raw_spin_unlock_irqrestore(&cluster->lock, flags);
6015 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
6016
6017 BUG_ON(!is_realtime(t));
6018 }
6019 @@ -583,8 +1123,12 @@ static void cedf_task_exit(struct task_struct * t)
6020 unsigned long flags;
6021 cedf_domain_t *cluster = task_cpu_cluster(t);
6022
6023 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6024 + flush_tasklets(cluster, t);
6025 +#endif
6026 +
6027 /* unlink if necessary */
6028 - raw_spin_lock_irqsave(&cluster->lock, flags);
6029 + raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
6030 unlink(t);
6031 if (tsk_rt(t)->scheduled_on != NO_CPU) {
6032 cpu_entry_t *cpu;
6033 @@ -592,7 +1136,7 @@ static void cedf_task_exit(struct task_struct * t)
6034 cpu->scheduled = NULL;
6035 tsk_rt(t)->scheduled_on = NO_CPU;
6036 }
6037 - raw_spin_unlock_irqrestore(&cluster->lock, flags);
6038 + raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
6039
6040 BUG_ON(!is_realtime(t));
6041 TRACE_TASK(t, "RIP\n");
6042 @@ -603,6 +1147,721 @@ static long cedf_admit_task(struct task_struct* tsk)
6043 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
6044 }
6045
6046 +
6047 +
6048 +
6049 +
6050 +
6051 +
6052 +
6053 +
6054 +
6055 +
6056 +
6057 +
6058 +#ifdef CONFIG_LITMUS_LOCKING
6059 +
6060 +#include <litmus/fdso.h>
6061 +
6062 +
6063 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
6064 +{
6065 + int linked_on;
6066 + int check_preempt = 0;
6067 +
6068 + cedf_domain_t* cluster = task_cpu_cluster(t);
6069 +
6070 + if(prio_inh != NULL)
6071 + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
6072 + else
6073 + TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
6074 +
6075 + sched_trace_eff_prio_change(t, prio_inh);
6076 +
6077 + tsk_rt(t)->inh_task = prio_inh;
6078 +
6079 + linked_on = tsk_rt(t)->linked_on;
6080 +
6081 + /* If it is scheduled, then we need to reorder the CPU heap. */
6082 + if (linked_on != NO_CPU) {
6083 + TRACE_TASK(t, "%s: linked on %d\n",
6084 + __FUNCTION__, linked_on);
6085 + /* Holder is scheduled; need to re-order CPUs.
6086 + * We can't use heap_decrease() here since
6087 + * the cpu_heap is ordered in reverse direction, so
6088 + * it is actually an increase. */
6089 + bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
6090 + per_cpu(cedf_cpu_entries, linked_on).hn);
6091 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
6092 + per_cpu(cedf_cpu_entries, linked_on).hn);
6093 + } else {
6094 + /* holder may be queued: first stop queue changes */
6095 + raw_spin_lock(&cluster->domain.release_lock);
6096 + if (is_queued(t)) {
6097 + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
6098 +
6099 + /* We need to update the position of holder in some
6100 + * heap. Note that this could be a release heap if we
6101 + * budget enforcement is used and this job overran. */
6102 + check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
6103 +
6104 + } else {
6105 + /* Nothing to do: if it is not queued and not linked
6106 + * then it is either sleeping or currently being moved
6107 + * by other code (e.g., a timer interrupt handler) that
6108 + * will use the correct priority when enqueuing the
6109 + * task. */
6110 + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
6111 + }
6112 + raw_spin_unlock(&cluster->domain.release_lock);
6113 +
6114 + /* If holder was enqueued in a release heap, then the following
6115 + * preemption check is pointless, but we can't easily detect
6116 + * that case. If you want to fix this, then consider that
6117 + * simply adding a state flag requires O(n) time to update when
6118 + * releasing n tasks, which conflicts with the goal to have
6119 + * O(log n) merges. */
6120 + if (check_preempt) {
6121 + /* heap_decrease() hit the top level of the heap: make
6122 + * sure preemption checks get the right task, not the
6123 + * potentially stale cache. */
6124 + bheap_uncache_min(edf_ready_order, &cluster->domain.ready_queue);
6125 + check_for_preemptions(cluster);
6126 + }
6127 + }
6128 +}
6129 +
6130 +/* called with IRQs off */
6131 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
6132 +{
6133 + cedf_domain_t* cluster = task_cpu_cluster(t);
6134 +
6135 + raw_spin_lock(&cluster->cedf_lock);
6136 +
6137 + __set_priority_inheritance(t, prio_inh);
6138 +
6139 +#ifdef CONFIG_LITMUS_SOFTIRQD
6140 + if(tsk_rt(t)->cur_klitirqd != NULL)
6141 + {
6142 + TRACE_TASK(t, "%s/%d inherits a new priority!\n",
6143 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
6144 +
6145 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
6146 + }
6147 +#endif
6148 +
6149 + raw_spin_unlock(&cluster->cedf_lock);
6150 +}
6151 +
6152 +
6153 +/* called with IRQs off */
6154 +static void __clear_priority_inheritance(struct task_struct* t)
6155 +{
6156 + TRACE_TASK(t, "priority restored\n");
6157 +
6158 + if(tsk_rt(t)->scheduled_on != NO_CPU)
6159 + {
6160 + sched_trace_eff_prio_change(t, NULL);
6161 +
6162 + tsk_rt(t)->inh_task = NULL;
6163 +
6164 + /* Check if rescheduling is necessary. We can't use heap_decrease()
6165 + * since the priority was effectively lowered. */
6166 + unlink(t);
6167 + cedf_job_arrival(t);
6168 + }
6169 + else
6170 + {
6171 + __set_priority_inheritance(t, NULL);
6172 + }
6173 +
6174 +#ifdef CONFIG_LITMUS_SOFTIRQD
6175 + if(tsk_rt(t)->cur_klitirqd != NULL)
6176 + {
6177 + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
6178 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
6179 +
6180 + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
6181 + {
6182 + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
6183 +
6184 + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
6185 +
6186 + /* Check if rescheduling is necessary. We can't use heap_decrease()
6187 + * since the priority was effectively lowered. */
6188 + unlink(tsk_rt(t)->cur_klitirqd);
6189 + cedf_job_arrival(tsk_rt(t)->cur_klitirqd);
6190 + }
6191 + else
6192 + {
6193 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
6194 + }
6195 + }
6196 +#endif
6197 +}
6198 +
6199 +/* called with IRQs off */
6200 +static void clear_priority_inheritance(struct task_struct* t)
6201 +{
6202 + cedf_domain_t* cluster = task_cpu_cluster(t);
6203 +
6204 + raw_spin_lock(&cluster->cedf_lock);
6205 + __clear_priority_inheritance(t);
6206 + raw_spin_unlock(&cluster->cedf_lock);
6207 +}
6208 +
6209 +
6210 +
6211 +#ifdef CONFIG_LITMUS_SOFTIRQD
6212 +/* called with IRQs off */
6213 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
6214 + struct task_struct* old_owner,
6215 + struct task_struct* new_owner)
6216 +{
6217 + cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
6218 +
6219 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
6220 +
6221 + raw_spin_lock(&cluster->cedf_lock);
6222 +
6223 + if(old_owner != new_owner)
6224 + {
6225 + if(old_owner)
6226 + {
6227 + // unreachable?
6228 + tsk_rt(old_owner)->cur_klitirqd = NULL;
6229 + }
6230 +
6231 + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
6232 + new_owner->comm, new_owner->pid);
6233 +
6234 + tsk_rt(new_owner)->cur_klitirqd = klitirqd;
6235 + }
6236 +
6237 + __set_priority_inheritance(klitirqd,
6238 + (tsk_rt(new_owner)->inh_task == NULL) ?
6239 + new_owner :
6240 + tsk_rt(new_owner)->inh_task);
6241 +
6242 + raw_spin_unlock(&cluster->cedf_lock);
6243 +}
6244 +
6245 +/* called with IRQs off */
6246 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
6247 + struct task_struct* old_owner)
6248 +{
6249 + cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
6250 +
6251 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
6252 +
6253 + raw_spin_lock(&cluster->cedf_lock);
6254 +
6255 + TRACE_TASK(klitirqd, "priority restored\n");
6256 +
6257 + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
6258 + {
6259 + tsk_rt(klitirqd)->inh_task = NULL;
6260 +
6261 + /* Check if rescheduling is necessary. We can't use heap_decrease()
6262 + * since the priority was effectively lowered. */
6263 + unlink(klitirqd);
6264 + cedf_job_arrival(klitirqd);
6265 + }
6266 + else
6267 + {
6268 + __set_priority_inheritance(klitirqd, NULL);
6269 + }
6270 +
6271 + tsk_rt(old_owner)->cur_klitirqd = NULL;
6272 +
6273 + raw_spin_unlock(&cluster->cedf_lock);
6274 +}
6275 +#endif // CONFIG_LITMUS_SOFTIRQD
6276 +
6277 +
6278 +/* ******************** KFMLP support ********************** */
6279 +
6280 +/* struct for semaphore with priority inheritance */
6281 +struct kfmlp_queue
6282 +{
6283 + wait_queue_head_t wait;
6284 + struct task_struct* owner;
6285 + struct task_struct* hp_waiter;
6286 + int count; /* number of waiters + holder */
6287 +};
6288 +
6289 +struct kfmlp_semaphore
6290 +{
6291 + struct litmus_lock litmus_lock;
6292 +
6293 + spinlock_t lock;
6294 +
6295 + int num_resources; /* aka k */
6296 + struct kfmlp_queue *queues; /* array */
6297 + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
6298 +};
6299 +
6300 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
6301 +{
6302 + return container_of(lock, struct kfmlp_semaphore, litmus_lock);
6303 +}
6304 +
6305 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
6306 + struct kfmlp_queue* queue)
6307 +{
6308 + return (queue - &sem->queues[0]);
6309 +}
6310 +
6311 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
6312 + struct task_struct* holder)
6313 +{
6314 + int i;
6315 + for(i = 0; i < sem->num_resources; ++i)
6316 + if(sem->queues[i].owner == holder)
6317 + return(&sem->queues[i]);
6318 + return(NULL);
6319 +}
6320 +
6321 +/* caller is responsible for locking */
6322 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
6323 + struct task_struct *skip)
6324 +{
6325 + struct list_head *pos;
6326 + struct task_struct *queued, *found = NULL;
6327 +
6328 + list_for_each(pos, &kqueue->wait.task_list) {
6329 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
6330 + task_list)->private;
6331 +
6332 + /* Compare task prios, find high prio task. */
6333 + if (queued != skip && edf_higher_prio(queued, found))
6334 + found = queued;
6335 + }
6336 + return found;
6337 +}
6338 +
6339 +static inline struct kfmlp_queue* kfmlp_find_shortest(
6340 + struct kfmlp_semaphore* sem,
6341 + struct kfmlp_queue* search_start)
6342 +{
6343 + // we start our search at search_start instead of at the beginning of the
6344 + // queue list to load-balance across all resources.
6345 + struct kfmlp_queue* step = search_start;
6346 + struct kfmlp_queue* shortest = sem->shortest_queue;
6347 +
6348 + do
6349 + {
6350 + step = (step+1 != &sem->queues[sem->num_resources]) ?
6351 + step+1 : &sem->queues[0];
6352 + if(step->count < shortest->count)
6353 + {
6354 + shortest = step;
6355 + if(step->count == 0)
6356 + break; /* can't get any shorter */
6357 + }
6358 + }while(step != search_start);
6359 +
6360 + return(shortest);
6361 +}
6362 +
6363 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
6364 +{
6365 + /* must hold sem->lock */
6366 +
6367 + struct kfmlp_queue *my_queue = NULL;
6368 + struct task_struct *max_hp = NULL;
6369 +
6370 +
6371 + struct list_head *pos;
6372 + struct task_struct *queued;
6373 + int i;
6374 +
6375 + for(i = 0; i < sem->num_resources; ++i)
6376 + {
6377 + if( (sem->queues[i].count > 1) &&
6378 + ((my_queue == NULL) ||
6379 + (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
6380 + {
6381 + my_queue = &sem->queues[i];
6382 + }
6383 + }
6384 +
6385 + if(my_queue)
6386 + {
6387 + cedf_domain_t* cluster;
6388 +
6389 + max_hp = my_queue->hp_waiter;
6390 + BUG_ON(!max_hp);
6391 +
6392 + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
6393 + kfmlp_get_idx(sem, my_queue),
6394 + max_hp->comm, max_hp->pid,
6395 + kfmlp_get_idx(sem, my_queue));
6396 +
6397 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
6398 +
6399 + /*
6400 + if(my_queue->hp_waiter)
6401 + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
6402 + kfmlp_get_idx(sem, my_queue),
6403 + my_queue->hp_waiter->comm,
6404 + my_queue->hp_waiter->pid);
6405 + else
6406 + TRACE_CUR("queue %d: new hp_waiter is %p\n",
6407 + kfmlp_get_idx(sem, my_queue), NULL);
6408 + */
6409 +
6410 + cluster = task_cpu_cluster(max_hp);
6411 +
6412 + raw_spin_lock(&cluster->cedf_lock);
6413 +
6414 + /*
6415 + if(my_queue->owner)
6416 + TRACE_CUR("queue %d: owner is %s/%d\n",
6417 + kfmlp_get_idx(sem, my_queue),
6418 + my_queue->owner->comm,
6419 + my_queue->owner->pid);
6420 + else
6421 + TRACE_CUR("queue %d: owner is %p\n",
6422 + kfmlp_get_idx(sem, my_queue),
6423 + NULL);
6424 + */
6425 +
6426 + if(tsk_rt(my_queue->owner)->inh_task == max_hp)
6427 + {
6428 + __clear_priority_inheritance(my_queue->owner);
6429 + if(my_queue->hp_waiter != NULL)
6430 + {
6431 + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
6432 + }
6433 + }
6434 + raw_spin_unlock(&cluster->cedf_lock);
6435 +
6436 + list_for_each(pos, &my_queue->wait.task_list)
6437 + {
6438 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
6439 + task_list)->private;
6440 + /* Compare task prios, find high prio task. */
6441 + if (queued == max_hp)
6442 + {
6443 + /*
6444 + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
6445 + kfmlp_get_idx(sem, my_queue));
6446 + */
6447 + __remove_wait_queue(&my_queue->wait,
6448 + list_entry(pos, wait_queue_t, task_list));
6449 + break;
6450 + }
6451 + }
6452 + --(my_queue->count);
6453 + }
6454 +
6455 + return(max_hp);
6456 +}
6457 +
6458 +int cedf_kfmlp_lock(struct litmus_lock* l)
6459 +{
6460 + struct task_struct* t = current;
6461 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6462 + struct kfmlp_queue* my_queue;
6463 + wait_queue_t wait;
6464 + unsigned long flags;
6465 +
6466 + if (!is_realtime(t))
6467 + return -EPERM;
6468 +
6469 + spin_lock_irqsave(&sem->lock, flags);
6470 +
6471 + my_queue = sem->shortest_queue;
6472 +
6473 + if (my_queue->owner) {
6474 + /* resource is not free => must suspend and wait */
6475 + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
6476 + kfmlp_get_idx(sem, my_queue));
6477 +
6478 + init_waitqueue_entry(&wait, t);
6479 +
6480 + /* FIXME: interruptible would be nice some day */
6481 + set_task_state(t, TASK_UNINTERRUPTIBLE);
6482 +
6483 + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
6484 +
6485 + /* check if we need to activate priority inheritance */
6486 + if (edf_higher_prio(t, my_queue->hp_waiter))
6487 + {
6488 + my_queue->hp_waiter = t;
6489 + if (edf_higher_prio(t, my_queue->owner))
6490 + {
6491 + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
6492 + }
6493 + }
6494 +
6495 + ++(my_queue->count);
6496 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
6497 +
6498 + /* release lock before sleeping */
6499 + spin_unlock_irqrestore(&sem->lock, flags);
6500 +
6501 + /* We depend on the FIFO order. Thus, we don't need to recheck
6502 + * when we wake up; we are guaranteed to have the lock since
6503 + * there is only one wake up per release (or steal).
6504 + */
6505 + schedule();
6506 +
6507 +
6508 + if(my_queue->owner == t)
6509 + {
6510 + TRACE_CUR("queue %d: acquired through waiting\n",
6511 + kfmlp_get_idx(sem, my_queue));
6512 + }
6513 + else
6514 + {
6515 + /* this case may happen if our wait entry was stolen
6516 + between queues. record where we went.*/
6517 + my_queue = kfmlp_get_queue(sem, t);
6518 + BUG_ON(!my_queue);
6519 + TRACE_CUR("queue %d: acquired through stealing\n",
6520 + kfmlp_get_idx(sem, my_queue));
6521 + }
6522 + }
6523 + else
6524 + {
6525 + TRACE_CUR("queue %d: acquired immediately\n",
6526 + kfmlp_get_idx(sem, my_queue));
6527 +
6528 + my_queue->owner = t;
6529 +
6530 + ++(my_queue->count);
6531 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
6532 +
6533 + spin_unlock_irqrestore(&sem->lock, flags);
6534 + }
6535 +
6536 + return kfmlp_get_idx(sem, my_queue);
6537 +}
6538 +
6539 +int cedf_kfmlp_unlock(struct litmus_lock* l)
6540 +{
6541 + struct task_struct *t = current, *next;
6542 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6543 + struct kfmlp_queue *my_queue;
6544 + unsigned long flags;
6545 + int err = 0;
6546 +
6547 + spin_lock_irqsave(&sem->lock, flags);
6548 +
6549 + my_queue = kfmlp_get_queue(sem, t);
6550 +
6551 + if (!my_queue) {
6552 + err = -EINVAL;
6553 + goto out;
6554 + }
6555 +
6556 + /* check if there are jobs waiting for this resource */
6557 + next = __waitqueue_remove_first(&my_queue->wait);
6558 + if (next) {
6559 + /*
6560 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
6561 + kfmlp_get_idx(sem, my_queue),
6562 + next->comm, next->pid);
6563 + */
6564 + /* next becomes the resouce holder */
6565 + my_queue->owner = next;
6566 +
6567 + --(my_queue->count);
6568 + if(my_queue->count < sem->shortest_queue->count)
6569 + {
6570 + sem->shortest_queue = my_queue;
6571 + }
6572 +
6573 + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
6574 + kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
6575 +
6576 + /* determine new hp_waiter if necessary */
6577 + if (next == my_queue->hp_waiter) {
6578 + TRACE_TASK(next, "was highest-prio waiter\n");
6579 + /* next has the highest priority --- it doesn't need to
6580 + * inherit. However, we need to make sure that the
6581 + * next-highest priority in the queue is reflected in
6582 + * hp_waiter. */
6583 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
6584 + if (my_queue->hp_waiter)
6585 + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
6586 + else
6587 + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
6588 + } else {
6589 + /* Well, if next is not the highest-priority waiter,
6590 + * then it ought to inherit the highest-priority
6591 + * waiter's priority. */
6592 + set_priority_inheritance(next, my_queue->hp_waiter);
6593 + }
6594 +
6595 + /* wake up next */
6596 + wake_up_process(next);
6597 + }
6598 + else
6599 + {
6600 + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
6601 +
6602 + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
6603 +
6604 + /*
6605 + if(next)
6606 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
6607 + kfmlp_get_idx(sem, my_queue),
6608 + next->comm, next->pid);
6609 + */
6610 +
6611 + my_queue->owner = next;
6612 +
6613 + if(next)
6614 + {
6615 + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
6616 + kfmlp_get_idx(sem, my_queue),
6617 + next->comm, next->pid);
6618 +
6619 + /* wake up next */
6620 + wake_up_process(next);
6621 + }
6622 + else
6623 + {
6624 + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
6625 +
6626 + --(my_queue->count);
6627 + if(my_queue->count < sem->shortest_queue->count)
6628 + {
6629 + sem->shortest_queue = my_queue;
6630 + }
6631 + }
6632 + }
6633 +
6634 + /* we lose the benefit of priority inheritance (if any) */
6635 + if (tsk_rt(t)->inh_task)
6636 + clear_priority_inheritance(t);
6637 +
6638 +out:
6639 + spin_unlock_irqrestore(&sem->lock, flags);
6640 +
6641 + return err;
6642 +}
6643 +
6644 +int cedf_kfmlp_close(struct litmus_lock* l)
6645 +{
6646 + struct task_struct *t = current;
6647 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6648 + struct kfmlp_queue *my_queue;
6649 + unsigned long flags;
6650 +
6651 + int owner;
6652 +
6653 + spin_lock_irqsave(&sem->lock, flags);
6654 +
6655 + my_queue = kfmlp_get_queue(sem, t);
6656 + owner = (my_queue) ? (my_queue->owner == t) : 0;
6657 +
6658 + spin_unlock_irqrestore(&sem->lock, flags);
6659 +
6660 + if (owner)
6661 + cedf_kfmlp_unlock(l);
6662 +
6663 + return 0;
6664 +}
6665 +
6666 +void cedf_kfmlp_free(struct litmus_lock* l)
6667 +{
6668 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6669 + kfree(sem->queues);
6670 + kfree(sem);
6671 +}
6672 +
6673 +static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
6674 + .close = cedf_kfmlp_close,
6675 + .lock = cedf_kfmlp_lock,
6676 + .unlock = cedf_kfmlp_unlock,
6677 + .deallocate = cedf_kfmlp_free,
6678 +};
6679 +
6680 +static struct litmus_lock* cedf_new_kfmlp(void* __user arg, int* ret_code)
6681 +{
6682 + struct kfmlp_semaphore* sem;
6683 + int num_resources = 0;
6684 + int i;
6685 +
6686 + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
6687 + {
6688 + *ret_code = -EINVAL;
6689 + return(NULL);
6690 + }
6691 + if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
6692 + {
6693 + *ret_code = -EINVAL;
6694 + return(NULL);
6695 + }
6696 + if(num_resources < 1)
6697 + {
6698 + *ret_code = -EINVAL;
6699 + return(NULL);
6700 + }
6701 +
6702 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
6703 + if(!sem)
6704 + {
6705 + *ret_code = -ENOMEM;
6706 + return NULL;
6707 + }
6708 +
6709 + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
6710 + if(!sem->queues)
6711 + {
6712 + kfree(sem);
6713 + *ret_code = -ENOMEM;
6714 + return NULL;
6715 + }
6716 +
6717 + sem->litmus_lock.ops = &cedf_kfmlp_lock_ops;
6718 + spin_lock_init(&sem->lock);
6719 + sem->num_resources = num_resources;
6720 +
6721 + for(i = 0; i < num_resources; ++i)
6722 + {
6723 + sem->queues[i].owner = NULL;
6724 + sem->queues[i].hp_waiter = NULL;
6725 + init_waitqueue_head(&sem->queues[i].wait);
6726 + sem->queues[i].count = 0;
6727 + }
6728 +
6729 + sem->shortest_queue = &sem->queues[0];
6730 +
6731 + *ret_code = 0;
6732 + return &sem->litmus_lock;
6733 +}
6734 +
6735 +
6736 +/* **** lock constructor **** */
6737 +
6738 +static long cedf_allocate_lock(struct litmus_lock **lock, int type,
6739 + void* __user arg)
6740 +{
6741 + int err = -ENXIO;
6742 +
6743 + /* C-EDF currently only supports the FMLP for global resources
6744 + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
6745 + switch (type) {
6746 + case KFMLP_SEM:
6747 + *lock = cedf_new_kfmlp(arg, &err);
6748 + break;
6749 + };
6750 +
6751 + return err;
6752 +}
6753 +
6754 +#endif // CONFIG_LITMUS_LOCKING
6755 +
6756 +
6757 +
6758 +
6759 +
6760 +
6761 /* total number of cluster */
6762 static int num_clusters;
6763 /* we do not support cluster of different sizes */
6764 @@ -696,6 +1955,13 @@ static long cedf_activate_plugin(void)
6765 bheap_init(&(cedf[i].cpu_heap));
6766 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
6767
6768 +
6769 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6770 + cedf[i].pending_tasklets.head = NULL;
6771 + cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
6772 +#endif
6773 +
6774 +
6775 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
6776 return -ENOMEM;
6777 }
6778 @@ -746,6 +2012,40 @@ static long cedf_activate_plugin(void)
6779 break;
6780 }
6781 }
6782 +
6783 +#ifdef CONFIG_LITMUS_SOFTIRQD
6784 + {
6785 + /* distribute the daemons evenly across the clusters. */
6786 + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
6787 + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
6788 + int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
6789 +
6790 + int daemon = 0;
6791 + for(i = 0; i < num_clusters; ++i)
6792 + {
6793 + int num_on_this_cluster = num_daemons_per_cluster;
6794 + if(left_over)
6795 + {
6796 + ++num_on_this_cluster;
6797 + --left_over;
6798 + }
6799 +
6800 + for(j = 0; j < num_on_this_cluster; ++j)
6801 + {
6802 + // first CPU of this cluster
6803 + affinity[daemon++] = i*cluster_size;
6804 + }
6805 + }
6806 +
6807 + spawn_klitirqd(affinity);
6808 +
6809 + kfree(affinity);
6810 + }
6811 +#endif
6812 +
6813 +#ifdef CONFIG_LITMUS_NVIDIA
6814 + init_nvidia_info();
6815 +#endif
6816
6817 free_cpumask_var(mask);
6818 clusters_allocated = 1;
6819 @@ -765,6 +2065,19 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
6820 .task_block = cedf_task_block,
6821 .admit_task = cedf_admit_task,
6822 .activate_plugin = cedf_activate_plugin,
6823 +#ifdef CONFIG_LITMUS_LOCKING
6824 + .allocate_lock = cedf_allocate_lock,
6825 + .set_prio_inh = set_priority_inheritance,
6826 + .clear_prio_inh = clear_priority_inheritance,
6827 +#endif
6828 +#ifdef CONFIG_LITMUS_SOFTIRQD
6829 + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
6830 + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
6831 +#endif
6832 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6833 + .enqueue_pai_tasklet = enqueue_pai_tasklet,
6834 + .run_tasklets = run_tasklets,
6835 +#endif
6836 };
6837
6838 static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
6839 diff --git a/litmus/sched_cfifo.c b/litmus/sched_cfifo.c
6840 new file mode 100644
6841 index 0000000..7fbdec3
6842 --- /dev/null
6843 +++ b/litmus/sched_cfifo.c
6844 @@ -0,0 +1,2063 @@
6845 +/*
6846 + * litmus/sched_cfifo.c
6847 + *
6848 + * Implementation of the C-FIFO scheduling algorithm.
6849 + *
6850 + * This implementation is based on G-EDF:
6851 + * - CPUs are clustered around L2 or L3 caches.
6852 + * - Clusters topology is automatically detected (this is arch dependent
6853 + * and is working only on x86 at the moment --- and only with modern
6854 + * cpus that exports cpuid4 information)
6855 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
6856 + * the programmer needs to be aware of the topology to place tasks
6857 + * in the desired cluster
6858 + * - default clustering is around L2 cache (cache index = 2)
6859 + * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
6860 + * online_cpus are placed in a single cluster).
6861 + *
6862 + * For details on functions, take a look at sched_gsn_edf.c
6863 + *
6864 + * Currently, we do not support changes in the number of online cpus.
6865 + * If the num_online_cpus() dynamically changes, the plugin is broken.
6866 + *
6867 + * This version uses the simple approach and serializes all scheduling
6868 + * decisions by the use of a queue lock. This is probably not the
6869 + * best way to do it, but it should suffice for now.
6870 + */
6871 +
6872 +#include <linux/spinlock.h>
6873 +#include <linux/percpu.h>
6874 +#include <linux/sched.h>
6875 +#include <linux/slab.h>
6876 +#include <linux/uaccess.h>
6877 +
6878 +#include <linux/module.h>
6879 +
6880 +#include <litmus/litmus.h>
6881 +#include <litmus/jobs.h>
6882 +#include <litmus/preempt.h>
6883 +#include <litmus/sched_plugin.h>
6884 +#include <litmus/fifo_common.h>
6885 +#include <litmus/sched_trace.h>
6886 +
6887 +#include <litmus/clustered.h>
6888 +
6889 +#include <litmus/bheap.h>
6890 +
6891 +/* to configure the cluster size */
6892 +#include <litmus/litmus_proc.h>
6893 +
6894 +#ifdef CONFIG_SCHED_CPU_AFFINITY
6895 +#include <litmus/affinity.h>
6896 +#endif
6897 +
6898 +#ifdef CONFIG_LITMUS_SOFTIRQD
6899 +#include <litmus/litmus_softirq.h>
6900 +#endif
6901 +
6902 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6903 +#include <linux/interrupt.h>
6904 +#include <litmus/trace.h>
6905 +#endif
6906 +
6907 +#ifdef CONFIG_LITMUS_NVIDIA
6908 +#include <litmus/nvidia_info.h>
6909 +#endif
6910 +
6911 +/* Reference configuration variable. Determines which cache level is used to
6912 + * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
6913 + * all CPUs form a single cluster (just like GSN-EDF).
6914 + */
6915 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
6916 +
6917 +struct clusterdomain;
6918 +
6919 +/* cpu_entry_t - maintain the linked and scheduled state
6920 + *
6921 + * A cpu also contains a pointer to the cfifo_domain_t cluster
6922 + * that owns it (struct clusterdomain*)
6923 + */
6924 +typedef struct {
6925 + int cpu;
6926 + struct clusterdomain* cluster; /* owning cluster */
6927 + struct task_struct* linked; /* only RT tasks */
6928 + struct task_struct* scheduled; /* only RT tasks */
6929 + atomic_t will_schedule; /* prevent unneeded IPIs */
6930 + struct bheap_node* hn;
6931 +} cpu_entry_t;
6932 +
6933 +/* one cpu_entry_t per CPU */
6934 +DEFINE_PER_CPU(cpu_entry_t, cfifo_cpu_entries);
6935 +
6936 +#define set_will_schedule() \
6937 + (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 1))
6938 +#define clear_will_schedule() \
6939 + (atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 0))
6940 +#define test_will_schedule(cpu) \
6941 + (atomic_read(&per_cpu(cfifo_cpu_entries, cpu).will_schedule))
6942 +
6943 +
6944 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6945 +struct tasklet_head
6946 +{
6947 + struct tasklet_struct *head;
6948 + struct tasklet_struct **tail;
6949 +};
6950 +#endif
6951 +
6952 +/*
6953 + * In C-FIFO there is a cfifo domain _per_ cluster
6954 + * The number of clusters is dynamically determined accordingly to the
6955 + * total cpu number and the cluster size
6956 + */
6957 +typedef struct clusterdomain {
6958 + /* rt_domain for this cluster */
6959 + rt_domain_t domain;
6960 + /* cpus in this cluster */
6961 + cpu_entry_t* *cpus;
6962 + /* map of this cluster cpus */
6963 + cpumask_var_t cpu_map;
6964 + /* the cpus queue themselves according to priority in here */
6965 + struct bheap_node *heap_node;
6966 + struct bheap cpu_heap;
6967 + /* lock for this cluster */
6968 +#define cfifo_lock domain.ready_lock
6969 +
6970 +
6971 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6972 + struct tasklet_head pending_tasklets;
6973 +#endif
6974 +
6975 +} cfifo_domain_t;
6976 +
6977 +/* a cfifo_domain per cluster; allocation is done at init/activation time */
6978 +cfifo_domain_t *cfifo;
6979 +
6980 +#define remote_cluster(cpu) ((cfifo_domain_t *) per_cpu(cfifo_cpu_entries, cpu).cluster)
6981 +#define task_cpu_cluster(task) remote_cluster(get_partition(task))
6982 +
6983 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
6984 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
6985 + * information during the initialization of the plugin (e.g., topology)
6986 +#define WANT_ALL_SCHED_EVENTS
6987 + */
6988 +#define VERBOSE_INIT
6989 +
6990 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
6991 +{
6992 + cpu_entry_t *a, *b;
6993 + a = _a->value;
6994 + b = _b->value;
6995 + /* Note that a and b are inverted: we want the lowest-priority CPU at
6996 + * the top of the heap.
6997 + */
6998 + return fifo_higher_prio(b->linked, a->linked);
6999 +}
7000 +
7001 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
7002 + * order in the cpu queue. Caller must hold cfifo lock.
7003 + */
7004 +static void update_cpu_position(cpu_entry_t *entry)
7005 +{
7006 + cfifo_domain_t *cluster = entry->cluster;
7007 +
7008 + if (likely(bheap_node_in_heap(entry->hn)))
7009 + bheap_delete(cpu_lower_prio,
7010 + &cluster->cpu_heap,
7011 + entry->hn);
7012 +
7013 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
7014 +}
7015 +
7016 +/* caller must hold cfifo lock */
7017 +static cpu_entry_t* lowest_prio_cpu(cfifo_domain_t *cluster)
7018 +{
7019 + struct bheap_node* hn;
7020 + hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
7021 + return hn->value;
7022 +}
7023 +
7024 +
7025 +/* link_task_to_cpu - Update the link of a CPU.
7026 + * Handles the case where the to-be-linked task is already
7027 + * scheduled on a different CPU.
7028 + */
7029 +static noinline void link_task_to_cpu(struct task_struct* linked,
7030 + cpu_entry_t *entry)
7031 +{
7032 + cpu_entry_t *sched;
7033 + struct task_struct* tmp;
7034 + int on_cpu;
7035 +
7036 + BUG_ON(linked && !is_realtime(linked));
7037 +
7038 + /* Currently linked task is set to be unlinked. */
7039 + if (entry->linked) {
7040 + entry->linked->rt_param.linked_on = NO_CPU;
7041 + }
7042 +
7043 + /* Link new task to CPU. */
7044 + if (linked) {
7045 + set_rt_flags(linked, RT_F_RUNNING);
7046 + /* handle task is already scheduled somewhere! */
7047 + on_cpu = linked->rt_param.scheduled_on;
7048 + if (on_cpu != NO_CPU) {
7049 + sched = &per_cpu(cfifo_cpu_entries, on_cpu);
7050 + /* this should only happen if not linked already */
7051 + BUG_ON(sched->linked == linked);
7052 +
7053 + /* If we are already scheduled on the CPU to which we
7054 + * wanted to link, we don't need to do the swap --
7055 + * we just link ourselves to the CPU and depend on
7056 + * the caller to get things right.
7057 + */
7058 + if (entry != sched) {
7059 + TRACE_TASK(linked,
7060 + "already scheduled on %d, updating link.\n",
7061 + sched->cpu);
7062 + tmp = sched->linked;
7063 + linked->rt_param.linked_on = sched->cpu;
7064 + sched->linked = linked;
7065 + update_cpu_position(sched);
7066 + linked = tmp;
7067 + }
7068 + }
7069 + if (linked) /* might be NULL due to swap */
7070 + linked->rt_param.linked_on = entry->cpu;
7071 + }
7072 + entry->linked = linked;
7073 +#ifdef WANT_ALL_SCHED_EVENTS
7074 + if (linked)
7075 + TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
7076 + else
7077 + TRACE("NULL linked to %d.\n", entry->cpu);
7078 +#endif
7079 + update_cpu_position(entry);
7080 +}
7081 +
7082 +/* unlink - Make sure a task is not linked any longer to an entry
7083 + * where it was linked before. Must hold cfifo_lock.
7084 + */
7085 +static noinline void unlink(struct task_struct* t)
7086 +{
7087 + cpu_entry_t *entry;
7088 +
7089 + if (t->rt_param.linked_on != NO_CPU) {
7090 + /* unlink */
7091 + entry = &per_cpu(cfifo_cpu_entries, t->rt_param.linked_on);
7092 + t->rt_param.linked_on = NO_CPU;
7093 + link_task_to_cpu(NULL, entry);
7094 + } else if (is_queued(t)) {
7095 + /* This is an interesting situation: t is scheduled,
7096 + * but was just recently unlinked. It cannot be
7097 + * linked anywhere else (because then it would have
7098 + * been relinked to this CPU), thus it must be in some
7099 + * queue. We must remove it from the list in this
7100 + * case.
7101 + *
7102 + * in C-FIFO case is should be somewhere in the queue for
7103 + * its domain, therefore and we can get the domain using
7104 + * task_cpu_cluster
7105 + */
7106 + remove(&(task_cpu_cluster(t))->domain, t);
7107 + }
7108 +}
7109 +
7110 +
7111 +/* preempt - force a CPU to reschedule
7112 + */
7113 +static void preempt(cpu_entry_t *entry)
7114 +{
7115 + preempt_if_preemptable(entry->scheduled, entry->cpu);
7116 +}
7117 +
7118 +/* requeue - Put an unlinked task into c-fifo domain.
7119 + * Caller must hold cfifo_lock.
7120 + */
7121 +static noinline void requeue(struct task_struct* task)
7122 +{
7123 + cfifo_domain_t *cluster = task_cpu_cluster(task);
7124 + BUG_ON(!task);
7125 + /* sanity check before insertion */
7126 + BUG_ON(is_queued(task));
7127 +
7128 + if (is_released(task, litmus_clock()))
7129 + __add_ready(&cluster->domain, task);
7130 + else {
7131 + /* it has got to wait */
7132 + add_release(&cluster->domain, task);
7133 + }
7134 +}
7135 +
7136 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7137 +static cpu_entry_t* cfifo_get_nearest_available_cpu(
7138 + cfifo_domain_t *cluster, cpu_entry_t* start)
7139 +{
7140 + cpu_entry_t* affinity;
7141 +
7142 + get_nearest_available_cpu(affinity, start, cfifo_cpu_entries, -1);
7143 +
7144 + /* make sure CPU is in our cluster */
7145 + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
7146 + return(affinity);
7147 + else
7148 + return(NULL);
7149 +}
7150 +#endif
7151 +
7152 +
7153 +/* check for any necessary preemptions */
7154 +static void check_for_preemptions(cfifo_domain_t *cluster)
7155 +{
7156 + struct task_struct *task;
7157 + cpu_entry_t *last;
7158 +
7159 + for(last = lowest_prio_cpu(cluster);
7160 + fifo_preemption_needed(&cluster->domain, last->linked);
7161 + last = lowest_prio_cpu(cluster)) {
7162 + /* preemption necessary */
7163 + task = __take_ready(&cluster->domain);
7164 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7165 + {
7166 + cpu_entry_t* affinity =
7167 + cfifo_get_nearest_available_cpu(cluster,
7168 + &per_cpu(cfifo_cpu_entries, task_cpu(task)));
7169 + if(affinity)
7170 + last = affinity;
7171 + else if(last->linked)
7172 + requeue(last->linked);
7173 + }
7174 +#else
7175 + if (last->linked)
7176 + requeue(last->linked);
7177 +#endif
7178 + TRACE("check_for_preemptions: attempting to link task %d to %d\n",
7179 + task->pid, last->cpu);
7180 + link_task_to_cpu(task, last);
7181 + preempt(last);
7182 + }
7183 +}
7184 +
7185 +/* cfifo_job_arrival: task is either resumed or released */
7186 +static noinline void cfifo_job_arrival(struct task_struct* task)
7187 +{
7188 + cfifo_domain_t *cluster = task_cpu_cluster(task);
7189 + BUG_ON(!task);
7190 +
7191 + requeue(task);
7192 + check_for_preemptions(cluster);
7193 +}
7194 +
7195 +static void cfifo_release_jobs(rt_domain_t* rt, struct bheap* tasks)
7196 +{
7197 + cfifo_domain_t* cluster = container_of(rt, cfifo_domain_t, domain);
7198 + unsigned long flags;
7199 +
7200 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7201 +
7202 + __merge_ready(&cluster->domain, tasks);
7203 + check_for_preemptions(cluster);
7204 +
7205 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7206 +}
7207 +
7208 +/* caller holds cfifo_lock */
7209 +static noinline void job_completion(struct task_struct *t, int forced)
7210 +{
7211 + BUG_ON(!t);
7212 +
7213 + sched_trace_task_completion(t, forced);
7214 +
7215 +#ifdef CONFIG_LITMUS_NVIDIA
7216 + atomic_set(&tsk_rt(t)->nv_int_count, 0);
7217 +#endif
7218 +
7219 + TRACE_TASK(t, "job_completion().\n");
7220 +
7221 + /* set flags */
7222 + set_rt_flags(t, RT_F_SLEEP);
7223 + /* prepare for next period */
7224 + prepare_for_next_period(t);
7225 + if (is_released(t, litmus_clock()))
7226 + sched_trace_task_release(t);
7227 + /* unlink */
7228 + unlink(t);
7229 + /* requeue
7230 + * But don't requeue a blocking task. */
7231 + if (is_running(t))
7232 + cfifo_job_arrival(t);
7233 +}
7234 +
7235 +/* cfifo_tick - this function is called for every local timer
7236 + * interrupt.
7237 + *
7238 + * checks whether the current task has expired and checks
7239 + * whether we need to preempt it if it has not expired
7240 + */
7241 +static void cfifo_tick(struct task_struct* t)
7242 +{
7243 + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
7244 + if (!is_np(t)) {
7245 + /* np tasks will be preempted when they become
7246 + * preemptable again
7247 + */
7248 + litmus_reschedule_local();
7249 + set_will_schedule();
7250 + TRACE("cfifo_scheduler_tick: "
7251 + "%d is preemptable "
7252 + " => FORCE_RESCHED\n", t->pid);
7253 + } else if (is_user_np(t)) {
7254 + TRACE("cfifo_scheduler_tick: "
7255 + "%d is non-preemptable, "
7256 + "preemption delayed.\n", t->pid);
7257 + request_exit_np(t);
7258 + }
7259 + }
7260 +}
7261 +
7262 +
7263 +
7264 +
7265 +
7266 +
7267 +
7268 +
7269 +
7270 +
7271 +
7272 +
7273 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
7274 +
7275 +
7276 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
7277 +{
7278 + if (!atomic_read(&tasklet->count)) {
7279 + sched_trace_tasklet_begin(tasklet->owner);
7280 +
7281 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
7282 + {
7283 + BUG();
7284 + }
7285 + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed);
7286 + tasklet->func(tasklet->data);
7287 + tasklet_unlock(tasklet);
7288 +
7289 + sched_trace_tasklet_end(tasklet->owner, flushed);
7290 + }
7291 + else {
7292 + BUG();
7293 + }
7294 +}
7295 +
7296 +
7297 +static void __extract_tasklets(cfifo_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
7298 +{
7299 + struct tasklet_struct* step;
7300 + struct tasklet_struct* tasklet;
7301 + struct tasklet_struct* prev;
7302 +
7303 + task_tasklets->head = NULL;
7304 + task_tasklets->tail = &(task_tasklets->head);
7305 +
7306 + prev = NULL;
7307 + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
7308 + {
7309 + if(step->owner == task)
7310 + {
7311 + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
7312 +
7313 + tasklet = step;
7314 +
7315 + if(prev) {
7316 + prev->next = tasklet->next;
7317 + }
7318 + else if(cluster->pending_tasklets.head == tasklet) {
7319 + // we're at the head.
7320 + cluster->pending_tasklets.head = tasklet->next;
7321 + }
7322 +
7323 + if(cluster->pending_tasklets.tail == &tasklet) {
7324 + // we're at the tail
7325 + if(prev) {
7326 + cluster->pending_tasklets.tail = &prev;
7327 + }
7328 + else {
7329 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
7330 + }
7331 + }
7332 +
7333 + tasklet->next = NULL;
7334 + *(task_tasklets->tail) = tasklet;
7335 + task_tasklets->tail = &(tasklet->next);
7336 + }
7337 + else {
7338 + prev = step;
7339 + }
7340 + }
7341 +}
7342 +
7343 +static void flush_tasklets(cfifo_domain_t* cluster, struct task_struct* task)
7344 +{
7345 + unsigned long flags;
7346 + struct tasklet_head task_tasklets;
7347 + struct tasklet_struct* step;
7348 +
7349 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7350 + __extract_tasklets(cluster, task, &task_tasklets);
7351 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7352 +
7353 + if(cluster->pending_tasklets.head != NULL) {
7354 + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
7355 + }
7356 +
7357 + // now execute any flushed tasklets.
7358 + for(step = cluster->pending_tasklets.head; step != NULL; /**/)
7359 + {
7360 + struct tasklet_struct* temp = step->next;
7361 +
7362 + step->next = NULL;
7363 + __do_lit_tasklet(step, 1ul);
7364 +
7365 + step = temp;
7366 + }
7367 +}
7368 +
7369 +
7370 +static void do_lit_tasklets(cfifo_domain_t* cluster, struct task_struct* sched_task)
7371 +{
7372 + int work_to_do = 1;
7373 + struct tasklet_struct *tasklet = NULL;
7374 + //struct tasklet_struct *step;
7375 + unsigned long flags;
7376 +
7377 + while(work_to_do) {
7378 +
7379 + TS_NV_SCHED_BOTISR_START;
7380 +
7381 + // remove tasklet at head of list if it has higher priority.
7382 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7383 +
7384 + /*
7385 + step = cluster->pending_tasklets.head;
7386 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
7387 + while(step != NULL){
7388 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7389 + step = step->next;
7390 + }
7391 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7392 + TRACE("%s: done.\n", __FUNCTION__);
7393 + */
7394 +
7395 +
7396 + if(cluster->pending_tasklets.head != NULL) {
7397 + // remove tasklet at head.
7398 + tasklet = cluster->pending_tasklets.head;
7399 +
7400 + if(fifo_higher_prio(tasklet->owner, sched_task)) {
7401 +
7402 + if(NULL == tasklet->next) {
7403 + // tasklet is at the head, list only has one element
7404 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
7405 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
7406 + }
7407 +
7408 + // remove the tasklet from the queue
7409 + cluster->pending_tasklets.head = tasklet->next;
7410 +
7411 + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
7412 + }
7413 + else {
7414 + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
7415 + tasklet = NULL;
7416 + }
7417 + }
7418 + else {
7419 + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
7420 + }
7421 +
7422 + /*
7423 + step = cluster->pending_tasklets.head;
7424 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
7425 + while(step != NULL){
7426 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7427 + step = step->next;
7428 + }
7429 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7430 + TRACE("%s: done.\n", __FUNCTION__);
7431 + */
7432 +
7433 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7434 +
7435 + TS_NV_SCHED_BOTISR_END;
7436 +
7437 + if(tasklet) {
7438 + __do_lit_tasklet(tasklet, 0ul);
7439 + tasklet = NULL;
7440 + }
7441 + else {
7442 + work_to_do = 0;
7443 + }
7444 + }
7445 +
7446 + //TRACE("%s: exited.\n", __FUNCTION__);
7447 +}
7448 +
7449 +
7450 +static void run_tasklets(struct task_struct* sched_task)
7451 +{
7452 + cfifo_domain_t* cluster;
7453 +
7454 +#if 0
7455 + int task_is_rt = is_realtime(sched_task);
7456 + cfifo_domain_t* cluster;
7457 +
7458 + if(is_realtime(sched_task)) {
7459 + cluster = task_cpu_cluster(sched_task);
7460 + }
7461 + else {
7462 + cluster = remote_cluster(get_cpu());
7463 + }
7464 +
7465 + if(cluster && cluster->pending_tasklets.head != NULL) {
7466 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
7467 +
7468 + do_lit_tasklets(cluster, sched_task);
7469 + }
7470 +
7471 + if(!task_is_rt) {
7472 + put_cpu_no_resched();
7473 + }
7474 +#else
7475 +
7476 + preempt_disable();
7477 +
7478 + cluster = (is_realtime(sched_task)) ?
7479 + task_cpu_cluster(sched_task) :
7480 + remote_cluster(smp_processor_id());
7481 +
7482 + if(cluster && cluster->pending_tasklets.head != NULL) {
7483 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
7484 + do_lit_tasklets(cluster, sched_task);
7485 + }
7486 +
7487 + preempt_enable_no_resched();
7488 +
7489 +#endif
7490 +}
7491 +
7492 +
7493 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cfifo_domain_t* cluster)
7494 +{
7495 + struct tasklet_struct* step;
7496 +
7497 + /*
7498 + step = cluster->pending_tasklets.head;
7499 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
7500 + while(step != NULL){
7501 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7502 + step = step->next;
7503 + }
7504 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7505 + TRACE("%s: done.\n", __FUNCTION__);
7506 + */
7507 +
7508 +
7509 + tasklet->next = NULL; // make sure there are no old values floating around
7510 +
7511 + step = cluster->pending_tasklets.head;
7512 + if(step == NULL) {
7513 + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
7514 + // insert at tail.
7515 + *(cluster->pending_tasklets.tail) = tasklet;
7516 + cluster->pending_tasklets.tail = &(tasklet->next);
7517 + }
7518 + else if((*(cluster->pending_tasklets.tail) != NULL) &&
7519 + fifo_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
7520 + // insert at tail.
7521 + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
7522 +
7523 + *(cluster->pending_tasklets.tail) = tasklet;
7524 + cluster->pending_tasklets.tail = &(tasklet->next);
7525 + }
7526 + else {
7527 +
7528 + //WARN_ON(1 == 1);
7529 +
7530 + // insert the tasklet somewhere in the middle.
7531 +
7532 + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
7533 +
7534 + while(step->next && fifo_higher_prio(step->next->owner, tasklet->owner)) {
7535 + step = step->next;
7536 + }
7537 +
7538 + // insert tasklet right before step->next.
7539 +
7540 + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
7541 +
7542 + tasklet->next = step->next;
7543 + step->next = tasklet;
7544 +
7545 + // patch up the head if needed.
7546 + if(cluster->pending_tasklets.head == step)
7547 + {
7548 + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
7549 + cluster->pending_tasklets.head = tasklet;
7550 + }
7551 + }
7552 +
7553 + /*
7554 + step = cluster->pending_tasklets.head;
7555 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
7556 + while(step != NULL){
7557 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7558 + step = step->next;
7559 + }
7560 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7561 + TRACE("%s: done.\n", __FUNCTION__);
7562 + */
7563 +
7564 + // TODO: Maintain this list in priority order.
7565 + // tasklet->next = NULL;
7566 + // *(cluster->pending_tasklets.tail) = tasklet;
7567 + // cluster->pending_tasklets.tail = &tasklet->next;
7568 +}
7569 +
7570 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
7571 +{
7572 + cfifo_domain_t *cluster = NULL;
7573 + cpu_entry_t *targetCPU = NULL;
7574 + int thisCPU;
7575 + int runLocal = 0;
7576 + int runNow = 0;
7577 + unsigned long flags;
7578 +
7579 + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
7580 + {
7581 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
7582 + return 0;
7583 + }
7584 +
7585 + cluster = task_cpu_cluster(tasklet->owner);
7586 +
7587 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7588 +
7589 + thisCPU = smp_processor_id();
7590 +
7591 +#if 1
7592 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7593 + {
7594 + cpu_entry_t* affinity = NULL;
7595 +
7596 + // use this CPU if it is in our cluster and isn't running any RT work.
7597 + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cfifo_cpu_entries).linked == NULL)) {
7598 + affinity = &(__get_cpu_var(cfifo_cpu_entries));
7599 + }
7600 + else {
7601 + // this CPU is busy or shouldn't run tasklet in this cluster.
7602 + // look for available near by CPUs.
7603 + // NOTE: Affinity towards owner and not this CPU. Is this right?
7604 + affinity =
7605 + cfifo_get_nearest_available_cpu(cluster,
7606 + &per_cpu(cfifo_cpu_entries, task_cpu(tasklet->owner)));
7607 + }
7608 +
7609 + targetCPU = affinity;
7610 + }
7611 +#endif
7612 +#endif
7613 +
7614 + if (targetCPU == NULL) {
7615 + targetCPU = lowest_prio_cpu(cluster);
7616 + }
7617 +
7618 + if (fifo_higher_prio(tasklet->owner, targetCPU->linked)) {
7619 + if (thisCPU == targetCPU->cpu) {
7620 + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
7621 + runLocal = 1;
7622 + runNow = 1;
7623 + }
7624 + else {
7625 + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
7626 + runLocal = 0;
7627 + runNow = 1;
7628 + }
7629 + }
7630 + else {
7631 + runLocal = 0;
7632 + runNow = 0;
7633 + }
7634 +
7635 + if(!runLocal) {
7636 + // enqueue the tasklet
7637 + __add_pai_tasklet(tasklet, cluster);
7638 + }
7639 +
7640 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7641 +
7642 +
7643 + if (runLocal /*&& runNow */) { // runNow == 1 is implied
7644 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
7645 + __do_lit_tasklet(tasklet, 0ul);
7646 + }
7647 + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
7648 + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
7649 + preempt(targetCPU); // need to be protected by cfifo_lock?
7650 + }
7651 + else {
7652 + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
7653 + }
7654 +
7655 + return(1); // success
7656 +}
7657 +
7658 +
7659 +#endif
7660 +
7661 +
7662 +
7663 +
7664 +
7665 +
7666 +
7667 +
7668 +
7669 +
7670 +
7671 +
7672 +
7673 +
7674 +
7675 +
7676 +
7677 +
7678 +
7679 +
7680 +/* Getting schedule() right is a bit tricky. schedule() may not make any
7681 + * assumptions on the state of the current task since it may be called for a
7682 + * number of reasons. The reasons include a scheduler_tick() determined that it
7683 + * was necessary, because sys_exit_np() was called, because some Linux
7684 + * subsystem determined so, or even (in the worst case) because there is a bug
7685 + * hidden somewhere. Thus, we must take extreme care to determine what the
7686 + * current state is.
7687 + *
7688 + * The CPU could currently be scheduling a task (or not), be linked (or not).
7689 + *
7690 + * The following assertions for the scheduled task could hold:
7691 + *
7692 + * - !is_running(scheduled) // the job blocks
7693 + * - scheduled->timeslice == 0 // the job completed (forcefully)
7694 + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
7695 + * - linked != scheduled // we need to reschedule (for any reason)
7696 + * - is_np(scheduled) // rescheduling must be delayed,
7697 + * sys_exit_np must be requested
7698 + *
7699 + * Any of these can occur together.
7700 + */
7701 +static struct task_struct* cfifo_schedule(struct task_struct * prev)
7702 +{
7703 + cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries);
7704 + cfifo_domain_t *cluster = entry->cluster;
7705 + int out_of_time, sleep, preempt, np, exists, blocks;
7706 + struct task_struct* next = NULL;
7707 +
7708 + raw_spin_lock(&cluster->cfifo_lock);
7709 + clear_will_schedule();
7710 +
7711 + /* sanity checking */
7712 + BUG_ON(entry->scheduled && entry->scheduled != prev);
7713 + BUG_ON(entry->scheduled && !is_realtime(prev));
7714 + BUG_ON(is_realtime(prev) && !entry->scheduled);
7715 +
7716 + /* (0) Determine state */
7717 + exists = entry->scheduled != NULL;
7718 + blocks = exists && !is_running(entry->scheduled);
7719 + out_of_time = exists &&
7720 + budget_enforced(entry->scheduled) &&
7721 + budget_exhausted(entry->scheduled);
7722 + np = exists && is_np(entry->scheduled);
7723 + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
7724 + preempt = entry->scheduled != entry->linked;
7725 +
7726 +#ifdef WANT_ALL_SCHED_EVENTS
7727 + TRACE_TASK(prev, "invoked cfifo_schedule.\n");
7728 +#endif
7729 +
7730 + if (exists)
7731 + TRACE_TASK(prev,
7732 + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
7733 + "state:%d sig:%d\n",
7734 + blocks, out_of_time, np, sleep, preempt,
7735 + prev->state, signal_pending(prev));
7736 + if (entry->linked && preempt)
7737 + TRACE_TASK(prev, "will be preempted by %s/%d\n",
7738 + entry->linked->comm, entry->linked->pid);
7739 +
7740 +
7741 + /* If a task blocks we have no choice but to reschedule.
7742 + */
7743 + if (blocks)
7744 + unlink(entry->scheduled);
7745 +
7746 + /* Request a sys_exit_np() call if we would like to preempt but cannot.
7747 + * We need to make sure to update the link structure anyway in case
7748 + * that we are still linked. Multiple calls to request_exit_np() don't
7749 + * hurt.
7750 + */
7751 + if (np && (out_of_time || preempt || sleep)) {
7752 + unlink(entry->scheduled);
7753 + request_exit_np(entry->scheduled);
7754 + }
7755 +
7756 + /* Any task that is preemptable and either exhausts its execution
7757 + * budget or wants to sleep completes. We may have to reschedule after
7758 + * this. Don't do a job completion if we block (can't have timers running
7759 + * for blocked jobs). Preemption go first for the same reason.
7760 + */
7761 + if (!np && (out_of_time || sleep) && !blocks && !preempt)
7762 + job_completion(entry->scheduled, !sleep);
7763 +
7764 + /* Link pending task if we became unlinked.
7765 + */
7766 + if (!entry->linked)
7767 + link_task_to_cpu(__take_ready(&cluster->domain), entry);
7768 +
7769 + /* The final scheduling decision. Do we need to switch for some reason?
7770 + * If linked is different from scheduled, then select linked as next.
7771 + */
7772 + if ((!np || blocks) &&
7773 + entry->linked != entry->scheduled) {
7774 + /* Schedule a linked job? */
7775 + if (entry->linked) {
7776 + entry->linked->rt_param.scheduled_on = entry->cpu;
7777 + next = entry->linked;
7778 + }
7779 + if (entry->scheduled) {
7780 + /* not gonna be scheduled soon */
7781 + entry->scheduled->rt_param.scheduled_on = NO_CPU;
7782 + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
7783 + }
7784 + } else
7785 + /* Only override Linux scheduler if we have a real-time task
7786 + * scheduled that needs to continue.
7787 + */
7788 + if (exists)
7789 + next = prev;
7790 +
7791 + sched_state_task_picked();
7792 + raw_spin_unlock(&cluster->cfifo_lock);
7793 +
7794 +#ifdef WANT_ALL_SCHED_EVENTS
7795 + TRACE("cfifo_lock released, next=0x%p\n", next);
7796 +
7797 + if (next)
7798 + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
7799 + else if (exists && !next)
7800 + TRACE("becomes idle at %llu.\n", litmus_clock());
7801 +#endif
7802 +
7803 +
7804 + return next;
7805 +}
7806 +
7807 +
7808 +/* _finish_switch - we just finished the switch away from prev
7809 + */
7810 +static void cfifo_finish_switch(struct task_struct *prev)
7811 +{
7812 + cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries);
7813 +
7814 + entry->scheduled = is_realtime(current) ? current : NULL;
7815 +#ifdef WANT_ALL_SCHED_EVENTS
7816 + TRACE_TASK(prev, "switched away from\n");
7817 +#endif
7818 +}
7819 +
7820 +
7821 +/* Prepare a task for running in RT mode
7822 + */
7823 +static void cfifo_task_new(struct task_struct * t, int on_rq, int running)
7824 +{
7825 + unsigned long flags;
7826 + cpu_entry_t* entry;
7827 + cfifo_domain_t* cluster;
7828 +
7829 + TRACE("cfifo: task new %d\n", t->pid);
7830 +
7831 + /* the cluster doesn't change even if t is running */
7832 + cluster = task_cpu_cluster(t);
7833 +
7834 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7835 +
7836 + /* setup job params */
7837 + release_at(t, litmus_clock());
7838 +
7839 + if (running) {
7840 + entry = &per_cpu(cfifo_cpu_entries, task_cpu(t));
7841 + BUG_ON(entry->scheduled);
7842 +
7843 + entry->scheduled = t;
7844 + tsk_rt(t)->scheduled_on = task_cpu(t);
7845 + } else {
7846 + t->rt_param.scheduled_on = NO_CPU;
7847 + }
7848 + t->rt_param.linked_on = NO_CPU;
7849 +
7850 + cfifo_job_arrival(t);
7851 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7852 +}
7853 +
7854 +static void cfifo_task_wake_up(struct task_struct *task)
7855 +{
7856 + unsigned long flags;
7857 + //lt_t now;
7858 + cfifo_domain_t *cluster;
7859 +
7860 + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
7861 +
7862 + cluster = task_cpu_cluster(task);
7863 +
7864 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7865 +
7866 +#if 0 // sporadic task model
7867 + /* We need to take suspensions because of semaphores into
7868 + * account! If a job resumes after being suspended due to acquiring
7869 + * a semaphore, it should never be treated as a new job release.
7870 + */
7871 + if (get_rt_flags(task) == RT_F_EXIT_SEM) {
7872 + set_rt_flags(task, RT_F_RUNNING);
7873 + } else {
7874 + now = litmus_clock();
7875 + if (is_tardy(task, now)) {
7876 + /* new sporadic release */
7877 + release_at(task, now);
7878 + sched_trace_task_release(task);
7879 + }
7880 + else {
7881 + if (task->rt.time_slice) {
7882 + /* came back in time before deadline
7883 + */
7884 + set_rt_flags(task, RT_F_RUNNING);
7885 + }
7886 + }
7887 + }
7888 +#endif
7889 +
7890 + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
7891 + set_rt_flags(task, RT_F_RUNNING); // periodic model
7892 +
7893 + if(tsk_rt(task)->linked_on == NO_CPU)
7894 + cfifo_job_arrival(task);
7895 + else
7896 + TRACE("WTF, mate?!\n");
7897 +
7898 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7899 +}
7900 +
7901 +static void cfifo_task_block(struct task_struct *t)
7902 +{
7903 + unsigned long flags;
7904 + cfifo_domain_t *cluster;
7905 +
7906 + TRACE_TASK(t, "block at %llu\n", litmus_clock());
7907 +
7908 + cluster = task_cpu_cluster(t);
7909 +
7910 + /* unlink if necessary */
7911 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7912 + unlink(t);
7913 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7914 +
7915 + BUG_ON(!is_realtime(t));
7916 +}
7917 +
7918 +
7919 +static void cfifo_task_exit(struct task_struct * t)
7920 +{
7921 + unsigned long flags;
7922 + cfifo_domain_t *cluster = task_cpu_cluster(t);
7923 +
7924 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
7925 + flush_tasklets(cluster, t);
7926 +#endif
7927 +
7928 + /* unlink if necessary */
7929 + raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7930 + unlink(t);
7931 + if (tsk_rt(t)->scheduled_on != NO_CPU) {
7932 + cpu_entry_t *cpu;
7933 + cpu = &per_cpu(cfifo_cpu_entries, tsk_rt(t)->scheduled_on);
7934 + cpu->scheduled = NULL;
7935 + tsk_rt(t)->scheduled_on = NO_CPU;
7936 + }
7937 + raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7938 +
7939 + BUG_ON(!is_realtime(t));
7940 + TRACE_TASK(t, "RIP\n");
7941 +}
7942 +
7943 +static long cfifo_admit_task(struct task_struct* tsk)
7944 +{
7945 + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
7946 +}
7947 +
7948 +
7949 +
7950 +
7951 +
7952 +
7953 +
7954 +
7955 +
7956 +
7957 +
7958 +
7959 +
7960 +#ifdef CONFIG_LITMUS_LOCKING
7961 +
7962 +#include <litmus/fdso.h>
7963 +
7964 +
7965 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
7966 +{
7967 + int linked_on;
7968 + int check_preempt = 0;
7969 +
7970 + cfifo_domain_t* cluster = task_cpu_cluster(t);
7971 +
7972 + if(prio_inh != NULL)
7973 + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
7974 + else
7975 + TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
7976 +
7977 + sched_trace_eff_prio_change(t, prio_inh);
7978 +
7979 + tsk_rt(t)->inh_task = prio_inh;
7980 +
7981 + linked_on = tsk_rt(t)->linked_on;
7982 +
7983 + /* If it is scheduled, then we need to reorder the CPU heap. */
7984 + if (linked_on != NO_CPU) {
7985 + TRACE_TASK(t, "%s: linked on %d\n",
7986 + __FUNCTION__, linked_on);
7987 + /* Holder is scheduled; need to re-order CPUs.
7988 + * We can't use heap_decrease() here since
7989 + * the cpu_heap is ordered in reverse direction, so
7990 + * it is actually an increase. */
7991 + bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
7992 + per_cpu(cfifo_cpu_entries, linked_on).hn);
7993 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
7994 + per_cpu(cfifo_cpu_entries, linked_on).hn);
7995 + } else {
7996 + /* holder may be queued: first stop queue changes */
7997 + raw_spin_lock(&cluster->domain.release_lock);
7998 + if (is_queued(t)) {
7999 + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
8000 +
8001 + /* We need to update the position of holder in some
8002 + * heap. Note that this could be a release heap if we
8003 + * budget enforcement is used and this job overran. */
8004 + check_preempt = !bheap_decrease(fifo_ready_order, tsk_rt(t)->heap_node);
8005 +
8006 + } else {
8007 + /* Nothing to do: if it is not queued and not linked
8008 + * then it is either sleeping or currently being moved
8009 + * by other code (e.g., a timer interrupt handler) that
8010 + * will use the correct priority when enqueuing the
8011 + * task. */
8012 + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
8013 + }
8014 + raw_spin_unlock(&cluster->domain.release_lock);
8015 +
8016 + /* If holder was enqueued in a release heap, then the following
8017 + * preemption check is pointless, but we can't easily detect
8018 + * that case. If you want to fix this, then consider that
8019 + * simply adding a state flag requires O(n) time to update when
8020 + * releasing n tasks, which conflicts with the goal to have
8021 + * O(log n) merges. */
8022 + if (check_preempt) {
8023 + /* heap_decrease() hit the top level of the heap: make
8024 + * sure preemption checks get the right task, not the
8025 + * potentially stale cache. */
8026 + bheap_uncache_min(fifo_ready_order, &cluster->domain.ready_queue);
8027 + check_for_preemptions(cluster);
8028 + }
8029 + }
8030 +}
8031 +
8032 +/* called with IRQs off */
8033 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
8034 +{
8035 + cfifo_domain_t* cluster = task_cpu_cluster(t);
8036 +
8037 + raw_spin_lock(&cluster->cfifo_lock);
8038 +
8039 + __set_priority_inheritance(t, prio_inh);
8040 +
8041 +#ifdef CONFIG_LITMUS_SOFTIRQD
8042 + if(tsk_rt(t)->cur_klitirqd != NULL)
8043 + {
8044 + TRACE_TASK(t, "%s/%d inherits a new priority!\n",
8045 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
8046 +
8047 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
8048 + }
8049 +#endif
8050 +
8051 + raw_spin_unlock(&cluster->cfifo_lock);
8052 +}
8053 +
8054 +
8055 +/* called with IRQs off */
8056 +static void __clear_priority_inheritance(struct task_struct* t)
8057 +{
8058 + TRACE_TASK(t, "priority restored\n");
8059 +
8060 + if(tsk_rt(t)->scheduled_on != NO_CPU)
8061 + {
8062 + sched_trace_eff_prio_change(t, NULL);
8063 +
8064 + tsk_rt(t)->inh_task = NULL;
8065 +
8066 + /* Check if rescheduling is necessary. We can't use heap_decrease()
8067 + * since the priority was effectively lowered. */
8068 + unlink(t);
8069 + cfifo_job_arrival(t);
8070 + }
8071 + else
8072 + {
8073 + __set_priority_inheritance(t, NULL);
8074 + }
8075 +
8076 +#ifdef CONFIG_LITMUS_SOFTIRQD
8077 + if(tsk_rt(t)->cur_klitirqd != NULL)
8078 + {
8079 + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
8080 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
8081 +
8082 + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
8083 + {
8084 + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
8085 +
8086 + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
8087 +
8088 + /* Check if rescheduling is necessary. We can't use heap_decrease()
8089 + * since the priority was effectively lowered. */
8090 + unlink(tsk_rt(t)->cur_klitirqd);
8091 + cfifo_job_arrival(tsk_rt(t)->cur_klitirqd);
8092 + }
8093 + else
8094 + {
8095 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
8096 + }
8097 + }
8098 +#endif
8099 +}
8100 +
8101 +/* called with IRQs off */
8102 +static void clear_priority_inheritance(struct task_struct* t)
8103 +{
8104 + cfifo_domain_t* cluster = task_cpu_cluster(t);
8105 +
8106 + raw_spin_lock(&cluster->cfifo_lock);
8107 + __clear_priority_inheritance(t);
8108 + raw_spin_unlock(&cluster->cfifo_lock);
8109 +}
8110 +
8111 +
8112 +
8113 +#ifdef CONFIG_LITMUS_SOFTIRQD
8114 +/* called with IRQs off */
8115 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
8116 + struct task_struct* old_owner,
8117 + struct task_struct* new_owner)
8118 +{
8119 + cfifo_domain_t* cluster = task_cpu_cluster(klitirqd);
8120 +
8121 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
8122 +
8123 + raw_spin_lock(&cluster->cfifo_lock);
8124 +
8125 + if(old_owner != new_owner)
8126 + {
8127 + if(old_owner)
8128 + {
8129 + // unreachable?
8130 + tsk_rt(old_owner)->cur_klitirqd = NULL;
8131 + }
8132 +
8133 + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
8134 + new_owner->comm, new_owner->pid);
8135 +
8136 + tsk_rt(new_owner)->cur_klitirqd = klitirqd;
8137 + }
8138 +
8139 + __set_priority_inheritance(klitirqd,
8140 + (tsk_rt(new_owner)->inh_task == NULL) ?
8141 + new_owner :
8142 + tsk_rt(new_owner)->inh_task);
8143 +
8144 + raw_spin_unlock(&cluster->cfifo_lock);
8145 +}
8146 +
8147 +/* called with IRQs off */
8148 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
8149 + struct task_struct* old_owner)
8150 +{
8151 + cfifo_domain_t* cluster = task_cpu_cluster(klitirqd);
8152 +
8153 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
8154 +
8155 + raw_spin_lock(&cluster->cfifo_lock);
8156 +
8157 + TRACE_TASK(klitirqd, "priority restored\n");
8158 +
8159 + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
8160 + {
8161 + tsk_rt(klitirqd)->inh_task = NULL;
8162 +
8163 + /* Check if rescheduling is necessary. We can't use heap_decrease()
8164 + * since the priority was effectively lowered. */
8165 + unlink(klitirqd);
8166 + cfifo_job_arrival(klitirqd);
8167 + }
8168 + else
8169 + {
8170 + __set_priority_inheritance(klitirqd, NULL);
8171 + }
8172 +
8173 + tsk_rt(old_owner)->cur_klitirqd = NULL;
8174 +
8175 + raw_spin_unlock(&cluster->cfifo_lock);
8176 +}
8177 +#endif // CONFIG_LITMUS_SOFTIRQD
8178 +
8179 +
8180 +/* ******************** KFMLP support ********************** */
8181 +
8182 +/* struct for semaphore with priority inheritance */
8183 +struct kfmlp_queue
8184 +{
8185 + wait_queue_head_t wait;
8186 + struct task_struct* owner;
8187 + struct task_struct* hp_waiter;
8188 + int count; /* number of waiters + holder */
8189 +};
8190 +
8191 +struct kfmlp_semaphore
8192 +{
8193 + struct litmus_lock litmus_lock;
8194 +
8195 + spinlock_t lock;
8196 +
8197 + int num_resources; /* aka k */
8198 + struct kfmlp_queue *queues; /* array */
8199 + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
8200 +};
8201 +
8202 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
8203 +{
8204 + return container_of(lock, struct kfmlp_semaphore, litmus_lock);
8205 +}
8206 +
8207 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
8208 + struct kfmlp_queue* queue)
8209 +{
8210 + return (queue - &sem->queues[0]);
8211 +}
8212 +
8213 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
8214 + struct task_struct* holder)
8215 +{
8216 + int i;
8217 + for(i = 0; i < sem->num_resources; ++i)
8218 + if(sem->queues[i].owner == holder)
8219 + return(&sem->queues[i]);
8220 + return(NULL);
8221 +}
8222 +
8223 +/* caller is responsible for locking */
8224 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
8225 + struct task_struct *skip)
8226 +{
8227 + struct list_head *pos;
8228 + struct task_struct *queued, *found = NULL;
8229 +
8230 + list_for_each(pos, &kqueue->wait.task_list) {
8231 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
8232 + task_list)->private;
8233 +
8234 + /* Compare task prios, find high prio task. */
8235 + if (queued != skip && fifo_higher_prio(queued, found))
8236 + found = queued;
8237 + }
8238 + return found;
8239 +}
8240 +
8241 +static inline struct kfmlp_queue* kfmlp_find_shortest(
8242 + struct kfmlp_semaphore* sem,
8243 + struct kfmlp_queue* search_start)
8244 +{
8245 + // we start our search at search_start instead of at the beginning of the
8246 + // queue list to load-balance across all resources.
8247 + struct kfmlp_queue* step = search_start;
8248 + struct kfmlp_queue* shortest = sem->shortest_queue;
8249 +
8250 + do
8251 + {
8252 + step = (step+1 != &sem->queues[sem->num_resources]) ?
8253 + step+1 : &sem->queues[0];
8254 + if(step->count < shortest->count)
8255 + {
8256 + shortest = step;
8257 + if(step->count == 0)
8258 + break; /* can't get any shorter */
8259 + }
8260 + }while(step != search_start);
8261 +
8262 + return(shortest);
8263 +}
8264 +
8265 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
8266 +{
8267 + /* must hold sem->lock */
8268 +
8269 + struct kfmlp_queue *my_queue = NULL;
8270 + struct task_struct *max_hp = NULL;
8271 +
8272 +
8273 + struct list_head *pos;
8274 + struct task_struct *queued;
8275 + int i;
8276 +
8277 + for(i = 0; i < sem->num_resources; ++i)
8278 + {
8279 + if( (sem->queues[i].count > 1) &&
8280 + ((my_queue == NULL) ||
8281 + (fifo_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
8282 + {
8283 + my_queue = &sem->queues[i];
8284 + }
8285 + }
8286 +
8287 + if(my_queue)
8288 + {
8289 + cfifo_domain_t* cluster;
8290 +
8291 + max_hp = my_queue->hp_waiter;
8292 + BUG_ON(!max_hp);
8293 +
8294 + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
8295 + kfmlp_get_idx(sem, my_queue),
8296 + max_hp->comm, max_hp->pid,
8297 + kfmlp_get_idx(sem, my_queue));
8298 +
8299 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
8300 +
8301 + /*
8302 + if(my_queue->hp_waiter)
8303 + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
8304 + kfmlp_get_idx(sem, my_queue),
8305 + my_queue->hp_waiter->comm,
8306 + my_queue->hp_waiter->pid);
8307 + else
8308 + TRACE_CUR("queue %d: new hp_waiter is %p\n",
8309 + kfmlp_get_idx(sem, my_queue), NULL);
8310 + */
8311 +
8312 + cluster = task_cpu_cluster(max_hp);
8313 +
8314 + raw_spin_lock(&cluster->cfifo_lock);
8315 +
8316 + /*
8317 + if(my_queue->owner)
8318 + TRACE_CUR("queue %d: owner is %s/%d\n",
8319 + kfmlp_get_idx(sem, my_queue),
8320 + my_queue->owner->comm,
8321 + my_queue->owner->pid);
8322 + else
8323 + TRACE_CUR("queue %d: owner is %p\n",
8324 + kfmlp_get_idx(sem, my_queue),
8325 + NULL);
8326 + */
8327 +
8328 + if(tsk_rt(my_queue->owner)->inh_task == max_hp)
8329 + {
8330 + __clear_priority_inheritance(my_queue->owner);
8331 + if(my_queue->hp_waiter != NULL)
8332 + {
8333 + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
8334 + }
8335 + }
8336 + raw_spin_unlock(&cluster->cfifo_lock);
8337 +
8338 + list_for_each(pos, &my_queue->wait.task_list)
8339 + {
8340 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
8341 + task_list)->private;
8342 + /* Compare task prios, find high prio task. */
8343 + if (queued == max_hp)
8344 + {
8345 + /*
8346 + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
8347 + kfmlp_get_idx(sem, my_queue));
8348 + */
8349 + __remove_wait_queue(&my_queue->wait,
8350 + list_entry(pos, wait_queue_t, task_list));
8351 + break;
8352 + }
8353 + }
8354 + --(my_queue->count);
8355 + }
8356 +
8357 + return(max_hp);
8358 +}
8359 +
8360 +int cfifo_kfmlp_lock(struct litmus_lock* l)
8361 +{
8362 + struct task_struct* t = current;
8363 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8364 + struct kfmlp_queue* my_queue;
8365 + wait_queue_t wait;
8366 + unsigned long flags;
8367 +
8368 + if (!is_realtime(t))
8369 + return -EPERM;
8370 +
8371 + spin_lock_irqsave(&sem->lock, flags);
8372 +
8373 + my_queue = sem->shortest_queue;
8374 +
8375 + if (my_queue->owner) {
8376 + /* resource is not free => must suspend and wait */
8377 + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
8378 + kfmlp_get_idx(sem, my_queue));
8379 +
8380 + init_waitqueue_entry(&wait, t);
8381 +
8382 + /* FIXME: interruptible would be nice some day */
8383 + set_task_state(t, TASK_UNINTERRUPTIBLE);
8384 +
8385 + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
8386 +
8387 + /* check if we need to activate priority inheritance */
8388 + if (fifo_higher_prio(t, my_queue->hp_waiter))
8389 + {
8390 + my_queue->hp_waiter = t;
8391 + if (fifo_higher_prio(t, my_queue->owner))
8392 + {
8393 + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
8394 + }
8395 + }
8396 +
8397 + ++(my_queue->count);
8398 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
8399 +
8400 + /* release lock before sleeping */
8401 + spin_unlock_irqrestore(&sem->lock, flags);
8402 +
8403 + /* We depend on the FIFO order. Thus, we don't need to recheck
8404 + * when we wake up; we are guaranteed to have the lock since
8405 + * there is only one wake up per release (or steal).
8406 + */
8407 + schedule();
8408 +
8409 +
8410 + if(my_queue->owner == t)
8411 + {
8412 + TRACE_CUR("queue %d: acquired through waiting\n",
8413 + kfmlp_get_idx(sem, my_queue));
8414 + }
8415 + else
8416 + {
8417 + /* this case may happen if our wait entry was stolen
8418 + between queues. record where we went.*/
8419 + my_queue = kfmlp_get_queue(sem, t);
8420 + BUG_ON(!my_queue);
8421 + TRACE_CUR("queue %d: acquired through stealing\n",
8422 + kfmlp_get_idx(sem, my_queue));
8423 + }
8424 + }
8425 + else
8426 + {
8427 + TRACE_CUR("queue %d: acquired immediately\n",
8428 + kfmlp_get_idx(sem, my_queue));
8429 +
8430 + my_queue->owner = t;
8431 +
8432 + ++(my_queue->count);
8433 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
8434 +
8435 + spin_unlock_irqrestore(&sem->lock, flags);
8436 + }
8437 +
8438 + return kfmlp_get_idx(sem, my_queue);
8439 +}
8440 +
8441 +int cfifo_kfmlp_unlock(struct litmus_lock* l)
8442 +{
8443 + struct task_struct *t = current, *next;
8444 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8445 + struct kfmlp_queue *my_queue;
8446 + unsigned long flags;
8447 + int err = 0;
8448 +
8449 + spin_lock_irqsave(&sem->lock, flags);
8450 +
8451 + my_queue = kfmlp_get_queue(sem, t);
8452 +
8453 + if (!my_queue) {
8454 + err = -EINVAL;
8455 + goto out;
8456 + }
8457 +
8458 + /* check if there are jobs waiting for this resource */
8459 + next = __waitqueue_remove_first(&my_queue->wait);
8460 + if (next) {
8461 + /*
8462 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
8463 + kfmlp_get_idx(sem, my_queue),
8464 + next->comm, next->pid);
8465 + */
8466 + /* next becomes the resouce holder */
8467 + my_queue->owner = next;
8468 +
8469 + --(my_queue->count);
8470 + if(my_queue->count < sem->shortest_queue->count)
8471 + {
8472 + sem->shortest_queue = my_queue;
8473 + }
8474 +
8475 + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
8476 + kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
8477 +
8478 + /* determine new hp_waiter if necessary */
8479 + if (next == my_queue->hp_waiter) {
8480 + TRACE_TASK(next, "was highest-prio waiter\n");
8481 + /* next has the highest priority --- it doesn't need to
8482 + * inherit. However, we need to make sure that the
8483 + * next-highest priority in the queue is reflected in
8484 + * hp_waiter. */
8485 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
8486 + if (my_queue->hp_waiter)
8487 + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
8488 + else
8489 + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
8490 + } else {
8491 + /* Well, if next is not the highest-priority waiter,
8492 + * then it ought to inherit the highest-priority
8493 + * waiter's priority. */
8494 + set_priority_inheritance(next, my_queue->hp_waiter);
8495 + }
8496 +
8497 + /* wake up next */
8498 + wake_up_process(next);
8499 + }
8500 + else
8501 + {
8502 + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
8503 +
8504 + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
8505 +
8506 + /*
8507 + if(next)
8508 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
8509 + kfmlp_get_idx(sem, my_queue),
8510 + next->comm, next->pid);
8511 + */
8512 +
8513 + my_queue->owner = next;
8514 +
8515 + if(next)
8516 + {
8517 + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
8518 + kfmlp_get_idx(sem, my_queue),
8519 + next->comm, next->pid);
8520 +
8521 + /* wake up next */
8522 + wake_up_process(next);
8523 + }
8524 + else
8525 + {
8526 + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
8527 +
8528 + --(my_queue->count);
8529 + if(my_queue->count < sem->shortest_queue->count)
8530 + {
8531 + sem->shortest_queue = my_queue;
8532 + }
8533 + }
8534 + }
8535 +
8536 + /* we lose the benefit of priority inheritance (if any) */
8537 + if (tsk_rt(t)->inh_task)
8538 + clear_priority_inheritance(t);
8539 +
8540 +out:
8541 + spin_unlock_irqrestore(&sem->lock, flags);
8542 +
8543 + return err;
8544 +}
8545 +
8546 +int cfifo_kfmlp_close(struct litmus_lock* l)
8547 +{
8548 + struct task_struct *t = current;
8549 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8550 + struct kfmlp_queue *my_queue;
8551 + unsigned long flags;
8552 +
8553 + int owner;
8554 +
8555 + spin_lock_irqsave(&sem->lock, flags);
8556 +
8557 + my_queue = kfmlp_get_queue(sem, t);
8558 + owner = (my_queue) ? (my_queue->owner == t) : 0;
8559 +
8560 + spin_unlock_irqrestore(&sem->lock, flags);
8561 +
8562 + if (owner)
8563 + cfifo_kfmlp_unlock(l);
8564 +
8565 + return 0;
8566 +}
8567 +
8568 +void cfifo_kfmlp_free(struct litmus_lock* l)
8569 +{
8570 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8571 + kfree(sem->queues);
8572 + kfree(sem);
8573 +}
8574 +
8575 +static struct litmus_lock_ops cfifo_kfmlp_lock_ops = {
8576 + .close = cfifo_kfmlp_close,
8577 + .lock = cfifo_kfmlp_lock,
8578 + .unlock = cfifo_kfmlp_unlock,
8579 + .deallocate = cfifo_kfmlp_free,
8580 +};
8581 +
8582 +static struct litmus_lock* cfifo_new_kfmlp(void* __user arg, int* ret_code)
8583 +{
8584 + struct kfmlp_semaphore* sem;
8585 + int num_resources = 0;
8586 + int i;
8587 +
8588 + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
8589 + {
8590 + *ret_code = -EINVAL;
8591 + return(NULL);
8592 + }
8593 + if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
8594 + {
8595 + *ret_code = -EINVAL;
8596 + return(NULL);
8597 + }
8598 + if(num_resources < 1)
8599 + {
8600 + *ret_code = -EINVAL;
8601 + return(NULL);
8602 + }
8603 +
8604 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
8605 + if(!sem)
8606 + {
8607 + *ret_code = -ENOMEM;
8608 + return NULL;
8609 + }
8610 +
8611 + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
8612 + if(!sem->queues)
8613 + {
8614 + kfree(sem);
8615 + *ret_code = -ENOMEM;
8616 + return NULL;
8617 + }
8618 +
8619 + sem->litmus_lock.ops = &cfifo_kfmlp_lock_ops;
8620 + spin_lock_init(&sem->lock);
8621 + sem->num_resources = num_resources;
8622 +
8623 + for(i = 0; i < num_resources; ++i)
8624 + {
8625 + sem->queues[i].owner = NULL;
8626 + sem->queues[i].hp_waiter = NULL;
8627 + init_waitqueue_head(&sem->queues[i].wait);
8628 + sem->queues[i].count = 0;
8629 + }
8630 +
8631 + sem->shortest_queue = &sem->queues[0];
8632 +
8633 + *ret_code = 0;
8634 + return &sem->litmus_lock;
8635 +}
8636 +
8637 +
8638 +/* **** lock constructor **** */
8639 +
8640 +static long cfifo_allocate_lock(struct litmus_lock **lock, int type,
8641 + void* __user arg)
8642 +{
8643 + int err = -ENXIO;
8644 +
8645 + /* C-FIFO currently only supports the FMLP for global resources
8646 + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
8647 + switch (type) {
8648 + case KFMLP_SEM:
8649 + *lock = cfifo_new_kfmlp(arg, &err);
8650 + break;
8651 + };
8652 +
8653 + return err;
8654 +}
8655 +
8656 +#endif // CONFIG_LITMUS_LOCKING
8657 +
8658 +
8659 +
8660 +
8661 +
8662 +
8663 +/* total number of cluster */
8664 +static int num_clusters;
8665 +/* we do not support cluster of different sizes */
8666 +static unsigned int cluster_size;
8667 +
8668 +#ifdef VERBOSE_INIT
8669 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
8670 +{
8671 + int chk;
8672 + char buf[255];
8673 +
8674 + chk = cpulist_scnprintf(buf, 254, mask);
8675 + buf[chk] = '\0';
8676 + printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
8677 +
8678 +}
8679 +#endif
8680 +
8681 +static int clusters_allocated = 0;
8682 +
8683 +static void cleanup_cfifo(void)
8684 +{
8685 + int i;
8686 +
8687 + if (clusters_allocated) {
8688 + for (i = 0; i < num_clusters; i++) {
8689 + kfree(cfifo[i].cpus);
8690 + kfree(cfifo[i].heap_node);
8691 + free_cpumask_var(cfifo[i].cpu_map);
8692 + }
8693 +
8694 + kfree(cfifo);
8695 + }
8696 +}
8697 +
8698 +static long cfifo_activate_plugin(void)
8699 +{
8700 + int i, j, cpu, ccpu, cpu_count;
8701 + cpu_entry_t *entry;
8702 +
8703 + cpumask_var_t mask;
8704 + int chk = 0;
8705 +
8706 + /* de-allocate old clusters, if any */
8707 + cleanup_cfifo();
8708 +
8709 + printk(KERN_INFO "C-FIFO: Activate Plugin, cluster configuration = %d\n",
8710 + cluster_config);
8711 +
8712 + /* need to get cluster_size first */
8713 + if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
8714 + return -ENOMEM;
8715 +
8716 + if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
8717 + cluster_size = num_online_cpus();
8718 + } else {
8719 + chk = get_shared_cpu_map(mask, 0, cluster_config);
8720 + if (chk) {
8721 + /* if chk != 0 then it is the max allowed index */
8722 + printk(KERN_INFO "C-FIFO: Cluster configuration = %d "
8723 + "is not supported on this hardware.\n",
8724 + cluster_config);
8725 + /* User should notice that the configuration failed, so
8726 + * let's bail out. */
8727 + return -EINVAL;
8728 + }
8729 +
8730 + cluster_size = cpumask_weight(mask);
8731 + }
8732 +
8733 + if ((num_online_cpus() % cluster_size) != 0) {
8734 + /* this can't be right, some cpus are left out */
8735 + printk(KERN_ERR "C-FIFO: Trying to group %d cpus in %d!\n",
8736 + num_online_cpus(), cluster_size);
8737 + return -1;
8738 + }
8739 +
8740 + num_clusters = num_online_cpus() / cluster_size;
8741 + printk(KERN_INFO "C-FIFO: %d cluster(s) of size = %d\n",
8742 + num_clusters, cluster_size);
8743 +
8744 + /* initialize clusters */
8745 + cfifo = kmalloc(num_clusters * sizeof(cfifo_domain_t), GFP_ATOMIC);
8746 + for (i = 0; i < num_clusters; i++) {
8747 +
8748 + cfifo[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
8749 + GFP_ATOMIC);
8750 + cfifo[i].heap_node = kmalloc(
8751 + cluster_size * sizeof(struct bheap_node),
8752 + GFP_ATOMIC);
8753 + bheap_init(&(cfifo[i].cpu_heap));
8754 + fifo_domain_init(&(cfifo[i].domain), NULL, cfifo_release_jobs);
8755 +
8756 +
8757 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
8758 + cfifo[i].pending_tasklets.head = NULL;
8759 + cfifo[i].pending_tasklets.tail = &(cfifo[i].pending_tasklets.head);
8760 +#endif
8761 +
8762 + if(!zalloc_cpumask_var(&cfifo[i].cpu_map, GFP_ATOMIC))
8763 + return -ENOMEM;
8764 + }
8765 +
8766 + /* cycle through cluster and add cpus to them */
8767 + for (i = 0; i < num_clusters; i++) {
8768 +
8769 + for_each_online_cpu(cpu) {
8770 + /* check if the cpu is already in a cluster */
8771 + for (j = 0; j < num_clusters; j++)
8772 + if (cpumask_test_cpu(cpu, cfifo[j].cpu_map))
8773 + break;
8774 + /* if it is in a cluster go to next cpu */
8775 + if (j < num_clusters &&
8776 + cpumask_test_cpu(cpu, cfifo[j].cpu_map))
8777 + continue;
8778 +
8779 + /* this cpu isn't in any cluster */
8780 + /* get the shared cpus */
8781 + if (unlikely(cluster_config == GLOBAL_CLUSTER))
8782 + cpumask_copy(mask, cpu_online_mask);
8783 + else
8784 + get_shared_cpu_map(mask, cpu, cluster_config);
8785 +
8786 + cpumask_copy(cfifo[i].cpu_map, mask);
8787 +#ifdef VERBOSE_INIT
8788 + print_cluster_topology(mask, cpu);
8789 +#endif
8790 + /* add cpus to current cluster and init cpu_entry_t */
8791 + cpu_count = 0;
8792 + for_each_cpu(ccpu, cfifo[i].cpu_map) {
8793 +
8794 + entry = &per_cpu(cfifo_cpu_entries, ccpu);
8795 + cfifo[i].cpus[cpu_count] = entry;
8796 + atomic_set(&entry->will_schedule, 0);
8797 + entry->cpu = ccpu;
8798 + entry->cluster = &cfifo[i];
8799 + entry->hn = &(cfifo[i].heap_node[cpu_count]);
8800 + bheap_node_init(&entry->hn, entry);
8801 +
8802 + cpu_count++;
8803 +
8804 + entry->linked = NULL;
8805 + entry->scheduled = NULL;
8806 + update_cpu_position(entry);
8807 + }
8808 + /* done with this cluster */
8809 + break;
8810 + }
8811 + }
8812 +
8813 +#ifdef CONFIG_LITMUS_SOFTIRQD
8814 + {
8815 + /* distribute the daemons evenly across the clusters. */
8816 + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
8817 + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
8818 + int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
8819 +
8820 + int daemon = 0;
8821 + for(i = 0; i < num_clusters; ++i)
8822 + {
8823 + int num_on_this_cluster = num_daemons_per_cluster;
8824 + if(left_over)
8825 + {
8826 + ++num_on_this_cluster;
8827 + --left_over;
8828 + }
8829 +
8830 + for(j = 0; j < num_on_this_cluster; ++j)
8831 + {
8832 + // first CPU of this cluster
8833 + affinity[daemon++] = i*cluster_size;
8834 + }
8835 + }
8836 +
8837 + spawn_klitirqd(affinity);
8838 +
8839 + kfree(affinity);
8840 + }
8841 +#endif
8842 +
8843 +#ifdef CONFIG_LITMUS_NVIDIA
8844 + init_nvidia_info();
8845 +#endif
8846 +
8847 + free_cpumask_var(mask);
8848 + clusters_allocated = 1;
8849 + return 0;
8850 +}
8851 +
8852 +/* Plugin object */
8853 +static struct sched_plugin cfifo_plugin __cacheline_aligned_in_smp = {
8854 + .plugin_name = "C-FIFO",
8855 + .finish_switch = cfifo_finish_switch,
8856 + .tick = cfifo_tick,
8857 + .task_new = cfifo_task_new,
8858 + .complete_job = complete_job,
8859 + .task_exit = cfifo_task_exit,
8860 + .schedule = cfifo_schedule,
8861 + .task_wake_up = cfifo_task_wake_up,
8862 + .task_block = cfifo_task_block,
8863 + .admit_task = cfifo_admit_task,
8864 + .activate_plugin = cfifo_activate_plugin,
8865 +#ifdef CONFIG_LITMUS_LOCKING
8866 + .allocate_lock = cfifo_allocate_lock,
8867 + .set_prio_inh = set_priority_inheritance,
8868 + .clear_prio_inh = clear_priority_inheritance,
8869 +#endif
8870 +#ifdef CONFIG_LITMUS_SOFTIRQD
8871 + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
8872 + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
8873 +#endif
8874 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
8875 + .enqueue_pai_tasklet = enqueue_pai_tasklet,
8876 + .run_tasklets = run_tasklets,
8877 +#endif
8878 +};
8879 +
8880 +static struct proc_dir_entry *cluster_file = NULL, *cfifo_dir = NULL;
8881 +
8882 +static int __init init_cfifo(void)
8883 +{
8884 + int err, fs;
8885 +
8886 + err = register_sched_plugin(&cfifo_plugin);
8887 + if (!err) {
8888 + fs = make_plugin_proc_dir(&cfifo_plugin, &cfifo_dir);
8889 + if (!fs)
8890 + cluster_file = create_cluster_file(cfifo_dir, &cluster_config);
8891 + else
8892 + printk(KERN_ERR "Could not allocate C-FIFO procfs dir.\n");
8893 + }
8894 + return err;
8895 +}
8896 +
8897 +static void clean_cfifo(void)
8898 +{
8899 + cleanup_cfifo();
8900 + if (cluster_file)
8901 + remove_proc_entry("cluster", cfifo_dir);
8902 + if (cfifo_dir)
8903 + remove_plugin_proc_dir(&cfifo_plugin);
8904 +}
8905 +
8906 +module_init(init_cfifo);
8907 +module_exit(clean_cfifo);
8908 diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c
8909 new file mode 100644
8910 index 0000000..e51de10
8911 --- /dev/null
8912 +++ b/litmus/sched_crm.c
8913 @@ -0,0 +1,2099 @@
8914 +/*
8915 + * litmus/sched_crm.c
8916 + *
8917 + * Implementation of the C-RM scheduling algorithm.
8918 + *
8919 + * This implementation is based on G-EDF:
8920 + * - CPUs are clustered around L2 or L3 caches.
8921 + * - Clusters topology is automatically detected (this is arch dependent
8922 + * and is working only on x86 at the moment --- and only with modern
8923 + * cpus that exports cpuid4 information)
8924 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
8925 + * the programmer needs to be aware of the topology to place tasks
8926 + * in the desired cluster
8927 + * - default clustering is around L2 cache (cache index = 2)
8928 + * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
8929 + * online_cpus are placed in a single cluster).
8930 + *
8931 + * For details on functions, take a look at sched_gsn_edf.c
8932 + *
8933 + * Currently, we do not support changes in the number of online cpus.
8934 + * If the num_online_cpus() dynamically changes, the plugin is broken.
8935 + *
8936 + * This version uses the simple approach and serializes all scheduling
8937 + * decisions by the use of a queue lock. This is probably not the
8938 + * best way to do it, but it should suffice for now.
8939 + */
8940 +
8941 +#include <linux/spinlock.h>
8942 +#include <linux/percpu.h>
8943 +#include <linux/sched.h>
8944 +#include <linux/slab.h>
8945 +#include <linux/uaccess.h>
8946 +
8947 +#include <linux/module.h>
8948 +
8949 +#include <litmus/litmus.h>
8950 +#include <litmus/jobs.h>
8951 +#include <litmus/preempt.h>
8952 +#include <litmus/sched_plugin.h>
8953 +#include <litmus/rm_common.h>
8954 +#include <litmus/sched_trace.h>
8955 +
8956 +#include <litmus/clustered.h>
8957 +
8958 +#include <litmus/bheap.h>
8959 +
8960 +/* to configure the cluster size */
8961 +#include <litmus/litmus_proc.h>
8962 +
8963 +#ifdef CONFIG_SCHED_CPU_AFFINITY
8964 +#include <litmus/affinity.h>
8965 +#endif
8966 +
8967 +#ifdef CONFIG_LITMUS_SOFTIRQD
8968 +#include <litmus/litmus_softirq.h>
8969 +#endif
8970 +
8971 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
8972 +#include <linux/interrupt.h>
8973 +#include <litmus/trace.h>
8974 +#endif
8975 +
8976 +#ifdef CONFIG_LITMUS_NVIDIA
8977 +#include <litmus/nvidia_info.h>
8978 +#endif
8979 +
8980 +/* Reference configuration variable. Determines which cache level is used to
8981 + * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
8982 + * all CPUs form a single cluster (just like GSN-EDF).
8983 + */
8984 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
8985 +
8986 +struct clusterdomain;
8987 +
8988 +/* cpu_entry_t - maintain the linked and scheduled state
8989 + *
8990 + * A cpu also contains a pointer to the crm_domain_t cluster
8991 + * that owns it (struct clusterdomain*)
8992 + */
8993 +typedef struct {
8994 + int cpu;
8995 + struct clusterdomain* cluster; /* owning cluster */
8996 + struct task_struct* linked; /* only RT tasks */
8997 + struct task_struct* scheduled; /* only RT tasks */
8998 + atomic_t will_schedule; /* prevent unneeded IPIs */
8999 + struct bheap_node* hn;
9000 +} cpu_entry_t;
9001 +
9002 +/* one cpu_entry_t per CPU */
9003 +DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries);
9004 +
9005 +#define set_will_schedule() \
9006 + (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1))
9007 +#define clear_will_schedule() \
9008 + (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0))
9009 +#define test_will_schedule(cpu) \
9010 + (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule))
9011 +
9012 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
9013 +struct tasklet_head
9014 +{
9015 + struct tasklet_struct *head;
9016 + struct tasklet_struct **tail;
9017 +};
9018 +#endif
9019 +
9020 +/*
9021 + * In C-RM there is a crm domain _per_ cluster
9022 + * The number of clusters is dynamically determined accordingly to the
9023 + * total cpu number and the cluster size
9024 + */
9025 +typedef struct clusterdomain {
9026 + /* rt_domain for this cluster */
9027 + rt_domain_t domain;
9028 + /* cpus in this cluster */
9029 + cpu_entry_t* *cpus;
9030 + /* map of this cluster cpus */
9031 + cpumask_var_t cpu_map;
9032 + /* the cpus queue themselves according to priority in here */
9033 + struct bheap_node *heap_node;
9034 + struct bheap cpu_heap;
9035 + /* lock for this cluster */
9036 +#define crm_lock domain.ready_lock
9037 +
9038 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
9039 + struct tasklet_head pending_tasklets;
9040 +#endif
9041 +} crm_domain_t;
9042 +
9043 +/* a crm_domain per cluster; allocation is done at init/activation time */
9044 +crm_domain_t *crm;
9045 +
9046 +#define remote_cluster(cpu) ((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster)
9047 +#define task_cpu_cluster(task) remote_cluster(get_partition(task))
9048 +
9049 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
9050 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
9051 + * information during the initialization of the plugin (e.g., topology)
9052 +#define WANT_ALL_SCHED_EVENTS
9053 + */
9054 +#define VERBOSE_INIT
9055 +
9056 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
9057 +{
9058 + cpu_entry_t *a, *b;
9059 + a = _a->value;
9060 + b = _b->value;
9061 + /* Note that a and b are inverted: we want the lowest-priority CPU at
9062 + * the top of the heap.
9063 + */
9064 + return rm_higher_prio(b->linked, a->linked);
9065 +}
9066 +
9067 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
9068 + * order in the cpu queue. Caller must hold crm lock.
9069 + */
9070 +static void update_cpu_position(cpu_entry_t *entry)
9071 +{
9072 + crm_domain_t *cluster = entry->cluster;
9073 +
9074 + if (likely(bheap_node_in_heap(entry->hn)))
9075 + bheap_delete(cpu_lower_prio,
9076 + &cluster->cpu_heap,
9077 + entry->hn);
9078 +
9079 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
9080 +}
9081 +
9082 +/* caller must hold crm lock */
9083 +static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster)
9084 +{
9085 + struct bheap_node* hn;
9086 + hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
9087 + return hn->value;
9088 +}
9089 +
9090 +
9091 +/* link_task_to_cpu - Update the link of a CPU.
9092 + * Handles the case where the to-be-linked task is already
9093 + * scheduled on a different CPU.
9094 + */
9095 +static noinline void link_task_to_cpu(struct task_struct* linked,
9096 + cpu_entry_t *entry)
9097 +{
9098 + cpu_entry_t *sched;
9099 + struct task_struct* tmp;
9100 + int on_cpu;
9101 +
9102 + BUG_ON(linked && !is_realtime(linked));
9103 +
9104 + /* Currently linked task is set to be unlinked. */
9105 + if (entry->linked) {
9106 + entry->linked->rt_param.linked_on = NO_CPU;
9107 + }
9108 +
9109 + /* Link new task to CPU. */
9110 + if (linked) {
9111 + set_rt_flags(linked, RT_F_RUNNING);
9112 + /* handle task is already scheduled somewhere! */
9113 + on_cpu = linked->rt_param.scheduled_on;
9114 + if (on_cpu != NO_CPU) {
9115 + sched = &per_cpu(crm_cpu_entries, on_cpu);
9116 + /* this should only happen if not linked already */
9117 + BUG_ON(sched->linked == linked);
9118 +
9119 + /* If we are already scheduled on the CPU to which we
9120 + * wanted to link, we don't need to do the swap --
9121 + * we just link ourselves to the CPU and depend on
9122 + * the caller to get things right.
9123 + */
9124 + if (entry != sched) {
9125 + TRACE_TASK(linked,
9126 + "already scheduled on %d, updating link.\n",
9127 + sched->cpu);
9128 + tmp = sched->linked;
9129 + linked->rt_param.linked_on = sched->cpu;
9130 + sched->linked = linked;
9131 + update_cpu_position(sched);
9132 + linked = tmp;
9133 + }
9134 + }
9135 + if (linked) /* might be NULL due to swap */
9136 + linked->rt_param.linked_on = entry->cpu;
9137 + }
9138 + entry->linked = linked;
9139 +#ifdef WANT_ALL_SCHED_EVENTS
9140 + if (linked)
9141 + TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
9142 + else
9143 + TRACE("NULL linked to %d.\n", entry->cpu);
9144 +#endif
9145 + update_cpu_position(entry);
9146 +}
9147 +
9148 +/* unlink - Make sure a task is not linked any longer to an entry
9149 + * where it was linked before. Must hold crm_lock.
9150 + */
9151 +static noinline void unlink(struct task_struct* t)
9152 +{
9153 + cpu_entry_t *entry;
9154 +
9155 + if (t->rt_param.linked_on != NO_CPU) {
9156 + /* unlink */
9157 + entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on);
9158 + t->rt_param.linked_on = NO_CPU;
9159 + link_task_to_cpu(NULL, entry);
9160 + } else if (is_queued(t)) {
9161 + /* This is an interesting situation: t is scheduled,
9162 + * but was just recently unlinked. It cannot be
9163 + * linked anywhere else (because then it would have
9164 + * been relinked to this CPU), thus it must be in some
9165 + * queue. We must remove it from the list in this
9166 + * case.
9167 + *
9168 + * in C-RM case is should be somewhere in the queue for
9169 + * its domain, therefore and we can get the domain using
9170 + * task_cpu_cluster
9171 + */
9172 + remove(&(task_cpu_cluster(t))->domain, t);
9173 + }
9174 +}
9175 +
9176 +
9177 +/* preempt - force a CPU to reschedule
9178 + */
9179 +static void preempt(cpu_entry_t *entry)
9180 +{
9181 + preempt_if_preemptable(entry->scheduled, entry->cpu);
9182 +}
9183 +
9184 +/* requeue - Put an unlinked task into c-rm domain.
9185 + * Caller must hold crm_lock.
9186 + */
9187 +static noinline void requeue(struct task_struct* task)
9188 +{
9189 + crm_domain_t *cluster = task_cpu_cluster(task);
9190 + BUG_ON(!task);
9191 + /* sanity check before insertion */
9192 + BUG_ON(is_queued(task));
9193 +
9194 + if (is_released(task, litmus_clock()))
9195 + __add_ready(&cluster->domain, task);
9196 + else {
9197 + /* it has got to wait */
9198 + add_release(&cluster->domain, task);
9199 + }
9200 +}
9201 +
9202 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9203 +static cpu_entry_t* crm_get_nearest_available_cpu(
9204 + crm_domain_t *cluster, cpu_entry_t* start)
9205 +{
9206 + cpu_entry_t* affinity;
9207 +
9208 + get_nearest_available_cpu(affinity, start, crm_cpu_entries, -1);
9209 +
9210 + /* make sure CPU is in our cluster */
9211 + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
9212 + return(affinity);
9213 + else
9214 + return(NULL);
9215 +}
9216 +#endif
9217 +
9218 +
9219 +/* check for any necessary preemptions */
9220 +static void check_for_preemptions(crm_domain_t *cluster)
9221 +{
9222 + struct task_struct *task;
9223 + cpu_entry_t *last;
9224 +
9225 + for(last = lowest_prio_cpu(cluster);
9226 + rm_preemption_needed(&cluster->domain, last->linked);
9227 + last = lowest_prio_cpu(cluster)) {
9228 + /* preemption necessary */
9229 + task = __take_ready(&cluster->domain);
9230 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9231 + {
9232 + cpu_entry_t* affinity =
9233 + crm_get_nearest_available_cpu(cluster,
9234 + &per_cpu(crm_cpu_entries, task_cpu(task)));
9235 + if(affinity)
9236 + last = affinity;
9237 + else if(last->linked)
9238 + requeue(last->linked);
9239 + }
9240 +#else
9241 + if (last->linked)
9242 + requeue(last->linked);
9243 +#endif
9244 + TRACE("check_for_preemptions: attempting to link task %d to %d\n",
9245 + task->pid, last->cpu);
9246 + link_task_to_cpu(task, last);
9247 + preempt(last);
9248 + }
9249 +}
9250 +
9251 +/* crm_job_arrival: task is either resumed or released */
9252 +static noinline void crm_job_arrival(struct task_struct* task)
9253 +{
9254 + crm_domain_t *cluster = task_cpu_cluster(task);
9255 + BUG_ON(!task);
9256 +
9257 + requeue(task);
9258 + check_for_preemptions(cluster);
9259 +}
9260 +
9261 +static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks)
9262 +{
9263 + crm_domain_t* cluster = container_of(rt, crm_domain_t, domain);
9264 + unsigned long flags;
9265 +
9266 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9267 +
9268 + __merge_ready(&cluster->domain, tasks);
9269 + check_for_preemptions(cluster);
9270 +
9271 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9272 +}
9273 +
9274 +/* caller holds crm_lock */
9275 +static noinline void job_completion(struct task_struct *t, int forced)
9276 +{
9277 + BUG_ON(!t);
9278 +
9279 + sched_trace_task_completion(t, forced);
9280 +
9281 +#ifdef CONFIG_LITMUS_NVIDIA
9282 + atomic_set(&tsk_rt(t)->nv_int_count, 0);
9283 +#endif
9284 +
9285 + TRACE_TASK(t, "job_completion().\n");
9286 +
9287 + /* set flags */
9288 + set_rt_flags(t, RT_F_SLEEP);
9289 + /* prepare for next period */
9290 + prepare_for_next_period(t);
9291 + if (is_released(t, litmus_clock()))
9292 + sched_trace_task_release(t);
9293 + /* unlink */
9294 + unlink(t);
9295 + /* requeue
9296 + * But don't requeue a blocking task. */
9297 + if (is_running(t))
9298 + crm_job_arrival(t);
9299 +}
9300 +
9301 +/* crm_tick - this function is called for every local timer
9302 + * interrupt.
9303 + *
9304 + * checks whether the current task has expired and checks
9305 + * whether we need to preempt it if it has not expired
9306 + */
9307 +static void crm_tick(struct task_struct* t)
9308 +{
9309 + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
9310 + if (!is_np(t)) {
9311 + /* np tasks will be preempted when they become
9312 + * preemptable again
9313 + */
9314 + litmus_reschedule_local();
9315 + set_will_schedule();
9316 + TRACE("crm_scheduler_tick: "
9317 + "%d is preemptable "
9318 + " => FORCE_RESCHED\n", t->pid);
9319 + } else if (is_user_np(t)) {
9320 + TRACE("crm_scheduler_tick: "
9321 + "%d is non-preemptable, "
9322 + "preemption delayed.\n", t->pid);
9323 + request_exit_np(t);
9324 + }
9325 + }
9326 +}
9327 +
9328 +
9329 +
9330 +
9331 +
9332 +
9333 +
9334 +
9335 +
9336 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
9337 +
9338 +
9339 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
9340 +{
9341 + if (!atomic_read(&tasklet->count)) {
9342 + if(tasklet->owner) {
9343 + sched_trace_tasklet_begin(tasklet->owner);
9344 + }
9345 +
9346 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
9347 + {
9348 + BUG();
9349 + }
9350 + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
9351 + __FUNCTION__,
9352 + (tasklet->owner) ? tasklet->owner->pid : -1,
9353 + (tasklet->owner) ? 0 : 1);
9354 + tasklet->func(tasklet->data);
9355 + tasklet_unlock(tasklet);
9356 +
9357 + if(tasklet->owner) {
9358 + sched_trace_tasklet_end(tasklet->owner, flushed);
9359 + }
9360 + }
9361 + else {
9362 + BUG();
9363 + }
9364 +}
9365 +
9366 +
9367 +static void __extract_tasklets(crm_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
9368 +{
9369 + struct tasklet_struct* step;
9370 + struct tasklet_struct* tasklet;
9371 + struct tasklet_struct* prev;
9372 +
9373 + task_tasklets->head = NULL;
9374 + task_tasklets->tail = &(task_tasklets->head);
9375 +
9376 + prev = NULL;
9377 + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
9378 + {
9379 + if(step->owner == task)
9380 + {
9381 + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
9382 +
9383 + tasklet = step;
9384 +
9385 + if(prev) {
9386 + prev->next = tasklet->next;
9387 + }
9388 + else if(cluster->pending_tasklets.head == tasklet) {
9389 + // we're at the head.
9390 + cluster->pending_tasklets.head = tasklet->next;
9391 + }
9392 +
9393 + if(cluster->pending_tasklets.tail == &tasklet) {
9394 + // we're at the tail
9395 + if(prev) {
9396 + cluster->pending_tasklets.tail = &prev;
9397 + }
9398 + else {
9399 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
9400 + }
9401 + }
9402 +
9403 + tasklet->next = NULL;
9404 + *(task_tasklets->tail) = tasklet;
9405 + task_tasklets->tail = &(tasklet->next);
9406 + }
9407 + else {
9408 + prev = step;
9409 + }
9410 + }
9411 +}
9412 +
9413 +static void flush_tasklets(crm_domain_t* cluster, struct task_struct* task)
9414 +{
9415 +#if 0
9416 + unsigned long flags;
9417 + struct tasklet_head task_tasklets;
9418 + struct tasklet_struct* step;
9419 +
9420 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9421 + __extract_tasklets(cluster, task, &task_tasklets);
9422 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9423 +
9424 + if(cluster->pending_tasklets.head != NULL) {
9425 + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
9426 + }
9427 +
9428 + // now execute any flushed tasklets.
9429 + for(step = cluster->pending_tasklets.head; step != NULL; /**/)
9430 + {
9431 + struct tasklet_struct* temp = step->next;
9432 +
9433 + step->next = NULL;
9434 + __do_lit_tasklet(step, 1ul);
9435 +
9436 + step = temp;
9437 + }
9438 +#endif
9439 +
9440 + // lazy flushing.
9441 + // just change ownership to NULL and let an idle processor
9442 + // take care of it. :P
9443 +
9444 + struct tasklet_struct* step;
9445 + unsigned long flags;
9446 +
9447 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9448 +
9449 + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
9450 + {
9451 + if(step->owner == task)
9452 + {
9453 + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
9454 + step->owner = NULL;
9455 + }
9456 + }
9457 +
9458 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9459 +}
9460 +
9461 +
9462 +static void do_lit_tasklets(crm_domain_t* cluster, struct task_struct* sched_task)
9463 +{
9464 + int work_to_do = 1;
9465 + struct tasklet_struct *tasklet = NULL;
9466 + //struct tasklet_struct *step;
9467 + unsigned long flags;
9468 +
9469 + while(work_to_do) {
9470 +
9471 + TS_NV_SCHED_BOTISR_START;
9472 +
9473 + // remove tasklet at head of list if it has higher priority.
9474 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9475 +
9476 + /*
9477 + step = cluster->pending_tasklets.head;
9478 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
9479 + while(step != NULL){
9480 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9481 + step = step->next;
9482 + }
9483 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9484 + TRACE("%s: done.\n", __FUNCTION__);
9485 + */
9486 +
9487 + if(cluster->pending_tasklets.head != NULL) {
9488 + // remove tasklet at head.
9489 + tasklet = cluster->pending_tasklets.head;
9490 +
9491 + if(rm_higher_prio(tasklet->owner, sched_task)) {
9492 +
9493 + if(NULL == tasklet->next) {
9494 + // tasklet is at the head, list only has one element
9495 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
9496 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
9497 + }
9498 +
9499 + // remove the tasklet from the queue
9500 + cluster->pending_tasklets.head = tasklet->next;
9501 +
9502 + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
9503 + }
9504 + else {
9505 + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1, smp_processor_id());
9506 + tasklet = NULL;
9507 + }
9508 + }
9509 + else {
9510 + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
9511 + }
9512 +
9513 + /*
9514 + step = cluster->pending_tasklets.head;
9515 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
9516 + while(step != NULL){
9517 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9518 + step = step->next;
9519 + }
9520 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9521 + TRACE("%s: done.\n", __FUNCTION__);
9522 + */
9523 +
9524 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9525 +
9526 + TS_NV_SCHED_BOTISR_END;
9527 +
9528 + if(tasklet) {
9529 + __do_lit_tasklet(tasklet, 0ul);
9530 + tasklet = NULL;
9531 + }
9532 + else {
9533 + work_to_do = 0;
9534 + }
9535 + }
9536 +
9537 + //TRACE("%s: exited.\n", __FUNCTION__);
9538 +}
9539 +
9540 +
9541 +static void run_tasklets(struct task_struct* sched_task)
9542 +{
9543 + crm_domain_t* cluster;
9544 +
9545 +#if 0
9546 + int task_is_rt = is_realtime(sched_task);
9547 + crm_domain_t* cluster;
9548 +
9549 + if(is_realtime(sched_task)) {
9550 + cluster = task_cpu_cluster(sched_task);
9551 + }
9552 + else {
9553 + cluster = remote_cluster(get_cpu());
9554 + }
9555 +
9556 + if(cluster && cluster->pending_tasklets.head != NULL) {
9557 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
9558 +
9559 + do_lit_tasklets(cluster, sched_task);
9560 + }
9561 +
9562 + if(!task_is_rt) {
9563 + put_cpu_no_resched();
9564 + }
9565 +#else
9566 +
9567 + preempt_disable();
9568 +
9569 + cluster = (is_realtime(sched_task)) ?
9570 + task_cpu_cluster(sched_task) :
9571 + remote_cluster(smp_processor_id());
9572 +
9573 + if(cluster && cluster->pending_tasklets.head != NULL) {
9574 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
9575 + do_lit_tasklets(cluster, sched_task);
9576 + }
9577 +
9578 + preempt_enable_no_resched();
9579 +
9580 +#endif
9581 +}
9582 +
9583 +
9584 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_domain_t* cluster)
9585 +{
9586 + struct tasklet_struct* step;
9587 +
9588 + /*
9589 + step = cluster->pending_tasklets.head;
9590 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
9591 + while(step != NULL){
9592 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9593 + step = step->next;
9594 + }
9595 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9596 + TRACE("%s: done.\n", __FUNCTION__);
9597 + */
9598 +
9599 + tasklet->next = NULL; // make sure there are no old values floating around
9600 +
9601 + step = cluster->pending_tasklets.head;
9602 + if(step == NULL) {
9603 + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
9604 + // insert at tail.
9605 + *(cluster->pending_tasklets.tail) = tasklet;
9606 + cluster->pending_tasklets.tail = &(tasklet->next);
9607 + }
9608 + else if((*(cluster->pending_tasklets.tail) != NULL) &&
9609 + rm_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
9610 + // insert at tail.
9611 + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
9612 +
9613 + *(cluster->pending_tasklets.tail) = tasklet;
9614 + cluster->pending_tasklets.tail = &(tasklet->next);
9615 + }
9616 + else {
9617 +
9618 + //WARN_ON(1 == 1);
9619 +
9620 + // insert the tasklet somewhere in the middle.
9621 +
9622 + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
9623 +
9624 + while(step->next && rm_higher_prio(step->next->owner, tasklet->owner)) {
9625 + step = step->next;
9626 + }
9627 +
9628 + // insert tasklet right before step->next.
9629 +
9630 + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
9631 + tasklet->owner->pid,
9632 + (step->owner) ?
9633 + step->owner->pid :
9634 + -1,
9635 + (step->next) ?
9636 + ((step->next->owner) ?
9637 + step->next->owner->pid :
9638 + -1) :
9639 + -1);
9640 +
9641 + tasklet->next = step->next;
9642 + step->next = tasklet;
9643 +
9644 + // patch up the head if needed.
9645 + if(cluster->pending_tasklets.head == step)
9646 + {
9647 + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
9648 + cluster->pending_tasklets.head = tasklet;
9649 + }
9650 + }
9651 +
9652 + /*
9653 + step = cluster->pending_tasklets.head;
9654 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
9655 + while(step != NULL){
9656 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9657 + step = step->next;
9658 + }
9659 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9660 + TRACE("%s: done.\n", __FUNCTION__);
9661 + */
9662 +
9663 + // TODO: Maintain this list in priority order.
9664 + // tasklet->next = NULL;
9665 + // *(cluster->pending_tasklets.tail) = tasklet;
9666 + // cluster->pending_tasklets.tail = &tasklet->next;
9667 +}
9668 +
9669 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
9670 +{
9671 + crm_domain_t *cluster = NULL;
9672 + cpu_entry_t *targetCPU = NULL;
9673 + int thisCPU;
9674 + int runLocal = 0;
9675 + int runNow = 0;
9676 + unsigned long flags;
9677 +
9678 + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
9679 + {
9680 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9681 + return 0;
9682 + }
9683 +
9684 + cluster = task_cpu_cluster(tasklet->owner);
9685 +
9686 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9687 +
9688 + thisCPU = smp_processor_id();
9689 +
9690 +#if 1
9691 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9692 + {
9693 + cpu_entry_t* affinity = NULL;
9694 +
9695 + // use this CPU if it is in our cluster and isn't running any RT work.
9696 + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(crm_cpu_entries).linked == NULL)) {
9697 + affinity = &(__get_cpu_var(crm_cpu_entries));
9698 + }
9699 + else {
9700 + // this CPU is busy or shouldn't run tasklet in this cluster.
9701 + // look for available near by CPUs.
9702 + // NOTE: Affinity towards owner and not this CPU. Is this right?
9703 + affinity =
9704 + crm_get_nearest_available_cpu(cluster,
9705 + &per_cpu(crm_cpu_entries, task_cpu(tasklet->owner)));
9706 + }
9707 +
9708 + targetCPU = affinity;
9709 + }
9710 +#endif
9711 +#endif
9712 +
9713 + if (targetCPU == NULL) {
9714 + targetCPU = lowest_prio_cpu(cluster);
9715 + }
9716 +
9717 + if (rm_higher_prio(tasklet->owner, targetCPU->linked)) {
9718 + if (thisCPU == targetCPU->cpu) {
9719 + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
9720 + runLocal = 1;
9721 + runNow = 1;
9722 + }
9723 + else {
9724 + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
9725 + runLocal = 0;
9726 + runNow = 1;
9727 + }
9728 + }
9729 + else {
9730 + runLocal = 0;
9731 + runNow = 0;
9732 + }
9733 +
9734 + if(!runLocal) {
9735 + // enqueue the tasklet
9736 + __add_pai_tasklet(tasklet, cluster);
9737 + }
9738 +
9739 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9740 +
9741 +
9742 + if (runLocal /*&& runNow */) { // runNow == 1 is implied
9743 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
9744 + __do_lit_tasklet(tasklet, 0ul);
9745 + }
9746 + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
9747 + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
9748 + preempt(targetCPU); // need to be protected by crm_lock?
9749 + }
9750 + else {
9751 + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
9752 + }
9753 +
9754 + return(1); // success
9755 +}
9756 +
9757 +
9758 +#endif
9759 +
9760 +
9761 +
9762 +
9763 +
9764 +
9765 +
9766 +
9767 +
9768 +
9769 +
9770 +
9771 +
9772 +
9773 +
9774 +
9775 +
9776 +
9777 +
9778 +
9779 +
9780 +
9781 +
9782 +
9783 +
9784 +
9785 +
9786 +/* Getting schedule() right is a bit tricky. schedule() may not make any
9787 + * assumptions on the state of the current task since it may be called for a
9788 + * number of reasons. The reasons include a scheduler_tick() determined that it
9789 + * was necessary, because sys_exit_np() was called, because some Linux
9790 + * subsystem determined so, or even (in the worst case) because there is a bug
9791 + * hidden somewhere. Thus, we must take extreme care to determine what the
9792 + * current state is.
9793 + *
9794 + * The CPU could currently be scheduling a task (or not), be linked (or not).
9795 + *
9796 + * The following assertions for the scheduled task could hold:
9797 + *
9798 + * - !is_running(scheduled) // the job blocks
9799 + * - scheduled->timeslice == 0 // the job completed (forcefully)
9800 + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
9801 + * - linked != scheduled // we need to reschedule (for any reason)
9802 + * - is_np(scheduled) // rescheduling must be delayed,
9803 + * sys_exit_np must be requested
9804 + *
9805 + * Any of these can occur together.
9806 + */
9807 +static struct task_struct* crm_schedule(struct task_struct * prev)
9808 +{
9809 + cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries);
9810 + crm_domain_t *cluster = entry->cluster;
9811 + int out_of_time, sleep, preempt, np, exists, blocks;
9812 + struct task_struct* next = NULL;
9813 +
9814 + raw_spin_lock(&cluster->crm_lock);
9815 + clear_will_schedule();
9816 +
9817 + /* sanity checking */
9818 + BUG_ON(entry->scheduled && entry->scheduled != prev);
9819 + BUG_ON(entry->scheduled && !is_realtime(prev));
9820 + BUG_ON(is_realtime(prev) && !entry->scheduled);
9821 +
9822 + /* (0) Determine state */
9823 + exists = entry->scheduled != NULL;
9824 + blocks = exists && !is_running(entry->scheduled);
9825 + out_of_time = exists &&
9826 + budget_enforced(entry->scheduled) &&
9827 + budget_exhausted(entry->scheduled);
9828 + np = exists && is_np(entry->scheduled);
9829 + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
9830 + preempt = entry->scheduled != entry->linked;
9831 +
9832 +#ifdef WANT_ALL_SCHED_EVENTS
9833 + TRACE_TASK(prev, "invoked crm_schedule.\n");
9834 +#endif
9835 +
9836 + if (exists)
9837 + TRACE_TASK(prev,
9838 + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
9839 + "state:%d sig:%d\n",
9840 + blocks, out_of_time, np, sleep, preempt,
9841 + prev->state, signal_pending(prev));
9842 + if (entry->linked && preempt)
9843 + TRACE_TASK(prev, "will be preempted by %s/%d\n",
9844 + entry->linked->comm, entry->linked->pid);
9845 +
9846 +
9847 + /* If a task blocks we have no choice but to reschedule.
9848 + */
9849 + if (blocks)
9850 + unlink(entry->scheduled);
9851 +
9852 + /* Request a sys_exit_np() call if we would like to preempt but cannot.
9853 + * We need to make sure to update the link structure anyway in case
9854 + * that we are still linked. Multiple calls to request_exit_np() don't
9855 + * hurt.
9856 + */
9857 + if (np && (out_of_time || preempt || sleep)) {
9858 + unlink(entry->scheduled);
9859 + request_exit_np(entry->scheduled);
9860 + }
9861 +
9862 + /* Any task that is preemptable and either exhausts its execution
9863 + * budget or wants to sleep completes. We may have to reschedule after
9864 + * this. Don't do a job completion if we block (can't have timers running
9865 + * for blocked jobs). Preemption go first for the same reason.
9866 + */
9867 + if (!np && (out_of_time || sleep) && !blocks && !preempt)
9868 + job_completion(entry->scheduled, !sleep);
9869 +
9870 + /* Link pending task if we became unlinked.
9871 + */
9872 + if (!entry->linked)
9873 + link_task_to_cpu(__take_ready(&cluster->domain), entry);
9874 +
9875 + /* The final scheduling decision. Do we need to switch for some reason?
9876 + * If linked is different from scheduled, then select linked as next.
9877 + */
9878 + if ((!np || blocks) &&
9879 + entry->linked != entry->scheduled) {
9880 + /* Schedule a linked job? */
9881 + if (entry->linked) {
9882 + entry->linked->rt_param.scheduled_on = entry->cpu;
9883 + next = entry->linked;
9884 + }
9885 + if (entry->scheduled) {
9886 + /* not gonna be scheduled soon */
9887 + entry->scheduled->rt_param.scheduled_on = NO_CPU;
9888 + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
9889 + }
9890 + } else
9891 + /* Only override Linux scheduler if we have a real-time task
9892 + * scheduled that needs to continue.
9893 + */
9894 + if (exists)
9895 + next = prev;
9896 +
9897 + sched_state_task_picked();
9898 + raw_spin_unlock(&cluster->crm_lock);
9899 +
9900 +#ifdef WANT_ALL_SCHED_EVENTS
9901 + TRACE("crm_lock released, next=0x%p\n", next);
9902 +
9903 + if (next)
9904 + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
9905 + else if (exists && !next)
9906 + TRACE("becomes idle at %llu.\n", litmus_clock());
9907 +#endif
9908 +
9909 +
9910 + return next;
9911 +}
9912 +
9913 +
9914 +/* _finish_switch - we just finished the switch away from prev
9915 + */
9916 +static void crm_finish_switch(struct task_struct *prev)
9917 +{
9918 + cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries);
9919 +
9920 + entry->scheduled = is_realtime(current) ? current : NULL;
9921 +#ifdef WANT_ALL_SCHED_EVENTS
9922 + TRACE_TASK(prev, "switched away from\n");
9923 +#endif
9924 +}
9925 +
9926 +
9927 +/* Prepare a task for running in RT mode
9928 + */
9929 +static void crm_task_new(struct task_struct * t, int on_rq, int running)
9930 +{
9931 + unsigned long flags;
9932 + cpu_entry_t* entry;
9933 + crm_domain_t* cluster;
9934 +
9935 + TRACE("crm: task new %d\n", t->pid);
9936 +
9937 + /* the cluster doesn't change even if t is running */
9938 + cluster = task_cpu_cluster(t);
9939 +
9940 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9941 +
9942 + /* setup job params */
9943 + release_at(t, litmus_clock());
9944 +
9945 + if (running) {
9946 + entry = &per_cpu(crm_cpu_entries, task_cpu(t));
9947 + BUG_ON(entry->scheduled);
9948 +
9949 + entry->scheduled = t;
9950 + tsk_rt(t)->scheduled_on = task_cpu(t);
9951 + } else {
9952 + t->rt_param.scheduled_on = NO_CPU;
9953 + }
9954 + t->rt_param.linked_on = NO_CPU;
9955 +
9956 + crm_job_arrival(t);
9957 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9958 +}
9959 +
9960 +static void crm_task_wake_up(struct task_struct *task)
9961 +{
9962 + unsigned long flags;
9963 + //lt_t now;
9964 + crm_domain_t *cluster;
9965 +
9966 + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
9967 +
9968 + cluster = task_cpu_cluster(task);
9969 +
9970 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9971 +
9972 +#if 0 // sporadic task model
9973 + /* We need to take suspensions because of semaphores into
9974 + * account! If a job resumes after being suspended due to acquiring
9975 + * a semaphore, it should never be treated as a new job release.
9976 + */
9977 + if (get_rt_flags(task) == RT_F_EXIT_SEM) {
9978 + set_rt_flags(task, RT_F_RUNNING);
9979 + } else {
9980 + now = litmus_clock();
9981 + if (is_tardy(task, now)) {
9982 + /* new sporadic release */
9983 + release_at(task, now);
9984 + sched_trace_task_release(task);
9985 + }
9986 + else {
9987 + if (task->rt.time_slice) {
9988 + /* came back in time before deadline
9989 + */
9990 + set_rt_flags(task, RT_F_RUNNING);
9991 + }
9992 + }
9993 + }
9994 +#endif
9995 +
9996 + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
9997 + set_rt_flags(task, RT_F_RUNNING); // periodic model
9998 +
9999 + if(tsk_rt(task)->linked_on == NO_CPU)
10000 + crm_job_arrival(task);
10001 + else
10002 + TRACE("WTF, mate?!\n");
10003 +
10004 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
10005 +}
10006 +
10007 +static void crm_task_block(struct task_struct *t)
10008 +{
10009 + unsigned long flags;
10010 + crm_domain_t *cluster;
10011 +
10012 + TRACE_TASK(t, "block at %llu\n", litmus_clock());
10013 +
10014 + cluster = task_cpu_cluster(t);
10015 +
10016 + /* unlink if necessary */
10017 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
10018 + unlink(t);
10019 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
10020 +
10021 + BUG_ON(!is_realtime(t));
10022 +}
10023 +
10024 +
10025 +static void crm_task_exit(struct task_struct * t)
10026 +{
10027 + unsigned long flags;
10028 + crm_domain_t *cluster = task_cpu_cluster(t);
10029 +
10030 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10031 + flush_tasklets(cluster, t);
10032 +#endif
10033 +
10034 + /* unlink if necessary */
10035 + raw_spin_lock_irqsave(&cluster->crm_lock, flags);
10036 + unlink(t);
10037 + if (tsk_rt(t)->scheduled_on != NO_CPU) {
10038 + cpu_entry_t *cpu;
10039 + cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on);
10040 + cpu->scheduled = NULL;
10041 + tsk_rt(t)->scheduled_on = NO_CPU;
10042 + }
10043 + raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
10044 +
10045 + BUG_ON(!is_realtime(t));
10046 + TRACE_TASK(t, "RIP\n");
10047 +}
10048 +
10049 +static long crm_admit_task(struct task_struct* tsk)
10050 +{
10051 + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
10052 +}
10053 +
10054 +
10055 +
10056 +
10057 +
10058 +
10059 +
10060 +
10061 +
10062 +
10063 +
10064 +
10065 +
10066 +#ifdef CONFIG_LITMUS_LOCKING
10067 +
10068 +#include <litmus/fdso.h>
10069 +
10070 +
10071 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
10072 +{
10073 + int linked_on;
10074 + int check_preempt = 0;
10075 +
10076 + crm_domain_t* cluster = task_cpu_cluster(t);
10077 +
10078 + if(prio_inh != NULL)
10079 + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
10080 + else
10081 + TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
10082 +
10083 + sched_trace_eff_prio_change(t, prio_inh);
10084 +
10085 + tsk_rt(t)->inh_task = prio_inh;
10086 +
10087 + linked_on = tsk_rt(t)->linked_on;
10088 +
10089 + /* If it is scheduled, then we need to reorder the CPU heap. */
10090 + if (linked_on != NO_CPU) {
10091 + TRACE_TASK(t, "%s: linked on %d\n",
10092 + __FUNCTION__, linked_on);
10093 + /* Holder is scheduled; need to re-order CPUs.
10094 + * We can't use heap_decrease() here since
10095 + * the cpu_heap is ordered in reverse direction, so
10096 + * it is actually an increase. */
10097 + bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
10098 + per_cpu(crm_cpu_entries, linked_on).hn);
10099 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
10100 + per_cpu(crm_cpu_entries, linked_on).hn);
10101 + } else {
10102 + /* holder may be queued: first stop queue changes */
10103 + raw_spin_lock(&cluster->domain.release_lock);
10104 + if (is_queued(t)) {
10105 + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
10106 +
10107 + /* We need to update the position of holder in some
10108 + * heap. Note that this could be a release heap if we
10109 + * budget enforcement is used and this job overran. */
10110 + check_preempt = !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node);
10111 +
10112 + } else {
10113 + /* Nothing to do: if it is not queued and not linked
10114 + * then it is either sleeping or currently being moved
10115 + * by other code (e.g., a timer interrupt handler) that
10116 + * will use the correct priority when enqueuing the
10117 + * task. */
10118 + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
10119 + }
10120 + raw_spin_unlock(&cluster->domain.release_lock);
10121 +
10122 + /* If holder was enqueued in a release heap, then the following
10123 + * preemption check is pointless, but we can't easily detect
10124 + * that case. If you want to fix this, then consider that
10125 + * simply adding a state flag requires O(n) time to update when
10126 + * releasing n tasks, which conflicts with the goal to have
10127 + * O(log n) merges. */
10128 + if (check_preempt) {
10129 + /* heap_decrease() hit the top level of the heap: make
10130 + * sure preemption checks get the right task, not the
10131 + * potentially stale cache. */
10132 + bheap_uncache_min(rm_ready_order, &cluster->domain.ready_queue);
10133 + check_for_preemptions(cluster);
10134 + }
10135 + }
10136 +}
10137 +
10138 +/* called with IRQs off */
10139 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
10140 +{
10141 + crm_domain_t* cluster = task_cpu_cluster(t);
10142 +
10143 + raw_spin_lock(&cluster->crm_lock);
10144 +
10145 + __set_priority_inheritance(t, prio_inh);
10146 +
10147 +#ifdef CONFIG_LITMUS_SOFTIRQD
10148 + if(tsk_rt(t)->cur_klitirqd != NULL)
10149 + {
10150 + TRACE_TASK(t, "%s/%d inherits a new priority!\n",
10151 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
10152 +
10153 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
10154 + }
10155 +#endif
10156 +
10157 + raw_spin_unlock(&cluster->crm_lock);
10158 +}
10159 +
10160 +
10161 +/* called with IRQs off */
10162 +static void __clear_priority_inheritance(struct task_struct* t)
10163 +{
10164 + TRACE_TASK(t, "priority restored\n");
10165 +
10166 + if(tsk_rt(t)->scheduled_on != NO_CPU)
10167 + {
10168 + sched_trace_eff_prio_change(t, NULL);
10169 +
10170 + tsk_rt(t)->inh_task = NULL;
10171 +
10172 + /* Check if rescheduling is necessary. We can't use heap_decrease()
10173 + * since the priority was effectively lowered. */
10174 + unlink(t);
10175 + crm_job_arrival(t);
10176 + }
10177 + else
10178 + {
10179 + __set_priority_inheritance(t, NULL);
10180 + }
10181 +
10182 +#ifdef CONFIG_LITMUS_SOFTIRQD
10183 + if(tsk_rt(t)->cur_klitirqd != NULL)
10184 + {
10185 + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
10186 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
10187 +
10188 + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
10189 + {
10190 + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
10191 +
10192 + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
10193 +
10194 + /* Check if rescheduling is necessary. We can't use heap_decrease()
10195 + * since the priority was effectively lowered. */
10196 + unlink(tsk_rt(t)->cur_klitirqd);
10197 + crm_job_arrival(tsk_rt(t)->cur_klitirqd);
10198 + }
10199 + else
10200 + {
10201 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
10202 + }
10203 + }
10204 +#endif
10205 +}
10206 +
10207 +/* called with IRQs off */
10208 +static void clear_priority_inheritance(struct task_struct* t)
10209 +{
10210 + crm_domain_t* cluster = task_cpu_cluster(t);
10211 +
10212 + raw_spin_lock(&cluster->crm_lock);
10213 + __clear_priority_inheritance(t);
10214 + raw_spin_unlock(&cluster->crm_lock);
10215 +}
10216 +
10217 +
10218 +
10219 +#ifdef CONFIG_LITMUS_SOFTIRQD
10220 +/* called with IRQs off */
10221 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
10222 + struct task_struct* old_owner,
10223 + struct task_struct* new_owner)
10224 +{
10225 + crm_domain_t* cluster = task_cpu_cluster(klitirqd);
10226 +
10227 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
10228 +
10229 + raw_spin_lock(&cluster->crm_lock);
10230 +
10231 + if(old_owner != new_owner)
10232 + {
10233 + if(old_owner)
10234 + {
10235 + // unreachable?
10236 + tsk_rt(old_owner)->cur_klitirqd = NULL;
10237 + }
10238 +
10239 + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
10240 + new_owner->comm, new_owner->pid);
10241 +
10242 + tsk_rt(new_owner)->cur_klitirqd = klitirqd;
10243 + }
10244 +
10245 + __set_priority_inheritance(klitirqd,
10246 + (tsk_rt(new_owner)->inh_task == NULL) ?
10247 + new_owner :
10248 + tsk_rt(new_owner)->inh_task);
10249 +
10250 + raw_spin_unlock(&cluster->crm_lock);
10251 +}
10252 +
10253 +/* called with IRQs off */
10254 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
10255 + struct task_struct* old_owner)
10256 +{
10257 + crm_domain_t* cluster = task_cpu_cluster(klitirqd);
10258 +
10259 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
10260 +
10261 + raw_spin_lock(&cluster->crm_lock);
10262 +
10263 + TRACE_TASK(klitirqd, "priority restored\n");
10264 +
10265 + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
10266 + {
10267 + tsk_rt(klitirqd)->inh_task = NULL;
10268 +
10269 + /* Check if rescheduling is necessary. We can't use heap_decrease()
10270 + * since the priority was effectively lowered. */
10271 + unlink(klitirqd);
10272 + crm_job_arrival(klitirqd);
10273 + }
10274 + else
10275 + {
10276 + __set_priority_inheritance(klitirqd, NULL);
10277 + }
10278 +
10279 + tsk_rt(old_owner)->cur_klitirqd = NULL;
10280 +
10281 + raw_spin_unlock(&cluster->crm_lock);
10282 +}
10283 +#endif // CONFIG_LITMUS_SOFTIRQD
10284 +
10285 +
10286 +/* ******************** KFMLP support ********************** */
10287 +
10288 +/* struct for semaphore with priority inheritance */
10289 +struct kfmlp_queue
10290 +{
10291 + wait_queue_head_t wait;
10292 + struct task_struct* owner;
10293 + struct task_struct* hp_waiter;
10294 + int count; /* number of waiters + holder */
10295 +};
10296 +
10297 +struct kfmlp_semaphore
10298 +{
10299 + struct litmus_lock litmus_lock;
10300 +
10301 + spinlock_t lock;
10302 +
10303 + int num_resources; /* aka k */
10304 + struct kfmlp_queue *queues; /* array */
10305 + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
10306 +};
10307 +
10308 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
10309 +{
10310 + return container_of(lock, struct kfmlp_semaphore, litmus_lock);
10311 +}
10312 +
10313 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
10314 + struct kfmlp_queue* queue)
10315 +{
10316 + return (queue - &sem->queues[0]);
10317 +}
10318 +
10319 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
10320 + struct task_struct* holder)
10321 +{
10322 + int i;
10323 + for(i = 0; i < sem->num_resources; ++i)
10324 + if(sem->queues[i].owner == holder)
10325 + return(&sem->queues[i]);
10326 + return(NULL);
10327 +}
10328 +
10329 +/* caller is responsible for locking */
10330 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
10331 + struct task_struct *skip)
10332 +{
10333 + struct list_head *pos;
10334 + struct task_struct *queued, *found = NULL;
10335 +
10336 + list_for_each(pos, &kqueue->wait.task_list) {
10337 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
10338 + task_list)->private;
10339 +
10340 + /* Compare task prios, find high prio task. */
10341 + if (queued != skip && rm_higher_prio(queued, found))
10342 + found = queued;
10343 + }
10344 + return found;
10345 +}
10346 +
10347 +static inline struct kfmlp_queue* kfmlp_find_shortest(
10348 + struct kfmlp_semaphore* sem,
10349 + struct kfmlp_queue* search_start)
10350 +{
10351 + // we start our search at search_start instead of at the beginning of the
10352 + // queue list to load-balance across all resources.
10353 + struct kfmlp_queue* step = search_start;
10354 + struct kfmlp_queue* shortest = sem->shortest_queue;
10355 +
10356 + do
10357 + {
10358 + step = (step+1 != &sem->queues[sem->num_resources]) ?
10359 + step+1 : &sem->queues[0];
10360 + if(step->count < shortest->count)
10361 + {
10362 + shortest = step;
10363 + if(step->count == 0)
10364 + break; /* can't get any shorter */
10365 + }
10366 + }while(step != search_start);
10367 +
10368 + return(shortest);
10369 +}
10370 +
10371 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
10372 +{
10373 + /* must hold sem->lock */
10374 +
10375 + struct kfmlp_queue *my_queue = NULL;
10376 + struct task_struct *max_hp = NULL;
10377 +
10378 +
10379 + struct list_head *pos;
10380 + struct task_struct *queued;
10381 + int i;
10382 +
10383 + for(i = 0; i < sem->num_resources; ++i)
10384 + {
10385 + if( (sem->queues[i].count > 1) &&
10386 + ((my_queue == NULL) ||
10387 + (rm_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
10388 + {
10389 + my_queue = &sem->queues[i];
10390 + }
10391 + }
10392 +
10393 + if(my_queue)
10394 + {
10395 + crm_domain_t* cluster;
10396 +
10397 + max_hp = my_queue->hp_waiter;
10398 + BUG_ON(!max_hp);
10399 +
10400 + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
10401 + kfmlp_get_idx(sem, my_queue),
10402 + max_hp->comm, max_hp->pid,
10403 + kfmlp_get_idx(sem, my_queue));
10404 +
10405 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
10406 +
10407 + /*
10408 + if(my_queue->hp_waiter)
10409 + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
10410 + kfmlp_get_idx(sem, my_queue),
10411 + my_queue->hp_waiter->comm,
10412 + my_queue->hp_waiter->pid);
10413 + else
10414 + TRACE_CUR("queue %d: new hp_waiter is %p\n",
10415 + kfmlp_get_idx(sem, my_queue), NULL);
10416 + */
10417 +
10418 + cluster = task_cpu_cluster(max_hp);
10419 +
10420 + raw_spin_lock(&cluster->crm_lock);
10421 +
10422 + /*
10423 + if(my_queue->owner)
10424 + TRACE_CUR("queue %d: owner is %s/%d\n",
10425 + kfmlp_get_idx(sem, my_queue),
10426 + my_queue->owner->comm,
10427 + my_queue->owner->pid);
10428 + else
10429 + TRACE_CUR("queue %d: owner is %p\n",
10430 + kfmlp_get_idx(sem, my_queue),
10431 + NULL);
10432 + */
10433 +
10434 + if(tsk_rt(my_queue->owner)->inh_task == max_hp)
10435 + {
10436 + __clear_priority_inheritance(my_queue->owner);
10437 + if(my_queue->hp_waiter != NULL)
10438 + {
10439 + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
10440 + }
10441 + }
10442 + raw_spin_unlock(&cluster->crm_lock);
10443 +
10444 + list_for_each(pos, &my_queue->wait.task_list)
10445 + {
10446 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
10447 + task_list)->private;
10448 + /* Compare task prios, find high prio task. */
10449 + if (queued == max_hp)
10450 + {
10451 + /*
10452 + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
10453 + kfmlp_get_idx(sem, my_queue));
10454 + */
10455 + __remove_wait_queue(&my_queue->wait,
10456 + list_entry(pos, wait_queue_t, task_list));
10457 + break;
10458 + }
10459 + }
10460 + --(my_queue->count);
10461 + }
10462 +
10463 + return(max_hp);
10464 +}
10465 +
10466 +int crm_kfmlp_lock(struct litmus_lock* l)
10467 +{
10468 + struct task_struct* t = current;
10469 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10470 + struct kfmlp_queue* my_queue;
10471 + wait_queue_t wait;
10472 + unsigned long flags;
10473 +
10474 + if (!is_realtime(t))
10475 + return -EPERM;
10476 +
10477 + spin_lock_irqsave(&sem->lock, flags);
10478 +
10479 + my_queue = sem->shortest_queue;
10480 +
10481 + if (my_queue->owner) {
10482 + /* resource is not free => must suspend and wait */
10483 + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
10484 + kfmlp_get_idx(sem, my_queue));
10485 +
10486 + init_waitqueue_entry(&wait, t);
10487 +
10488 + /* FIXME: interruptible would be nice some day */
10489 + set_task_state(t, TASK_UNINTERRUPTIBLE);
10490 +
10491 + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
10492 +
10493 + /* check if we need to activate priority inheritance */
10494 + if (rm_higher_prio(t, my_queue->hp_waiter))
10495 + {
10496 + my_queue->hp_waiter = t;
10497 + if (rm_higher_prio(t, my_queue->owner))
10498 + {
10499 + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
10500 + }
10501 + }
10502 +
10503 + ++(my_queue->count);
10504 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
10505 +
10506 + /* release lock before sleeping */
10507 + spin_unlock_irqrestore(&sem->lock, flags);
10508 +
10509 + /* We depend on the FIFO order. Thus, we don't need to recheck
10510 + * when we wake up; we are guaranteed to have the lock since
10511 + * there is only one wake up per release (or steal).
10512 + */
10513 + schedule();
10514 +
10515 +
10516 + if(my_queue->owner == t)
10517 + {
10518 + TRACE_CUR("queue %d: acquired through waiting\n",
10519 + kfmlp_get_idx(sem, my_queue));
10520 + }
10521 + else
10522 + {
10523 + /* this case may happen if our wait entry was stolen
10524 + between queues. record where we went.*/
10525 + my_queue = kfmlp_get_queue(sem, t);
10526 + BUG_ON(!my_queue);
10527 + TRACE_CUR("queue %d: acquired through stealing\n",
10528 + kfmlp_get_idx(sem, my_queue));
10529 + }
10530 + }
10531 + else
10532 + {
10533 + TRACE_CUR("queue %d: acquired immediately\n",
10534 + kfmlp_get_idx(sem, my_queue));
10535 +
10536 + my_queue->owner = t;
10537 +
10538 + ++(my_queue->count);
10539 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
10540 +
10541 + spin_unlock_irqrestore(&sem->lock, flags);
10542 + }
10543 +
10544 + return kfmlp_get_idx(sem, my_queue);
10545 +}
10546 +
10547 +int crm_kfmlp_unlock(struct litmus_lock* l)
10548 +{
10549 + struct task_struct *t = current, *next;
10550 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10551 + struct kfmlp_queue *my_queue;
10552 + unsigned long flags;
10553 + int err = 0;
10554 +
10555 + spin_lock_irqsave(&sem->lock, flags);
10556 +
10557 + my_queue = kfmlp_get_queue(sem, t);
10558 +
10559 + if (!my_queue) {
10560 + err = -EINVAL;
10561 + goto out;
10562 + }
10563 +
10564 + /* check if there are jobs waiting for this resource */
10565 + next = __waitqueue_remove_first(&my_queue->wait);
10566 + if (next) {
10567 + /*
10568 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
10569 + kfmlp_get_idx(sem, my_queue),
10570 + next->comm, next->pid);
10571 + */
10572 + /* next becomes the resouce holder */
10573 + my_queue->owner = next;
10574 +
10575 + --(my_queue->count);
10576 + if(my_queue->count < sem->shortest_queue->count)
10577 + {
10578 + sem->shortest_queue = my_queue;
10579 + }
10580 +
10581 + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
10582 + kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
10583 +
10584 + /* determine new hp_waiter if necessary */
10585 + if (next == my_queue->hp_waiter) {
10586 + TRACE_TASK(next, "was highest-prio waiter\n");
10587 + /* next has the highest priority --- it doesn't need to
10588 + * inherit. However, we need to make sure that the
10589 + * next-highest priority in the queue is reflected in
10590 + * hp_waiter. */
10591 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
10592 + if (my_queue->hp_waiter)
10593 + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
10594 + else
10595 + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
10596 + } else {
10597 + /* Well, if next is not the highest-priority waiter,
10598 + * then it ought to inherit the highest-priority
10599 + * waiter's priority. */
10600 + set_priority_inheritance(next, my_queue->hp_waiter);
10601 + }
10602 +
10603 + /* wake up next */
10604 + wake_up_process(next);
10605 + }
10606 + else
10607 + {
10608 + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
10609 +
10610 + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
10611 +
10612 + /*
10613 + if(next)
10614 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
10615 + kfmlp_get_idx(sem, my_queue),
10616 + next->comm, next->pid);
10617 + */
10618 +
10619 + my_queue->owner = next;
10620 +
10621 + if(next)
10622 + {
10623 + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
10624 + kfmlp_get_idx(sem, my_queue),
10625 + next->comm, next->pid);
10626 +
10627 + /* wake up next */
10628 + wake_up_process(next);
10629 + }
10630 + else
10631 + {
10632 + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
10633 +
10634 + --(my_queue->count);
10635 + if(my_queue->count < sem->shortest_queue->count)
10636 + {
10637 + sem->shortest_queue = my_queue;
10638 + }
10639 + }
10640 + }
10641 +
10642 + /* we lose the benefit of priority inheritance (if any) */
10643 + if (tsk_rt(t)->inh_task)
10644 + clear_priority_inheritance(t);
10645 +
10646 +out:
10647 + spin_unlock_irqrestore(&sem->lock, flags);
10648 +
10649 + return err;
10650 +}
10651 +
10652 +int crm_kfmlp_close(struct litmus_lock* l)
10653 +{
10654 + struct task_struct *t = current;
10655 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10656 + struct kfmlp_queue *my_queue;
10657 + unsigned long flags;
10658 +
10659 + int owner;
10660 +
10661 + spin_lock_irqsave(&sem->lock, flags);
10662 +
10663 + my_queue = kfmlp_get_queue(sem, t);
10664 + owner = (my_queue) ? (my_queue->owner == t) : 0;
10665 +
10666 + spin_unlock_irqrestore(&sem->lock, flags);
10667 +
10668 + if (owner)
10669 + crm_kfmlp_unlock(l);
10670 +
10671 + return 0;
10672 +}
10673 +
10674 +void crm_kfmlp_free(struct litmus_lock* l)
10675 +{
10676 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10677 + kfree(sem->queues);
10678 + kfree(sem);
10679 +}
10680 +
10681 +static struct litmus_lock_ops crm_kfmlp_lock_ops = {
10682 + .close = crm_kfmlp_close,
10683 + .lock = crm_kfmlp_lock,
10684 + .unlock = crm_kfmlp_unlock,
10685 + .deallocate = crm_kfmlp_free,
10686 +};
10687 +
10688 +static struct litmus_lock* crm_new_kfmlp(void* __user arg, int* ret_code)
10689 +{
10690 + struct kfmlp_semaphore* sem;
10691 + int num_resources = 0;
10692 + int i;
10693 +
10694 + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
10695 + {
10696 + *ret_code = -EINVAL;
10697 + return(NULL);
10698 + }
10699 + if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
10700 + {
10701 + *ret_code = -EINVAL;
10702 + return(NULL);
10703 + }
10704 + if(num_resources < 1)
10705 + {
10706 + *ret_code = -EINVAL;
10707 + return(NULL);
10708 + }
10709 +
10710 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
10711 + if(!sem)
10712 + {
10713 + *ret_code = -ENOMEM;
10714 + return NULL;
10715 + }
10716 +
10717 + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
10718 + if(!sem->queues)
10719 + {
10720 + kfree(sem);
10721 + *ret_code = -ENOMEM;
10722 + return NULL;
10723 + }
10724 +
10725 + sem->litmus_lock.ops = &crm_kfmlp_lock_ops;
10726 + spin_lock_init(&sem->lock);
10727 + sem->num_resources = num_resources;
10728 +
10729 + for(i = 0; i < num_resources; ++i)
10730 + {
10731 + sem->queues[i].owner = NULL;
10732 + sem->queues[i].hp_waiter = NULL;
10733 + init_waitqueue_head(&sem->queues[i].wait);
10734 + sem->queues[i].count = 0;
10735 + }
10736 +
10737 + sem->shortest_queue = &sem->queues[0];
10738 +
10739 + *ret_code = 0;
10740 + return &sem->litmus_lock;
10741 +}
10742 +
10743 +
10744 +/* **** lock constructor **** */
10745 +
10746 +static long crm_allocate_lock(struct litmus_lock **lock, int type,
10747 + void* __user arg)
10748 +{
10749 + int err = -ENXIO;
10750 +
10751 + /* C-RM currently only supports the FMLP for global resources
10752 + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
10753 + switch (type) {
10754 + case KFMLP_SEM:
10755 + *lock = crm_new_kfmlp(arg, &err);
10756 + break;
10757 + };
10758 +
10759 + return err;
10760 +}
10761 +
10762 +#endif // CONFIG_LITMUS_LOCKING
10763 +
10764 +
10765 +
10766 +
10767 +
10768 +
10769 +/* total number of cluster */
10770 +static int num_clusters;
10771 +/* we do not support cluster of different sizes */
10772 +static unsigned int cluster_size;
10773 +
10774 +#ifdef VERBOSE_INIT
10775 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
10776 +{
10777 + int chk;
10778 + char buf[255];
10779 +
10780 + chk = cpulist_scnprintf(buf, 254, mask);
10781 + buf[chk] = '\0';
10782 + printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
10783 +
10784 +}
10785 +#endif
10786 +
10787 +static int clusters_allocated = 0;
10788 +
10789 +static void cleanup_crm(void)
10790 +{
10791 + int i;
10792 +
10793 + if (clusters_allocated) {
10794 + for (i = 0; i < num_clusters; i++) {
10795 + kfree(crm[i].cpus);
10796 + kfree(crm[i].heap_node);
10797 + free_cpumask_var(crm[i].cpu_map);
10798 + }
10799 +
10800 + kfree(crm);
10801 + }
10802 +}
10803 +
10804 +static long crm_activate_plugin(void)
10805 +{
10806 + int i, j, cpu, ccpu, cpu_count;
10807 + cpu_entry_t *entry;
10808 +
10809 + cpumask_var_t mask;
10810 + int chk = 0;
10811 +
10812 + /* de-allocate old clusters, if any */
10813 + cleanup_crm();
10814 +
10815 + printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n",
10816 + cluster_config);
10817 +
10818 + /* need to get cluster_size first */
10819 + if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
10820 + return -ENOMEM;
10821 +
10822 + if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
10823 + cluster_size = num_online_cpus();
10824 + } else {
10825 + chk = get_shared_cpu_map(mask, 0, cluster_config);
10826 + if (chk) {
10827 + /* if chk != 0 then it is the max allowed index */
10828 + printk(KERN_INFO "C-RM: Cluster configuration = %d "
10829 + "is not supported on this hardware.\n",
10830 + cluster_config);
10831 + /* User should notice that the configuration failed, so
10832 + * let's bail out. */
10833 + return -EINVAL;
10834 + }
10835 +
10836 + cluster_size = cpumask_weight(mask);
10837 + }
10838 +
10839 + if ((num_online_cpus() % cluster_size) != 0) {
10840 + /* this can't be right, some cpus are left out */
10841 + printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n",
10842 + num_online_cpus(), cluster_size);
10843 + return -1;
10844 + }
10845 +
10846 + num_clusters = num_online_cpus() / cluster_size;
10847 + printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n",
10848 + num_clusters, cluster_size);
10849 +
10850 + /* initialize clusters */
10851 + crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC);
10852 + for (i = 0; i < num_clusters; i++) {
10853 +
10854 + crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
10855 + GFP_ATOMIC);
10856 + crm[i].heap_node = kmalloc(
10857 + cluster_size * sizeof(struct bheap_node),
10858 + GFP_ATOMIC);
10859 + bheap_init(&(crm[i].cpu_heap));
10860 + rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs);
10861 +
10862 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10863 + crm[i].pending_tasklets.head = NULL;
10864 + crm[i].pending_tasklets.tail = &(crm[i].pending_tasklets.head);
10865 +#endif
10866 +
10867 + if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC))
10868 + return -ENOMEM;
10869 + }
10870 +
10871 + /* cycle through cluster and add cpus to them */
10872 + for (i = 0; i < num_clusters; i++) {
10873 +
10874 + for_each_online_cpu(cpu) {
10875 + /* check if the cpu is already in a cluster */
10876 + for (j = 0; j < num_clusters; j++)
10877 + if (cpumask_test_cpu(cpu, crm[j].cpu_map))
10878 + break;
10879 + /* if it is in a cluster go to next cpu */
10880 + if (j < num_clusters &&
10881 + cpumask_test_cpu(cpu, crm[j].cpu_map))
10882 + continue;
10883 +
10884 + /* this cpu isn't in any cluster */
10885 + /* get the shared cpus */
10886 + if (unlikely(cluster_config == GLOBAL_CLUSTER))
10887 + cpumask_copy(mask, cpu_online_mask);
10888 + else
10889 + get_shared_cpu_map(mask, cpu, cluster_config);
10890 +
10891 + cpumask_copy(crm[i].cpu_map, mask);
10892 +#ifdef VERBOSE_INIT
10893 + print_cluster_topology(mask, cpu);
10894 +#endif
10895 + /* add cpus to current cluster and init cpu_entry_t */
10896 + cpu_count = 0;
10897 + for_each_cpu(ccpu, crm[i].cpu_map) {
10898 +
10899 + entry = &per_cpu(crm_cpu_entries, ccpu);
10900 + crm[i].cpus[cpu_count] = entry;
10901 + atomic_set(&entry->will_schedule, 0);
10902 + entry->cpu = ccpu;
10903 + entry->cluster = &crm[i];
10904 + entry->hn = &(crm[i].heap_node[cpu_count]);
10905 + bheap_node_init(&entry->hn, entry);
10906 +
10907 + cpu_count++;
10908 +
10909 + entry->linked = NULL;
10910 + entry->scheduled = NULL;
10911 + update_cpu_position(entry);
10912 + }
10913 + /* done with this cluster */
10914 + break;
10915 + }
10916 + }
10917 +
10918 +#ifdef CONFIG_LITMUS_SOFTIRQD
10919 + {
10920 + /* distribute the daemons evenly across the clusters. */
10921 + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
10922 + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
10923 + int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
10924 +
10925 + int daemon = 0;
10926 + for(i = 0; i < num_clusters; ++i)
10927 + {
10928 + int num_on_this_cluster = num_daemons_per_cluster;
10929 + if(left_over)
10930 + {
10931 + ++num_on_this_cluster;
10932 + --left_over;
10933 + }
10934 +
10935 + for(j = 0; j < num_on_this_cluster; ++j)
10936 + {
10937 + // first CPU of this cluster
10938 + affinity[daemon++] = i*cluster_size;
10939 + }
10940 + }
10941 +
10942 + spawn_klitirqd(affinity);
10943 +
10944 + kfree(affinity);
10945 + }
10946 +#endif
10947 +
10948 +#ifdef CONFIG_LITMUS_NVIDIA
10949 + init_nvidia_info();
10950 +#endif
10951 +
10952 + free_cpumask_var(mask);
10953 + clusters_allocated = 1;
10954 + return 0;
10955 +}
10956 +
10957 +/* Plugin object */
10958 +static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = {
10959 + .plugin_name = "C-RM",
10960 + .finish_switch = crm_finish_switch,
10961 + .tick = crm_tick,
10962 + .task_new = crm_task_new,
10963 + .complete_job = complete_job,
10964 + .task_exit = crm_task_exit,
10965 + .schedule = crm_schedule,
10966 + .task_wake_up = crm_task_wake_up,
10967 + .task_block = crm_task_block,
10968 + .admit_task = crm_admit_task,
10969 + .activate_plugin = crm_activate_plugin,
10970 +#ifdef CONFIG_LITMUS_LOCKING
10971 + .allocate_lock = crm_allocate_lock,
10972 + .set_prio_inh = set_priority_inheritance,
10973 + .clear_prio_inh = clear_priority_inheritance,
10974 +#endif
10975 +#ifdef CONFIG_LITMUS_SOFTIRQD
10976 + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
10977 + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
10978 +#endif
10979 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10980 + .enqueue_pai_tasklet = enqueue_pai_tasklet,
10981 + .run_tasklets = run_tasklets,
10982 +#endif
10983 +};
10984 +
10985 +static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL;
10986 +
10987 +static int __init init_crm(void)
10988 +{
10989 + int err, fs;
10990 +
10991 + err = register_sched_plugin(&crm_plugin);
10992 + if (!err) {
10993 + fs = make_plugin_proc_dir(&crm_plugin, &crm_dir);
10994 + if (!fs)
10995 + cluster_file = create_cluster_file(crm_dir, &cluster_config);
10996 + else
10997 + printk(KERN_ERR "Could not allocate C-RM procfs dir.\n");
10998 + }
10999 + return err;
11000 +}
11001 +
11002 +static void clean_crm(void)
11003 +{
11004 + cleanup_crm();
11005 + if (cluster_file)
11006 + remove_proc_entry("cluster", crm_dir);
11007 + if (crm_dir)
11008 + remove_plugin_proc_dir(&crm_plugin);
11009 +}
11010 +
11011 +module_init(init_crm);
11012 +module_exit(clean_crm);
11013 diff --git a/litmus/sched_crm_srt.c b/litmus/sched_crm_srt.c
11014 new file mode 100644
11015 index 0000000..f0064d4
11016 --- /dev/null
11017 +++ b/litmus/sched_crm_srt.c
11018 @@ -0,0 +1,2058 @@
11019 +/*
11020 + * litmus/sched_crm_srt.c
11021 + *
11022 + * Implementation of the C-RM-SRT scheduling algorithm.
11023 + *
11024 + * This implementation is based on G-EDF:
11025 + * - CPUs are clustered around L2 or L3 caches.
11026 + * - Clusters topology is automatically detected (this is arch dependent
11027 + * and is working only on x86 at the moment --- and only with modern
11028 + * cpus that exports cpuid4 information)
11029 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
11030 + * the programmer needs to be aware of the topology to place tasks
11031 + * in the desired cluster
11032 + * - default clustering is around L2 cache (cache index = 2)
11033 + * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
11034 + * online_cpus are placed in a single cluster).
11035 + *
11036 + * For details on functions, take a look at sched_gsn_edf.c
11037 + *
11038 + * Currently, we do not support changes in the number of online cpus.
11039 + * If the num_online_cpus() dynamically changes, the plugin is broken.
11040 + *
11041 + * This version uses the simple approach and serializes all scheduling
11042 + * decisions by the use of a queue lock. This is probably not the
11043 + * best way to do it, but it should suffice for now.
11044 + */
11045 +
11046 +#include <linux/spinlock.h>
11047 +#include <linux/percpu.h>
11048 +#include <linux/sched.h>
11049 +#include <linux/slab.h>
11050 +#include <linux/uaccess.h>
11051 +
11052 +#include <linux/module.h>
11053 +
11054 +#include <litmus/litmus.h>
11055 +#include <litmus/jobs.h>
11056 +#include <litmus/preempt.h>
11057 +#include <litmus/sched_plugin.h>
11058 +#include <litmus/rm_srt_common.h>
11059 +#include <litmus/sched_trace.h>
11060 +
11061 +#include <litmus/clustered.h>
11062 +
11063 +#include <litmus/bheap.h>
11064 +
11065 +/* to configure the cluster size */
11066 +#include <litmus/litmus_proc.h>
11067 +
11068 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11069 +#include <litmus/affinity.h>
11070 +#endif
11071 +
11072 +#ifdef CONFIG_LITMUS_SOFTIRQD
11073 +#include <litmus/litmus_softirq.h>
11074 +#endif
11075 +
11076 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11077 +#include <linux/interrupt.h>
11078 +#include <litmus/trace.h>
11079 +#endif
11080 +
11081 +#ifdef CONFIG_LITMUS_NVIDIA
11082 +#include <litmus/nvidia_info.h>
11083 +#endif
11084 +
11085 +/* Reference configuration variable. Determines which cache level is used to
11086 + * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
11087 + * all CPUs form a single cluster (just like GSN-EDF).
11088 + */
11089 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
11090 +
11091 +struct clusterdomain;
11092 +
11093 +/* cpu_entry_t - maintain the linked and scheduled state
11094 + *
11095 + * A cpu also contains a pointer to the crm_srt_domain_t cluster
11096 + * that owns it (struct clusterdomain*)
11097 + */
11098 +typedef struct {
11099 + int cpu;
11100 + struct clusterdomain* cluster; /* owning cluster */
11101 + struct task_struct* linked; /* only RT tasks */
11102 + struct task_struct* scheduled; /* only RT tasks */
11103 + atomic_t will_schedule; /* prevent unneeded IPIs */
11104 + struct bheap_node* hn;
11105 +} cpu_entry_t;
11106 +
11107 +/* one cpu_entry_t per CPU */
11108 +DEFINE_PER_CPU(cpu_entry_t, crm_srt_cpu_entries);
11109 +
11110 +#define set_will_schedule() \
11111 + (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 1))
11112 +#define clear_will_schedule() \
11113 + (atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 0))
11114 +#define test_will_schedule(cpu) \
11115 + (atomic_read(&per_cpu(crm_srt_cpu_entries, cpu).will_schedule))
11116 +
11117 +
11118 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11119 +struct tasklet_head
11120 +{
11121 + struct tasklet_struct *head;
11122 + struct tasklet_struct **tail;
11123 +};
11124 +#endif
11125 +
11126 +/*
11127 + * In C-RM-SRT there is a crm_srt domain _per_ cluster
11128 + * The number of clusters is dynamically determined accordingly to the
11129 + * total cpu number and the cluster size
11130 + */
11131 +typedef struct clusterdomain {
11132 + /* rt_domain for this cluster */
11133 + rt_domain_t domain;
11134 + /* cpus in this cluster */
11135 + cpu_entry_t* *cpus;
11136 + /* map of this cluster cpus */
11137 + cpumask_var_t cpu_map;
11138 + /* the cpus queue themselves according to priority in here */
11139 + struct bheap_node *heap_node;
11140 + struct bheap cpu_heap;
11141 + /* lock for this cluster */
11142 +#define crm_srt_lock domain.ready_lock
11143 +
11144 +
11145 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11146 + struct tasklet_head pending_tasklets;
11147 +#endif
11148 +
11149 +} crm_srt_domain_t;
11150 +
11151 +/* a crm_srt_domain per cluster; allocation is done at init/activation time */
11152 +crm_srt_domain_t *crm_srt;
11153 +
11154 +#define remote_cluster(cpu) ((crm_srt_domain_t *) per_cpu(crm_srt_cpu_entries, cpu).cluster)
11155 +#define task_cpu_cluster(task) remote_cluster(get_partition(task))
11156 +
11157 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
11158 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
11159 + * information during the initialization of the plugin (e.g., topology)
11160 +#define WANT_ALL_SCHED_EVENTS
11161 + */
11162 +#define VERBOSE_INIT
11163 +
11164 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
11165 +{
11166 + cpu_entry_t *a, *b;
11167 + a = _a->value;
11168 + b = _b->value;
11169 + /* Note that a and b are inverted: we want the lowest-priority CPU at
11170 + * the top of the heap.
11171 + */
11172 + return rm_srt_higher_prio(b->linked, a->linked);
11173 +}
11174 +
11175 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
11176 + * order in the cpu queue. Caller must hold crm_srt lock.
11177 + */
11178 +static void update_cpu_position(cpu_entry_t *entry)
11179 +{
11180 + crm_srt_domain_t *cluster = entry->cluster;
11181 +
11182 + if (likely(bheap_node_in_heap(entry->hn)))
11183 + bheap_delete(cpu_lower_prio,
11184 + &cluster->cpu_heap,
11185 + entry->hn);
11186 +
11187 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
11188 +}
11189 +
11190 +/* caller must hold crm_srt lock */
11191 +static cpu_entry_t* lowest_prio_cpu(crm_srt_domain_t *cluster)
11192 +{
11193 + struct bheap_node* hn;
11194 + hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
11195 + return hn->value;
11196 +}
11197 +
11198 +
11199 +/* link_task_to_cpu - Update the link of a CPU.
11200 + * Handles the case where the to-be-linked task is already
11201 + * scheduled on a different CPU.
11202 + */
11203 +static noinline void link_task_to_cpu(struct task_struct* linked,
11204 + cpu_entry_t *entry)
11205 +{
11206 + cpu_entry_t *sched;
11207 + struct task_struct* tmp;
11208 + int on_cpu;
11209 +
11210 + BUG_ON(linked && !is_realtime(linked));
11211 +
11212 + /* Currently linked task is set to be unlinked. */
11213 + if (entry->linked) {
11214 + entry->linked->rt_param.linked_on = NO_CPU;
11215 + }
11216 +
11217 + /* Link new task to CPU. */
11218 + if (linked) {
11219 + set_rt_flags(linked, RT_F_RUNNING);
11220 + /* handle task is already scheduled somewhere! */
11221 + on_cpu = linked->rt_param.scheduled_on;
11222 + if (on_cpu != NO_CPU) {
11223 + sched = &per_cpu(crm_srt_cpu_entries, on_cpu);
11224 + /* this should only happen if not linked already */
11225 + BUG_ON(sched->linked == linked);
11226 +
11227 + /* If we are already scheduled on the CPU to which we
11228 + * wanted to link, we don't need to do the swap --
11229 + * we just link ourselves to the CPU and depend on
11230 + * the caller to get things right.
11231 + */
11232 + if (entry != sched) {
11233 + TRACE_TASK(linked,
11234 + "already scheduled on %d, updating link.\n",
11235 + sched->cpu);
11236 + tmp = sched->linked;
11237 + linked->rt_param.linked_on = sched->cpu;
11238 + sched->linked = linked;
11239 + update_cpu_position(sched);
11240 + linked = tmp;
11241 + }
11242 + }
11243 + if (linked) /* might be NULL due to swap */
11244 + linked->rt_param.linked_on = entry->cpu;
11245 + }
11246 + entry->linked = linked;
11247 +#ifdef WANT_ALL_SCHED_EVENTS
11248 + if (linked)
11249 + TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
11250 + else
11251 + TRACE("NULL linked to %d.\n", entry->cpu);
11252 +#endif
11253 + update_cpu_position(entry);
11254 +}
11255 +
11256 +/* unlink - Make sure a task is not linked any longer to an entry
11257 + * where it was linked before. Must hold crm_srt_lock.
11258 + */
11259 +static noinline void unlink(struct task_struct* t)
11260 +{
11261 + cpu_entry_t *entry;
11262 +
11263 + if (t->rt_param.linked_on != NO_CPU) {
11264 + /* unlink */
11265 + entry = &per_cpu(crm_srt_cpu_entries, t->rt_param.linked_on);
11266 + t->rt_param.linked_on = NO_CPU;
11267 + link_task_to_cpu(NULL, entry);
11268 + } else if (is_queued(t)) {
11269 + /* This is an interesting situation: t is scheduled,
11270 + * but was just recently unlinked. It cannot be
11271 + * linked anywhere else (because then it would have
11272 + * been relinked to this CPU), thus it must be in some
11273 + * queue. We must remove it from the list in this
11274 + * case.
11275 + *
11276 + * in C-RM-SRT case is should be somewhere in the queue for
11277 + * its domain, therefore and we can get the domain using
11278 + * task_cpu_cluster
11279 + */
11280 + remove(&(task_cpu_cluster(t))->domain, t);
11281 + }
11282 +}
11283 +
11284 +
11285 +/* preempt - force a CPU to reschedule
11286 + */
11287 +static void preempt(cpu_entry_t *entry)
11288 +{
11289 + preempt_if_preemptable(entry->scheduled, entry->cpu);
11290 +}
11291 +
11292 +/* requeue - Put an unlinked task into c-rm-srt domain.
11293 + * Caller must hold crm_srt_lock.
11294 + */
11295 +static noinline void requeue(struct task_struct* task)
11296 +{
11297 + crm_srt_domain_t *cluster = task_cpu_cluster(task);
11298 + BUG_ON(!task);
11299 + /* sanity check before insertion */
11300 + BUG_ON(is_queued(task));
11301 +
11302 + if (is_released(task, litmus_clock()))
11303 + __add_ready(&cluster->domain, task);
11304 + else {
11305 + /* it has got to wait */
11306 + add_release(&cluster->domain, task);
11307 + }
11308 +}
11309 +
11310 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11311 +static cpu_entry_t* crm_srt_get_nearest_available_cpu(
11312 + crm_srt_domain_t *cluster, cpu_entry_t* start)
11313 +{
11314 + cpu_entry_t* affinity;
11315 +
11316 + get_nearest_available_cpu(affinity, start, crm_srt_cpu_entries, -1);
11317 +
11318 + /* make sure CPU is in our cluster */
11319 + if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
11320 + return(affinity);
11321 + else
11322 + return(NULL);
11323 +}
11324 +#endif
11325 +
11326 +
11327 +/* check for any necessary preemptions */
11328 +static void check_for_preemptions(crm_srt_domain_t *cluster)
11329 +{
11330 + struct task_struct *task;
11331 + cpu_entry_t *last;
11332 +
11333 + for(last = lowest_prio_cpu(cluster);
11334 + rm_srt_preemption_needed(&cluster->domain, last->linked);
11335 + last = lowest_prio_cpu(cluster)) {
11336 + /* preemption necessary */
11337 + task = __take_ready(&cluster->domain);
11338 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11339 + {
11340 + cpu_entry_t* affinity =
11341 + crm_srt_get_nearest_available_cpu(cluster,
11342 + &per_cpu(crm_srt_cpu_entries, task_cpu(task)));
11343 + if(affinity)
11344 + last = affinity;
11345 + else if(last->linked)
11346 + requeue(last->linked);
11347 + }
11348 +#else
11349 + if (last->linked)
11350 + requeue(last->linked);
11351 +#endif
11352 + TRACE("check_for_preemptions: attempting to link task %d to %d\n",
11353 + task->pid, last->cpu);
11354 + link_task_to_cpu(task, last);
11355 + preempt(last);
11356 + }
11357 +}
11358 +
11359 +/* crm_srt_job_arrival: task is either resumed or released */
11360 +static noinline void crm_srt_job_arrival(struct task_struct* task)
11361 +{
11362 + crm_srt_domain_t *cluster = task_cpu_cluster(task);
11363 + BUG_ON(!task);
11364 +
11365 + requeue(task);
11366 + check_for_preemptions(cluster);
11367 +}
11368 +
11369 +static void crm_srt_release_jobs(rt_domain_t* rt, struct bheap* tasks)
11370 +{
11371 + crm_srt_domain_t* cluster = container_of(rt, crm_srt_domain_t, domain);
11372 + unsigned long flags;
11373 +
11374 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
11375 +
11376 + __merge_ready(&cluster->domain, tasks);
11377 + check_for_preemptions(cluster);
11378 +
11379 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11380 +}
11381 +
11382 +/* caller holds crm_srt_lock */
11383 +static noinline void job_completion(struct task_struct *t, int forced)
11384 +{
11385 + BUG_ON(!t);
11386 +
11387 + sched_trace_task_completion(t, forced);
11388 +
11389 +#ifdef CONFIG_LITMUS_NVIDIA
11390 + atomic_set(&tsk_rt(t)->nv_int_count, 0);
11391 +#endif
11392 +
11393 + TRACE_TASK(t, "job_completion().\n");
11394 +
11395 + /* set flags */
11396 + set_rt_flags(t, RT_F_SLEEP);
11397 + /* prepare for next period */
11398 + prepare_for_next_period(t);
11399 + if (is_released(t, litmus_clock()))
11400 + sched_trace_task_release(t);
11401 + /* unlink */
11402 + unlink(t);
11403 + /* requeue
11404 + * But don't requeue a blocking task. */
11405 + if (is_running(t))
11406 + crm_srt_job_arrival(t);
11407 +}
11408 +
11409 +/* crm_srt_tick - this function is called for every local timer
11410 + * interrupt.
11411 + *
11412 + * checks whether the current task has expired and checks
11413 + * whether we need to preempt it if it has not expired
11414 + */
11415 +static void crm_srt_tick(struct task_struct* t)
11416 +{
11417 + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
11418 + if (!is_np(t)) {
11419 + /* np tasks will be preempted when they become
11420 + * preemptable again
11421 + */
11422 + litmus_reschedule_local();
11423 + set_will_schedule();
11424 + TRACE("crm_srt_scheduler_tick: "
11425 + "%d is preemptable "
11426 + " => FORCE_RESCHED\n", t->pid);
11427 + } else if (is_user_np(t)) {
11428 + TRACE("crm_srt_scheduler_tick: "
11429 + "%d is non-preemptable, "
11430 + "preemption delayed.\n", t->pid);
11431 + request_exit_np(t);
11432 + }
11433 + }
11434 +}
11435 +
11436 +
11437 +
11438 +
11439 +
11440 +
11441 +
11442 +
11443 +
11444 +
11445 +
11446 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11447 +
11448 +
11449 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
11450 +{
11451 + if (!atomic_read(&tasklet->count)) {
11452 + sched_trace_tasklet_begin(tasklet->owner);
11453 +
11454 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
11455 + {
11456 + BUG();
11457 + }
11458 + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed);
11459 + tasklet->func(tasklet->data);
11460 + tasklet_unlock(tasklet);
11461 +
11462 + sched_trace_tasklet_end(tasklet->owner, flushed);
11463 + }
11464 + else {
11465 + BUG();
11466 + }
11467 +}
11468 +
11469 +
11470 +static void __extract_tasklets(crm_srt_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
11471 +{
11472 + struct tasklet_struct* step;
11473 + struct tasklet_struct* tasklet;
11474 + struct tasklet_struct* prev;
11475 +
11476 + task_tasklets->head = NULL;
11477 + task_tasklets->tail = &(task_tasklets->head);
11478 +
11479 + prev = NULL;
11480 + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
11481 + {
11482 + if(step->owner == task)
11483 + {
11484 + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
11485 +
11486 + tasklet = step;
11487 +
11488 + if(prev) {
11489 + prev->next = tasklet->next;
11490 + }
11491 + else if(cluster->pending_tasklets.head == tasklet) {
11492 + // we're at the head.
11493 + cluster->pending_tasklets.head = tasklet->next;
11494 + }
11495 +
11496 + if(cluster->pending_tasklets.tail == &tasklet) {
11497 + // we're at the tail
11498 + if(prev) {
11499 + cluster->pending_tasklets.tail = &prev;
11500 + }
11501 + else {
11502 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
11503 + }
11504 + }
11505 +
11506 + tasklet->next = NULL;
11507 + *(task_tasklets->tail) = tasklet;
11508 + task_tasklets->tail = &(tasklet->next);
11509 + }
11510 + else {
11511 + prev = step;
11512 + }
11513 + }
11514 +}
11515 +
11516 +static void flush_tasklets(crm_srt_domain_t* cluster, struct task_struct* task)
11517 +{
11518 + unsigned long flags;
11519 + struct tasklet_head task_tasklets;
11520 + struct tasklet_struct* step;
11521 +
11522 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
11523 + __extract_tasklets(cluster, task, &task_tasklets);
11524 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11525 +
11526 + if(cluster->pending_tasklets.head != NULL) {
11527 + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
11528 + }
11529 +
11530 + // now execute any flushed tasklets.
11531 + for(step = cluster->pending_tasklets.head; step != NULL; /**/)
11532 + {
11533 + struct tasklet_struct* temp = step->next;
11534 +
11535 + step->next = NULL;
11536 + __do_lit_tasklet(step, 1ul);
11537 +
11538 + step = temp;
11539 + }
11540 +}
11541 +
11542 +
11543 +static void do_lit_tasklets(crm_srt_domain_t* cluster, struct task_struct* sched_task)
11544 +{
11545 + int work_to_do = 1;
11546 + struct tasklet_struct *tasklet = NULL;
11547 + //struct tasklet_struct *step;
11548 + unsigned long flags;
11549 +
11550 + while(work_to_do) {
11551 +
11552 + TS_NV_SCHED_BOTISR_START;
11553 +
11554 + // remove tasklet at head of list if it has higher priority.
11555 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
11556 +
11557 + /*
11558 + step = cluster->pending_tasklets.head;
11559 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
11560 + while(step != NULL){
11561 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11562 + step = step->next;
11563 + }
11564 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11565 + TRACE("%s: done.\n", __FUNCTION__);
11566 + */
11567 +
11568 + if(cluster->pending_tasklets.head != NULL) {
11569 + // remove tasklet at head.
11570 + tasklet = cluster->pending_tasklets.head;
11571 +
11572 + if(rm_srt_higher_prio(tasklet->owner, sched_task)) {
11573 +
11574 + if(NULL == tasklet->next) {
11575 + // tasklet is at the head, list only has one element
11576 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
11577 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
11578 + }
11579 +
11580 + // remove the tasklet from the queue
11581 + cluster->pending_tasklets.head = tasklet->next;
11582 +
11583 + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
11584 + }
11585 + else {
11586 + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
11587 + tasklet = NULL;
11588 + }
11589 + }
11590 + else {
11591 + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
11592 + }
11593 +
11594 + /*
11595 + step = cluster->pending_tasklets.head;
11596 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
11597 + while(step != NULL){
11598 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11599 + step = step->next;
11600 + }
11601 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11602 + TRACE("%s: done.\n", __FUNCTION__);
11603 + */
11604 +
11605 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11606 +
11607 + TS_NV_SCHED_BOTISR_END;
11608 +
11609 + if(tasklet) {
11610 + __do_lit_tasklet(tasklet, 0ul);
11611 + tasklet = NULL;
11612 + }
11613 + else {
11614 + work_to_do = 0;
11615 + }
11616 + }
11617 +
11618 + //TRACE("%s: exited.\n", __FUNCTION__);
11619 +}
11620 +
11621 +
11622 +static void run_tasklets(struct task_struct* sched_task)
11623 +{
11624 + crm_srt_domain_t* cluster;
11625 +
11626 +#if 0
11627 + int task_is_rt = is_realtime(sched_task);
11628 + crm_srt_domain_t* cluster;
11629 +
11630 + if(is_realtime(sched_task)) {
11631 + cluster = task_cpu_cluster(sched_task);
11632 + }
11633 + else {
11634 + cluster = remote_cluster(get_cpu());
11635 + }
11636 +
11637 + if(cluster && cluster->pending_tasklets.head != NULL) {
11638 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
11639 +
11640 + do_lit_tasklets(cluster, sched_task);
11641 + }
11642 +
11643 + if(!task_is_rt) {
11644 + put_cpu_no_resched();
11645 + }
11646 +#else
11647 +
11648 + preempt_disable();
11649 +
11650 + cluster = (is_realtime(sched_task)) ?
11651 + task_cpu_cluster(sched_task) :
11652 + remote_cluster(smp_processor_id());
11653 +
11654 + if(cluster && cluster->pending_tasklets.head != NULL) {
11655 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
11656 + do_lit_tasklets(cluster, sched_task);
11657 + }
11658 +
11659 + preempt_enable_no_resched();
11660 +
11661 +#endif
11662 +}
11663 +
11664 +
11665 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_srt_domain_t* cluster)
11666 +{
11667 + struct tasklet_struct* step;
11668 +
11669 + /*
11670 + step = cluster->pending_tasklets.head;
11671 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
11672 + while(step != NULL){
11673 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11674 + step = step->next;
11675 + }
11676 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11677 + TRACE("%s: done.\n", __FUNCTION__);
11678 + */
11679 +
11680 + tasklet->next = NULL; // make sure there are no old values floating around
11681 +
11682 + step = cluster->pending_tasklets.head;
11683 + if(step == NULL) {
11684 + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
11685 + // insert at tail.
11686 + *(cluster->pending_tasklets.tail) = tasklet;
11687 + cluster->pending_tasklets.tail = &(tasklet->next);
11688 + }
11689 + else if((*(cluster->pending_tasklets.tail) != NULL) &&
11690 + rm_srt_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
11691 + // insert at tail.
11692 + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
11693 +
11694 + *(cluster->pending_tasklets.tail) = tasklet;
11695 + cluster->pending_tasklets.tail = &(tasklet->next);
11696 + }
11697 + else {
11698 +
11699 + //WARN_ON(1 == 1);
11700 +
11701 + // insert the tasklet somewhere in the middle.
11702 +
11703 + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
11704 +
11705 + while(step->next && rm_srt_higher_prio(step->next->owner, tasklet->owner)) {
11706 + step = step->next;
11707 + }
11708 +
11709 + // insert tasklet right before step->next.
11710 +
11711 + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
11712 +
11713 + tasklet->next = step->next;
11714 + step->next = tasklet;
11715 +
11716 + // patch up the head if needed.
11717 + if(cluster->pending_tasklets.head == step)
11718 + {
11719 + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
11720 + cluster->pending_tasklets.head = tasklet;
11721 + }
11722 + }
11723 +
11724 + /*
11725 + step = cluster->pending_tasklets.head;
11726 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
11727 + while(step != NULL){
11728 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11729 + step = step->next;
11730 + }
11731 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11732 + TRACE("%s: done.\n", __FUNCTION__);
11733 + */
11734 +
11735 + // TODO: Maintain this list in priority order.
11736 + // tasklet->next = NULL;
11737 + // *(cluster->pending_tasklets.tail) = tasklet;
11738 + // cluster->pending_tasklets.tail = &tasklet->next;
11739 +}
11740 +
11741 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
11742 +{
11743 + crm_srt_domain_t *cluster = NULL;
11744 + cpu_entry_t *targetCPU = NULL;
11745 + int thisCPU;
11746 + int runLocal = 0;
11747 + int runNow = 0;
11748 + unsigned long flags;
11749 +
11750 + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
11751 + {
11752 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
11753 + return 0;
11754 + }
11755 +
11756 + cluster = task_cpu_cluster(tasklet->owner);
11757 +
11758 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
11759 +
11760 + thisCPU = smp_processor_id();
11761 +
11762 +#if 1
11763 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11764 + {
11765 + cpu_entry_t* affinity = NULL;
11766 +
11767 + // use this CPU if it is in our cluster and isn't running any RT work.
11768 + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(crm_srt_cpu_entries).linked == NULL)) {
11769 + affinity = &(__get_cpu_var(crm_srt_cpu_entries));
11770 + }
11771 + else {
11772 + // this CPU is busy or shouldn't run tasklet in this cluster.
11773 + // look for available near by CPUs.
11774 + // NOTE: Affinity towards owner and not this CPU. Is this right?
11775 + affinity =
11776 + crm_srt_get_nearest_available_cpu(cluster,
11777 + &per_cpu(crm_srt_cpu_entries, task_cpu(tasklet->owner)));
11778 + }
11779 +
11780 + targetCPU = affinity;
11781 + }
11782 +#endif
11783 +#endif
11784 +
11785 + if (targetCPU == NULL) {
11786 + targetCPU = lowest_prio_cpu(cluster);
11787 + }
11788 +
11789 + if (rm_srt_higher_prio(tasklet->owner, targetCPU->linked)) {
11790 + if (thisCPU == targetCPU->cpu) {
11791 + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
11792 + runLocal = 1;
11793 + runNow = 1;
11794 + }
11795 + else {
11796 + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
11797 + runLocal = 0;
11798 + runNow = 1;
11799 + }
11800 + }
11801 + else {
11802 + runLocal = 0;
11803 + runNow = 0;
11804 + }
11805 +
11806 + if(!runLocal) {
11807 + // enqueue the tasklet
11808 + __add_pai_tasklet(tasklet, cluster);
11809 + }
11810 +
11811 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11812 +
11813 +
11814 + if (runLocal /*&& runNow */) { // runNow == 1 is implied
11815 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
11816 + __do_lit_tasklet(tasklet, 0ul);
11817 + }
11818 + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
11819 + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
11820 + preempt(targetCPU); // need to be protected by crm_srt_lock?
11821 + }
11822 + else {
11823 + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
11824 + }
11825 +
11826 + return(1); // success
11827 +}
11828 +
11829 +
11830 +#endif
11831 +
11832 +
11833 +
11834 +
11835 +
11836 +
11837 +
11838 +
11839 +
11840 +
11841 +
11842 +
11843 +
11844 +
11845 +
11846 +
11847 +
11848 +
11849 +
11850 +/* Getting schedule() right is a bit tricky. schedule() may not make any
11851 + * assumptions on the state of the current task since it may be called for a
11852 + * number of reasons. The reasons include a scheduler_tick() determined that it
11853 + * was necessary, because sys_exit_np() was called, because some Linux
11854 + * subsystem determined so, or even (in the worst case) because there is a bug
11855 + * hidden somewhere. Thus, we must take extreme care to determine what the
11856 + * current state is.
11857 + *
11858 + * The CPU could currently be scheduling a task (or not), be linked (or not).
11859 + *
11860 + * The following assertions for the scheduled task could hold:
11861 + *
11862 + * - !is_running(scheduled) // the job blocks
11863 + * - scheduled->timeslice == 0 // the job completed (forcefully)
11864 + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
11865 + * - linked != scheduled // we need to reschedule (for any reason)
11866 + * - is_np(scheduled) // rescheduling must be delayed,
11867 + * sys_exit_np must be requested
11868 + *
11869 + * Any of these can occur together.
11870 + */
11871 +static struct task_struct* crm_srt_schedule(struct task_struct * prev)
11872 +{
11873 + cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries);
11874 + crm_srt_domain_t *cluster = entry->cluster;
11875 + int out_of_time, sleep, preempt, np, exists, blocks;
11876 + struct task_struct* next = NULL;
11877 +
11878 + raw_spin_lock(&cluster->crm_srt_lock);
11879 + clear_will_schedule();
11880 +
11881 + /* sanity checking */
11882 + BUG_ON(entry->scheduled && entry->scheduled != prev);
11883 + BUG_ON(entry->scheduled && !is_realtime(prev));
11884 + BUG_ON(is_realtime(prev) && !entry->scheduled);
11885 +
11886 + /* (0) Determine state */
11887 + exists = entry->scheduled != NULL;
11888 + blocks = exists && !is_running(entry->scheduled);
11889 + out_of_time = exists &&
11890 + budget_enforced(entry->scheduled) &&
11891 + budget_exhausted(entry->scheduled);
11892 + np = exists && is_np(entry->scheduled);
11893 + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
11894 + preempt = entry->scheduled != entry->linked;
11895 +
11896 +#ifdef WANT_ALL_SCHED_EVENTS
11897 + TRACE_TASK(prev, "invoked crm_srt_schedule.\n");
11898 +#endif
11899 +
11900 + if (exists)
11901 + TRACE_TASK(prev,
11902 + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
11903 + "state:%d sig:%d\n",
11904 + blocks, out_of_time, np, sleep, preempt,
11905 + prev->state, signal_pending(prev));
11906 + if (entry->linked && preempt)
11907 + TRACE_TASK(prev, "will be preempted by %s/%d\n",
11908 + entry->linked->comm, entry->linked->pid);
11909 +
11910 +
11911 + /* If a task blocks we have no choice but to reschedule.
11912 + */
11913 + if (blocks)
11914 + unlink(entry->scheduled);
11915 +
11916 + /* Request a sys_exit_np() call if we would like to preempt but cannot.
11917 + * We need to make sure to update the link structure anyway in case
11918 + * that we are still linked. Multiple calls to request_exit_np() don't
11919 + * hurt.
11920 + */
11921 + if (np && (out_of_time || preempt || sleep)) {
11922 + unlink(entry->scheduled);
11923 + request_exit_np(entry->scheduled);
11924 + }
11925 +
11926 + /* Any task that is preemptable and either exhausts its execution
11927 + * budget or wants to sleep completes. We may have to reschedule after
11928 + * this. Don't do a job completion if we block (can't have timers running
11929 + * for blocked jobs). Preemption go first for the same reason.
11930 + */
11931 + if (!np && (out_of_time || sleep) && !blocks && !preempt)
11932 + job_completion(entry->scheduled, !sleep);
11933 +
11934 + /* Link pending task if we became unlinked.
11935 + */
11936 + if (!entry->linked)
11937 + link_task_to_cpu(__take_ready(&cluster->domain), entry);
11938 +
11939 + /* The final scheduling decision. Do we need to switch for some reason?
11940 + * If linked is different from scheduled, then select linked as next.
11941 + */
11942 + if ((!np || blocks) &&
11943 + entry->linked != entry->scheduled) {
11944 + /* Schedule a linked job? */
11945 + if (entry->linked) {
11946 + entry->linked->rt_param.scheduled_on = entry->cpu;
11947 + next = entry->linked;
11948 + }
11949 + if (entry->scheduled) {
11950 + /* not gonna be scheduled soon */
11951 + entry->scheduled->rt_param.scheduled_on = NO_CPU;
11952 + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
11953 + }
11954 + } else
11955 + /* Only override Linux scheduler if we have a real-time task
11956 + * scheduled that needs to continue.
11957 + */
11958 + if (exists)
11959 + next = prev;
11960 +
11961 + sched_state_task_picked();
11962 + raw_spin_unlock(&cluster->crm_srt_lock);
11963 +
11964 +#ifdef WANT_ALL_SCHED_EVENTS
11965 + TRACE("crm_srt_lock released, next=0x%p\n", next);
11966 +
11967 + if (next)
11968 + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
11969 + else if (exists && !next)
11970 + TRACE("becomes idle at %llu.\n", litmus_clock());
11971 +#endif
11972 +
11973 +
11974 + return next;
11975 +}
11976 +
11977 +
11978 +/* _finish_switch - we just finished the switch away from prev
11979 + */
11980 +static void crm_srt_finish_switch(struct task_struct *prev)
11981 +{
11982 + cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries);
11983 +
11984 + entry->scheduled = is_realtime(current) ? current : NULL;
11985 +#ifdef WANT_ALL_SCHED_EVENTS
11986 + TRACE_TASK(prev, "switched away from\n");
11987 +#endif
11988 +}
11989 +
11990 +
11991 +/* Prepare a task for running in RT mode
11992 + */
11993 +static void crm_srt_task_new(struct task_struct * t, int on_rq, int running)
11994 +{
11995 + unsigned long flags;
11996 + cpu_entry_t* entry;
11997 + crm_srt_domain_t* cluster;
11998 +
11999 + TRACE("crm srt: task new %d\n", t->pid);
12000 +
12001 + /* the cluster doesn't change even if t is running */
12002 + cluster = task_cpu_cluster(t);
12003 +
12004 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12005 +
12006 + /* setup job params */
12007 + release_at(t, litmus_clock());
12008 +
12009 + if (running) {
12010 + entry = &per_cpu(crm_srt_cpu_entries, task_cpu(t));
12011 + BUG_ON(entry->scheduled);
12012 +
12013 + entry->scheduled = t;
12014 + tsk_rt(t)->scheduled_on = task_cpu(t);
12015 + } else {
12016 + t->rt_param.scheduled_on = NO_CPU;
12017 + }
12018 + t->rt_param.linked_on = NO_CPU;
12019 +
12020 + crm_srt_job_arrival(t);
12021 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12022 +}
12023 +
12024 +static void crm_srt_task_wake_up(struct task_struct *task)
12025 +{
12026 + unsigned long flags;
12027 + //lt_t now;
12028 + crm_srt_domain_t *cluster;
12029 +
12030 + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
12031 +
12032 + cluster = task_cpu_cluster(task);
12033 +
12034 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12035 +
12036 +#if 0 // sporadic task model
12037 + /* We need to take suspensions because of semaphores into
12038 + * account! If a job resumes after being suspended due to acquiring
12039 + * a semaphore, it should never be treated as a new job release.
12040 + */
12041 + if (get_rt_flags(task) == RT_F_EXIT_SEM) {
12042 + set_rt_flags(task, RT_F_RUNNING);
12043 + } else {
12044 + now = litmus_clock();
12045 + if (is_tardy(task, now)) {
12046 + /* new sporadic release */
12047 + release_at(task, now);
12048 + sched_trace_task_release(task);
12049 + }
12050 + else {
12051 + if (task->rt.time_slice) {
12052 + /* came back in time before deadline
12053 + */
12054 + set_rt_flags(task, RT_F_RUNNING);
12055 + }
12056 + }
12057 + }
12058 +#endif
12059 +
12060 + //BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
12061 + set_rt_flags(task, RT_F_RUNNING); // periodic model
12062 +
12063 + if(tsk_rt(task)->linked_on == NO_CPU)
12064 + crm_srt_job_arrival(task);
12065 + else
12066 + TRACE("WTF, mate?!\n");
12067 +
12068 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12069 +}
12070 +
12071 +static void crm_srt_task_block(struct task_struct *t)
12072 +{
12073 + unsigned long flags;
12074 + crm_srt_domain_t *cluster;
12075 +
12076 + TRACE_TASK(t, "block at %llu\n", litmus_clock());
12077 +
12078 + cluster = task_cpu_cluster(t);
12079 +
12080 + /* unlink if necessary */
12081 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12082 + unlink(t);
12083 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12084 +
12085 + BUG_ON(!is_realtime(t));
12086 +}
12087 +
12088 +
12089 +static void crm_srt_task_exit(struct task_struct * t)
12090 +{
12091 + unsigned long flags;
12092 + crm_srt_domain_t *cluster = task_cpu_cluster(t);
12093 +
12094 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12095 + flush_tasklets(cluster, t);
12096 +#endif
12097 +
12098 + /* unlink if necessary */
12099 + raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12100 + unlink(t);
12101 + if (tsk_rt(t)->scheduled_on != NO_CPU) {
12102 + cpu_entry_t *cpu;
12103 + cpu = &per_cpu(crm_srt_cpu_entries, tsk_rt(t)->scheduled_on);
12104 + cpu->scheduled = NULL;
12105 + tsk_rt(t)->scheduled_on = NO_CPU;
12106 + }
12107 + raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12108 +
12109 + BUG_ON(!is_realtime(t));
12110 + TRACE_TASK(t, "RIP\n");
12111 +}
12112 +
12113 +static long crm_srt_admit_task(struct task_struct* tsk)
12114 +{
12115 + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
12116 +}
12117 +
12118 +
12119 +
12120 +
12121 +
12122 +
12123 +
12124 +
12125 +
12126 +
12127 +
12128 +
12129 +
12130 +#ifdef CONFIG_LITMUS_LOCKING
12131 +
12132 +#include <litmus/fdso.h>
12133 +
12134 +
12135 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
12136 +{
12137 + int linked_on;
12138 + int check_preempt = 0;
12139 +
12140 + crm_srt_domain_t* cluster = task_cpu_cluster(t);
12141 +
12142 + if(prio_inh != NULL)
12143 + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
12144 + else
12145 + TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
12146 +
12147 + sched_trace_eff_prio_change(t, prio_inh);
12148 +
12149 + tsk_rt(t)->inh_task = prio_inh;
12150 +
12151 + linked_on = tsk_rt(t)->linked_on;
12152 +
12153 + /* If it is scheduled, then we need to reorder the CPU heap. */
12154 + if (linked_on != NO_CPU) {
12155 + TRACE_TASK(t, "%s: linked on %d\n",
12156 + __FUNCTION__, linked_on);
12157 + /* Holder is scheduled; need to re-order CPUs.
12158 + * We can't use heap_decrease() here since
12159 + * the cpu_heap is ordered in reverse direction, so
12160 + * it is actually an increase. */
12161 + bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
12162 + per_cpu(crm_srt_cpu_entries, linked_on).hn);
12163 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
12164 + per_cpu(crm_srt_cpu_entries, linked_on).hn);
12165 + } else {
12166 + /* holder may be queued: first stop queue changes */
12167 + raw_spin_lock(&cluster->domain.release_lock);
12168 + if (is_queued(t)) {
12169 + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
12170 +
12171 + /* We need to update the position of holder in some
12172 + * heap. Note that this could be a release heap if we
12173 + * budget enforcement is used and this job overran. */
12174 + check_preempt = !bheap_decrease(rm_srt_ready_order, tsk_rt(t)->heap_node);
12175 +
12176 + } else {
12177 + /* Nothing to do: if it is not queued and not linked
12178 + * then it is either sleeping or currently being moved
12179 + * by other code (e.g., a timer interrupt handler) that
12180 + * will use the correct priority when enqueuing the
12181 + * task. */
12182 + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
12183 + }
12184 + raw_spin_unlock(&cluster->domain.release_lock);
12185 +
12186 + /* If holder was enqueued in a release heap, then the following
12187 + * preemption check is pointless, but we can't easily detect
12188 + * that case. If you want to fix this, then consider that
12189 + * simply adding a state flag requires O(n) time to update when
12190 + * releasing n tasks, which conflicts with the goal to have
12191 + * O(log n) merges. */
12192 + if (check_preempt) {
12193 + /* heap_decrease() hit the top level of the heap: make
12194 + * sure preemption checks get the right task, not the
12195 + * potentially stale cache. */
12196 + bheap_uncache_min(rm_srt_ready_order, &cluster->domain.ready_queue);
12197 + check_for_preemptions(cluster);
12198 + }
12199 + }
12200 +}
12201 +
12202 +/* called with IRQs off */
12203 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
12204 +{
12205 + crm_srt_domain_t* cluster = task_cpu_cluster(t);
12206 +
12207 + raw_spin_lock(&cluster->crm_srt_lock);
12208 +
12209 + __set_priority_inheritance(t, prio_inh);
12210 +
12211 +#ifdef CONFIG_LITMUS_SOFTIRQD
12212 + if(tsk_rt(t)->cur_klitirqd != NULL)
12213 + {
12214 + TRACE_TASK(t, "%s/%d inherits a new priority!\n",
12215 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12216 +
12217 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
12218 + }
12219 +#endif
12220 +
12221 + raw_spin_unlock(&cluster->crm_srt_lock);
12222 +}
12223 +
12224 +
12225 +/* called with IRQs off */
12226 +static void __clear_priority_inheritance(struct task_struct* t)
12227 +{
12228 + TRACE_TASK(t, "priority restored\n");
12229 +
12230 + if(tsk_rt(t)->scheduled_on != NO_CPU)
12231 + {
12232 + sched_trace_eff_prio_change(t, NULL);
12233 +
12234 + tsk_rt(t)->inh_task = NULL;
12235 +
12236 + /* Check if rescheduling is necessary. We can't use heap_decrease()
12237 + * since the priority was effectively lowered. */
12238 + unlink(t);
12239 + crm_srt_job_arrival(t);
12240 + }
12241 + else
12242 + {
12243 + __set_priority_inheritance(t, NULL);
12244 + }
12245 +
12246 +#ifdef CONFIG_LITMUS_SOFTIRQD
12247 + if(tsk_rt(t)->cur_klitirqd != NULL)
12248 + {
12249 + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
12250 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12251 +
12252 + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
12253 + {
12254 + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
12255 +
12256 + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
12257 +
12258 + /* Check if rescheduling is necessary. We can't use heap_decrease()
12259 + * since the priority was effectively lowered. */
12260 + unlink(tsk_rt(t)->cur_klitirqd);
12261 + crm_srt_job_arrival(tsk_rt(t)->cur_klitirqd);
12262 + }
12263 + else
12264 + {
12265 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
12266 + }
12267 + }
12268 +#endif
12269 +}
12270 +
12271 +/* called with IRQs off */
12272 +static void clear_priority_inheritance(struct task_struct* t)
12273 +{
12274 + crm_srt_domain_t* cluster = task_cpu_cluster(t);
12275 +
12276 + raw_spin_lock(&cluster->crm_srt_lock);
12277 + __clear_priority_inheritance(t);
12278 + raw_spin_unlock(&cluster->crm_srt_lock);
12279 +}
12280 +
12281 +
12282 +
12283 +#ifdef CONFIG_LITMUS_SOFTIRQD
12284 +/* called with IRQs off */
12285 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12286 + struct task_struct* old_owner,
12287 + struct task_struct* new_owner)
12288 +{
12289 + crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd);
12290 +
12291 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12292 +
12293 + raw_spin_lock(&cluster->crm_srt_lock);
12294 +
12295 + if(old_owner != new_owner)
12296 + {
12297 + if(old_owner)
12298 + {
12299 + // unreachable?
12300 + tsk_rt(old_owner)->cur_klitirqd = NULL;
12301 + }
12302 +
12303 + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
12304 + new_owner->comm, new_owner->pid);
12305 +
12306 + tsk_rt(new_owner)->cur_klitirqd = klitirqd;
12307 + }
12308 +
12309 + __set_priority_inheritance(klitirqd,
12310 + (tsk_rt(new_owner)->inh_task == NULL) ?
12311 + new_owner :
12312 + tsk_rt(new_owner)->inh_task);
12313 +
12314 + raw_spin_unlock(&cluster->crm_srt_lock);
12315 +}
12316 +
12317 +/* called with IRQs off */
12318 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12319 + struct task_struct* old_owner)
12320 +{
12321 + crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd);
12322 +
12323 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12324 +
12325 + raw_spin_lock(&cluster->crm_srt_lock);
12326 +
12327 + TRACE_TASK(klitirqd, "priority restored\n");
12328 +
12329 + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
12330 + {
12331 + tsk_rt(klitirqd)->inh_task = NULL;
12332 +
12333 + /* Check if rescheduling is necessary. We can't use heap_decrease()
12334 + * since the priority was effectively lowered. */
12335 + unlink(klitirqd);
12336 + crm_srt_job_arrival(klitirqd);
12337 + }
12338 + else
12339 + {
12340 + __set_priority_inheritance(klitirqd, NULL);
12341 + }
12342 +
12343 + tsk_rt(old_owner)->cur_klitirqd = NULL;
12344 +
12345 + raw_spin_unlock(&cluster->crm_srt_lock);
12346 +}
12347 +#endif // CONFIG_LITMUS_SOFTIRQD
12348 +
12349 +
12350 +/* ******************** KFMLP support ********************** */
12351 +
12352 +/* struct for semaphore with priority inheritance */
12353 +struct kfmlp_queue
12354 +{
12355 + wait_queue_head_t wait;
12356 + struct task_struct* owner;
12357 + struct task_struct* hp_waiter;
12358 + int count; /* number of waiters + holder */
12359 +};
12360 +
12361 +struct kfmlp_semaphore
12362 +{
12363 + struct litmus_lock litmus_lock;
12364 +
12365 + spinlock_t lock;
12366 +
12367 + int num_resources; /* aka k */
12368 + struct kfmlp_queue *queues; /* array */
12369 + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
12370 +};
12371 +
12372 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
12373 +{
12374 + return container_of(lock, struct kfmlp_semaphore, litmus_lock);
12375 +}
12376 +
12377 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
12378 + struct kfmlp_queue* queue)
12379 +{
12380 + return (queue - &sem->queues[0]);
12381 +}
12382 +
12383 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
12384 + struct task_struct* holder)
12385 +{
12386 + int i;
12387 + for(i = 0; i < sem->num_resources; ++i)
12388 + if(sem->queues[i].owner == holder)
12389 + return(&sem->queues[i]);
12390 + return(NULL);
12391 +}
12392 +
12393 +/* caller is responsible for locking */
12394 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
12395 + struct task_struct *skip)
12396 +{
12397 + struct list_head *pos;
12398 + struct task_struct *queued, *found = NULL;
12399 +
12400 + list_for_each(pos, &kqueue->wait.task_list) {
12401 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
12402 + task_list)->private;
12403 +
12404 + /* Compare task prios, find high prio task. */
12405 + if (queued != skip && rm_srt_higher_prio(queued, found))
12406 + found = queued;
12407 + }
12408 + return found;
12409 +}
12410 +
12411 +static inline struct kfmlp_queue* kfmlp_find_shortest(
12412 + struct kfmlp_semaphore* sem,
12413 + struct kfmlp_queue* search_start)
12414 +{
12415 + // we start our search at search_start instead of at the beginning of the
12416 + // queue list to load-balance across all resources.
12417 + struct kfmlp_queue* step = search_start;
12418 + struct kfmlp_queue* shortest = sem->shortest_queue;
12419 +
12420 + do
12421 + {
12422 + step = (step+1 != &sem->queues[sem->num_resources]) ?
12423 + step+1 : &sem->queues[0];
12424 + if(step->count < shortest->count)
12425 + {
12426 + shortest = step;
12427 + if(step->count == 0)
12428 + break; /* can't get any shorter */
12429 + }
12430 + }while(step != search_start);
12431 +
12432 + return(shortest);
12433 +}
12434 +
12435 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
12436 +{
12437 + /* must hold sem->lock */
12438 +
12439 + struct kfmlp_queue *my_queue = NULL;
12440 + struct task_struct *max_hp = NULL;
12441 +
12442 +
12443 + struct list_head *pos;
12444 + struct task_struct *queued;
12445 + int i;
12446 +
12447 + for(i = 0; i < sem->num_resources; ++i)
12448 + {
12449 + if( (sem->queues[i].count > 1) &&
12450 + ((my_queue == NULL) ||
12451 + (rm_srt_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
12452 + {
12453 + my_queue = &sem->queues[i];
12454 + }
12455 + }
12456 +
12457 + if(my_queue)
12458 + {
12459 + crm_srt_domain_t* cluster;
12460 +
12461 + max_hp = my_queue->hp_waiter;
12462 + BUG_ON(!max_hp);
12463 +
12464 + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
12465 + kfmlp_get_idx(sem, my_queue),
12466 + max_hp->comm, max_hp->pid,
12467 + kfmlp_get_idx(sem, my_queue));
12468 +
12469 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
12470 +
12471 + /*
12472 + if(my_queue->hp_waiter)
12473 + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
12474 + kfmlp_get_idx(sem, my_queue),
12475 + my_queue->hp_waiter->comm,
12476 + my_queue->hp_waiter->pid);
12477 + else
12478 + TRACE_CUR("queue %d: new hp_waiter is %p\n",
12479 + kfmlp_get_idx(sem, my_queue), NULL);
12480 + */
12481 +
12482 + cluster = task_cpu_cluster(max_hp);
12483 +
12484 + raw_spin_lock(&cluster->crm_srt_lock);
12485 +
12486 + /*
12487 + if(my_queue->owner)
12488 + TRACE_CUR("queue %d: owner is %s/%d\n",
12489 + kfmlp_get_idx(sem, my_queue),
12490 + my_queue->owner->comm,
12491 + my_queue->owner->pid);
12492 + else
12493 + TRACE_CUR("queue %d: owner is %p\n",
12494 + kfmlp_get_idx(sem, my_queue),
12495 + NULL);
12496 + */
12497 +
12498 + if(tsk_rt(my_queue->owner)->inh_task == max_hp)
12499 + {
12500 + __clear_priority_inheritance(my_queue->owner);
12501 + if(my_queue->hp_waiter != NULL)
12502 + {
12503 + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
12504 + }
12505 + }
12506 + raw_spin_unlock(&cluster->crm_srt_lock);
12507 +
12508 + list_for_each(pos, &my_queue->wait.task_list)
12509 + {
12510 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
12511 + task_list)->private;
12512 + /* Compare task prios, find high prio task. */
12513 + if (queued == max_hp)
12514 + {
12515 + /*
12516 + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
12517 + kfmlp_get_idx(sem, my_queue));
12518 + */
12519 + __remove_wait_queue(&my_queue->wait,
12520 + list_entry(pos, wait_queue_t, task_list));
12521 + break;
12522 + }
12523 + }
12524 + --(my_queue->count);
12525 + }
12526 +
12527 + return(max_hp);
12528 +}
12529 +
12530 +int crm_srt_kfmlp_lock(struct litmus_lock* l)
12531 +{
12532 + struct task_struct* t = current;
12533 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12534 + struct kfmlp_queue* my_queue;
12535 + wait_queue_t wait;
12536 + unsigned long flags;
12537 +
12538 + if (!is_realtime(t))
12539 + return -EPERM;
12540 +
12541 + spin_lock_irqsave(&sem->lock, flags);
12542 +
12543 + my_queue = sem->shortest_queue;
12544 +
12545 + if (my_queue->owner) {
12546 + /* resource is not free => must suspend and wait */
12547 + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
12548 + kfmlp_get_idx(sem, my_queue));
12549 +
12550 + init_waitqueue_entry(&wait, t);
12551 +
12552 + /* FIXME: interruptible would be nice some day */
12553 + set_task_state(t, TASK_UNINTERRUPTIBLE);
12554 +
12555 + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
12556 +
12557 + /* check if we need to activate priority inheritance */
12558 + if (rm_srt_higher_prio(t, my_queue->hp_waiter))
12559 + {
12560 + my_queue->hp_waiter = t;
12561 + if (rm_srt_higher_prio(t, my_queue->owner))
12562 + {
12563 + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
12564 + }
12565 + }
12566 +
12567 + ++(my_queue->count);
12568 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
12569 +
12570 + /* release lock before sleeping */
12571 + spin_unlock_irqrestore(&sem->lock, flags);
12572 +
12573 + /* We depend on the FIFO order. Thus, we don't need to recheck
12574 + * when we wake up; we are guaranteed to have the lock since
12575 + * there is only one wake up per release (or steal).
12576 + */
12577 + schedule();
12578 +
12579 +
12580 + if(my_queue->owner == t)
12581 + {
12582 + TRACE_CUR("queue %d: acquired through waiting\n",
12583 + kfmlp_get_idx(sem, my_queue));
12584 + }
12585 + else
12586 + {
12587 + /* this case may happen if our wait entry was stolen
12588 + between queues. record where we went.*/
12589 + my_queue = kfmlp_get_queue(sem, t);
12590 + BUG_ON(!my_queue);
12591 + TRACE_CUR("queue %d: acquired through stealing\n",
12592 + kfmlp_get_idx(sem, my_queue));
12593 + }
12594 + }
12595 + else
12596 + {
12597 + TRACE_CUR("queue %d: acquired immediately\n",
12598 + kfmlp_get_idx(sem, my_queue));
12599 +
12600 + my_queue->owner = t;
12601 +
12602 + ++(my_queue->count);
12603 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
12604 +
12605 + spin_unlock_irqrestore(&sem->lock, flags);
12606 + }
12607 +
12608 + return kfmlp_get_idx(sem, my_queue);
12609 +}
12610 +
12611 +int crm_srt_kfmlp_unlock(struct litmus_lock* l)
12612 +{
12613 + struct task_struct *t = current, *next;
12614 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12615 + struct kfmlp_queue *my_queue;
12616 + unsigned long flags;
12617 + int err = 0;
12618 +
12619 + spin_lock_irqsave(&sem->lock, flags);
12620 +
12621 + my_queue = kfmlp_get_queue(sem, t);
12622 +
12623 + if (!my_queue) {
12624 + err = -EINVAL;
12625 + goto out;
12626 + }
12627 +
12628 + /* check if there are jobs waiting for this resource */
12629 + next = __waitqueue_remove_first(&my_queue->wait);
12630 + if (next) {
12631 + /*
12632 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
12633 + kfmlp_get_idx(sem, my_queue),
12634 + next->comm, next->pid);
12635 + */
12636 + /* next becomes the resouce holder */
12637 + my_queue->owner = next;
12638 +
12639 + --(my_queue->count);
12640 + if(my_queue->count < sem->shortest_queue->count)
12641 + {
12642 + sem->shortest_queue = my_queue;
12643 + }
12644 +
12645 + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
12646 + kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
12647 +
12648 + /* determine new hp_waiter if necessary */
12649 + if (next == my_queue->hp_waiter) {
12650 + TRACE_TASK(next, "was highest-prio waiter\n");
12651 + /* next has the highest priority --- it doesn't need to
12652 + * inherit. However, we need to make sure that the
12653 + * next-highest priority in the queue is reflected in
12654 + * hp_waiter. */
12655 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
12656 + if (my_queue->hp_waiter)
12657 + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
12658 + else
12659 + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
12660 + } else {
12661 + /* Well, if next is not the highest-priority waiter,
12662 + * then it ought to inherit the highest-priority
12663 + * waiter's priority. */
12664 + set_priority_inheritance(next, my_queue->hp_waiter);
12665 + }
12666 +
12667 + /* wake up next */
12668 + wake_up_process(next);
12669 + }
12670 + else
12671 + {
12672 + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
12673 +
12674 + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
12675 +
12676 + /*
12677 + if(next)
12678 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
12679 + kfmlp_get_idx(sem, my_queue),
12680 + next->comm, next->pid);
12681 + */
12682 +
12683 + my_queue->owner = next;
12684 +
12685 + if(next)
12686 + {
12687 + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
12688 + kfmlp_get_idx(sem, my_queue),
12689 + next->comm, next->pid);
12690 +
12691 + /* wake up next */
12692 + wake_up_process(next);
12693 + }
12694 + else
12695 + {
12696 + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
12697 +
12698 + --(my_queue->count);
12699 + if(my_queue->count < sem->shortest_queue->count)
12700 + {
12701 + sem->shortest_queue = my_queue;
12702 + }
12703 + }
12704 + }
12705 +
12706 + /* we lose the benefit of priority inheritance (if any) */
12707 + if (tsk_rt(t)->inh_task)
12708 + clear_priority_inheritance(t);
12709 +
12710 +out:
12711 + spin_unlock_irqrestore(&sem->lock, flags);
12712 +
12713 + return err;
12714 +}
12715 +
12716 +int crm_srt_kfmlp_close(struct litmus_lock* l)
12717 +{
12718 + struct task_struct *t = current;
12719 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12720 + struct kfmlp_queue *my_queue;
12721 + unsigned long flags;
12722 +
12723 + int owner;
12724 +
12725 + spin_lock_irqsave(&sem->lock, flags);
12726 +
12727 + my_queue = kfmlp_get_queue(sem, t);
12728 + owner = (my_queue) ? (my_queue->owner == t) : 0;
12729 +
12730 + spin_unlock_irqrestore(&sem->lock, flags);
12731 +
12732 + if (owner)
12733 + crm_srt_kfmlp_unlock(l);
12734 +
12735 + return 0;
12736 +}
12737 +
12738 +void crm_srt_kfmlp_free(struct litmus_lock* l)
12739 +{
12740 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12741 + kfree(sem->queues);
12742 + kfree(sem);
12743 +}
12744 +
12745 +static struct litmus_lock_ops crm_srt_kfmlp_lock_ops = {
12746 + .close = crm_srt_kfmlp_close,
12747 + .lock = crm_srt_kfmlp_lock,
12748 + .unlock = crm_srt_kfmlp_unlock,
12749 + .deallocate = crm_srt_kfmlp_free,
12750 +};
12751 +
12752 +static struct litmus_lock* crm_srt_new_kfmlp(void* __user arg, int* ret_code)
12753 +{
12754 + struct kfmlp_semaphore* sem;
12755 + int num_resources = 0;
12756 + int i;
12757 +
12758 + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
12759 + {
12760 + *ret_code = -EINVAL;
12761 + return(NULL);
12762 + }
12763 + if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
12764 + {
12765 + *ret_code = -EINVAL;
12766 + return(NULL);
12767 + }
12768 + if(num_resources < 1)
12769 + {
12770 + *ret_code = -EINVAL;
12771 + return(NULL);
12772 + }
12773 +
12774 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
12775 + if(!sem)
12776 + {
12777 + *ret_code = -ENOMEM;
12778 + return NULL;
12779 + }
12780 +
12781 + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
12782 + if(!sem->queues)
12783 + {
12784 + kfree(sem);
12785 + *ret_code = -ENOMEM;
12786 + return NULL;
12787 + }
12788 +
12789 + sem->litmus_lock.ops = &crm_srt_kfmlp_lock_ops;
12790 + spin_lock_init(&sem->lock);
12791 + sem->num_resources = num_resources;
12792 +
12793 + for(i = 0; i < num_resources; ++i)
12794 + {
12795 + sem->queues[i].owner = NULL;
12796 + sem->queues[i].hp_waiter = NULL;
12797 + init_waitqueue_head(&sem->queues[i].wait);
12798 + sem->queues[i].count = 0;
12799 + }
12800 +
12801 + sem->shortest_queue = &sem->queues[0];
12802 +
12803 + *ret_code = 0;
12804 + return &sem->litmus_lock;
12805 +}
12806 +
12807 +
12808 +/* **** lock constructor **** */
12809 +
12810 +static long crm_srt_allocate_lock(struct litmus_lock **lock, int type,
12811 + void* __user arg)
12812 +{
12813 + int err = -ENXIO;
12814 +
12815 + /* C-RM-SRT currently only supports the FMLP for global resources
12816 + WITHIN a given cluster. DO NOT USE CROSS-CLUSTER! */
12817 + switch (type) {
12818 + case KFMLP_SEM:
12819 + *lock = crm_srt_new_kfmlp(arg, &err);
12820 + break;
12821 + };
12822 +
12823 + return err;
12824 +}
12825 +
12826 +#endif // CONFIG_LITMUS_LOCKING
12827 +
12828 +
12829 +
12830 +
12831 +
12832 +
12833 +/* total number of cluster */
12834 +static int num_clusters;
12835 +/* we do not support cluster of different sizes */
12836 +static unsigned int cluster_size;
12837 +
12838 +#ifdef VERBOSE_INIT
12839 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
12840 +{
12841 + int chk;
12842 + char buf[255];
12843 +
12844 + chk = cpulist_scnprintf(buf, 254, mask);
12845 + buf[chk] = '\0';
12846 + printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
12847 +
12848 +}
12849 +#endif
12850 +
12851 +static int clusters_allocated = 0;
12852 +
12853 +static void cleanup_crm_srt(void)
12854 +{
12855 + int i;
12856 +
12857 + if (clusters_allocated) {
12858 + for (i = 0; i < num_clusters; i++) {
12859 + kfree(crm_srt[i].cpus);
12860 + kfree(crm_srt[i].heap_node);
12861 + free_cpumask_var(crm_srt[i].cpu_map);
12862 + }
12863 +
12864 + kfree(crm_srt);
12865 + }
12866 +}
12867 +
12868 +static long crm_srt_activate_plugin(void)
12869 +{
12870 + int i, j, cpu, ccpu, cpu_count;
12871 + cpu_entry_t *entry;
12872 +
12873 + cpumask_var_t mask;
12874 + int chk = 0;
12875 +
12876 + /* de-allocate old clusters, if any */
12877 + cleanup_crm_srt();
12878 +
12879 + printk(KERN_INFO "C-RM-SRT: Activate Plugin, cluster configuration = %d\n",
12880 + cluster_config);
12881 +
12882 + /* need to get cluster_size first */
12883 + if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
12884 + return -ENOMEM;
12885 +
12886 + if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
12887 + cluster_size = num_online_cpus();
12888 + } else {
12889 + chk = get_shared_cpu_map(mask, 0, cluster_config);
12890 + if (chk) {
12891 + /* if chk != 0 then it is the max allowed index */
12892 + printk(KERN_INFO "C-RM-SRT: Cluster configuration = %d "
12893 + "is not supported on this hardware.\n",
12894 + cluster_config);
12895 + /* User should notice that the configuration failed, so
12896 + * let's bail out. */
12897 + return -EINVAL;
12898 + }
12899 +
12900 + cluster_size = cpumask_weight(mask);
12901 + }
12902 +
12903 + if ((num_online_cpus() % cluster_size) != 0) {
12904 + /* this can't be right, some cpus are left out */
12905 + printk(KERN_ERR "C-RM-SRT: Trying to group %d cpus in %d!\n",
12906 + num_online_cpus(), cluster_size);
12907 + return -1;
12908 + }
12909 +
12910 + num_clusters = num_online_cpus() / cluster_size;
12911 + printk(KERN_INFO "C-RM-SRT: %d cluster(s) of size = %d\n",
12912 + num_clusters, cluster_size);
12913 +
12914 + /* initialize clusters */
12915 + crm_srt = kmalloc(num_clusters * sizeof(crm_srt_domain_t), GFP_ATOMIC);
12916 + for (i = 0; i < num_clusters; i++) {
12917 +
12918 + crm_srt[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
12919 + GFP_ATOMIC);
12920 + crm_srt[i].heap_node = kmalloc(
12921 + cluster_size * sizeof(struct bheap_node),
12922 + GFP_ATOMIC);
12923 + bheap_init(&(crm_srt[i].cpu_heap));
12924 + rm_srt_domain_init(&(crm_srt[i].domain), NULL, crm_srt_release_jobs);
12925 +
12926 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12927 + crm_srt[i].pending_tasklets.head = NULL;
12928 + crm_srt[i].pending_tasklets.tail = &(crm_srt[i].pending_tasklets.head);
12929 +#endif
12930 +
12931 + if(!zalloc_cpumask_var(&crm_srt[i].cpu_map, GFP_ATOMIC))
12932 + return -ENOMEM;
12933 + }
12934 +
12935 + /* cycle through cluster and add cpus to them */
12936 + for (i = 0; i < num_clusters; i++) {
12937 +
12938 + for_each_online_cpu(cpu) {
12939 + /* check if the cpu is already in a cluster */
12940 + for (j = 0; j < num_clusters; j++)
12941 + if (cpumask_test_cpu(cpu, crm_srt[j].cpu_map))
12942 + break;
12943 + /* if it is in a cluster go to next cpu */
12944 + if (j < num_clusters &&
12945 + cpumask_test_cpu(cpu, crm_srt[j].cpu_map))
12946 + continue;
12947 +
12948 + /* this cpu isn't in any cluster */
12949 + /* get the shared cpus */
12950 + if (unlikely(cluster_config == GLOBAL_CLUSTER))
12951 + cpumask_copy(mask, cpu_online_mask);
12952 + else
12953 + get_shared_cpu_map(mask, cpu, cluster_config);
12954 +
12955 + cpumask_copy(crm_srt[i].cpu_map, mask);
12956 +#ifdef VERBOSE_INIT
12957 + print_cluster_topology(mask, cpu);
12958 +#endif
12959 + /* add cpus to current cluster and init cpu_entry_t */
12960 + cpu_count = 0;
12961 + for_each_cpu(ccpu, crm_srt[i].cpu_map) {
12962 +
12963 + entry = &per_cpu(crm_srt_cpu_entries, ccpu);
12964 + crm_srt[i].cpus[cpu_count] = entry;
12965 + atomic_set(&entry->will_schedule, 0);
12966 + entry->cpu = ccpu;
12967 + entry->cluster = &crm_srt[i];
12968 + entry->hn = &(crm_srt[i].heap_node[cpu_count]);
12969 + bheap_node_init(&entry->hn, entry);
12970 +
12971 + cpu_count++;
12972 +
12973 + entry->linked = NULL;
12974 + entry->scheduled = NULL;
12975 + update_cpu_position(entry);
12976 + }
12977 + /* done with this cluster */
12978 + break;
12979 + }
12980 + }
12981 +
12982 +#ifdef CONFIG_LITMUS_SOFTIRQD
12983 + {
12984 + /* distribute the daemons evenly across the clusters. */
12985 + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
12986 + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
12987 + int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
12988 +
12989 + int daemon = 0;
12990 + for(i = 0; i < num_clusters; ++i)
12991 + {
12992 + int num_on_this_cluster = num_daemons_per_cluster;
12993 + if(left_over)
12994 + {
12995 + ++num_on_this_cluster;
12996 + --left_over;
12997 + }
12998 +
12999 + for(j = 0; j < num_on_this_cluster; ++j)
13000 + {
13001 + // first CPU of this cluster
13002 + affinity[daemon++] = i*cluster_size;
13003 + }
13004 + }
13005 +
13006 + spawn_klitirqd(affinity);
13007 +
13008 + kfree(affinity);
13009 + }
13010 +#endif
13011 +
13012 +#ifdef CONFIG_LITMUS_NVIDIA
13013 + init_nvidia_info();
13014 +#endif
13015 +
13016 + free_cpumask_var(mask);
13017 + clusters_allocated = 1;
13018 + return 0;
13019 +}
13020 +
13021 +/* Plugin object */
13022 +static struct sched_plugin crm_srt_plugin __cacheline_aligned_in_smp = {
13023 + .plugin_name = "C-RM-SRT",
13024 + .finish_switch = crm_srt_finish_switch,
13025 + .tick = crm_srt_tick,
13026 + .task_new = crm_srt_task_new,
13027 + .complete_job = complete_job,
13028 + .task_exit = crm_srt_task_exit,
13029 + .schedule = crm_srt_schedule,
13030 + .task_wake_up = crm_srt_task_wake_up,
13031 + .task_block = crm_srt_task_block,
13032 + .admit_task = crm_srt_admit_task,
13033 + .activate_plugin = crm_srt_activate_plugin,
13034 +#ifdef CONFIG_LITMUS_LOCKING
13035 + .allocate_lock = crm_srt_allocate_lock,
13036 + .set_prio_inh = set_priority_inheritance,
13037 + .clear_prio_inh = clear_priority_inheritance,
13038 +#endif
13039 +#ifdef CONFIG_LITMUS_SOFTIRQD
13040 + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
13041 + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
13042 +#endif
13043 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13044 + .enqueue_pai_tasklet = enqueue_pai_tasklet,
13045 + .run_tasklets = run_tasklets,
13046 +#endif
13047 +};
13048 +
13049 +static struct proc_dir_entry *cluster_file = NULL, *crm_srt_dir = NULL;
13050 +
13051 +static int __init init_crm_srt(void)
13052 +{
13053 + int err, fs;
13054 +
13055 + err = register_sched_plugin(&crm_srt_plugin);
13056 + if (!err) {
13057 + fs = make_plugin_proc_dir(&crm_srt_plugin, &crm_srt_dir);
13058 + if (!fs)
13059 + cluster_file = create_cluster_file(crm_srt_dir, &cluster_config);
13060 + else
13061 + printk(KERN_ERR "Could not allocate C-RM-SRT procfs dir.\n");
13062 + }
13063 + return err;
13064 +}
13065 +
13066 +static void clean_crm_srt(void)
13067 +{
13068 + cleanup_crm_srt();
13069 + if (cluster_file)
13070 + remove_proc_entry("cluster", crm_srt_dir);
13071 + if (crm_srt_dir)
13072 + remove_plugin_proc_dir(&crm_srt_plugin);
13073 +}
13074 +
13075 +module_init(init_crm_srt);
13076 +module_exit(clean_crm_srt);
13077 diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
13078 index 3092797..30c745f 100644
13079 --- a/litmus/sched_gsn_edf.c
13080 +++ b/litmus/sched_gsn_edf.c
13081 @@ -12,6 +12,8 @@
13082 #include <linux/percpu.h>
13083 #include <linux/sched.h>
13084 #include <linux/slab.h>
13085 +#include <linux/uaccess.h>
13086 +
13087
13088 #include <litmus/litmus.h>
13089 #include <litmus/jobs.h>
13090 @@ -25,6 +27,24 @@
13091
13092 #include <linux/module.h>
13093
13094 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13095 +#include <litmus/affinity.h>
13096 +#endif
13097 +
13098 +#ifdef CONFIG_LITMUS_SOFTIRQD
13099 +#include <litmus/litmus_softirq.h>
13100 +#endif
13101 +
13102 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13103 +#include <linux/interrupt.h>
13104 +#include <litmus/trace.h>
13105 +#endif
13106 +
13107 +#ifdef CONFIG_LITMUS_NVIDIA
13108 +#include <litmus/nvidia_info.h>
13109 +#endif
13110 +
13111 +
13112 /* Overview of GSN-EDF operations.
13113 *
13114 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
13115 @@ -111,6 +131,16 @@ static struct bheap gsnedf_cpu_heap;
13116 static rt_domain_t gsnedf;
13117 #define gsnedf_lock (gsnedf.ready_lock)
13118
13119 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13120 +struct tasklet_head
13121 +{
13122 + struct tasklet_struct *head;
13123 + struct tasklet_struct **tail;
13124 +};
13125 +
13126 +struct tasklet_head gsnedf_pending_tasklets;
13127 +#endif
13128 +
13129
13130 /* Uncomment this if you want to see all scheduling decisions in the
13131 * TRACE() log.
13132 @@ -253,21 +283,52 @@ static noinline void requeue(struct task_struct* task)
13133 }
13134 }
13135
13136 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13137 +static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t* start)
13138 +{
13139 + cpu_entry_t* affinity;
13140 +
13141 + get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
13142 +#ifdef CONFIG_RELEASE_MASTER
13143 + gsnedf.release_master
13144 +#else
13145 + -1
13146 +#endif
13147 + );
13148 +
13149 + return(affinity);
13150 +}
13151 +#endif
13152 +
13153 /* check for any necessary preemptions */
13154 static void check_for_preemptions(void)
13155 {
13156 struct task_struct *task;
13157 - cpu_entry_t* last;
13158 + cpu_entry_t *last;
13159
13160 for(last = lowest_prio_cpu();
13161 edf_preemption_needed(&gsnedf, last->linked);
13162 last = lowest_prio_cpu()) {
13163 /* preemption necessary */
13164 task = __take_ready(&gsnedf);
13165 - TRACE("check_for_preemptions: attempting to link task %d to %d\n",
13166 - task->pid, last->cpu);
13167 +
13168 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13169 + {
13170 + cpu_entry_t* affinity = gsnedf_get_nearest_available_cpu(
13171 + &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
13172 + if(affinity)
13173 + last = affinity;
13174 + else if(last->linked)
13175 + requeue(last->linked);
13176 + }
13177 +#else
13178 if (last->linked)
13179 requeue(last->linked);
13180 +#endif
13181 +
13182 + TRACE("check_for_preemptions: attempting to link task %d to %d\n",
13183 + task->pid, last->cpu);
13184 +
13185 link_task_to_cpu(task, last);
13186 preempt(last);
13187 }
13188 @@ -277,7 +338,7 @@ static void check_for_preemptions(void)
13189 static noinline void gsnedf_job_arrival(struct task_struct* task)
13190 {
13191 BUG_ON(!task);
13192 -
13193 +
13194 requeue(task);
13195 check_for_preemptions();
13196 }
13197 @@ -298,9 +359,13 @@ static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
13198 static noinline void job_completion(struct task_struct *t, int forced)
13199 {
13200 BUG_ON(!t);
13201 -
13202 +
13203 sched_trace_task_completion(t, forced);
13204
13205 +#ifdef CONFIG_LITMUS_NVIDIA
13206 + atomic_set(&tsk_rt(t)->nv_int_count, 0);
13207 +#endif
13208 +
13209 TRACE_TASK(t, "job_completion().\n");
13210
13211 /* set flags */
13212 @@ -343,6 +408,414 @@ static void gsnedf_tick(struct task_struct* t)
13213 }
13214 }
13215
13216 +
13217 +
13218 +
13219 +
13220 +
13221 +
13222 +
13223 +
13224 +
13225 +
13226 +
13227 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13228 +
13229 +
13230 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
13231 +{
13232 + if (!atomic_read(&tasklet->count)) {
13233 + sched_trace_tasklet_begin(tasklet->owner);
13234 +
13235 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
13236 + {
13237 + BUG();
13238 + }
13239 + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed);
13240 + tasklet->func(tasklet->data);
13241 + tasklet_unlock(tasklet);
13242 +
13243 + sched_trace_tasklet_end(tasklet->owner, flushed);
13244 + }
13245 + else {
13246 + BUG();
13247 + }
13248 +}
13249 +
13250 +
13251 +static void __extract_tasklets(struct task_struct* task, struct tasklet_head* task_tasklets)
13252 +{
13253 + struct tasklet_struct* step;
13254 + struct tasklet_struct* tasklet;
13255 + struct tasklet_struct* prev;
13256 +
13257 + task_tasklets->head = NULL;
13258 + task_tasklets->tail = &(task_tasklets->head);
13259 +
13260 + prev = NULL;
13261 + for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next)
13262 + {
13263 + if(step->owner == task)
13264 + {
13265 + TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
13266 +
13267 + tasklet = step;
13268 +
13269 + if(prev) {
13270 + prev->next = tasklet->next;
13271 + }
13272 + else if(gsnedf_pending_tasklets.head == tasklet) {
13273 + // we're at the head.
13274 + gsnedf_pending_tasklets.head = tasklet->next;
13275 + }
13276 +
13277 + if(gsnedf_pending_tasklets.tail == &tasklet) {
13278 + // we're at the tail
13279 + if(prev) {
13280 + gsnedf_pending_tasklets.tail = &prev;
13281 + }
13282 + else {
13283 + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13284 + }
13285 + }
13286 +
13287 + tasklet->next = NULL;
13288 + *(task_tasklets->tail) = tasklet;
13289 + task_tasklets->tail = &(tasklet->next);
13290 + }
13291 + else {
13292 + prev = step;
13293 + }
13294 + }
13295 +}
13296 +
13297 +static void flush_tasklets(struct task_struct* task)
13298 +{
13299 + unsigned long flags;
13300 + struct tasklet_head task_tasklets;
13301 + struct tasklet_struct* step;
13302 +
13303 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
13304 + __extract_tasklets(task, &task_tasklets);
13305 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13306 +
13307 + if(gsnedf_pending_tasklets.head != NULL) {
13308 + TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
13309 + }
13310 +
13311 + // now execute any flushed tasklets.
13312 + for(step = gsnedf_pending_tasklets.head; step != NULL; /**/)
13313 + {
13314 + struct tasklet_struct* temp = step->next;
13315 +
13316 + step->next = NULL;
13317 + __do_lit_tasklet(step, 1ul);
13318 +
13319 + step = temp;
13320 + }
13321 +}
13322 +
13323 +
13324 +static void do_lit_tasklets(struct task_struct* sched_task)
13325 +{
13326 + int work_to_do = 1;
13327 + struct tasklet_struct *tasklet = NULL;
13328 + //struct tasklet_struct *step;
13329 + unsigned long flags;
13330 +
13331 + while(work_to_do) {
13332 +
13333 + TS_NV_SCHED_BOTISR_START;
13334 +
13335 + // remove tasklet at head of list if it has higher priority.
13336 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
13337 +
13338 + /*
13339 + step = gsnedf_pending_tasklets.head;
13340 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
13341 + while(step != NULL){
13342 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13343 + step = step->next;
13344 + }
13345 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13346 + TRACE("%s: done.\n", __FUNCTION__);
13347 + */
13348 +
13349 +
13350 + if(gsnedf_pending_tasklets.head != NULL) {
13351 + // remove tasklet at head.
13352 + tasklet = gsnedf_pending_tasklets.head;
13353 +
13354 + if(edf_higher_prio(tasklet->owner, sched_task)) {
13355 +
13356 + if(NULL == tasklet->next) {
13357 + // tasklet is at the head, list only has one element
13358 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13359 + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13360 + }
13361 +
13362 + // remove the tasklet from the queue
13363 + gsnedf_pending_tasklets.head = tasklet->next;
13364 +
13365 + TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13366 + }
13367 + else {
13368 + TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
13369 + tasklet = NULL;
13370 + }
13371 + }
13372 + else {
13373 + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
13374 + }
13375 +
13376 +
13377 + /*
13378 + step = gsnedf_pending_tasklets.head;
13379 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
13380 + while(step != NULL){
13381 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13382 + step = step->next;
13383 + }
13384 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13385 + TRACE("%s: done.\n", __FUNCTION__);
13386 + */
13387 +
13388 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13389 +
13390 + TS_NV_SCHED_BOTISR_END;
13391 +
13392 + if(tasklet) {
13393 + __do_lit_tasklet(tasklet, 0ul);
13394 + tasklet = NULL;
13395 + }
13396 + else {
13397 + work_to_do = 0;
13398 + }
13399 + }
13400 +
13401 + //TRACE("%s: exited.\n", __FUNCTION__);
13402 +}
13403 +
13404 +
13405 +static void run_tasklets(struct task_struct* sched_task)
13406 +{
13407 +#if 0
13408 + int task_is_rt = is_realtime(sched_task);
13409 + cedf_domain_t* cluster;
13410 +
13411 + if(is_realtime(sched_task)) {
13412 + cluster = task_cpu_cluster(sched_task);
13413 + }
13414 + else {
13415 + cluster = remote_cluster(get_cpu());
13416 + }
13417 +
13418 + if(cluster && gsnedf_pending_tasklets.head != NULL) {
13419 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
13420 +
13421 + do_lit_tasklets(cluster, sched_task);
13422 + }
13423 +
13424 + if(!task_is_rt) {
13425 + put_cpu_no_resched();
13426 + }
13427 +#else
13428 +
13429 + preempt_disable();
13430 +
13431 + if(gsnedf_pending_tasklets.head != NULL) {
13432 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
13433 + do_lit_tasklets(sched_task);
13434 + }
13435 +
13436 + preempt_enable_no_resched();
13437 +
13438 +#endif
13439 +}
13440 +
13441 +
13442 +static void __add_pai_tasklet(struct tasklet_struct* tasklet)
13443 +{
13444 + struct tasklet_struct* step;
13445 +
13446 + /*
13447 + step = gsnedf_pending_tasklets.head;
13448 + TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
13449 + while(step != NULL){
13450 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13451 + step = step->next;
13452 + }
13453 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13454 + TRACE("%s: done.\n", __FUNCTION__);
13455 + */
13456 +
13457 +
13458 + tasklet->next = NULL; // make sure there are no old values floating around
13459 +
13460 + step = gsnedf_pending_tasklets.head;
13461 + if(step == NULL) {
13462 + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
13463 + // insert at tail.
13464 + *(gsnedf_pending_tasklets.tail) = tasklet;
13465 + gsnedf_pending_tasklets.tail = &(tasklet->next);
13466 + }
13467 + else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
13468 + edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
13469 + // insert at tail.
13470 + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
13471 +
13472 + *(gsnedf_pending_tasklets.tail) = tasklet;
13473 + gsnedf_pending_tasklets.tail = &(tasklet->next);
13474 + }
13475 + else {
13476 +
13477 + //WARN_ON(1 == 1);
13478 +
13479 + // insert the tasklet somewhere in the middle.
13480 +
13481 + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
13482 +
13483 + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
13484 + step = step->next;
13485 + }
13486 +
13487 + // insert tasklet right before step->next.
13488 +
13489 + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
13490 +
13491 + tasklet->next = step->next;
13492 + step->next = tasklet;
13493 +
13494 + // patch up the head if needed.
13495 + if(gsnedf_pending_tasklets.head == step)
13496 + {
13497 + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
13498 + gsnedf_pending_tasklets.head = tasklet;
13499 + }
13500 + }
13501 +
13502 + /*
13503 + step = gsnedf_pending_tasklets.head;
13504 + TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
13505 + while(step != NULL){
13506 + TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13507 + step = step->next;
13508 + }
13509 + TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13510 + TRACE("%s: done.\n", __FUNCTION__);
13511 + */
13512 +
13513 + // TODO: Maintain this list in priority order.
13514 + // tasklet->next = NULL;
13515 + // *(gsnedf_pending_tasklets.tail) = tasklet;
13516 + // gsnedf_pending_tasklets.tail = &tasklet->next;
13517 +}
13518 +
13519 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
13520 +{
13521 + cpu_entry_t *targetCPU = NULL;
13522 + int thisCPU;
13523 + int runLocal = 0;
13524 + int runNow = 0;
13525 + unsigned long flags;
13526 +
13527 + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
13528 + {
13529 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
13530 + return 0;
13531 + }
13532 +
13533 +
13534 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
13535 +
13536 + thisCPU = smp_processor_id();
13537 +
13538 +#if 1
13539 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13540 + {
13541 + cpu_entry_t* affinity = NULL;
13542 +
13543 + // use this CPU if it is in our cluster and isn't running any RT work.
13544 + if(
13545 +#ifdef CONFIG_RELEASE_MASTER
13546 + (thisCPU != gsnedf.release_master) &&
13547 +#endif
13548 + (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
13549 + affinity = &(__get_cpu_var(gsnedf_cpu_entries));
13550 + }
13551 + else {
13552 + // this CPU is busy or shouldn't run tasklet in this cluster.
13553 + // look for available near by CPUs.
13554 + // NOTE: Affinity towards owner and not this CPU. Is this right?
13555 + affinity =
13556 + gsnedf_get_nearest_available_cpu(
13557 + &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
13558 + }
13559 +
13560 + targetCPU = affinity;
13561 + }
13562 +#endif
13563 +#endif
13564 +
13565 + if (targetCPU == NULL) {
13566 + targetCPU = lowest_prio_cpu();
13567 + }
13568 +
13569 + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
13570 + if (thisCPU == targetCPU->cpu) {
13571 + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
13572 + runLocal = 1;
13573 + runNow = 1;
13574 + }
13575 + else {
13576 + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
13577 + runLocal = 0;
13578 + runNow = 1;
13579 + }
13580 + }
13581 + else {
13582 + runLocal = 0;
13583 + runNow = 0;
13584 + }
13585 +
13586 + if(!runLocal) {
13587 + // enqueue the tasklet
13588 + __add_pai_tasklet(tasklet);
13589 + }
13590 +
13591 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13592 +
13593 +
13594 + if (runLocal /*&& runNow */) { // runNow == 1 is implied
13595 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
13596 + __do_lit_tasklet(tasklet, 0ul);
13597 + }
13598 + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
13599 + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
13600 + preempt(targetCPU); // need to be protected by cedf_lock?
13601 + }
13602 + else {
13603 + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
13604 + }
13605 +
13606 + return(1); // success
13607 +}
13608 +
13609 +
13610 +#endif
13611 +
13612 +
13613 +
13614 +
13615 +
13616 +
13617 +
13618 +
13619 +
13620 +
13621 +
13622 +
13623 +
13624 /* Getting schedule() right is a bit tricky. schedule() may not make any
13625 * assumptions on the state of the current task since it may be called for a
13626 * number of reasons. The reasons include a scheduler_tick() determined that it
13627 @@ -401,17 +874,19 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13628 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
13629 #endif
13630
13631 + /*
13632 if (exists)
13633 TRACE_TASK(prev,
13634 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
13635 "state:%d sig:%d\n",
13636 blocks, out_of_time, np, sleep, preempt,
13637 prev->state, signal_pending(prev));
13638 + */
13639 +
13640 if (entry->linked && preempt)
13641 TRACE_TASK(prev, "will be preempted by %s/%d\n",
13642 entry->linked->comm, entry->linked->pid);
13643
13644 -
13645 /* If a task blocks we have no choice but to reschedule.
13646 */
13647 if (blocks)
13648 @@ -456,12 +931,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13649 entry->scheduled->rt_param.scheduled_on = NO_CPU;
13650 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
13651 }
13652 - } else
13653 + }
13654 + else
13655 + {
13656 /* Only override Linux scheduler if we have a real-time task
13657 * scheduled that needs to continue.
13658 */
13659 if (exists)
13660 next = prev;
13661 + }
13662
13663 sched_state_task_picked();
13664
13665 @@ -486,8 +964,9 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13666 static void gsnedf_finish_switch(struct task_struct *prev)
13667 {
13668 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
13669 -
13670 +
13671 entry->scheduled = is_realtime(current) ? current : NULL;
13672 +
13673 #ifdef WANT_ALL_SCHED_EVENTS
13674 TRACE_TASK(prev, "switched away from\n");
13675 #endif
13676 @@ -536,11 +1015,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
13677 static void gsnedf_task_wake_up(struct task_struct *task)
13678 {
13679 unsigned long flags;
13680 - lt_t now;
13681 -
13682 + //lt_t now;
13683 +
13684 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
13685
13686 raw_spin_lock_irqsave(&gsnedf_lock, flags);
13687 +
13688 +
13689 +#if 0 // sporadic task model
13690 /* We need to take suspensions because of semaphores into
13691 * account! If a job resumes after being suspended due to acquiring
13692 * a semaphore, it should never be treated as a new job release.
13693 @@ -562,19 +1044,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
13694 }
13695 }
13696 }
13697 +#else // periodic task model
13698 + set_rt_flags(task, RT_F_RUNNING);
13699 +#endif
13700 +
13701 gsnedf_job_arrival(task);
13702 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13703 }
13704
13705 static void gsnedf_task_block(struct task_struct *t)
13706 {
13707 + // TODO: is this called on preemption??
13708 unsigned long flags;
13709
13710 TRACE_TASK(t, "block at %llu\n", litmus_clock());
13711
13712 /* unlink if necessary */
13713 raw_spin_lock_irqsave(&gsnedf_lock, flags);
13714 +
13715 unlink(t);
13716 +
13717 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13718
13719 BUG_ON(!is_realtime(t));
13720 @@ -585,6 +1074,10 @@ static void gsnedf_task_exit(struct task_struct * t)
13721 {
13722 unsigned long flags;
13723
13724 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13725 + flush_tasklets(t);
13726 +#endif
13727 +
13728 /* unlink if necessary */
13729 raw_spin_lock_irqsave(&gsnedf_lock, flags);
13730 unlink(t);
13731 @@ -593,7 +1086,7 @@ static void gsnedf_task_exit(struct task_struct * t)
13732 tsk_rt(t)->scheduled_on = NO_CPU;
13733 }
13734 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13735 -
13736 +
13737 BUG_ON(!is_realtime(t));
13738 TRACE_TASK(t, "RIP\n");
13739 }
13740 @@ -608,51 +1101,53 @@ static long gsnedf_admit_task(struct task_struct* tsk)
13741
13742 #include <litmus/fdso.h>
13743
13744 -/* called with IRQs off */
13745 -static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13746 +
13747 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13748 {
13749 int linked_on;
13750 - int check_preempt = 0;
13751 -
13752 - raw_spin_lock(&gsnedf_lock);
13753 -
13754 - TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
13755 + int check_preempt = 0;
13756 +
13757 + if(prio_inh != NULL)
13758 + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
13759 + else
13760 + TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
13761 +
13762 + sched_trace_eff_prio_change(t, prio_inh);
13763 +
13764 tsk_rt(t)->inh_task = prio_inh;
13765 -
13766 +
13767 linked_on = tsk_rt(t)->linked_on;
13768 -
13769 +
13770 /* If it is scheduled, then we need to reorder the CPU heap. */
13771 if (linked_on != NO_CPU) {
13772 TRACE_TASK(t, "%s: linked on %d\n",
13773 - __FUNCTION__, linked_on);
13774 + __FUNCTION__, linked_on);
13775 /* Holder is scheduled; need to re-order CPUs.
13776 * We can't use heap_decrease() here since
13777 * the cpu_heap is ordered in reverse direction, so
13778 * it is actually an increase. */
13779 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
13780 - gsnedf_cpus[linked_on]->hn);
13781 + gsnedf_cpus[linked_on]->hn);
13782 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
13783 - gsnedf_cpus[linked_on]->hn);
13784 + gsnedf_cpus[linked_on]->hn);
13785 } else {
13786 /* holder may be queued: first stop queue changes */
13787 raw_spin_lock(&gsnedf.release_lock);
13788 if (is_queued(t)) {
13789 - TRACE_TASK(t, "%s: is queued\n",
13790 - __FUNCTION__);
13791 + TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
13792 +
13793 /* We need to update the position of holder in some
13794 * heap. Note that this could be a release heap if we
13795 * budget enforcement is used and this job overran. */
13796 - check_preempt =
13797 - !bheap_decrease(edf_ready_order,
13798 - tsk_rt(t)->heap_node);
13799 + check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
13800 +
13801 } else {
13802 /* Nothing to do: if it is not queued and not linked
13803 * then it is either sleeping or currently being moved
13804 * by other code (e.g., a timer interrupt handler) that
13805 * will use the correct priority when enqueuing the
13806 * task. */
13807 - TRACE_TASK(t, "%s: is NOT queued => Done.\n",
13808 - __FUNCTION__);
13809 + TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
13810 }
13811 raw_spin_unlock(&gsnedf.release_lock);
13812
13813 @@ -666,34 +1161,148 @@ static void set_priority_inheritance(struct task_struct* t, struct task_struct*
13814 /* heap_decrease() hit the top level of the heap: make
13815 * sure preemption checks get the right task, not the
13816 * potentially stale cache. */
13817 - bheap_uncache_min(edf_ready_order,
13818 - &gsnedf.ready_queue);
13819 + bheap_uncache_min(edf_ready_order, &gsnedf.ready_queue);
13820 check_for_preemptions();
13821 }
13822 }
13823 +}
13824
13825 +/* called with IRQs off */
13826 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13827 +{
13828 + raw_spin_lock(&gsnedf_lock);
13829 +
13830 + __set_priority_inheritance(t, prio_inh);
13831 +
13832 +#ifdef CONFIG_LITMUS_SOFTIRQD
13833 + if(tsk_rt(t)->cur_klitirqd != NULL)
13834 + {
13835 + TRACE_TASK(t, "%s/%d inherits a new priority!\n",
13836 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
13837 +
13838 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
13839 + }
13840 +#endif
13841 +
13842 raw_spin_unlock(&gsnedf_lock);
13843 }
13844
13845 +
13846 +/* called with IRQs off */
13847 +static void __clear_priority_inheritance(struct task_struct* t)
13848 +{
13849 + TRACE_TASK(t, "priority restored\n");
13850 +
13851 + if(tsk_rt(t)->scheduled_on != NO_CPU)
13852 + {
13853 + sched_trace_eff_prio_change(t, NULL);
13854 +
13855 + tsk_rt(t)->inh_task = NULL;
13856 +
13857 + /* Check if rescheduling is necessary. We can't use heap_decrease()
13858 + * since the priority was effectively lowered. */
13859 + unlink(t);
13860 + gsnedf_job_arrival(t);
13861 + }
13862 + else
13863 + {
13864 + __set_priority_inheritance(t, NULL);
13865 + }
13866 +
13867 +#ifdef CONFIG_LITMUS_SOFTIRQD
13868 + if(tsk_rt(t)->cur_klitirqd != NULL)
13869 + {
13870 + TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
13871 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
13872 +
13873 + if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
13874 + {
13875 + sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
13876 +
13877 + tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
13878 +
13879 + /* Check if rescheduling is necessary. We can't use heap_decrease()
13880 + * since the priority was effectively lowered. */
13881 + unlink(tsk_rt(t)->cur_klitirqd);
13882 + gsnedf_job_arrival(tsk_rt(t)->cur_klitirqd);
13883 + }
13884 + else
13885 + {
13886 + __set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
13887 + }
13888 + }
13889 +#endif
13890 +}
13891 +
13892 /* called with IRQs off */
13893 static void clear_priority_inheritance(struct task_struct* t)
13894 {
13895 raw_spin_lock(&gsnedf_lock);
13896 + __clear_priority_inheritance(t);
13897 + raw_spin_unlock(&gsnedf_lock);
13898 +}
13899
13900 - /* A job only stops inheriting a priority when it releases a
13901 - * resource. Thus we can make the following assumption.*/
13902 - BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
13903 -
13904 - TRACE_TASK(t, "priority restored\n");
13905 - tsk_rt(t)->inh_task = NULL;
13906 +#ifdef CONFIG_LITMUS_SOFTIRQD
13907 +/* called with IRQs off */
13908 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
13909 + struct task_struct* old_owner,
13910 + struct task_struct* new_owner)
13911 +{
13912 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
13913 +
13914 + raw_spin_lock(&gsnedf_lock);
13915 +
13916 + if(old_owner != new_owner)
13917 + {
13918 + if(old_owner)
13919 + {
13920 + // unreachable?
13921 + tsk_rt(old_owner)->cur_klitirqd = NULL;
13922 + }
13923 +
13924 + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
13925 + new_owner->comm, new_owner->pid);
13926
13927 - /* Check if rescheduling is necessary. We can't use heap_decrease()
13928 - * since the priority was effectively lowered. */
13929 - unlink(t);
13930 - gsnedf_job_arrival(t);
13931 + tsk_rt(new_owner)->cur_klitirqd = klitirqd;
13932 + }
13933 +
13934 + __set_priority_inheritance(klitirqd,
13935 + (tsk_rt(new_owner)->inh_task == NULL) ?
13936 + new_owner :
13937 + tsk_rt(new_owner)->inh_task);
13938 +
13939 + raw_spin_unlock(&gsnedf_lock);
13940 +}
13941
13942 +/* called with IRQs off */
13943 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
13944 + struct task_struct* old_owner)
13945 +{
13946 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
13947 +
13948 + raw_spin_lock(&gsnedf_lock);
13949 +
13950 + TRACE_TASK(klitirqd, "priority restored\n");
13951 +
13952 + if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
13953 + {
13954 + tsk_rt(klitirqd)->inh_task = NULL;
13955 +
13956 + /* Check if rescheduling is necessary. We can't use heap_decrease()
13957 + * since the priority was effectively lowered. */
13958 + unlink(klitirqd);
13959 + gsnedf_job_arrival(klitirqd);
13960 + }
13961 + else
13962 + {
13963 + __set_priority_inheritance(klitirqd, NULL);
13964 + }
13965 +
13966 + tsk_rt(old_owner)->cur_klitirqd = NULL;
13967 +
13968 raw_spin_unlock(&gsnedf_lock);
13969 }
13970 +#endif
13971
13972
13973 /* ******************** FMLP support ********************** */
13974 @@ -892,11 +1501,483 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
13975 return &sem->litmus_lock;
13976 }
13977
13978 +
13979 +
13980 +
13981 +
13982 +
13983 +
13984 +/* ******************** KFMLP support ********************** */
13985 +
13986 +/* struct for semaphore with priority inheritance */
13987 +struct kfmlp_queue
13988 +{
13989 + wait_queue_head_t wait;
13990 + struct task_struct* owner;
13991 + struct task_struct* hp_waiter;
13992 + int count; /* number of waiters + holder */
13993 +};
13994 +
13995 +struct kfmlp_semaphore
13996 +{
13997 + struct litmus_lock litmus_lock;
13998 +
13999 + spinlock_t lock;
14000 +
14001 + int num_resources; /* aka k */
14002 +
14003 + struct kfmlp_queue *queues; /* array */
14004 + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
14005 +};
14006 +
14007 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
14008 +{
14009 + return container_of(lock, struct kfmlp_semaphore, litmus_lock);
14010 +}
14011 +
14012 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
14013 + struct kfmlp_queue* queue)
14014 +{
14015 + return (queue - &sem->queues[0]);
14016 +}
14017 +
14018 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
14019 + struct task_struct* holder)
14020 +{
14021 + int i;
14022 + for(i = 0; i < sem->num_resources; ++i)
14023 + if(sem->queues[i].owner == holder)
14024 + return(&sem->queues[i]);
14025 + return(NULL);
14026 +}
14027 +
14028 +/* caller is responsible for locking */
14029 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
14030 + struct task_struct *skip)
14031 +{
14032 + struct list_head *pos;
14033 + struct task_struct *queued, *found = NULL;
14034 +
14035 + list_for_each(pos, &kqueue->wait.task_list) {
14036 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
14037 + task_list)->private;
14038 +
14039 + /* Compare task prios, find high prio task. */
14040 + if (queued != skip && edf_higher_prio(queued, found))
14041 + found = queued;
14042 + }
14043 + return found;
14044 +}
14045 +
14046 +static inline struct kfmlp_queue* kfmlp_find_shortest(
14047 + struct kfmlp_semaphore* sem,
14048 + struct kfmlp_queue* search_start)
14049 +{
14050 + // we start our search at search_start instead of at the beginning of the
14051 + // queue list to load-balance across all resources.
14052 + struct kfmlp_queue* step = search_start;
14053 + struct kfmlp_queue* shortest = sem->shortest_queue;
14054 +
14055 + do
14056 + {
14057 + step = (step+1 != &sem->queues[sem->num_resources]) ?
14058 + step+1 : &sem->queues[0];
14059 +
14060 + if(step->count < shortest->count)
14061 + {
14062 + shortest = step;
14063 + if(step->count == 0)
14064 + break; /* can't get any shorter */
14065 + }
14066 +
14067 + }while(step != search_start);
14068 +
14069 + return(shortest);
14070 +}
14071 +
14072 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
14073 +{
14074 + /* must hold sem->lock */
14075 +
14076 + struct kfmlp_queue *my_queue = NULL;
14077 + struct task_struct *max_hp = NULL;
14078 +
14079 +
14080 + struct list_head *pos;
14081 + struct task_struct *queued;
14082 + int i;
14083 +
14084 + for(i = 0; i < sem->num_resources; ++i)
14085 + {
14086 + if( (sem->queues[i].count > 1) &&
14087 + ((my_queue == NULL) ||
14088 + (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
14089 + {
14090 + my_queue = &sem->queues[i];
14091 + }
14092 + }
14093 +
14094 + if(my_queue)
14095 + {
14096 + max_hp = my_queue->hp_waiter;
14097 +
14098 + BUG_ON(!max_hp);
14099 +
14100 + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
14101 + kfmlp_get_idx(sem, my_queue),
14102 + max_hp->comm, max_hp->pid,
14103 + kfmlp_get_idx(sem, my_queue));
14104 +
14105 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
14106 +
14107 + /*
14108 + if(my_queue->hp_waiter)
14109 + TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
14110 + kfmlp_get_idx(sem, my_queue),
14111 + my_queue->hp_waiter->comm,
14112 + my_queue->hp_waiter->pid);
14113 + else
14114 + TRACE_CUR("queue %d: new hp_waiter is %p\n",
14115 + kfmlp_get_idx(sem, my_queue), NULL);
14116 + */
14117 +
14118 + raw_spin_lock(&gsnedf_lock);
14119 +
14120 + /*
14121 + if(my_queue->owner)
14122 + TRACE_CUR("queue %d: owner is %s/%d\n",
14123 + kfmlp_get_idx(sem, my_queue),
14124 + my_queue->owner->comm,
14125 + my_queue->owner->pid);
14126 + else
14127 + TRACE_CUR("queue %d: owner is %p\n",
14128 + kfmlp_get_idx(sem, my_queue),
14129 + NULL);
14130 + */
14131 +
14132 + if(tsk_rt(my_queue->owner)->inh_task == max_hp)
14133 + {
14134 + __clear_priority_inheritance(my_queue->owner);
14135 + if(my_queue->hp_waiter != NULL)
14136 + {
14137 + __set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
14138 + }
14139 + }
14140 + raw_spin_unlock(&gsnedf_lock);
14141 +
14142 + list_for_each(pos, &my_queue->wait.task_list)
14143 + {
14144 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
14145 + task_list)->private;
14146 + /* Compare task prios, find high prio task. */
14147 + if (queued == max_hp)
14148 + {
14149 + /*
14150 + TRACE_CUR("queue %d: found entry in wait queue. REMOVING!\n",
14151 + kfmlp_get_idx(sem, my_queue));
14152 + */
14153 + __remove_wait_queue(&my_queue->wait,
14154 + list_entry(pos, wait_queue_t, task_list));
14155 + break;
14156 + }
14157 + }
14158 + --(my_queue->count);
14159 + }
14160 +
14161 + return(max_hp);
14162 +}
14163 +
14164 +int gsnedf_kfmlp_lock(struct litmus_lock* l)
14165 +{
14166 + struct task_struct* t = current;
14167 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14168 + struct kfmlp_queue* my_queue;
14169 + wait_queue_t wait;
14170 + unsigned long flags;
14171 +
14172 + if (!is_realtime(t))
14173 + return -EPERM;
14174 +
14175 + spin_lock_irqsave(&sem->lock, flags);
14176 +
14177 + my_queue = sem->shortest_queue;
14178 +
14179 + if (my_queue->owner) {
14180 + /* resource is not free => must suspend and wait */
14181 + TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
14182 + kfmlp_get_idx(sem, my_queue));
14183 +
14184 + init_waitqueue_entry(&wait, t);
14185 +
14186 + /* FIXME: interruptible would be nice some day */
14187 + set_task_state(t, TASK_UNINTERRUPTIBLE);
14188 +
14189 + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
14190 +
14191 + /* check if we need to activate priority inheritance */
14192 + if (edf_higher_prio(t, my_queue->hp_waiter))
14193 + {
14194 + my_queue->hp_waiter = t;
14195 + if (edf_higher_prio(t, my_queue->owner))
14196 + {
14197 + set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
14198 + }
14199 + }
14200 +
14201 + ++(my_queue->count);
14202 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
14203 +
14204 + /* release lock before sleeping */
14205 + spin_unlock_irqrestore(&sem->lock, flags);
14206 +
14207 + /* We depend on the FIFO order. Thus, we don't need to recheck
14208 + * when we wake up; we are guaranteed to have the lock since
14209 + * there is only one wake up per release (or steal).
14210 + */
14211 + schedule();
14212 +
14213 +
14214 + if(my_queue->owner == t)
14215 + {
14216 + TRACE_CUR("queue %d: acquired through waiting\n",
14217 + kfmlp_get_idx(sem, my_queue));
14218 + }
14219 + else
14220 + {
14221 + /* this case may happen if our wait entry was stolen
14222 + between queues. record where we went. */
14223 + my_queue = kfmlp_get_queue(sem, t);
14224 +
14225 + BUG_ON(!my_queue);
14226 + TRACE_CUR("queue %d: acquired through stealing\n",
14227 + kfmlp_get_idx(sem, my_queue));
14228 + }
14229 + }
14230 + else
14231 + {
14232 + TRACE_CUR("queue %d: acquired immediately\n",
14233 + kfmlp_get_idx(sem, my_queue));
14234 +
14235 + my_queue->owner = t;
14236 +
14237 + ++(my_queue->count);
14238 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
14239 +
14240 + spin_unlock_irqrestore(&sem->lock, flags);
14241 + }
14242 +
14243 + return kfmlp_get_idx(sem, my_queue);
14244 +}
14245 +
14246 +int gsnedf_kfmlp_unlock(struct litmus_lock* l)
14247 +{
14248 + struct task_struct *t = current, *next;
14249 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14250 + struct kfmlp_queue *my_queue;
14251 + unsigned long flags;
14252 + int err = 0;
14253 +
14254 + spin_lock_irqsave(&sem->lock, flags);
14255 +
14256 + my_queue = kfmlp_get_queue(sem, t);
14257 +
14258 + if (!my_queue) {
14259 + err = -EINVAL;
14260 + goto out;
14261 + }
14262 +
14263 + /* check if there are jobs waiting for this resource */
14264 + next = __waitqueue_remove_first(&my_queue->wait);
14265 + if (next) {
14266 + /*
14267 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
14268 + kfmlp_get_idx(sem, my_queue),
14269 + next->comm, next->pid);
14270 + */
14271 + /* next becomes the resouce holder */
14272 + my_queue->owner = next;
14273 +
14274 + --(my_queue->count);
14275 + // the '=' of '<=' is a dumb method to attempt to build
14276 + // affinity until tasks can tell us where they ran last...
14277 + if(my_queue->count <= sem->shortest_queue->count)
14278 + {
14279 + sem->shortest_queue = my_queue;
14280 + }
14281 +
14282 + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
14283 + kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
14284 +
14285 + /* determine new hp_waiter if necessary */
14286 + if (next == my_queue->hp_waiter) {
14287 + TRACE_TASK(next, "was highest-prio waiter\n");
14288 + /* next has the highest priority --- it doesn't need to
14289 + * inherit. However, we need to make sure that the
14290 + * next-highest priority in the queue is reflected in
14291 + * hp_waiter. */
14292 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
14293 + if (my_queue->hp_waiter)
14294 + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
14295 + else
14296 + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
14297 + } else {
14298 + /* Well, if next is not the highest-priority waiter,
14299 + * then it ought to inherit the highest-priority
14300 + * waiter's priority. */
14301 + set_priority_inheritance(next, my_queue->hp_waiter);
14302 + }
14303 +
14304 + /* wake up next */
14305 + wake_up_process(next);
14306 + }
14307 + else
14308 + {
14309 + TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
14310 +
14311 + next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
14312 +
14313 + /*
14314 + if(next)
14315 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
14316 + kfmlp_get_idx(sem, my_queue),
14317 + next->comm, next->pid);
14318 + */
14319 +
14320 + my_queue->owner = next;
14321 +
14322 + if(next)
14323 + {
14324 + TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
14325 + kfmlp_get_idx(sem, my_queue),
14326 + next->comm, next->pid);
14327 +
14328 + /* wake up next */
14329 + wake_up_process(next);
14330 + }
14331 + else
14332 + {
14333 + TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
14334 +
14335 + --(my_queue->count);
14336 + // the '=' of '<=' is a dumb method to attempt to build
14337 + // affinity until tasks can tell us where they ran last...
14338 + if(my_queue->count <= sem->shortest_queue->count)
14339 + {
14340 + sem->shortest_queue = my_queue;
14341 + }
14342 + }
14343 + }
14344 +
14345 + /* we lose the benefit of priority inheritance (if any) */
14346 + if (tsk_rt(t)->inh_task)
14347 + clear_priority_inheritance(t);
14348 +
14349 +out:
14350 + spin_unlock_irqrestore(&sem->lock, flags);
14351 +
14352 + return err;
14353 +}
14354 +
14355 +int gsnedf_kfmlp_close(struct litmus_lock* l)
14356 +{
14357 + struct task_struct *t = current;
14358 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14359 + struct kfmlp_queue *my_queue;
14360 + unsigned long flags;
14361 +
14362 + int owner;
14363 +
14364 + spin_lock_irqsave(&sem->lock, flags);
14365 +
14366 + my_queue = kfmlp_get_queue(sem, t);
14367 + owner = (my_queue) ? (my_queue->owner == t) : 0;
14368 +
14369 + spin_unlock_irqrestore(&sem->lock, flags);
14370 +
14371 + if (owner)
14372 + gsnedf_kfmlp_unlock(l);
14373 +
14374 + return 0;
14375 +}
14376 +
14377 +void gsnedf_kfmlp_free(struct litmus_lock* l)
14378 +{
14379 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14380 + kfree(sem->queues);
14381 + kfree(sem);
14382 +}
14383 +
14384 +static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
14385 + .close = gsnedf_kfmlp_close,
14386 + .lock = gsnedf_kfmlp_lock,
14387 + .unlock = gsnedf_kfmlp_unlock,
14388 + .deallocate = gsnedf_kfmlp_free,
14389 +};
14390 +
14391 +static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg, int* ret_code)
14392 +{
14393 + struct kfmlp_semaphore* sem;
14394 + int num_resources = 0;
14395 + int i;
14396 +
14397 + if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
14398 + {
14399 + *ret_code = -EINVAL;
14400 + return(NULL);
14401 + }
14402 + if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
14403 + {
14404 + *ret_code = -EINVAL;
14405 + return(NULL);
14406 + }
14407 + if(num_resources < 1)
14408 + {
14409 + *ret_code = -EINVAL;
14410 + return(NULL);
14411 + }
14412 +
14413 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
14414 + if(!sem)
14415 + {
14416 + *ret_code = -ENOMEM;
14417 + return NULL;
14418 + }
14419 +
14420 + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
14421 + if(!sem->queues)
14422 + {
14423 + kfree(sem);
14424 + *ret_code = -ENOMEM;
14425 + return NULL;
14426 + }
14427 +
14428 + sem->litmus_lock.ops = &gsnedf_kfmlp_lock_ops;
14429 + spin_lock_init(&sem->lock);
14430 + sem->num_resources = num_resources;
14431 +
14432 + for(i = 0; i < num_resources; ++i)
14433 + {
14434 + sem->queues[i].owner = NULL;
14435 + sem->queues[i].hp_waiter = NULL;
14436 + init_waitqueue_head(&sem->queues[i].wait);
14437 + sem->queues[i].count = 0;
14438 + }
14439 +
14440 + sem->shortest_queue = &sem->queues[0];
14441 +
14442 + *ret_code = 0;
14443 + return &sem->litmus_lock;
14444 +}
14445 +
14446 +
14447 +
14448 +
14449 +
14450 /* **** lock constructor **** */
14451
14452
14453 static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14454 - void* __user unused)
14455 + void* __user arg)
14456 {
14457 int err = -ENXIO;
14458
14459 @@ -911,7 +1992,10 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14460 else
14461 err = -ENOMEM;
14462 break;
14463 -
14464 +
14465 + case KFMLP_SEM:
14466 + *lock = gsnedf_new_kfmlp(arg, &err);
14467 + break;
14468 };
14469
14470 return err;
14471 @@ -919,7 +2003,6 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14472
14473 #endif
14474
14475 -
14476 static long gsnedf_activate_plugin(void)
14477 {
14478 int cpu;
14479 @@ -946,6 +2029,20 @@ static long gsnedf_activate_plugin(void)
14480 }
14481 #endif
14482 }
14483 +
14484 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14485 + gsnedf_pending_tasklets.head = NULL;
14486 + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
14487 +#endif
14488 +
14489 +#ifdef CONFIG_LITMUS_SOFTIRQD
14490 + spawn_klitirqd(NULL);
14491 +#endif
14492 +
14493 +#ifdef CONFIG_LITMUS_NVIDIA
14494 + init_nvidia_info();
14495 +#endif
14496 +
14497 return 0;
14498 }
14499
14500 @@ -963,7 +2060,17 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
14501 .admit_task = gsnedf_admit_task,
14502 .activate_plugin = gsnedf_activate_plugin,
14503 #ifdef CONFIG_LITMUS_LOCKING
14504 - .allocate_lock = gsnedf_allocate_lock,
14505 + .allocate_lock = gsnedf_allocate_lock,
14506 + .set_prio_inh = set_priority_inheritance,
14507 + .clear_prio_inh = clear_priority_inheritance,
14508 +#endif
14509 +#ifdef CONFIG_LITMUS_SOFTIRQD
14510 + .set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
14511 + .clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
14512 +#endif
14513 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14514 + .enqueue_pai_tasklet = enqueue_pai_tasklet,
14515 + .run_tasklets = run_tasklets,
14516 #endif
14517 };
14518
14519 diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
14520 index e695289..1bca2e1 100644
14521 --- a/litmus/sched_litmus.c
14522 +++ b/litmus/sched_litmus.c
14523 @@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
14524 }
14525 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
14526 if (next->oncpu)
14527 + {
14528 TRACE_TASK(next, "waiting for !oncpu");
14529 + }
14530 while (next->oncpu) {
14531 cpu_relax();
14532 mb();
14533 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
14534 index d54886d..d977e80 100644
14535 --- a/litmus/sched_plugin.c
14536 +++ b/litmus/sched_plugin.c
14537 @@ -129,6 +129,40 @@ static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
14538 return -ENXIO;
14539 }
14540
14541 +static void litmus_dummy_set_prio_inh(struct task_struct* a, struct task_struct* b)
14542 +{
14543 +}
14544 +
14545 +static void litmus_dummy_clear_prio_inh(struct task_struct* t)
14546 +{
14547 +}
14548 +
14549 +#endif
14550 +
14551 +#ifdef CONFIG_LITMUS_SOFTIRQD
14552 +static void litmus_dummy_set_prio_inh_klitirq(struct task_struct* klitirqd,
14553 + struct task_struct* old_owner,
14554 + struct task_struct* new_owner)
14555 +{
14556 +}
14557 +
14558 +static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd,
14559 + struct task_struct* old_owner)
14560 +{
14561 +}
14562 +#endif
14563 +
14564 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14565 +static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
14566 +{
14567 + TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14568 + return(0); // failure.
14569 +}
14570 +
14571 +static void litmus_dummy_run_tasklets(struct task_struct* t)
14572 +{
14573 + //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14574 +}
14575 #endif
14576
14577
14578 @@ -149,6 +183,16 @@ struct sched_plugin linux_sched_plugin = {
14579 .deactivate_plugin = litmus_dummy_deactivate_plugin,
14580 #ifdef CONFIG_LITMUS_LOCKING
14581 .allocate_lock = litmus_dummy_allocate_lock,
14582 + .set_prio_inh = litmus_dummy_set_prio_inh,
14583 + .clear_prio_inh = litmus_dummy_clear_prio_inh,
14584 +#endif
14585 +#ifdef CONFIG_LITMUS_SOFTIRQD
14586 + .set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq,
14587 + .clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd,
14588 +#endif
14589 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14590 + .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
14591 + .run_tasklets = litmus_dummy_run_tasklets,
14592 #endif
14593 .admit_task = litmus_dummy_admit_task
14594 };
14595 @@ -187,6 +231,8 @@ int register_sched_plugin(struct sched_plugin* plugin)
14596 CHECK(deactivate_plugin);
14597 #ifdef CONFIG_LITMUS_LOCKING
14598 CHECK(allocate_lock);
14599 + CHECK(set_prio_inh);
14600 + CHECK(clear_prio_inh);
14601 #endif
14602 CHECK(admit_task);
14603
14604 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
14605 index 5ef8d09..d079df2 100644
14606 --- a/litmus/sched_task_trace.c
14607 +++ b/litmus/sched_task_trace.c
14608 @@ -7,6 +7,7 @@
14609 #include <linux/module.h>
14610 #include <linux/sched.h>
14611 #include <linux/percpu.h>
14612 +#include <linux/hardirq.h>
14613
14614 #include <litmus/ftdev.h>
14615 #include <litmus/litmus.h>
14616 @@ -16,13 +17,13 @@
14617 #include <litmus/ftdev.h>
14618
14619
14620 -#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
14621 +#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
14622
14623 #define now() litmus_clock()
14624
14625 struct local_buffer {
14626 - struct st_event_record record[NO_EVENTS];
14627 - char flag[NO_EVENTS];
14628 + struct st_event_record record[NUM_EVENTS];
14629 + char flag[NUM_EVENTS];
14630 struct ft_buffer ftbuf;
14631 };
14632
14633 @@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
14634 int i, ok = 0, err;
14635 printk("Allocated %u sched_trace_xxx() events per CPU "
14636 "(buffer size: %d bytes)\n",
14637 - NO_EVENTS, (int) sizeof(struct local_buffer));
14638 + NUM_EVENTS, (int) sizeof(struct local_buffer));
14639
14640 err = ftdev_init(&st_dev, THIS_MODULE,
14641 num_online_cpus(), "sched_trace");
14642 @@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
14643
14644 for (i = 0; i < st_dev.minor_cnt; i++) {
14645 buf = &per_cpu(st_event_buffer, i);
14646 - ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
14647 + ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
14648 sizeof(struct st_event_record),
14649 buf->flag,
14650 buf->record);
14651 @@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
14652 {
14653 struct task_struct *t = (struct task_struct*) _task;
14654 struct st_event_record* rec;
14655 - if (is_realtime(t)) {
14656 + //if (is_realtime(t)) /* comment out to trace EVERYTHING */
14657 + {
14658 rec = get_record(ST_SWITCH_TO, t);
14659 if (rec) {
14660 rec->data.switch_to.when = now();
14661 @@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
14662 {
14663 struct task_struct *t = (struct task_struct*) _task;
14664 struct st_event_record* rec;
14665 - if (is_realtime(t)) {
14666 + //if (is_realtime(t)) /* comment out to trace EVERYTHING */
14667 + {
14668 rec = get_record(ST_SWITCH_AWAY, t);
14669 if (rec) {
14670 rec->data.switch_away.when = now();
14671 @@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
14672 if (rec) {
14673 rec->data.completion.when = now();
14674 rec->data.completion.forced = forced;
14675 +#ifdef LITMUS_NVIDIA
14676 + rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
14677 +#endif
14678 put_record(rec);
14679 }
14680 }
14681 @@ -239,3 +245,215 @@ feather_callback void do_sched_trace_action(unsigned long id,
14682 put_record(rec);
14683 }
14684 }
14685 +
14686 +
14687 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
14688 + unsigned long _owner)
14689 +{
14690 + struct task_struct *t = (struct task_struct*) _owner;
14691 + struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
14692 +
14693 + if (rec) {
14694 + rec->data.tasklet_release.when = now();
14695 + put_record(rec);
14696 + }
14697 +}
14698 +
14699 +
14700 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
14701 + unsigned long _owner)
14702 +{
14703 + struct task_struct *t = (struct task_struct*) _owner;
14704 + struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
14705 +
14706 + if (rec) {
14707 + rec->data.tasklet_begin.when = now();
14708 +
14709 + if(!in_interrupt())
14710 + rec->data.tasklet_begin.exe_pid = current->pid;
14711 + else
14712 + rec->data.tasklet_begin.exe_pid = 0;
14713 +
14714 + put_record(rec);
14715 + }
14716 +}
14717 +EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
14718 +
14719 +
14720 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
14721 + unsigned long _owner,
14722 + unsigned long _flushed)
14723 +{
14724 + struct task_struct *t = (struct task_struct*) _owner;
14725 + struct st_event_record *rec = get_record(ST_TASKLET_END, t);
14726 +
14727 + if (rec) {
14728 + rec->data.tasklet_end.when = now();
14729 + rec->data.tasklet_end.flushed = _flushed;
14730 +
14731 + if(!in_interrupt())
14732 + rec->data.tasklet_end.exe_pid = current->pid;
14733 + else
14734 + rec->data.tasklet_end.exe_pid = 0;
14735 +
14736 + put_record(rec);
14737 + }
14738 +}
14739 +EXPORT_SYMBOL(do_sched_trace_tasklet_end);
14740 +
14741 +
14742 +feather_callback void do_sched_trace_work_release(unsigned long id,
14743 + unsigned long _owner)
14744 +{
14745 + struct task_struct *t = (struct task_struct*) _owner;
14746 + struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
14747 +
14748 + if (rec) {
14749 + rec->data.work_release.when = now();
14750 + put_record(rec);
14751 + }
14752 +}
14753 +
14754 +
14755 +feather_callback void do_sched_trace_work_begin(unsigned long id,
14756 + unsigned long _owner,
14757 + unsigned long _exe)
14758 +{
14759 + struct task_struct *t = (struct task_struct*) _owner;
14760 + struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
14761 +
14762 + if (rec) {
14763 + struct task_struct *exe = (struct task_struct*) _exe;
14764 + rec->data.work_begin.exe_pid = exe->pid;
14765 + rec->data.work_begin.when = now();
14766 + put_record(rec);
14767 + }
14768 +}
14769 +EXPORT_SYMBOL(do_sched_trace_work_begin);
14770 +
14771 +
14772 +feather_callback void do_sched_trace_work_end(unsigned long id,
14773 + unsigned long _owner,
14774 + unsigned long _exe,
14775 + unsigned long _flushed)
14776 +{
14777 + struct task_struct *t = (struct task_struct*) _owner;
14778 + struct st_event_record *rec = get_record(ST_WORK_END, t);
14779 +
14780 + if (rec) {
14781 + struct task_struct *exe = (struct task_struct*) _exe;
14782 + rec->data.work_end.exe_pid = exe->pid;
14783 + rec->data.work_end.flushed = _flushed;
14784 + rec->data.work_end.when = now();
14785 + put_record(rec);
14786 + }
14787 +}
14788 +EXPORT_SYMBOL(do_sched_trace_work_end);
14789 +
14790 +
14791 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
14792 + unsigned long _task,
14793 + unsigned long _inh)
14794 +{
14795 + struct task_struct *t = (struct task_struct*) _task;
14796 + struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
14797 +
14798 + if (rec) {
14799 + struct task_struct *inh = (struct task_struct*) _inh;
14800 + rec->data.effective_priority_change.when = now();
14801 + rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
14802 + inh->pid :
14803 + 0xffff;
14804 +
14805 + put_record(rec);
14806 + }
14807 +}
14808 +
14809 +/* pray for no nesting of nv interrupts on same CPU... */
14810 +struct tracing_interrupt_map
14811 +{
14812 + int active;
14813 + int count;
14814 + unsigned long data[128]; // assume nesting less than 128...
14815 + unsigned long serial[128];
14816 +};
14817 +DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
14818 +
14819 +
14820 +DEFINE_PER_CPU(u32, intCounter);
14821 +
14822 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
14823 + unsigned long _device)
14824 +{
14825 + struct st_event_record *rec;
14826 + u32 serialNum;
14827 +
14828 + {
14829 + u32* serial;
14830 + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
14831 + if(!int_map->active == 0xcafebabe)
14832 + {
14833 + int_map->count++;
14834 + }
14835 + else
14836 + {
14837 + int_map->active = 0xcafebabe;
14838 + int_map->count = 1;
14839 + }
14840 + //int_map->data[int_map->count-1] = _device;
14841 +
14842 + serial = &per_cpu(intCounter, smp_processor_id());
14843 + *serial += num_online_cpus();
14844 + serialNum = *serial;
14845 + int_map->serial[int_map->count-1] = serialNum;
14846 + }
14847 +
14848 + rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
14849 + if(rec) {
14850 + u32 device = _device;
14851 + rec->data.nv_interrupt_begin.when = now();
14852 + rec->data.nv_interrupt_begin.device = device;
14853 + rec->data.nv_interrupt_begin.serialNumber = serialNum;
14854 + put_record(rec);
14855 + }
14856 +}
14857 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
14858 +
14859 +/*
14860 +int is_interrupt_tracing_active(void)
14861 +{
14862 + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
14863 + if(int_map->active == 0xcafebabe)
14864 + return 1;
14865 + return 0;
14866 +}
14867 +*/
14868 +
14869 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
14870 +{
14871 + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
14872 + if(int_map->active == 0xcafebabe)
14873 + {
14874 + struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
14875 +
14876 + int_map->count--;
14877 + if(int_map->count == 0)
14878 + int_map->active = 0;
14879 +
14880 + if(rec) {
14881 + u32 device = _device;
14882 + rec->data.nv_interrupt_end.when = now();
14883 + //rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
14884 + rec->data.nv_interrupt_end.device = device;
14885 + rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
14886 + put_record(rec);
14887 + }
14888 + }
14889 +}
14890 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
14891 +
14892 +
14893 +
14894 +
14895 +
14896 +
14897 diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
14898 new file mode 100644
14899 index 0000000..cf8e1d7
14900 --- /dev/null
14901 +++ b/litmus/sched_trace_external.c
14902 @@ -0,0 +1,64 @@
14903 +#include <linux/module.h>
14904 +
14905 +#include <litmus/trace.h>
14906 +#include <litmus/sched_trace.h>
14907 +#include <litmus/litmus.h>
14908 +
14909 +void __sched_trace_tasklet_begin_external(struct task_struct* t)
14910 +{
14911 + sched_trace_tasklet_begin(t);
14912 +}
14913 +EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
14914 +
14915 +void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
14916 +{
14917 + sched_trace_tasklet_end(t, flushed);
14918 +}
14919 +EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
14920 +
14921 +
14922 +
14923 +void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
14924 +{
14925 + sched_trace_work_begin(t, e);
14926 +}
14927 +EXPORT_SYMBOL(__sched_trace_work_begin_external);
14928 +
14929 +void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
14930 +{
14931 + sched_trace_work_end(t, e, f);
14932 +}
14933 +EXPORT_SYMBOL(__sched_trace_work_end_external);
14934 +
14935 +
14936 +
14937 +void __sched_trace_nv_interrupt_begin_external(u32 device)
14938 +{
14939 + //unsigned long _device = device;
14940 + sched_trace_nv_interrupt_begin((unsigned long)device);
14941 +}
14942 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
14943 +
14944 +void __sched_trace_nv_interrupt_end_external(u32 device)
14945 +{
14946 + //unsigned long _device = device;
14947 + sched_trace_nv_interrupt_end((unsigned long)device);
14948 +}
14949 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
14950 +
14951 +
14952 +#ifdef CONFIG_LITMUS_NVIDIA
14953 +
14954 +#define EXX_TS(evt) \
14955 +void __##evt(void) { evt; } \
14956 +EXPORT_SYMBOL(__##evt);
14957 +
14958 +EXX_TS(TS_NV_TOPISR_START)
14959 +EXX_TS(TS_NV_TOPISR_END)
14960 +EXX_TS(TS_NV_BOTISR_START)
14961 +EXX_TS(TS_NV_BOTISR_END)
14962 +EXX_TS(TS_NV_RELEASE_BOTISR_START)
14963 +EXX_TS(TS_NV_RELEASE_BOTISR_END)
14964 +
14965 +#endif
14966 +
14967
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.