Attachment 'gpusync-rtss12.patch'
Download 1 From a463f9a9e04385f0729f7435a0a6dff7d89b25de Mon Sep 17 00:00:00 2001
2 From: Glenn Elliott <gelliott@cs.unc.edu>
3 Date: Sat, 26 May 2012 17:29:58 -0400
4 Subject: [PATCH] GPUSync patch for Litmus 2012.1.
5
6 ---
7 arch/x86/kernel/irq.c | 4 +
8 arch/x86/kernel/syscall_table_32.S | 1 +
9 include/linux/completion.h | 1 +
10 include/linux/interrupt.h | 10 +-
11 include/linux/mutex.h | 10 +
12 include/linux/semaphore.h | 9 +
13 include/linux/workqueue.h | 18 +
14 include/litmus/binheap.h | 207 +++
15 include/litmus/edf_common.h | 12 +
16 include/litmus/fdso.h | 14 +-
17 include/litmus/fpmath.h | 145 ++
18 include/litmus/gpu_affinity.h | 49 +
19 include/litmus/ikglp_lock.h | 160 ++
20 include/litmus/kexclu_affinity.h | 35 +
21 include/litmus/kfmlp_lock.h | 97 ++
22 include/litmus/litmus.h | 9 +-
23 include/litmus/litmus_softirq.h | 199 +++
24 include/litmus/locking.h | 142 +-
25 include/litmus/nvidia_info.h | 46 +
26 include/litmus/preempt.h | 2 +-
27 include/litmus/rsm_lock.h | 54 +
28 include/litmus/rt_param.h | 100 +-
29 include/litmus/sched_plugin.h | 76 +-
30 include/litmus/sched_trace.h | 218 ++-
31 include/litmus/sched_trace_external.h | 78 +
32 include/litmus/trace.h | 34 +-
33 include/litmus/unistd_32.h | 5 +-
34 include/litmus/unistd_64.h | 9 +-
35 kernel/lockdep.c | 7 +-
36 kernel/mutex.c | 125 ++
37 kernel/sched.c | 27 +
38 kernel/semaphore.c | 13 +-
39 kernel/softirq.c | 322 +++-
40 kernel/workqueue.c | 71 +-
41 litmus/Kconfig | 148 +-
42 litmus/Makefile | 11 +-
43 litmus/affinity.c | 2 +-
44 litmus/binheap.c | 443 +++++
45 litmus/edf_common.c | 147 +-
46 litmus/fdso.c | 13 +
47 litmus/gpu_affinity.c | 113 ++
48 litmus/ikglp_lock.c | 2838 +++++++++++++++++++++++++++++++++
49 litmus/jobs.c | 17 +-
50 litmus/kexclu_affinity.c | 92 ++
51 litmus/kfmlp_lock.c | 1002 ++++++++++++
52 litmus/litmus.c | 126 +-
53 litmus/litmus_pai_softirq.c | 64 +
54 litmus/litmus_proc.c | 17 +
55 litmus/litmus_softirq.c | 1582 ++++++++++++++++++
56 litmus/locking.c | 393 ++++-
57 litmus/nvidia_info.c | 597 +++++++
58 litmus/preempt.c | 5 +
59 litmus/rsm_lock.c | 796 +++++++++
60 litmus/sched_cedf.c | 1062 +++++++++++-
61 litmus/sched_gsn_edf.c | 1032 ++++++++++--
62 litmus/sched_litmus.c | 2 +
63 litmus/sched_plugin.c | 135 +-
64 litmus/sched_task_trace.c | 282 +++-
65 litmus/sched_trace_external.c | 64 +
66 59 files changed, 13012 insertions(+), 280 deletions(-)
67 create mode 100644 include/litmus/binheap.h
68 create mode 100644 include/litmus/fpmath.h
69 create mode 100644 include/litmus/gpu_affinity.h
70 create mode 100644 include/litmus/ikglp_lock.h
71 create mode 100644 include/litmus/kexclu_affinity.h
72 create mode 100644 include/litmus/kfmlp_lock.h
73 create mode 100644 include/litmus/litmus_softirq.h
74 create mode 100644 include/litmus/nvidia_info.h
75 create mode 100644 include/litmus/rsm_lock.h
76 create mode 100644 include/litmus/sched_trace_external.h
77 create mode 100644 litmus/binheap.c
78 create mode 100644 litmus/gpu_affinity.c
79 create mode 100644 litmus/ikglp_lock.c
80 create mode 100644 litmus/kexclu_affinity.c
81 create mode 100644 litmus/kfmlp_lock.c
82 create mode 100644 litmus/litmus_pai_softirq.c
83 create mode 100644 litmus/litmus_softirq.c
84 create mode 100644 litmus/nvidia_info.c
85 create mode 100644 litmus/rsm_lock.c
86 create mode 100644 litmus/sched_trace_external.c
87
88 diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
89 index 6c0802e..680a5cb 100644
90 --- a/arch/x86/kernel/irq.c
91 +++ b/arch/x86/kernel/irq.c
92 @@ -10,6 +10,10 @@
93 #include <linux/ftrace.h>
94 #include <linux/delay.h>
95
96 +#ifdef CONFIG_LITMUS_NVIDIA
97 +#include <litmus/sched_trace.h>
98 +#endif
99 +
100 #include <asm/apic.h>
101 #include <asm/io_apic.h>
102 #include <asm/irq.h>
103 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
104 index d012622..0cb4373 100644
105 --- a/arch/x86/kernel/syscall_table_32.S
106 +++ b/arch/x86/kernel/syscall_table_32.S
107 @@ -358,3 +358,4 @@ ENTRY(sys_call_table)
108 .long sys_wait_for_ts_release
109 .long sys_release_ts /* +10 */
110 .long sys_null_call
111 + .long sys_register_nv_device
112 diff --git a/include/linux/completion.h b/include/linux/completion.h
113 index 9d72727..cff405c 100644
114 --- a/include/linux/completion.h
115 +++ b/include/linux/completion.h
116 @@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
117 init_waitqueue_head(&x->wait);
118 }
119
120 +extern void __wait_for_completion_locked(struct completion *);
121 extern void wait_for_completion(struct completion *);
122 extern int wait_for_completion_interruptible(struct completion *x);
123 extern int wait_for_completion_killable(struct completion *x);
124 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
125 index f6efed0..57a7bc8 100644
126 --- a/include/linux/interrupt.h
127 +++ b/include/linux/interrupt.h
128 @@ -445,6 +445,7 @@ static inline void __raise_softirq_irqoff(unsigned int nr)
129
130 extern void raise_softirq_irqoff(unsigned int nr);
131 extern void raise_softirq(unsigned int nr);
132 +extern void wakeup_softirqd(void);
133
134 /* This is the worklist that queues up per-cpu softirq work.
135 *
136 @@ -500,6 +501,10 @@ struct tasklet_struct
137 atomic_t count;
138 void (*func)(unsigned long);
139 unsigned long data;
140 +
141 +#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
142 + struct task_struct *owner;
143 +#endif
144 };
145
146 #define DECLARE_TASKLET(name, func, data) \
147 @@ -537,6 +542,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
148 #define tasklet_unlock(t) do { } while (0)
149 #endif
150
151 +extern void ___tasklet_schedule(struct tasklet_struct *t);
152 extern void __tasklet_schedule(struct tasklet_struct *t);
153
154 static inline void tasklet_schedule(struct tasklet_struct *t)
155 @@ -545,6 +551,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
156 __tasklet_schedule(t);
157 }
158
159 +extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
160 extern void __tasklet_hi_schedule(struct tasklet_struct *t);
161
162 static inline void tasklet_hi_schedule(struct tasklet_struct *t)
163 @@ -553,6 +560,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
164 __tasklet_hi_schedule(t);
165 }
166
167 +extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
168 extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
169
170 /*
171 @@ -582,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
172 }
173
174 static inline void tasklet_enable(struct tasklet_struct *t)
175 -{
176 +{
177 smp_mb__before_atomic_dec();
178 atomic_dec(&t->count);
179 }
180 diff --git a/include/linux/mutex.h b/include/linux/mutex.h
181 index a940fe4..cb47deb 100644
182 --- a/include/linux/mutex.h
183 +++ b/include/linux/mutex.h
184 @@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
185 return atomic_read(&lock->count) != 1;
186 }
187
188 +/* return non-zero to abort. only pre-side-effects may abort */
189 +typedef int (*side_effect_t)(unsigned long);
190 +extern void mutex_lock_sfx(struct mutex *lock,
191 + side_effect_t pre, unsigned long pre_arg,
192 + side_effect_t post, unsigned long post_arg);
193 +extern void mutex_unlock_sfx(struct mutex *lock,
194 + side_effect_t pre, unsigned long pre_arg,
195 + side_effect_t post, unsigned long post_arg);
196 +
197 /*
198 * See kernel/mutex.c for detailed documentation of these APIs.
199 * Also see Documentation/mutex-design.txt.
200 @@ -153,6 +162,7 @@ extern void mutex_lock(struct mutex *lock);
201 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
202 extern int __must_check mutex_lock_killable(struct mutex *lock);
203
204 +
205 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
206 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
207 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
208 diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
209 index 39fa049..c83fc2b 100644
210 --- a/include/linux/semaphore.h
211 +++ b/include/linux/semaphore.h
212 @@ -43,4 +43,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
213 extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
214 extern void up(struct semaphore *sem);
215
216 +extern void __down(struct semaphore *sem);
217 +extern void __up(struct semaphore *sem);
218 +
219 +struct semaphore_waiter {
220 + struct list_head list;
221 + struct task_struct *task;
222 + int up;
223 +};
224 +
225 #endif /* __LINUX_SEMAPHORE_H */
226 diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
227 index f584aba..1ec2ec7 100644
228 --- a/include/linux/workqueue.h
229 +++ b/include/linux/workqueue.h
230 @@ -83,6 +83,9 @@ struct work_struct {
231 #ifdef CONFIG_LOCKDEP
232 struct lockdep_map lockdep_map;
233 #endif
234 +#ifdef CONFIG_LITMUS_SOFTIRQD
235 + struct task_struct *owner;
236 +#endif
237 };
238
239 #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
240 @@ -115,11 +118,25 @@ struct execute_work {
241 #define __WORK_INIT_LOCKDEP_MAP(n, k)
242 #endif
243
244 +#ifdef CONFIG_LITMUS_SOFTIRQD
245 +#define __WORK_INIT_OWNER() \
246 + .owner = NULL,
247 +
248 +#define PREPARE_OWNER(_work, _owner) \
249 + do { \
250 + (_work)->owner = (_owner); \
251 + } while(0)
252 +#else
253 +#define __WORK_INIT_OWNER()
254 +#define PREPARE_OWNER(_work, _owner)
255 +#endif
256 +
257 #define __WORK_INITIALIZER(n, f) { \
258 .data = WORK_DATA_STATIC_INIT(), \
259 .entry = { &(n).entry, &(n).entry }, \
260 .func = (f), \
261 __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
262 + __WORK_INIT_OWNER() \
263 }
264
265 #define __DELAYED_WORK_INITIALIZER(n, f) { \
266 @@ -357,6 +374,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
267 extern void flush_workqueue(struct workqueue_struct *wq);
268 extern void flush_scheduled_work(void);
269
270 +extern int __schedule_work(struct work_struct *work);
271 extern int schedule_work(struct work_struct *work);
272 extern int schedule_work_on(int cpu, struct work_struct *work);
273 extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
274 diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
275 new file mode 100644
276 index 0000000..9e966e3
277 --- /dev/null
278 +++ b/include/litmus/binheap.h
279 @@ -0,0 +1,207 @@
280 +#ifndef LITMUS_BINARY_HEAP_H
281 +#define LITMUS_BINARY_HEAP_H
282 +
283 +#include <linux/kernel.h>
284 +
285 +/**
286 + * Simple binary heap with add, arbitrary delete, delete_root, and top
287 + * operations.
288 + *
289 + * Style meant to conform with list.h.
290 + *
291 + * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
292 + * heap may be overkill (and perhaps not general enough) for some applications.
293 + *
294 + * Note: In order to make node swaps fast, a node inserted with a data pointer
295 + * may not always hold said data pointer. This is similar to the binomial heap
296 + * implementation. This does make node deletion tricky since we have to
297 + * (1) locate the node that holds the data pointer to delete, and (2) the
298 + * node that was originally inserted with said data pointer. These have to be
299 + * coalesced into a single node before removal (see usage of
300 + * __binheap_safe_swap()). We have to track node references to accomplish this.
301 + */
302 +
303 +struct binheap_node {
304 + void *data;
305 + struct binheap_node *parent;
306 + struct binheap_node *left;
307 + struct binheap_node *right;
308 +
309 + /* pointer to binheap_node that holds *data for which this binheap_node
310 + * was originally inserted. (*data "owns" this node)
311 + */
312 + struct binheap_node *ref;
313 + struct binheap_node **ref_ptr;
314 +};
315 +
316 +/**
317 + * Signature of compator function. Assumed 'less-than' (min-heap).
318 + * Pass in 'greater-than' for max-heap.
319 + *
320 + * TODO: Consider macro-based implementation that allows comparator to be
321 + * inlined (similar to Linux red/black tree) for greater efficiency.
322 + */
323 +typedef int (*binheap_order_t)(struct binheap_node *a,
324 + struct binheap_node *b);
325 +
326 +
327 +struct binheap_handle {
328 + struct binheap_node *root;
329 +
330 + /* pointer to node to take next inserted child */
331 + struct binheap_node *next;
332 +
333 + /* pointer to last node in complete binary tree */
334 + struct binheap_node *last;
335 +
336 + /* comparator function pointer */
337 + binheap_order_t compare;
338 +};
339 +
340 +
341 +#define BINHEAP_POISON ((void*)(0xdeadbeef))
342 +
343 +
344 +/**
345 + * binheap_entry - get the struct for this heap node.
346 + * Only valid when called upon heap nodes other than the root handle.
347 + * @ptr: the heap node.
348 + * @type: the type of struct pointed to by binheap_node::data.
349 + * @member: unused.
350 + */
351 +#define binheap_entry(ptr, type, member) \
352 +((type *)((ptr)->data))
353 +
354 +/**
355 + * binheap_node_container - get the struct that contains this node.
356 + * Only valid when called upon heap nodes other than the root handle.
357 + * @ptr: the heap node.
358 + * @type: the type of struct the node is embedded in.
359 + * @member: the name of the binheap_struct within the (type) struct.
360 + */
361 +#define binheap_node_container(ptr, type, member) \
362 +container_of((ptr), type, member)
363 +
364 +/**
365 + * binheap_top_entry - get the struct for the node at the top of the heap.
366 + * Only valid when called upon the heap handle node.
367 + * @ptr: the special heap-handle node.
368 + * @type: the type of the struct the head is embedded in.
369 + * @member: the name of the binheap_struct within the (type) struct.
370 + */
371 +#define binheap_top_entry(ptr, type, member) \
372 +binheap_entry((ptr)->root, type, member)
373 +
374 +/**
375 + * binheap_delete_root - remove the root element from the heap.
376 + * @handle: handle to the heap.
377 + * @type: the type of the struct the head is embedded in.
378 + * @member: the name of the binheap_struct within the (type) struct.
379 + */
380 +#define binheap_delete_root(handle, type, member) \
381 +__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
382 +
383 +/**
384 + * binheap_delete - remove an arbitrary element from the heap.
385 + * @to_delete: pointer to node to be removed.
386 + * @handle: handle to the heap.
387 + */
388 +#define binheap_delete(to_delete, handle) \
389 +__binheap_delete((to_delete), (handle))
390 +
391 +/**
392 + * binheap_add - insert an element to the heap
393 + * new_node: node to add.
394 + * @handle: handle to the heap.
395 + * @type: the type of the struct the head is embedded in.
396 + * @member: the name of the binheap_struct within the (type) struct.
397 + */
398 +#define binheap_add(new_node, handle, type, member) \
399 +__binheap_add((new_node), (handle), container_of((new_node), type, member))
400 +
401 +/**
402 + * binheap_decrease - re-eval the position of a node (based upon its
403 + * original data pointer).
404 + * @handle: handle to the heap.
405 + * @orig_node: node that was associated with the data pointer
406 + * (whose value has changed) when said pointer was
407 + * added to the heap.
408 + */
409 +#define binheap_decrease(orig_node, handle) \
410 +__binheap_decrease((orig_node), (handle))
411 +
412 +#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
413 +
414 +#define BINHEAP_NODE(name) \
415 + struct binheap_node name = BINHEAP_NODE_INIT()
416 +
417 +
418 +static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
419 +{
420 + n->data = NULL;
421 + n->parent = BINHEAP_POISON;
422 + n->left = NULL;
423 + n->right = NULL;
424 + n->ref = NULL;
425 + n->ref_ptr = NULL;
426 +}
427 +
428 +static inline void INIT_BINHEAP_HANDLE(
429 + struct binheap_handle *handle,
430 + binheap_order_t compare)
431 +{
432 + handle->root = NULL;
433 + handle->next = NULL;
434 + handle->last = NULL;
435 + handle->compare = compare;
436 +}
437 +
438 +/* Returns true (1) if binheap is empty. */
439 +static inline int binheap_empty(struct binheap_handle *handle)
440 +{
441 + return(handle->root == NULL);
442 +}
443 +
444 +/* Returns true (1) if binheap node is in a heap. */
445 +static inline int binheap_is_in_heap(struct binheap_node *node)
446 +{
447 + return (node->parent != BINHEAP_POISON);
448 +}
449 +
450 +
451 +int binheap_is_in_this_heap(struct binheap_node *node, struct binheap_handle* heap);
452 +
453 +
454 +
455 +void __binheap_add(struct binheap_node *new_node,
456 + struct binheap_handle *handle,
457 + void *data);
458 +
459 +
460 +/**
461 + * Removes the root node from the heap. The node is removed after coalescing
462 + * the binheap_node with its original data pointer at the root of the tree.
463 + *
464 + * The 'last' node in the tree is then swapped up to the root and bubbled
465 + * down.
466 + */
467 +void __binheap_delete_root(struct binheap_handle *handle,
468 + struct binheap_node *container);
469 +
470 +/**
471 + * Delete an arbitrary node. Bubble node to delete up to the root,
472 + * and then delete to root.
473 + */
474 +void __binheap_delete(
475 + struct binheap_node *node_to_delete,
476 + struct binheap_handle *handle);
477 +
478 +/**
479 + * Bubble up a node whose pointer has decreased in value.
480 + */
481 +void __binheap_decrease(struct binheap_node *orig_node,
482 + struct binheap_handle *handle);
483 +
484 +
485 +#endif
486 +
487 diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
488 index bbaf22e..63dff7e 100644
489 --- a/include/litmus/edf_common.h
490 +++ b/include/litmus/edf_common.h
491 @@ -20,6 +20,18 @@ int edf_higher_prio(struct task_struct* first,
492
493 int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
494
495 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
496 +/* binheap_nodes must be embedded within 'struct litmus_lock' */
497 +int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b);
498 +int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b);
499 +int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
500 +int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
501 +
502 +int __edf_higher_prio(struct task_struct* first, comparison_mode_t first_mode,
503 + struct task_struct* second, comparison_mode_t second_mode);
504 +
505 +#endif
506 +
507 int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
508
509 #endif
510 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
511 index caf2a1e..1f5d3bd 100644
512 --- a/include/litmus/fdso.h
513 +++ b/include/litmus/fdso.h
514 @@ -20,7 +20,16 @@ typedef enum {
515 FMLP_SEM = 0,
516 SRP_SEM = 1,
517
518 - MAX_OBJ_TYPE = 1
519 + RSM_MUTEX = 2,
520 + IKGLP_SEM = 3,
521 + KFMLP_SEM = 4,
522 +
523 + IKGLP_SIMPLE_GPU_AFF_OBS = 5,
524 + IKGLP_GPU_AFF_OBS = 6,
525 + KFMLP_SIMPLE_GPU_AFF_OBS = 7,
526 + KFMLP_GPU_AFF_OBS = 8,
527 +
528 + MAX_OBJ_TYPE = 8
529 } obj_type_t;
530
531 struct inode_obj_id {
532 @@ -64,8 +73,11 @@ static inline void* od_lookup(int od, obj_type_t type)
533 }
534
535 #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
536 +#define lookup_kfmlp_sem(od)((struct pi_semaphore*) od_lookup(od, KFMLP_SEM))
537 #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
538 #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
539
540 +#define lookup_rsm_mutex(od)((struct litmus_lock*) od_lookup(od, FMLP_SEM))
541 +
542
543 #endif
544 diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
545 new file mode 100644
546 index 0000000..04d4bca
547 --- /dev/null
548 +++ b/include/litmus/fpmath.h
549 @@ -0,0 +1,145 @@
550 +#ifndef __FP_MATH_H__
551 +#define __FP_MATH_H__
552 +
553 +#ifndef __KERNEL__
554 +#include <stdint.h>
555 +#define abs(x) (((x) < 0) ? -(x) : x)
556 +#endif
557 +
558 +// Use 64-bit because we want to track things at the nanosecond scale.
559 +// This can lead to very large numbers.
560 +typedef int64_t fpbuf_t;
561 +typedef struct
562 +{
563 + fpbuf_t val;
564 +} fp_t;
565 +
566 +#define FP_SHIFT 10
567 +#define ROUND_BIT (FP_SHIFT - 1)
568 +
569 +#define _fp(x) ((fp_t) {x})
570 +
571 +#ifdef __KERNEL__
572 +static const fp_t LITMUS_FP_ZERO = {.val = 0};
573 +static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
574 +#endif
575 +
576 +static inline fp_t FP(fpbuf_t x)
577 +{
578 + return _fp(((fpbuf_t) x) << FP_SHIFT);
579 +}
580 +
581 +/* divide two integers to obtain a fixed point value */
582 +static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
583 +{
584 + return _fp(FP(a).val / (b));
585 +}
586 +
587 +static inline fpbuf_t _point(fp_t x)
588 +{
589 + return (x.val % (1 << FP_SHIFT));
590 +
591 +}
592 +
593 +#define fp2str(x) x.val
594 +/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
595 +#define _FP_ "%ld/1024"
596 +
597 +static inline fpbuf_t _floor(fp_t x)
598 +{
599 + return x.val >> FP_SHIFT;
600 +}
601 +
602 +/* FIXME: negative rounding */
603 +static inline fpbuf_t _round(fp_t x)
604 +{
605 + return _floor(x) + ((x.val >> ROUND_BIT) & 1);
606 +}
607 +
608 +/* multiply two fixed point values */
609 +static inline fp_t _mul(fp_t a, fp_t b)
610 +{
611 + return _fp((a.val * b.val) >> FP_SHIFT);
612 +}
613 +
614 +static inline fp_t _div(fp_t a, fp_t b)
615 +{
616 +#if !defined(__KERNEL__) && !defined(unlikely)
617 +#define unlikely(x) (x)
618 +#define DO_UNDEF_UNLIKELY
619 +#endif
620 + /* try not to overflow */
621 + if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
622 + return _fp((a.val / b.val) << FP_SHIFT);
623 + else
624 + return _fp((a.val << FP_SHIFT) / b.val);
625 +#ifdef DO_UNDEF_UNLIKELY
626 +#undef unlikely
627 +#undef DO_UNDEF_UNLIKELY
628 +#endif
629 +}
630 +
631 +static inline fp_t _add(fp_t a, fp_t b)
632 +{
633 + return _fp(a.val + b.val);
634 +}
635 +
636 +static inline fp_t _sub(fp_t a, fp_t b)
637 +{
638 + return _fp(a.val - b.val);
639 +}
640 +
641 +static inline fp_t _neg(fp_t x)
642 +{
643 + return _fp(-x.val);
644 +}
645 +
646 +static inline fp_t _abs(fp_t x)
647 +{
648 + return _fp(abs(x.val));
649 +}
650 +
651 +/* works the same as casting float/double to integer */
652 +static inline fpbuf_t _fp_to_integer(fp_t x)
653 +{
654 + return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
655 +}
656 +
657 +static inline fp_t _integer_to_fp(fpbuf_t x)
658 +{
659 + return _frac(x,1);
660 +}
661 +
662 +static inline int _leq(fp_t a, fp_t b)
663 +{
664 + return a.val <= b.val;
665 +}
666 +
667 +static inline int _geq(fp_t a, fp_t b)
668 +{
669 + return a.val >= b.val;
670 +}
671 +
672 +static inline int _lt(fp_t a, fp_t b)
673 +{
674 + return a.val < b.val;
675 +}
676 +
677 +static inline int _gt(fp_t a, fp_t b)
678 +{
679 + return a.val > b.val;
680 +}
681 +
682 +static inline int _eq(fp_t a, fp_t b)
683 +{
684 + return a.val == b.val;
685 +}
686 +
687 +static inline fp_t _max(fp_t a, fp_t b)
688 +{
689 + if (a.val < b.val)
690 + return b;
691 + else
692 + return a;
693 +}
694 +#endif
695 diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
696 new file mode 100644
697 index 0000000..6b3fb8b
698 --- /dev/null
699 +++ b/include/litmus/gpu_affinity.h
700 @@ -0,0 +1,49 @@
701 +#ifndef LITMUS_GPU_AFFINITY_H
702 +#define LITMUS_GPU_AFFINITY_H
703 +
704 +#include <litmus/rt_param.h>
705 +#include <litmus/sched_plugin.h>
706 +#include <litmus/litmus.h>
707 +
708 +void update_gpu_estimate(struct task_struct* t, lt_t observed);
709 +gpu_migration_dist_t gpu_migration_distance(int a, int b);
710 +
711 +static inline void reset_gpu_tracker(struct task_struct* t)
712 +{
713 + t->rt_param.accum_gpu_time = 0;
714 +}
715 +
716 +static inline void start_gpu_tracker(struct task_struct* t)
717 +{
718 + t->rt_param.gpu_time_stamp = litmus_clock();
719 +}
720 +
721 +static inline void stop_gpu_tracker(struct task_struct* t)
722 +{
723 + lt_t now = litmus_clock();
724 + t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp);
725 +}
726 +
727 +static inline lt_t get_gpu_time(struct task_struct* t)
728 +{
729 + return t->rt_param.accum_gpu_time;
730 +}
731 +
732 +static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
733 +{
734 + int i;
735 + fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
736 + lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates...
737 +
738 + WARN_ON(temp < 0);
739 +
740 + // lower-bound a distant migration to be at least equal to the level
741 + // below it.
742 + for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
743 + val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
744 + }
745 +
746 + return ((val > 0) ? val : dist+1);
747 +}
748 +
749 +#endif
750 diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
751 new file mode 100644
752 index 0000000..af6f151
753 --- /dev/null
754 +++ b/include/litmus/ikglp_lock.h
755 @@ -0,0 +1,160 @@
756 +#ifndef LITMUS_IKGLP_H
757 +#define LITMUS_IKGLP_H
758 +
759 +#include <litmus/litmus.h>
760 +#include <litmus/binheap.h>
761 +#include <litmus/locking.h>
762 +
763 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
764 +#include <litmus/kexclu_affinity.h>
765 +
766 +struct ikglp_affinity;
767 +#endif
768 +
769 +typedef struct ikglp_heap_node
770 +{
771 + struct task_struct *task;
772 + struct binheap_node node;
773 +} ikglp_heap_node_t;
774 +
775 +struct fifo_queue;
776 +struct ikglp_wait_state;
777 +
778 +typedef struct ikglp_donee_heap_node
779 +{
780 + struct task_struct *task;
781 + struct fifo_queue *fq;
782 + struct ikglp_wait_state *donor_info; // cross-linked with ikglp_wait_state_t of donor
783 +
784 + struct binheap_node node;
785 +} ikglp_donee_heap_node_t;
786 +
787 +// Maintains the state of a request as it goes through the IKGLP
788 +typedef struct ikglp_wait_state {
789 + struct task_struct *task; // pointer back to the requesting task
790 +
791 + // Data for while waiting in FIFO Queue
792 + wait_queue_t fq_node;
793 + ikglp_heap_node_t global_heap_node;
794 + ikglp_donee_heap_node_t donee_heap_node;
795 +
796 + // Data for while waiting in PQ
797 + ikglp_heap_node_t pq_node;
798 +
799 + // Data for while waiting as a donor
800 + ikglp_donee_heap_node_t *donee_info; // cross-linked with donee's ikglp_donee_heap_node_t
801 + struct nested_info prio_donation;
802 + struct binheap_node node;
803 +} ikglp_wait_state_t;
804 +
805 +/* struct for semaphore with priority inheritance */
806 +struct fifo_queue
807 +{
808 + wait_queue_head_t wait;
809 + struct task_struct* owner;
810 +
811 + // used for bookkeepping
812 + ikglp_heap_node_t global_heap_node;
813 + ikglp_donee_heap_node_t donee_heap_node;
814 +
815 + struct task_struct* hp_waiter;
816 + int count; /* number of waiters + holder */
817 +
818 + struct nested_info nest;
819 +};
820 +
821 +struct ikglp_semaphore
822 +{
823 + struct litmus_lock litmus_lock;
824 +
825 + raw_spinlock_t lock;
826 + raw_spinlock_t real_lock;
827 +
828 + int nr_replicas; // AKA k
829 + int m;
830 +
831 + int max_fifo_len; // max len of a fifo queue
832 + int nr_in_fifos;
833 +
834 + struct binheap_handle top_m; // min heap, base prio
835 + int top_m_size; // number of nodes in top_m
836 +
837 + struct binheap_handle not_top_m; // max heap, base prio
838 +
839 + struct binheap_handle donees; // min-heap, base prio
840 + struct fifo_queue *shortest_fifo_queue; // pointer to shortest fifo queue
841 +
842 + /* data structures for holding requests */
843 + struct fifo_queue *fifo_queues; // array nr_replicas in length
844 + struct binheap_handle priority_queue; // max-heap, base prio
845 + struct binheap_handle donors; // max-heap, base prio
846 +
847 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
848 + struct ikglp_affinity *aff_obs;
849 +#endif
850 +};
851 +
852 +static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
853 +{
854 + return container_of(lock, struct ikglp_semaphore, litmus_lock);
855 +}
856 +
857 +int ikglp_lock(struct litmus_lock* l);
858 +int ikglp_unlock(struct litmus_lock* l);
859 +int ikglp_close(struct litmus_lock* l);
860 +void ikglp_free(struct litmus_lock* l);
861 +struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
862 +
863 +
864 +
865 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
866 +
867 +struct ikglp_queue_info
868 +{
869 + struct fifo_queue* q;
870 + lt_t estimated_len;
871 + int *nr_cur_users;
872 +};
873 +
874 +struct ikglp_affinity_ops
875 +{
876 + struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO
877 + ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select steal from FIFO
878 + ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff, struct task_struct* t); // select a donee
879 + ikglp_wait_state_t* (*advise_donor_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select a donor to move to PQ
880 +
881 + void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue
882 + void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue
883 + void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired
884 + void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed
885 + int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding)
886 +};
887 +
888 +struct ikglp_affinity
889 +{
890 + struct affinity_observer obs;
891 + struct ikglp_affinity_ops *ops;
892 + struct ikglp_queue_info *q_info;
893 + int *nr_cur_users_on_rsrc;
894 + int offset;
895 + int nr_simult;
896 + int nr_rsrc;
897 + int relax_max_fifo_len;
898 +};
899 +
900 +static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
901 +{
902 + return container_of(aff_obs, struct ikglp_affinity, obs);
903 +}
904 +
905 +int ikglp_aff_obs_close(struct affinity_observer*);
906 +void ikglp_aff_obs_free(struct affinity_observer*);
907 +struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
908 + void* __user arg);
909 +struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
910 + void* __user arg);
911 +#endif
912 +
913 +
914 +
915 +#endif
916 diff --git a/include/litmus/kexclu_affinity.h b/include/litmus/kexclu_affinity.h
917 new file mode 100644
918 index 0000000..f6355de
919 --- /dev/null
920 +++ b/include/litmus/kexclu_affinity.h
921 @@ -0,0 +1,35 @@
922 +#ifndef LITMUS_AFF_OBS_H
923 +#define LITMUS_AFF_OBS_H
924 +
925 +#include <litmus/locking.h>
926 +
927 +struct affinity_observer_ops;
928 +
929 +struct affinity_observer
930 +{
931 + struct affinity_observer_ops* ops;
932 + int type;
933 + int ident;
934 +
935 + struct litmus_lock* lock; // the lock under observation
936 +};
937 +
938 +typedef int (*aff_obs_open_t)(struct affinity_observer* aff_obs,
939 + void* __user arg);
940 +typedef int (*aff_obs_close_t)(struct affinity_observer* aff_obs);
941 +typedef void (*aff_obs_free_t)(struct affinity_observer* aff_obs);
942 +
943 +struct affinity_observer_ops
944 +{
945 + aff_obs_open_t open;
946 + aff_obs_close_t close;
947 + aff_obs_free_t deallocate;
948 +};
949 +
950 +struct litmus_lock* get_lock_from_od(int od);
951 +
952 +void affinity_observer_new(struct affinity_observer* aff,
953 + struct affinity_observer_ops* ops,
954 + struct affinity_observer_args* args);
955 +
956 +#endif
957 diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
958 new file mode 100644
959 index 0000000..5f0aae6
960 --- /dev/null
961 +++ b/include/litmus/kfmlp_lock.h
962 @@ -0,0 +1,97 @@
963 +#ifndef LITMUS_KFMLP_H
964 +#define LITMUS_KFMLP_H
965 +
966 +#include <litmus/litmus.h>
967 +#include <litmus/locking.h>
968 +
969 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
970 +#include <litmus/kexclu_affinity.h>
971 +
972 +struct kfmlp_affinity;
973 +#endif
974 +
975 +/* struct for semaphore with priority inheritance */
976 +struct kfmlp_queue
977 +{
978 + wait_queue_head_t wait;
979 + struct task_struct* owner;
980 + struct task_struct* hp_waiter;
981 + int count; /* number of waiters + holder */
982 +};
983 +
984 +struct kfmlp_semaphore
985 +{
986 + struct litmus_lock litmus_lock;
987 +
988 + spinlock_t lock;
989 +
990 + int num_resources; /* aka k */
991 +
992 + struct kfmlp_queue *queues; /* array */
993 + struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
994 +
995 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
996 + struct kfmlp_affinity *aff_obs;
997 +#endif
998 +};
999 +
1000 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
1001 +{
1002 + return container_of(lock, struct kfmlp_semaphore, litmus_lock);
1003 +}
1004 +
1005 +int kfmlp_lock(struct litmus_lock* l);
1006 +int kfmlp_unlock(struct litmus_lock* l);
1007 +int kfmlp_close(struct litmus_lock* l);
1008 +void kfmlp_free(struct litmus_lock* l);
1009 +struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg);
1010 +
1011 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1012 +
1013 +struct kfmlp_queue_info
1014 +{
1015 + struct kfmlp_queue* q;
1016 + lt_t estimated_len;
1017 + int *nr_cur_users;
1018 +};
1019 +
1020 +struct kfmlp_affinity_ops
1021 +{
1022 + struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
1023 + struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from);
1024 + void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1025 + void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1026 + void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1027 + void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1028 + int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
1029 +};
1030 +
1031 +struct kfmlp_affinity
1032 +{
1033 + struct affinity_observer obs;
1034 + struct kfmlp_affinity_ops *ops;
1035 + struct kfmlp_queue_info *q_info;
1036 + int *nr_cur_users_on_rsrc;
1037 + int offset;
1038 + int nr_simult;
1039 + int nr_rsrc;
1040 +};
1041 +
1042 +static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
1043 +{
1044 + return container_of(aff_obs, struct kfmlp_affinity, obs);
1045 +}
1046 +
1047 +int kfmlp_aff_obs_close(struct affinity_observer*);
1048 +void kfmlp_aff_obs_free(struct affinity_observer*);
1049 +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*,
1050 + void* __user arg);
1051 +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
1052 + void* __user arg);
1053 +
1054 +
1055 +#endif
1056 +
1057 +#endif
1058 +
1059 +
1060 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
1061 index 0b071fd..71df378 100644
1062 --- a/include/litmus/litmus.h
1063 +++ b/include/litmus/litmus.h
1064 @@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
1065 );
1066 }
1067
1068 +
1069 struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
1070
1071 #define NO_CPU 0xffffffff
1072 @@ -53,12 +54,16 @@ void litmus_exit_task(struct task_struct *tsk);
1073 #define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
1074 #define get_partition(t) (tsk_rt(t)->task_params.cpu)
1075 #define get_deadline(t) (tsk_rt(t)->job_params.deadline)
1076 +#define get_period(t) (tsk_rt(t)->task_params.period)
1077 #define get_release(t) (tsk_rt(t)->job_params.release)
1078 #define get_class(t) (tsk_rt(t)->task_params.cls)
1079
1080 #define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
1081 #define get_boost_start(t) (tsk_rt(t)->boost_start_time)
1082
1083 +#define effective_priority(t) ((!(tsk_rt(t)->inh_task)) ? t : tsk_rt(t)->inh_task)
1084 +#define base_priority(t) (t)
1085 +
1086 inline static int budget_exhausted(struct task_struct* t)
1087 {
1088 return get_exec_time(t) >= get_exec_cost(t);
1089 @@ -114,10 +119,12 @@ static inline lt_t litmus_clock(void)
1090 #define earlier_deadline(a, b) (lt_before(\
1091 (a)->rt_param.job_params.deadline,\
1092 (b)->rt_param.job_params.deadline))
1093 +#define shorter_period(a, b) (lt_before(\
1094 + (a)->rt_param.task_params.period,\
1095 + (b)->rt_param.task_params.period))
1096 #define earlier_release(a, b) (lt_before(\
1097 (a)->rt_param.job_params.release,\
1098 (b)->rt_param.job_params.release))
1099 -
1100 void preempt_if_preemptable(struct task_struct* t, int on_cpu);
1101
1102 #ifdef CONFIG_LITMUS_LOCKING
1103 diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
1104 new file mode 100644
1105 index 0000000..1eb5ea1
1106 --- /dev/null
1107 +++ b/include/litmus/litmus_softirq.h
1108 @@ -0,0 +1,199 @@
1109 +#ifndef __LITMUS_SOFTIRQ_H
1110 +#define __LITMUS_SOFTIRQ_H
1111 +
1112 +#include <linux/interrupt.h>
1113 +#include <linux/workqueue.h>
1114 +
1115 +/*
1116 + Threaded tasklet handling for Litmus. Tasklets
1117 + are scheduled with the priority of the tasklet's
1118 + owner---that is, the RT task on behalf the tasklet
1119 + runs.
1120 +
1121 + Tasklets are current scheduled in FIFO order with
1122 + NO priority inheritance for "blocked" tasklets.
1123 +
1124 + klitirqd assumes the priority of the owner of the
1125 + tasklet when the tasklet is next to execute.
1126 +
1127 + Currently, hi-tasklets are scheduled before
1128 + low-tasklets, regardless of priority of low-tasklets.
1129 + And likewise, low-tasklets are scheduled before work
1130 + queue objects. This priority inversion probably needs
1131 + to be fixed, though it is not an issue if our work with
1132 + GPUs as GPUs are owned (and associated klitirqds) for
1133 + exclusive time periods, thus no inversions can
1134 + occur.
1135 + */
1136 +
1137 +
1138 +
1139 +#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
1140 +
1141 +/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
1142 + Actual launch of threads is deffered to kworker's
1143 + workqueue, so daemons will likely not be immediately
1144 + running when this function returns, though the required
1145 + data will be initialized.
1146 +
1147 + @affinity_set: an array expressing the processor affinity
1148 + for each of the NR_LITMUS_SOFTIRQD daemons. May be set
1149 + to NULL for global scheduling.
1150 +
1151 + - Examples -
1152 + 8-CPU system with two CPU clusters:
1153 + affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
1154 + NOTE: Daemons not actually bound to specified CPU, but rather
1155 + cluster in which the CPU resides.
1156 +
1157 + 8-CPU system, partitioned:
1158 + affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
1159 +
1160 + FIXME: change array to a CPU topology or array of cpumasks
1161 +
1162 + */
1163 +void spawn_klitirqd(int* affinity);
1164 +
1165 +
1166 +/* Raises a flag to tell klitirqds to terminate.
1167 + Termination is async, so some threads may be running
1168 + after function return. */
1169 +void kill_klitirqd(void);
1170 +
1171 +
1172 +/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
1173 + to handle tasklets. 0, otherwise.*/
1174 +int klitirqd_is_ready(void);
1175 +
1176 +/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
1177 + to handle tasklets. 0, otherwise.*/
1178 +int klitirqd_is_dead(void);
1179 +
1180 +/* Flushes all pending work out to the OS for regular
1181 + * tasklet/work processing of the specified 'owner'
1182 + *
1183 + * PRECOND: klitirqd_thread must have a clear entry
1184 + * in the GPU registry, otherwise this call will become
1185 + * a no-op as work will loop back to the klitirqd_thread.
1186 + *
1187 + * Pass NULL for owner to flush ALL pending items.
1188 + */
1189 +void flush_pending(struct task_struct* klitirqd_thread,
1190 + struct task_struct* owner);
1191 +
1192 +struct task_struct* get_klitirqd(unsigned int k_id);
1193 +
1194 +
1195 +extern int __litmus_tasklet_schedule(
1196 + struct tasklet_struct *t,
1197 + unsigned int k_id);
1198 +
1199 +/* schedule a tasklet on klitirqd #k_id */
1200 +static inline int litmus_tasklet_schedule(
1201 + struct tasklet_struct *t,
1202 + unsigned int k_id)
1203 +{
1204 + int ret = 0;
1205 + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1206 + ret = __litmus_tasklet_schedule(t, k_id);
1207 + return(ret);
1208 +}
1209 +
1210 +/* for use by __tasklet_schedule() */
1211 +static inline int _litmus_tasklet_schedule(
1212 + struct tasklet_struct *t,
1213 + unsigned int k_id)
1214 +{
1215 + return(__litmus_tasklet_schedule(t, k_id));
1216 +}
1217 +
1218 +
1219 +
1220 +
1221 +extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1222 + unsigned int k_id);
1223 +
1224 +/* schedule a hi tasklet on klitirqd #k_id */
1225 +static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1226 + unsigned int k_id)
1227 +{
1228 + int ret = 0;
1229 + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1230 + ret = __litmus_tasklet_hi_schedule(t, k_id);
1231 + return(ret);
1232 +}
1233 +
1234 +/* for use by __tasklet_hi_schedule() */
1235 +static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1236 + unsigned int k_id)
1237 +{
1238 + return(__litmus_tasklet_hi_schedule(t, k_id));
1239 +}
1240 +
1241 +
1242 +
1243 +
1244 +
1245 +extern int __litmus_tasklet_hi_schedule_first(
1246 + struct tasklet_struct *t,
1247 + unsigned int k_id);
1248 +
1249 +/* schedule a hi tasklet on klitirqd #k_id on next go-around */
1250 +/* PRECONDITION: Interrupts must be disabled. */
1251 +static inline int litmus_tasklet_hi_schedule_first(
1252 + struct tasklet_struct *t,
1253 + unsigned int k_id)
1254 +{
1255 + int ret = 0;
1256 + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1257 + ret = __litmus_tasklet_hi_schedule_first(t, k_id);
1258 + return(ret);
1259 +}
1260 +
1261 +/* for use by __tasklet_hi_schedule_first() */
1262 +static inline int _litmus_tasklet_hi_schedule_first(
1263 + struct tasklet_struct *t,
1264 + unsigned int k_id)
1265 +{
1266 + return(__litmus_tasklet_hi_schedule_first(t, k_id));
1267 +}
1268 +
1269 +
1270 +
1271 +//////////////
1272 +
1273 +extern int __litmus_schedule_work(
1274 + struct work_struct* w,
1275 + unsigned int k_id);
1276 +
1277 +static inline int litmus_schedule_work(
1278 + struct work_struct* w,
1279 + unsigned int k_id)
1280 +{
1281 + return(__litmus_schedule_work(w, k_id));
1282 +}
1283 +
1284 +
1285 +
1286 +///////////// mutex operations for client threads.
1287 +
1288 +void down_and_set_stat(struct task_struct* t,
1289 + enum klitirqd_sem_status to_set,
1290 + struct mutex* sem);
1291 +
1292 +void __down_and_reset_and_set_stat(struct task_struct* t,
1293 + enum klitirqd_sem_status to_reset,
1294 + enum klitirqd_sem_status to_set,
1295 + struct mutex* sem);
1296 +
1297 +void up_and_set_stat(struct task_struct* t,
1298 + enum klitirqd_sem_status to_set,
1299 + struct mutex* sem);
1300 +
1301 +
1302 +
1303 +void release_klitirqd_lock(struct task_struct* t);
1304 +
1305 +int reacquire_klitirqd_lock(struct task_struct* t);
1306 +
1307 +#endif
1308 diff --git a/include/litmus/locking.h b/include/litmus/locking.h
1309 index 4d7b870..36647fe 100644
1310 --- a/include/litmus/locking.h
1311 +++ b/include/litmus/locking.h
1312 @@ -1,28 +1,160 @@
1313 #ifndef LITMUS_LOCKING_H
1314 #define LITMUS_LOCKING_H
1315
1316 +#include <linux/list.h>
1317 +
1318 struct litmus_lock_ops;
1319
1320 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1321 +struct nested_info
1322 +{
1323 + struct litmus_lock *lock;
1324 + struct task_struct *hp_waiter_eff_prio;
1325 + struct task_struct **hp_waiter_ptr;
1326 + struct binheap_node hp_binheap_node;
1327 +};
1328 +
1329 +static inline struct task_struct* top_priority(struct binheap_handle* handle) {
1330 + if(!binheap_empty(handle)) {
1331 + return (struct task_struct*)(binheap_top_entry(handle, struct nested_info, hp_binheap_node)->hp_waiter_eff_prio);
1332 + }
1333 + return NULL;
1334 +}
1335 +
1336 +void print_hp_waiters(struct binheap_node* n, int depth);
1337 +#endif
1338 +
1339 +
1340 /* Generic base struct for LITMUS^RT userspace semaphores.
1341 * This structure should be embedded in protocol-specific semaphores.
1342 */
1343 struct litmus_lock {
1344 struct litmus_lock_ops *ops;
1345 int type;
1346 +
1347 + int ident;
1348 +
1349 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1350 + struct nested_info nest;
1351 +//#ifdef CONFIG_DEBUG_SPINLOCK
1352 + char cheat_lockdep[2];
1353 + struct lock_class_key key;
1354 +//#endif
1355 +#endif
1356 };
1357
1358 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1359 +
1360 +#define MAX_DGL_SIZE CONFIG_LITMUS_MAX_DGL_SIZE
1361 +
1362 +typedef struct dgl_wait_state {
1363 + struct task_struct *task; /* task waiting on DGL */
1364 + struct litmus_lock *locks[MAX_DGL_SIZE]; /* requested locks in DGL */
1365 + int size; /* size of the DGL */
1366 + int nr_remaining; /* nr locks remainging before DGL is complete */
1367 + int last_primary; /* index lock in locks[] that has active priority */
1368 + wait_queue_t wq_nodes[MAX_DGL_SIZE];
1369 +} dgl_wait_state_t;
1370 +
1371 +void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait);
1372 +void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/);
1373 +
1374 +void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait);
1375 +int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key);
1376 +void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait, struct task_struct **task);
1377 +#endif
1378 +
1379 +typedef int (*lock_op_t)(struct litmus_lock *l);
1380 +typedef lock_op_t lock_close_t;
1381 +typedef lock_op_t lock_lock_t;
1382 +typedef lock_op_t lock_unlock_t;
1383 +
1384 +typedef int (*lock_open_t)(struct litmus_lock *l, void* __user arg);
1385 +typedef void (*lock_free_t)(struct litmus_lock *l);
1386 +
1387 struct litmus_lock_ops {
1388 /* Current task tries to obtain / drop a reference to a lock.
1389 * Optional methods, allowed by default. */
1390 - int (*open)(struct litmus_lock*, void* __user);
1391 - int (*close)(struct litmus_lock*);
1392 + lock_open_t open;
1393 + lock_close_t close;
1394
1395 /* Current tries to lock/unlock this lock (mandatory methods). */
1396 - int (*lock)(struct litmus_lock*);
1397 - int (*unlock)(struct litmus_lock*);
1398 + lock_lock_t lock;
1399 + lock_unlock_t unlock;
1400
1401 /* The lock is no longer being referenced (mandatory method). */
1402 - void (*deallocate)(struct litmus_lock*);
1403 + lock_free_t deallocate;
1404 +
1405 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1406 + void (*propagate_increase_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
1407 + void (*propagate_decrease_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
1408 +#endif
1409 +
1410 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1411 + raw_spinlock_t* (*get_dgl_spin_lock)(struct litmus_lock *l);
1412 + int (*dgl_lock)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
1413 + int (*is_owner)(struct litmus_lock *l, struct task_struct *t);
1414 + void (*enable_priority)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
1415 +#endif
1416 };
1417
1418 +
1419 +/*
1420 + Nested inheritance can be achieved with fine-grain locking when there is
1421 + no need for DGL support, presuming locks are acquired in a partial order
1422 + (no cycles!). However, DGLs allow locks to be acquired in any order. This
1423 + makes nested inheritance very difficult (we don't yet know a solution) to
1424 + realize with fine-grain locks, so we use a big lock instead.
1425 +
1426 + Code contains both fine-grain and coarse-grain methods together, side-by-side.
1427 + Each lock operation *IS NOT* surrounded by ifdef/endif to help make code more
1428 + readable. However, this leads to the odd situation where both code paths
1429 + appear together in code as if they were both active together.
1430 +
1431 + THIS IS NOT REALLY THE CASE! ONLY ONE CODE PATH IS ACTUALLY ACTIVE!
1432 +
1433 + Example:
1434 + lock_global_irqsave(coarseLock, flags);
1435 + lock_fine_irqsave(fineLock, flags);
1436 +
1437 + Reality (coarse):
1438 + lock_global_irqsave(coarseLock, flags);
1439 + //lock_fine_irqsave(fineLock, flags);
1440 +
1441 + Reality (fine):
1442 + //lock_global_irqsave(coarseLock, flags);
1443 + lock_fine_irqsave(fineLock, flags);
1444 +
1445 + Be careful when you read code involving nested inheritance.
1446 + */
1447 +#if defined(CONFIG_LITMUS_DGL_SUPPORT)
1448 +/* DGL requires a big lock to implement nested inheritance */
1449 +#define lock_global_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags))
1450 +#define lock_global(lock) raw_spin_lock((lock))
1451 +#define unlock_global_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags))
1452 +#define unlock_global(lock) raw_spin_unlock((lock))
1453 +
1454 +/* fine-grain locking are no-ops with DGL support */
1455 +#define lock_fine_irqsave(lock, flags)
1456 +#define lock_fine(lock)
1457 +#define unlock_fine_irqrestore(lock, flags)
1458 +#define unlock_fine(lock)
1459 +
1460 +#elif defined(CONFIG_LITMUS_NESTED_LOCKING)
1461 +/* Use fine-grain locking when DGLs are disabled. */
1462 +/* global locking are no-ops without DGL support */
1463 +#define lock_global_irqsave(lock, flags)
1464 +#define lock_global(lock)
1465 +#define unlock_global_irqrestore(lock, flags)
1466 +#define unlock_global(lock)
1467 +
1468 +#define lock_fine_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags))
1469 +#define lock_fine(lock) raw_spin_lock((lock))
1470 +#define unlock_fine_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags))
1471 +#define unlock_fine(lock) raw_spin_unlock((lock))
1472 +
1473 #endif
1474 +
1475 +
1476 +#endif
1477 +
1478 diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
1479 new file mode 100644
1480 index 0000000..97c9577
1481 --- /dev/null
1482 +++ b/include/litmus/nvidia_info.h
1483 @@ -0,0 +1,46 @@
1484 +#ifndef __LITMUS_NVIDIA_H
1485 +#define __LITMUS_NVIDIA_H
1486 +
1487 +#include <linux/interrupt.h>
1488 +
1489 +
1490 +#include <litmus/litmus_softirq.h>
1491 +
1492 +
1493 +//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
1494 +#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
1495 +#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
1496 +
1497 +int init_nvidia_info(void);
1498 +void shutdown_nvidia_info(void);
1499 +
1500 +int is_nvidia_func(void* func_addr);
1501 +
1502 +void dump_nvidia_info(const struct tasklet_struct *t);
1503 +
1504 +
1505 +// Returns the Nvidia device # associated with provided tasklet and work_struct.
1506 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
1507 +u32 get_work_nv_device_num(const struct work_struct *t);
1508 +
1509 +
1510 +int init_nv_device_reg(void);
1511 +//int get_nv_device_id(struct task_struct* owner);
1512 +
1513 +
1514 +int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t);
1515 +
1516 +struct task_struct* get_nv_max_device_owner(u32 target_device_id);
1517 +//int is_nv_device_owner(u32 target_device_id);
1518 +
1519 +void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
1520 +void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
1521 +
1522 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1523 +void pai_check_priority_increase(struct task_struct *t, int reg_device_id);
1524 +void pai_check_priority_decrease(struct task_struct *t, int reg_device_id);
1525 +#endif
1526 +
1527 +//void increment_nv_int_count(u32 device);
1528 +
1529 +#endif
1530 diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
1531 index 380b886..8f3a9ca 100644
1532 --- a/include/litmus/preempt.h
1533 +++ b/include/litmus/preempt.h
1534 @@ -26,12 +26,12 @@ const char* sched_state_name(int s);
1535 (x), #x, __FUNCTION__); \
1536 } while (0);
1537
1538 +//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
1539 #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
1540 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
1541 cpu, (x), sched_state_name(x), \
1542 (y), sched_state_name(y))
1543
1544 -
1545 typedef enum scheduling_state {
1546 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
1547 * should be scheduled, and the processor does not
1548 diff --git a/include/litmus/rsm_lock.h b/include/litmus/rsm_lock.h
1549 new file mode 100644
1550 index 0000000..a151896
1551 --- /dev/null
1552 +++ b/include/litmus/rsm_lock.h
1553 @@ -0,0 +1,54 @@
1554 +#ifndef LITMUS_RSM_H
1555 +#define LITMUS_RSM_H
1556 +
1557 +#include <litmus/litmus.h>
1558 +#include <litmus/binheap.h>
1559 +#include <litmus/locking.h>
1560 +
1561 +/* struct for semaphore with priority inheritance */
1562 +struct rsm_mutex {
1563 + struct litmus_lock litmus_lock;
1564 +
1565 + /* current resource holder */
1566 + struct task_struct *owner;
1567 +
1568 + /* highest-priority waiter */
1569 + struct task_struct *hp_waiter;
1570 +
1571 + /* FIFO queue of waiting tasks -- for now. time stamp in the future. */
1572 + wait_queue_head_t wait;
1573 +
1574 + /* we do some nesting within spinlocks, so we can't use the normal
1575 + sleeplocks found in wait_queue_head_t. */
1576 + raw_spinlock_t lock;
1577 +};
1578 +
1579 +static inline struct rsm_mutex* rsm_mutex_from_lock(struct litmus_lock* lock)
1580 +{
1581 + return container_of(lock, struct rsm_mutex, litmus_lock);
1582 +}
1583 +
1584 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1585 +int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t);
1586 +int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
1587 +void rsm_mutex_enable_priority(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
1588 +#endif
1589 +
1590 +void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
1591 + struct task_struct* t,
1592 + raw_spinlock_t* to_unlock,
1593 + unsigned long irqflags);
1594 +
1595 +void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
1596 + struct task_struct* t,
1597 + raw_spinlock_t* to_unlock,
1598 + unsigned long irqflags);
1599 +
1600 +int rsm_mutex_lock(struct litmus_lock* l);
1601 +int rsm_mutex_unlock(struct litmus_lock* l);
1602 +int rsm_mutex_close(struct litmus_lock* l);
1603 +void rsm_mutex_free(struct litmus_lock* l);
1604 +struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops*);
1605 +
1606 +
1607 +#endif
1608 \ No newline at end of file
1609 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
1610 index d6d7991..0198884 100644
1611 --- a/include/litmus/rt_param.h
1612 +++ b/include/litmus/rt_param.h
1613 @@ -5,6 +5,8 @@
1614 #ifndef _LINUX_RT_PARAM_H_
1615 #define _LINUX_RT_PARAM_H_
1616
1617 +#include <litmus/fpmath.h>
1618 +
1619 /* Litmus time type. */
1620 typedef unsigned long long lt_t;
1621
1622 @@ -24,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b)
1623 typedef enum {
1624 RT_CLASS_HARD,
1625 RT_CLASS_SOFT,
1626 + RT_CLASS_SOFT_W_SLIP,
1627 RT_CLASS_BEST_EFFORT
1628 } task_class_t;
1629
1630 @@ -52,6 +55,19 @@ union np_flag {
1631 } np;
1632 };
1633
1634 +struct affinity_observer_args
1635 +{
1636 + int lock_od;
1637 +};
1638 +
1639 +struct gpu_affinity_observer_args
1640 +{
1641 + struct affinity_observer_args obs;
1642 + int replica_to_gpu_offset;
1643 + int nr_simult_users;
1644 + int relaxed_rules;
1645 +};
1646 +
1647 /* The definition of the data that is shared between the kernel and real-time
1648 * tasks via a shared page (see litmus/ctrldev.c).
1649 *
1650 @@ -75,6 +91,9 @@ struct control_page {
1651 /* don't export internal data structures to user space (liblitmus) */
1652 #ifdef __KERNEL__
1653
1654 +#include <litmus/binheap.h>
1655 +#include <linux/semaphore.h>
1656 +
1657 struct _rt_domain;
1658 struct bheap_node;
1659 struct release_heap;
1660 @@ -100,6 +119,31 @@ struct rt_job {
1661
1662 struct pfair_param;
1663
1664 +enum klitirqd_sem_status
1665 +{
1666 + NEED_TO_REACQUIRE,
1667 + REACQUIRING,
1668 + NOT_HELD,
1669 + HELD
1670 +};
1671 +
1672 +typedef enum gpu_migration_dist
1673 +{
1674 + // TODO: Make this variable against NR_NVIDIA_GPUS
1675 + MIG_LOCAL = 0,
1676 + MIG_NEAR = 1,
1677 + MIG_MED = 2,
1678 + MIG_FAR = 3, // 8 GPUs in a binary tree hierarchy
1679 + MIG_NONE = 4,
1680 +
1681 + MIG_LAST = MIG_NONE
1682 +} gpu_migration_dist_t;
1683 +
1684 +typedef struct feedback_est{
1685 + fp_t est;
1686 + fp_t accum_err;
1687 +} feedback_est_t;
1688 +
1689 /* RT task parameters for scheduling extensions
1690 * These parameters are inherited during clone and therefore must
1691 * be explicitly set up before the task set is launched.
1692 @@ -114,6 +158,52 @@ struct rt_param {
1693 /* is the task present? (true if it can be scheduled) */
1694 unsigned int present:1;
1695
1696 +#ifdef CONFIG_LITMUS_SOFTIRQD
1697 + /* proxy threads have minimum priority by default */
1698 + unsigned int is_proxy_thread:1;
1699 +
1700 + /* pointer to klitirqd currently working on this
1701 + task_struct's behalf. only set by the task pointed
1702 + to by klitirqd.
1703 +
1704 + ptr only valid if is_proxy_thread == 0
1705 + */
1706 + struct task_struct* cur_klitirqd;
1707 +
1708 + /* Used to implement mutual execution exclusion between
1709 + * job and klitirqd execution. Job must always hold
1710 + * it's klitirqd_sem to execute. klitirqd instance
1711 + * must hold the semaphore before executing on behalf
1712 + * of a job.
1713 + */
1714 + struct mutex klitirqd_sem;
1715 +
1716 + /* status of held klitirqd_sem, even if the held klitirqd_sem is from
1717 + another task (only proxy threads do this though).
1718 + */
1719 + atomic_t klitirqd_sem_stat;
1720 +#endif
1721 +
1722 +#ifdef CONFIG_LITMUS_NVIDIA
1723 + /* number of top-half interrupts handled on behalf of current job */
1724 + atomic_t nv_int_count;
1725 + long unsigned int held_gpus; // bitmap of held GPUs.
1726 +
1727 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1728 + fp_t gpu_fb_param_a[MIG_LAST+1];
1729 + fp_t gpu_fb_param_b[MIG_LAST+1];
1730 +
1731 + gpu_migration_dist_t gpu_migration;
1732 + int last_gpu;
1733 + feedback_est_t gpu_migration_est[MIG_LAST+1]; // local, near, med, far
1734 +
1735 + lt_t accum_gpu_time;
1736 + lt_t gpu_time_stamp;
1737 +
1738 + unsigned int suspend_gpu_tracker_on_block:1;
1739 +#endif
1740 +#endif
1741 +
1742 #ifdef CONFIG_LITMUS_LOCKING
1743 /* Is the task being priority-boosted by a locking protocol? */
1744 unsigned int priority_boosted:1;
1745 @@ -133,7 +223,15 @@ struct rt_param {
1746 * could point to self if PI does not result in
1747 * an increased task priority.
1748 */
1749 - struct task_struct* inh_task;
1750 + struct task_struct* inh_task;
1751 +
1752 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1753 + raw_spinlock_t hp_blocked_tasks_lock;
1754 + struct binheap_handle hp_blocked_tasks;
1755 +
1756 + /* pointer to lock upon which is currently blocked */
1757 + struct litmus_lock* blocked_lock;
1758 +#endif
1759
1760 #ifdef CONFIG_NP_SECTION
1761 /* For the FMLP under PSN-EDF, it is required to make the task
1762 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
1763 index 6e7cabd..24a6858 100644
1764 --- a/include/litmus/sched_plugin.h
1765 +++ b/include/litmus/sched_plugin.h
1766 @@ -11,6 +11,12 @@
1767 #include <litmus/locking.h>
1768 #endif
1769
1770 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1771 +#include <litmus/kexclu_affinity.h>
1772 +#endif
1773 +
1774 +#include <linux/interrupt.h>
1775 +
1776 /************************ setup/tear down ********************/
1777
1778 typedef long (*activate_plugin_t) (void);
1779 @@ -29,7 +35,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
1780 */
1781 typedef void (*finish_switch_t)(struct task_struct *prev);
1782
1783 -
1784 /********************* task state changes ********************/
1785
1786 /* Called to setup a new real-time task.
1787 @@ -58,6 +63,47 @@ typedef void (*task_exit_t) (struct task_struct *);
1788 typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
1789 void* __user config);
1790
1791 +struct affinity_observer;
1792 +typedef long (*allocate_affinity_observer_t) (
1793 + struct affinity_observer **aff_obs, int type,
1794 + void* __user config);
1795 +
1796 +typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
1797 +typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
1798 +typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
1799 + raw_spinlock_t *to_unlock, unsigned long irqflags);
1800 +typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
1801 + raw_spinlock_t *to_unlock, unsigned long irqflags);
1802 +
1803 +typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd,
1804 + struct task_struct* old_owner,
1805 + struct task_struct* new_owner);
1806 +typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd,
1807 + struct task_struct* old_owner);
1808 +
1809 +
1810 +typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
1811 +typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio,
1812 + struct task_struct *new_prio);
1813 +typedef void (*run_tasklets_t)(struct task_struct* next);
1814 +
1815 +typedef raw_spinlock_t* (*get_dgl_spinlock_t) (struct task_struct *t);
1816 +
1817 +
1818 +typedef int (*higher_prio_t)(struct task_struct* a, struct task_struct* b);
1819 +
1820 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1821 +
1822 +typedef enum
1823 +{
1824 + BASE,
1825 + EFFECTIVE
1826 +} comparison_mode_t;
1827 +
1828 +typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod,
1829 + struct task_struct* b, comparison_mode_t b_mod);
1830 +#endif
1831 +
1832
1833 /********************* sys call backends ********************/
1834 /* This function causes the caller to sleep until the next release */
1835 @@ -88,14 +134,40 @@ struct sched_plugin {
1836 /* task state changes */
1837 admit_task_t admit_task;
1838
1839 - task_new_t task_new;
1840 + task_new_t task_new;
1841 task_wake_up_t task_wake_up;
1842 task_block_t task_block;
1843 task_exit_t task_exit;
1844
1845 + higher_prio_t compare;
1846 +
1847 #ifdef CONFIG_LITMUS_LOCKING
1848 /* locking protocols */
1849 allocate_lock_t allocate_lock;
1850 + increase_prio_t increase_prio;
1851 + decrease_prio_t decrease_prio;
1852 +#endif
1853 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1854 + nested_increase_prio_t nested_increase_prio;
1855 + nested_decrease_prio_t nested_decrease_prio;
1856 + __higher_prio_t __compare;
1857 +#endif
1858 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1859 + get_dgl_spinlock_t get_dgl_spinlock;
1860 +#endif
1861 +
1862 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1863 + allocate_affinity_observer_t allocate_aff_obs;
1864 +#endif
1865 +
1866 +#ifdef CONFIG_LITMUS_SOFTIRQD
1867 + increase_prio_klitirq_t increase_prio_klitirqd;
1868 + decrease_prio_klitirqd_t decrease_prio_klitirqd;
1869 +#endif
1870 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1871 + enqueue_pai_tasklet_t enqueue_pai_tasklet;
1872 + change_prio_pai_tasklet_t change_prio_pai_tasklet;
1873 + run_tasklets_t run_tasklets;
1874 #endif
1875 } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
1876
1877 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
1878 index 7ca34cb..b1b71f6 100644
1879 --- a/include/litmus/sched_trace.h
1880 +++ b/include/litmus/sched_trace.h
1881 @@ -10,13 +10,14 @@ struct st_trace_header {
1882 u8 type; /* Of what type is this record? */
1883 u8 cpu; /* On which CPU was it recorded? */
1884 u16 pid; /* PID of the task. */
1885 - u32 job; /* The job sequence number. */
1886 -};
1887 + u32 job:24; /* The job sequence number. */
1888 + u8 extra;
1889 +} __attribute__((packed));
1890
1891 #define ST_NAME_LEN 16
1892 struct st_name_data {
1893 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
1894 -};
1895 +} __attribute__((packed));
1896
1897 struct st_param_data { /* regular params */
1898 u32 wcet;
1899 @@ -25,30 +26,29 @@ struct st_param_data { /* regular params */
1900 u8 partition;
1901 u8 class;
1902 u8 __unused[2];
1903 -};
1904 +} __attribute__((packed));
1905
1906 struct st_release_data { /* A job is was/is going to be released. */
1907 u64 release; /* What's the release time? */
1908 u64 deadline; /* By when must it finish? */
1909 -};
1910 +} __attribute__((packed));
1911
1912 struct st_assigned_data { /* A job was asigned to a CPU. */
1913 u64 when;
1914 u8 target; /* Where should it execute? */
1915 u8 __unused[7];
1916 -};
1917 +} __attribute__((packed));
1918
1919 struct st_switch_to_data { /* A process was switched to on a given CPU. */
1920 u64 when; /* When did this occur? */
1921 u32 exec_time; /* Time the current job has executed. */
1922 u8 __unused[4];
1923 -
1924 -};
1925 +} __attribute__((packed));
1926
1927 struct st_switch_away_data { /* A process was switched away from on a given CPU. */
1928 u64 when;
1929 u64 exec_time;
1930 -};
1931 +} __attribute__((packed));
1932
1933 struct st_completion_data { /* A job completed. */
1934 u64 when;
1935 @@ -56,35 +56,108 @@ struct st_completion_data { /* A job completed. */
1936 * next task automatically; set to 0 otherwise.
1937 */
1938 u8 __uflags:7;
1939 - u8 __unused[7];
1940 -};
1941 + u16 nv_int_count;
1942 + u8 __unused[5];
1943 +} __attribute__((packed));
1944
1945 struct st_block_data { /* A task blocks. */
1946 u64 when;
1947 u64 __unused;
1948 -};
1949 +} __attribute__((packed));
1950
1951 struct st_resume_data { /* A task resumes. */
1952 u64 when;
1953 u64 __unused;
1954 -};
1955 +} __attribute__((packed));
1956
1957 struct st_action_data {
1958 u64 when;
1959 u8 action;
1960 u8 __unused[7];
1961 -};
1962 +} __attribute__((packed));
1963
1964 struct st_sys_release_data {
1965 u64 when;
1966 u64 release;
1967 -};
1968 +} __attribute__((packed));
1969 +
1970 +
1971 +struct st_tasklet_release_data {
1972 + u64 when;
1973 + u64 __unused;
1974 +} __attribute__((packed));
1975 +
1976 +struct st_tasklet_begin_data {
1977 + u64 when;
1978 + u16 exe_pid;
1979 + u8 __unused[6];
1980 +} __attribute__((packed));
1981 +
1982 +struct st_tasklet_end_data {
1983 + u64 when;
1984 + u16 exe_pid;
1985 + u8 flushed;
1986 + u8 __unused[5];
1987 +} __attribute__((packed));
1988 +
1989 +
1990 +struct st_work_release_data {
1991 + u64 when;
1992 + u64 __unused;
1993 +} __attribute__((packed));
1994 +
1995 +struct st_work_begin_data {
1996 + u64 when;
1997 + u16 exe_pid;
1998 + u8 __unused[6];
1999 +} __attribute__((packed));
2000 +
2001 +struct st_work_end_data {
2002 + u64 when;
2003 + u16 exe_pid;
2004 + u8 flushed;
2005 + u8 __unused[5];
2006 +} __attribute__((packed));
2007 +
2008 +struct st_effective_priority_change_data {
2009 + u64 when;
2010 + u16 inh_pid;
2011 + u8 __unused[6];
2012 +} __attribute__((packed));
2013 +
2014 +struct st_nv_interrupt_begin_data {
2015 + u64 when;
2016 + u32 device;
2017 + u32 serialNumber;
2018 +} __attribute__((packed));
2019 +
2020 +struct st_nv_interrupt_end_data {
2021 + u64 when;
2022 + u32 device;
2023 + u32 serialNumber;
2024 +} __attribute__((packed));
2025 +
2026 +struct st_prediction_err_data {
2027 + u64 distance;
2028 + u64 rel_err;
2029 +} __attribute__((packed));
2030 +
2031 +struct st_migration_data {
2032 + u64 observed;
2033 + u64 estimated;
2034 +} __attribute__((packed));
2035 +
2036 +struct migration_info {
2037 + u64 observed;
2038 + u64 estimated;
2039 + u8 distance;
2040 +} __attribute__((packed));
2041
2042 #define DATA(x) struct st_ ## x ## _data x;
2043
2044 typedef enum {
2045 - ST_NAME = 1, /* Start at one, so that we can spot
2046 - * uninitialized records. */
2047 + ST_NAME = 1, /* Start at one, so that we can spot
2048 + * uninitialized records. */
2049 ST_PARAM,
2050 ST_RELEASE,
2051 ST_ASSIGNED,
2052 @@ -94,7 +167,19 @@ typedef enum {
2053 ST_BLOCK,
2054 ST_RESUME,
2055 ST_ACTION,
2056 - ST_SYS_RELEASE
2057 + ST_SYS_RELEASE,
2058 + ST_TASKLET_RELEASE,
2059 + ST_TASKLET_BEGIN,
2060 + ST_TASKLET_END,
2061 + ST_WORK_RELEASE,
2062 + ST_WORK_BEGIN,
2063 + ST_WORK_END,
2064 + ST_EFF_PRIO_CHANGE,
2065 + ST_NV_INTERRUPT_BEGIN,
2066 + ST_NV_INTERRUPT_END,
2067 +
2068 + ST_PREDICTION_ERR,
2069 + ST_MIGRATION,
2070 } st_event_record_type_t;
2071
2072 struct st_event_record {
2073 @@ -113,8 +198,20 @@ struct st_event_record {
2074 DATA(resume);
2075 DATA(action);
2076 DATA(sys_release);
2077 + DATA(tasklet_release);
2078 + DATA(tasklet_begin);
2079 + DATA(tasklet_end);
2080 + DATA(work_release);
2081 + DATA(work_begin);
2082 + DATA(work_end);
2083 + DATA(effective_priority_change);
2084 + DATA(nv_interrupt_begin);
2085 + DATA(nv_interrupt_end);
2086 +
2087 + DATA(prediction_err);
2088 + DATA(migration);
2089 } data;
2090 -};
2091 +} __attribute__((packed));
2092
2093 #undef DATA
2094
2095 @@ -129,6 +226,8 @@ struct st_event_record {
2096 ft_event1(id, callback, task)
2097 #define SCHED_TRACE2(id, callback, task, xtra) \
2098 ft_event2(id, callback, task, xtra)
2099 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
2100 + ft_event3(id, callback, task, xtra1, xtra2)
2101
2102 /* provide prototypes; needed on sparc64 */
2103 #ifndef NO_TASK_TRACE_DECLS
2104 @@ -155,12 +254,58 @@ feather_callback void do_sched_trace_action(unsigned long id,
2105 feather_callback void do_sched_trace_sys_release(unsigned long id,
2106 lt_t* start);
2107
2108 +
2109 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
2110 + struct task_struct* owner);
2111 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
2112 + struct task_struct* owner);
2113 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
2114 + struct task_struct* owner,
2115 + unsigned long flushed);
2116 +
2117 +feather_callback void do_sched_trace_work_release(unsigned long id,
2118 + struct task_struct* owner);
2119 +feather_callback void do_sched_trace_work_begin(unsigned long id,
2120 + struct task_struct* owner,
2121 + struct task_struct* exe);
2122 +feather_callback void do_sched_trace_work_end(unsigned long id,
2123 + struct task_struct* owner,
2124 + struct task_struct* exe,
2125 + unsigned long flushed);
2126 +
2127 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
2128 + struct task_struct* task,
2129 + struct task_struct* inh);
2130 +
2131 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
2132 + u32 device);
2133 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
2134 + unsigned long unused);
2135 +
2136 +feather_callback void do_sched_trace_prediction_err(unsigned long id,
2137 + struct task_struct* task,
2138 + gpu_migration_dist_t* distance,
2139 + fp_t* rel_err);
2140 +
2141 +
2142 +
2143 +
2144 +
2145 +feather_callback void do_sched_trace_migration(unsigned long id,
2146 + struct task_struct* task,
2147 + struct migration_info* mig_info);
2148 +
2149 +
2150 +/* returns true if we're tracing an interrupt on current CPU */
2151 +/* int is_interrupt_tracing_active(void); */
2152 +
2153 #endif
2154
2155 #else
2156
2157 #define SCHED_TRACE(id, callback, task) /* no tracing */
2158 #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
2159 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
2160
2161 #endif
2162
2163 @@ -193,6 +338,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
2164 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
2165
2166
2167 +#define sched_trace_tasklet_release(t) \
2168 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
2169 +
2170 +#define sched_trace_tasklet_begin(t) \
2171 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
2172 +
2173 +#define sched_trace_tasklet_end(t, flushed) \
2174 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
2175 +
2176 +
2177 +#define sched_trace_work_release(t) \
2178 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
2179 +
2180 +#define sched_trace_work_begin(t, e) \
2181 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
2182 +
2183 +#define sched_trace_work_end(t, e, flushed) \
2184 + SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
2185 +
2186 +
2187 +#define sched_trace_eff_prio_change(t, inh) \
2188 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
2189 +
2190 +
2191 +#define sched_trace_nv_interrupt_begin(d) \
2192 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
2193 +#define sched_trace_nv_interrupt_end(d) \
2194 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
2195 +
2196 +#define sched_trace_prediction_err(t, dist, rel_err) \
2197 + SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
2198 +
2199 +#define sched_trace_migration(t, mig_info) \
2200 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
2201 +
2202 #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
2203
2204 #endif /* __KERNEL__ */
2205 diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
2206 new file mode 100644
2207 index 0000000..e70e45e
2208 --- /dev/null
2209 +++ b/include/litmus/sched_trace_external.h
2210 @@ -0,0 +1,78 @@
2211 +/*
2212 + * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
2213 + */
2214 +#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
2215 +#define _LINUX_SCHED_TRACE_EXTERNAL_H_
2216 +
2217 +
2218 +#ifdef CONFIG_SCHED_TASK_TRACE
2219 +extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
2220 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
2221 +{
2222 + __sched_trace_tasklet_begin_external(t);
2223 +}
2224 +
2225 +extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
2226 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
2227 +{
2228 + __sched_trace_tasklet_end_external(t, flushed);
2229 +}
2230 +
2231 +extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
2232 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
2233 +{
2234 + __sched_trace_work_begin_external(t, e);
2235 +}
2236 +
2237 +extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
2238 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
2239 +{
2240 + __sched_trace_work_end_external(t, e, f);
2241 +}
2242 +
2243 +#ifdef CONFIG_LITMUS_NVIDIA
2244 +extern void __sched_trace_nv_interrupt_begin_external(u32 device);
2245 +static inline void sched_trace_nv_interrupt_begin_external(u32 device)
2246 +{
2247 + __sched_trace_nv_interrupt_begin_external(device);
2248 +}
2249 +
2250 +extern void __sched_trace_nv_interrupt_end_external(u32 device);
2251 +static inline void sched_trace_nv_interrupt_end_external(u32 device)
2252 +{
2253 + __sched_trace_nv_interrupt_end_external(device);
2254 +}
2255 +#endif
2256 +
2257 +#else
2258 +
2259 +// no tracing.
2260 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t){}
2261 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){}
2262 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){}
2263 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){}
2264 +
2265 +#ifdef CONFIG_LITMUS_NVIDIA
2266 +static inline void sched_trace_nv_interrupt_begin_external(u32 device){}
2267 +static inline void sched_trace_nv_interrupt_end_external(u32 device){}
2268 +#endif
2269 +
2270 +#endif
2271 +
2272 +
2273 +#ifdef CONFIG_LITMUS_NVIDIA
2274 +
2275 +#define EX_TS(evt) \
2276 +extern void __##evt(void); \
2277 +static inline void EX_##evt(void) { __##evt(); }
2278 +
2279 +EX_TS(TS_NV_TOPISR_START)
2280 +EX_TS(TS_NV_TOPISR_END)
2281 +EX_TS(TS_NV_BOTISR_START)
2282 +EX_TS(TS_NV_BOTISR_END)
2283 +EX_TS(TS_NV_RELEASE_BOTISR_START)
2284 +EX_TS(TS_NV_RELEASE_BOTISR_END)
2285 +
2286 +#endif
2287 +
2288 +#endif
2289 diff --git a/include/litmus/trace.h b/include/litmus/trace.h
2290 index e809376..e078aee 100644
2291 --- a/include/litmus/trace.h
2292 +++ b/include/litmus/trace.h
2293 @@ -103,14 +103,46 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
2294 #define TS_LOCK_START TIMESTAMP(170)
2295 #define TS_LOCK_SUSPEND TIMESTAMP(171)
2296 #define TS_LOCK_RESUME TIMESTAMP(172)
2297 -#define TS_LOCK_END TIMESTAMP(173)
2298 +#define TS_LOCK_END TIMESTAMP(173)
2299 +
2300 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
2301 +#define TS_DGL_LOCK_START TIMESTAMP(175)
2302 +#define TS_DGL_LOCK_SUSPEND TIMESTAMP(176)
2303 +#define TS_DGL_LOCK_RESUME TIMESTAMP(177)
2304 +#define TS_DGL_LOCK_END TIMESTAMP(178)
2305 +#endif
2306
2307 #define TS_UNLOCK_START TIMESTAMP(180)
2308 #define TS_UNLOCK_END TIMESTAMP(181)
2309
2310 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
2311 +#define TS_DGL_UNLOCK_START TIMESTAMP(185)
2312 +#define TS_DGL_UNLOCK_END TIMESTAMP(186)
2313 +#endif
2314 +
2315 #define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
2316 #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
2317
2318 #define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when))
2319
2320 +
2321 +#ifdef CONFIG_LITMUS_NVIDIA
2322 +
2323 +#define TS_NV_TOPISR_START TIMESTAMP(200)
2324 +#define TS_NV_TOPISR_END TIMESTAMP(201)
2325 +
2326 +#define TS_NV_BOTISR_START TIMESTAMP(202)
2327 +#define TS_NV_BOTISR_END TIMESTAMP(203)
2328 +
2329 +#define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204)
2330 +#define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205)
2331 +
2332 +#endif
2333 +
2334 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
2335 +#define TS_NV_SCHED_BOTISR_START TIMESTAMP(206)
2336 +#define TS_NV_SCHED_BOTISR_END TIMESTAMP(207)
2337 +#endif
2338 +
2339 +
2340 #endif /* !_SYS_TRACE_H_ */
2341 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
2342 index 94264c2..4fa514c 100644
2343 --- a/include/litmus/unistd_32.h
2344 +++ b/include/litmus/unistd_32.h
2345 @@ -17,5 +17,8 @@
2346 #define __NR_wait_for_ts_release __LSC(9)
2347 #define __NR_release_ts __LSC(10)
2348 #define __NR_null_call __LSC(11)
2349 +#define __NR_litmus_dgl_lock __LSC(12)
2350 +#define __NR_litmus_dgl_unlock __LSC(13)
2351 +#define __NR_register_nv_device __LSC(14)
2352
2353 -#define NR_litmus_syscalls 12
2354 +#define NR_litmus_syscalls 15
2355 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
2356 index d5ced0d..f80dc45 100644
2357 --- a/include/litmus/unistd_64.h
2358 +++ b/include/litmus/unistd_64.h
2359 @@ -29,5 +29,12 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
2360 __SYSCALL(__NR_release_ts, sys_release_ts)
2361 #define __NR_null_call __LSC(11)
2362 __SYSCALL(__NR_null_call, sys_null_call)
2363 +#define __NR_litmus_dgl_lock __LSC(12)
2364 +__SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
2365 +#define __NR_litmus_dgl_unlock __LSC(13)
2366 +__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
2367 +#define __NR_register_nv_device __LSC(14)
2368 +__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
2369
2370 -#define NR_litmus_syscalls 12
2371 +
2372 +#define NR_litmus_syscalls 15
2373 diff --git a/kernel/lockdep.c b/kernel/lockdep.c
2374 index 298c927..2bdcdc3 100644
2375 --- a/kernel/lockdep.c
2376 +++ b/kernel/lockdep.c
2377 @@ -542,7 +542,7 @@ static void print_lock(struct held_lock *hlock)
2378 print_ip_sym(hlock->acquire_ip);
2379 }
2380
2381 -static void lockdep_print_held_locks(struct task_struct *curr)
2382 +void lockdep_print_held_locks(struct task_struct *curr)
2383 {
2384 int i, depth = curr->lockdep_depth;
2385
2386 @@ -558,6 +558,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
2387 print_lock(curr->held_locks + i);
2388 }
2389 }
2390 +EXPORT_SYMBOL(lockdep_print_held_locks);
2391
2392 static void print_kernel_version(void)
2393 {
2394 @@ -583,6 +584,10 @@ static int static_obj(void *obj)
2395 end = (unsigned long) &_end,
2396 addr = (unsigned long) obj;
2397
2398 + // GLENN
2399 + return 1;
2400 +
2401 +
2402 /*
2403 * static variable?
2404 */
2405 diff --git a/kernel/mutex.c b/kernel/mutex.c
2406 index d607ed5..2f363b9 100644
2407 --- a/kernel/mutex.c
2408 +++ b/kernel/mutex.c
2409 @@ -498,3 +498,128 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
2410 return 1;
2411 }
2412 EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
2413 +
2414 +
2415 +
2416 +
2417 +void mutex_lock_sfx(struct mutex *lock,
2418 + side_effect_t pre, unsigned long pre_arg,
2419 + side_effect_t post, unsigned long post_arg)
2420 +{
2421 + long state = TASK_UNINTERRUPTIBLE;
2422 +
2423 + struct task_struct *task = current;
2424 + struct mutex_waiter waiter;
2425 + unsigned long flags;
2426 +
2427 + preempt_disable();
2428 + mutex_acquire(&lock->dep_map, subclass, 0, ip);
2429 +
2430 + spin_lock_mutex(&lock->wait_lock, flags);
2431 +
2432 + if(pre)
2433 + {
2434 + if(unlikely(pre(pre_arg)))
2435 + {
2436 + // this will fuck with lockdep's CONFIG_PROVE_LOCKING...
2437 + spin_unlock_mutex(&lock->wait_lock, flags);
2438 + preempt_enable();
2439 + return;
2440 + }
2441 + }
2442 +
2443 + debug_mutex_lock_common(lock, &waiter);
2444 + debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
2445 +
2446 + /* add waiting tasks to the end of the waitqueue (FIFO): */
2447 + list_add_tail(&waiter.list, &lock->wait_list);
2448 + waiter.task = task;
2449 +
2450 + if (atomic_xchg(&lock->count, -1) == 1)
2451 + goto done;
2452 +
2453 + lock_contended(&lock->dep_map, ip);
2454 +
2455 + for (;;) {
2456 + /*
2457 + * Lets try to take the lock again - this is needed even if
2458 + * we get here for the first time (shortly after failing to
2459 + * acquire the lock), to make sure that we get a wakeup once
2460 + * it's unlocked. Later on, if we sleep, this is the
2461 + * operation that gives us the lock. We xchg it to -1, so
2462 + * that when we release the lock, we properly wake up the
2463 + * other waiters:
2464 + */
2465 + if (atomic_xchg(&lock->count, -1) == 1)
2466 + break;
2467 +
2468 + __set_task_state(task, state);
2469 +
2470 + /* didnt get the lock, go to sleep: */
2471 + spin_unlock_mutex(&lock->wait_lock, flags);
2472 + preempt_enable_no_resched();
2473 + schedule();
2474 + preempt_disable();
2475 + spin_lock_mutex(&lock->wait_lock, flags);
2476 + }
2477 +
2478 +done:
2479 + lock_acquired(&lock->dep_map, ip);
2480 + /* got the lock - rejoice! */
2481 + mutex_remove_waiter(lock, &waiter, current_thread_info());
2482 + mutex_set_owner(lock);
2483 +
2484 + /* set it to 0 if there are no waiters left: */
2485 + if (likely(list_empty(&lock->wait_list)))
2486 + atomic_set(&lock->count, 0);
2487 +
2488 + if(post)
2489 + post(post_arg);
2490 +
2491 + spin_unlock_mutex(&lock->wait_lock, flags);
2492 +
2493 + debug_mutex_free_waiter(&waiter);
2494 + preempt_enable();
2495 +}
2496 +EXPORT_SYMBOL(mutex_lock_sfx);
2497 +
2498 +void mutex_unlock_sfx(struct mutex *lock,
2499 + side_effect_t pre, unsigned long pre_arg,
2500 + side_effect_t post, unsigned long post_arg)
2501 +{
2502 + unsigned long flags;
2503 +
2504 + spin_lock_mutex(&lock->wait_lock, flags);
2505 +
2506 + if(pre)
2507 + pre(pre_arg);
2508 +
2509 + //mutex_release(&lock->dep_map, nested, _RET_IP_);
2510 + mutex_release(&lock->dep_map, 1, _RET_IP_);
2511 + debug_mutex_unlock(lock);
2512 +
2513 + /*
2514 + * some architectures leave the lock unlocked in the fastpath failure
2515 + * case, others need to leave it locked. In the later case we have to
2516 + * unlock it here
2517 + */
2518 + if (__mutex_slowpath_needs_to_unlock())
2519 + atomic_set(&lock->count, 1);
2520 +
2521 + if (!list_empty(&lock->wait_list)) {
2522 + /* get the first entry from the wait-list: */
2523 + struct mutex_waiter *waiter =
2524 + list_entry(lock->wait_list.next,
2525 + struct mutex_waiter, list);
2526 +
2527 + debug_mutex_wake_waiter(lock, waiter);
2528 +
2529 + wake_up_process(waiter->task);
2530 + }
2531 +
2532 + if(post)
2533 + post(post_arg);
2534 +
2535 + spin_unlock_mutex(&lock->wait_lock, flags);
2536 +}
2537 +EXPORT_SYMBOL(mutex_unlock_sfx);
2538 diff --git a/kernel/sched.c b/kernel/sched.c
2539 index baaca61..f3d9a69 100644
2540 --- a/kernel/sched.c
2541 +++ b/kernel/sched.c
2542 @@ -83,6 +83,10 @@
2543 #include <litmus/sched_trace.h>
2544 #include <litmus/trace.h>
2545
2546 +#ifdef CONFIG_LITMUS_SOFTIRQD
2547 +#include <litmus/litmus_softirq.h>
2548 +#endif
2549 +
2550 static void litmus_tick(struct rq*, struct task_struct*);
2551
2552 #define CREATE_TRACE_POINTS
2553 @@ -4305,6 +4309,7 @@ pick_next_task(struct rq *rq)
2554 BUG(); /* the idle class will always have a runnable task */
2555 }
2556
2557 +
2558 /*
2559 * schedule() is the main scheduler function.
2560 */
2561 @@ -4323,6 +4328,10 @@ need_resched:
2562 rcu_note_context_switch(cpu);
2563 prev = rq->curr;
2564
2565 +#ifdef CONFIG_LITMUS_SOFTIRQD
2566 + release_klitirqd_lock(prev);
2567 +#endif
2568 +
2569 /* LITMUS^RT: quickly re-evaluate the scheduling decision
2570 * if the previous one is no longer valid after CTX.
2571 */
2572 @@ -4411,13 +4420,24 @@ litmus_need_resched_nonpreemptible:
2573 goto litmus_need_resched_nonpreemptible;
2574
2575 preempt_enable_no_resched();
2576 +
2577 if (need_resched())
2578 goto need_resched;
2579
2580 +#ifdef LITMUS_SOFTIRQD
2581 + reacquire_klitirqd_lock(prev);
2582 +#endif
2583 +
2584 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
2585 + litmus->run_tasklets(prev);
2586 +#endif
2587 +
2588 srp_ceiling_block();
2589 }
2590 EXPORT_SYMBOL(schedule);
2591
2592 +
2593 +
2594 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
2595
2596 static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
2597 @@ -4561,6 +4581,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
2598 }
2599 }
2600
2601 +
2602 /**
2603 * __wake_up - wake up threads blocked on a waitqueue.
2604 * @q: the waitqueue
2605 @@ -4747,6 +4768,12 @@ void __sched wait_for_completion(struct completion *x)
2606 }
2607 EXPORT_SYMBOL(wait_for_completion);
2608
2609 +void __sched __wait_for_completion_locked(struct completion *x)
2610 +{
2611 + do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
2612 +}
2613 +EXPORT_SYMBOL(__wait_for_completion_locked);
2614 +
2615 /**
2616 * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
2617 * @x: holds the state of this particular completion
2618 diff --git a/kernel/semaphore.c b/kernel/semaphore.c
2619 index 94a62c0..c947a04 100644
2620 --- a/kernel/semaphore.c
2621 +++ b/kernel/semaphore.c
2622 @@ -33,11 +33,11 @@
2623 #include <linux/spinlock.h>
2624 #include <linux/ftrace.h>
2625
2626 -static noinline void __down(struct semaphore *sem);
2627 +noinline void __down(struct semaphore *sem);
2628 static noinline int __down_interruptible(struct semaphore *sem);
2629 static noinline int __down_killable(struct semaphore *sem);
2630 static noinline int __down_timeout(struct semaphore *sem, long jiffies);
2631 -static noinline void __up(struct semaphore *sem);
2632 +noinline void __up(struct semaphore *sem);
2633
2634 /**
2635 * down - acquire the semaphore
2636 @@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
2637
2638 /* Functions for the contended case */
2639
2640 +/*
2641 struct semaphore_waiter {
2642 struct list_head list;
2643 struct task_struct *task;
2644 int up;
2645 };
2646 + */
2647
2648 /*
2649 * Because this function is inlined, the 'state' parameter will be
2650 @@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
2651 return -EINTR;
2652 }
2653
2654 -static noinline void __sched __down(struct semaphore *sem)
2655 +noinline void __sched __down(struct semaphore *sem)
2656 {
2657 __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
2658 }
2659 +EXPORT_SYMBOL(__down);
2660 +
2661
2662 static noinline int __sched __down_interruptible(struct semaphore *sem)
2663 {
2664 @@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
2665 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
2666 }
2667
2668 -static noinline void __sched __up(struct semaphore *sem)
2669 +noinline void __sched __up(struct semaphore *sem)
2670 {
2671 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
2672 struct semaphore_waiter, list);
2673 @@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
2674 waiter->up = 1;
2675 wake_up_process(waiter->task);
2676 }
2677 +EXPORT_SYMBOL(__up);
2678 \ No newline at end of file
2679 diff --git a/kernel/softirq.c b/kernel/softirq.c
2680 index fca82c3..5ce2716 100644
2681 --- a/kernel/softirq.c
2682 +++ b/kernel/softirq.c
2683 @@ -29,6 +29,15 @@
2684 #include <trace/events/irq.h>
2685
2686 #include <asm/irq.h>
2687 +
2688 +#include <litmus/litmus.h>
2689 +#include <litmus/sched_trace.h>
2690 +
2691 +#ifdef CONFIG_LITMUS_NVIDIA
2692 +#include <litmus/nvidia_info.h>
2693 +#include <litmus/trace.h>
2694 +#endif
2695 +
2696 /*
2697 - No shared variables, all the data are CPU local.
2698 - If a softirq needs serialization, let it serialize itself
2699 @@ -67,7 +76,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
2700 * to the pending events, so lets the scheduler to balance
2701 * the softirq load for us.
2702 */
2703 -static void wakeup_softirqd(void)
2704 +void wakeup_softirqd(void)
2705 {
2706 /* Interrupts are disabled: no need to stop preemption */
2707 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
2708 @@ -193,6 +202,7 @@ void local_bh_enable_ip(unsigned long ip)
2709 }
2710 EXPORT_SYMBOL(local_bh_enable_ip);
2711
2712 +
2713 /*
2714 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
2715 * and we fall back to softirqd after that.
2716 @@ -206,65 +216,65 @@ EXPORT_SYMBOL(local_bh_enable_ip);
2717
2718 asmlinkage void __do_softirq(void)
2719 {
2720 - struct softirq_action *h;
2721 - __u32 pending;
2722 - int max_restart = MAX_SOFTIRQ_RESTART;
2723 - int cpu;
2724 + struct softirq_action *h;
2725 + __u32 pending;
2726 + int max_restart = MAX_SOFTIRQ_RESTART;
2727 + int cpu;
2728
2729 - pending = local_softirq_pending();
2730 - account_system_vtime(current);
2731 + pending = local_softirq_pending();
2732 + account_system_vtime(current);
2733
2734 - __local_bh_disable((unsigned long)__builtin_return_address(0),
2735 - SOFTIRQ_OFFSET);
2736 - lockdep_softirq_enter();
2737 + __local_bh_disable((unsigned long)__builtin_return_address(0),
2738 + SOFTIRQ_OFFSET);
2739 + lockdep_softirq_enter();
2740
2741 - cpu = smp_processor_id();
2742 + cpu = smp_processor_id();
2743 restart:
2744 - /* Reset the pending bitmask before enabling irqs */
2745 - set_softirq_pending(0);
2746 + /* Reset the pending bitmask before enabling irqs */
2747 + set_softirq_pending(0);
2748
2749 - local_irq_enable();
2750 + local_irq_enable();
2751
2752 - h = softirq_vec;
2753 -
2754 - do {
2755 - if (pending & 1) {
2756 - unsigned int vec_nr = h - softirq_vec;
2757 - int prev_count = preempt_count();
2758 -
2759 - kstat_incr_softirqs_this_cpu(vec_nr);
2760 -
2761 - trace_softirq_entry(vec_nr);
2762 - h->action(h);
2763 - trace_softirq_exit(vec_nr);
2764 - if (unlikely(prev_count != preempt_count())) {
2765 - printk(KERN_ERR "huh, entered softirq %u %s %p"
2766 - "with preempt_count %08x,"
2767 - " exited with %08x?\n", vec_nr,
2768 - softirq_to_name[vec_nr], h->action,
2769 - prev_count, preempt_count());
2770 - preempt_count() = prev_count;
2771 - }
2772 + h = softirq_vec;
2773
2774 - rcu_bh_qs(cpu);
2775 - }
2776 - h++;
2777 - pending >>= 1;
2778 - } while (pending);
2779 + do {
2780 + if (pending & 1) {
2781 + unsigned int vec_nr = h - softirq_vec;
2782 + int prev_count = preempt_count();
2783
2784 - local_irq_disable();
2785 + kstat_incr_softirqs_this_cpu(vec_nr);
2786
2787 - pending = local_softirq_pending();
2788 - if (pending && --max_restart)
2789 - goto restart;
2790 + trace_softirq_entry(vec_nr);
2791 + h->action(h);
2792 + trace_softirq_exit(vec_nr);
2793 + if (unlikely(prev_count != preempt_count())) {
2794 + printk(KERN_ERR "huh, entered softirq %u %s %p"
2795 + "with preempt_count %08x,"
2796 + " exited with %08x?\n", vec_nr,
2797 + softirq_to_name[vec_nr], h->action,
2798 + prev_count, preempt_count());
2799 + preempt_count() = prev_count;
2800 + }
2801
2802 - if (pending)
2803 - wakeup_softirqd();
2804 + rcu_bh_qs(cpu);
2805 + }
2806 + h++;
2807 + pending >>= 1;
2808 + } while (pending);
2809
2810 - lockdep_softirq_exit();
2811 + local_irq_disable();
2812
2813 - account_system_vtime(current);
2814 - __local_bh_enable(SOFTIRQ_OFFSET);
2815 + pending = local_softirq_pending();
2816 + if (pending && --max_restart)
2817 + goto restart;
2818 +
2819 + if (pending)
2820 + wakeup_softirqd();
2821 +
2822 + lockdep_softirq_exit();
2823 +
2824 + account_system_vtime(current);
2825 + __local_bh_enable(SOFTIRQ_OFFSET);
2826 }
2827
2828 #ifndef __ARCH_HAS_DO_SOFTIRQ
2829 @@ -402,8 +412,99 @@ struct tasklet_head
2830 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
2831 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
2832
2833 +#ifdef CONFIG_LITMUS_NVIDIA
2834 +static int __do_nv_now(struct tasklet_struct* tasklet)
2835 +{
2836 + int success = 1;
2837 +
2838 + if(tasklet_trylock(tasklet)) {
2839 + if (!atomic_read(&tasklet->count)) {
2840 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {
2841 + BUG();
2842 + }
2843 + tasklet->func(tasklet->data);
2844 + tasklet_unlock(tasklet);
2845 + }
2846 + else {
2847 + success = 0;
2848 + }
2849 +
2850 + tasklet_unlock(tasklet);
2851 + }
2852 + else {
2853 + success = 0;
2854 + }
2855 +
2856 + return success;
2857 +}
2858 +#endif
2859 +
2860 +
2861 void __tasklet_schedule(struct tasklet_struct *t)
2862 {
2863 +#ifdef CONFIG_LITMUS_NVIDIA
2864 + if(is_nvidia_func(t->func))
2865 + {
2866 +#if 0
2867 + // do nvidia tasklets right away and return
2868 + if(__do_nv_now(t))
2869 + return;
2870 +#else
2871 + u32 nvidia_device = get_tasklet_nv_device_num(t);
2872 + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
2873 + // __FUNCTION__, nvidia_device,litmus_clock());
2874 +
2875 + unsigned long flags;
2876 + struct task_struct* device_owner;
2877 +
2878 + lock_nv_registry(nvidia_device, &flags);
2879 +
2880 + device_owner = get_nv_max_device_owner(nvidia_device);
2881 +
2882 + if(device_owner==NULL)
2883 + {
2884 + t->owner = NULL;
2885 + }
2886 + else
2887 + {
2888 + if(is_realtime(device_owner))
2889 + {
2890 + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
2891 + __FUNCTION__, nvidia_device,litmus_clock());
2892 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2893 + __FUNCTION__,device_owner->pid,nvidia_device);
2894 +
2895 + t->owner = device_owner;
2896 + sched_trace_tasklet_release(t->owner);
2897 +
2898 + if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
2899 + {
2900 + unlock_nv_registry(nvidia_device, &flags);
2901 + return;
2902 + }
2903 + else
2904 + {
2905 + t->owner = NULL; /* fall through to normal scheduling */
2906 + }
2907 + }
2908 + else
2909 + {
2910 + t->owner = NULL;
2911 + }
2912 + }
2913 + unlock_nv_registry(nvidia_device, &flags);
2914 +#endif
2915 + }
2916 +
2917 +#endif
2918 +
2919 + ___tasklet_schedule(t);
2920 +}
2921 +EXPORT_SYMBOL(__tasklet_schedule);
2922 +
2923 +
2924 +void ___tasklet_schedule(struct tasklet_struct *t)
2925 +{
2926 unsigned long flags;
2927
2928 local_irq_save(flags);
2929 @@ -413,11 +514,65 @@ void __tasklet_schedule(struct tasklet_struct *t)
2930 raise_softirq_irqoff(TASKLET_SOFTIRQ);
2931 local_irq_restore(flags);
2932 }
2933 +EXPORT_SYMBOL(___tasklet_schedule);
2934
2935 -EXPORT_SYMBOL(__tasklet_schedule);
2936
2937 void __tasklet_hi_schedule(struct tasklet_struct *t)
2938 {
2939 +#ifdef CONFIG_LITMUS_NVIDIA
2940 + if(is_nvidia_func(t->func))
2941 + {
2942 + u32 nvidia_device = get_tasklet_nv_device_num(t);
2943 + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
2944 + // __FUNCTION__, nvidia_device,litmus_clock());
2945 +
2946 + unsigned long flags;
2947 + struct task_struct* device_owner;
2948 +
2949 + lock_nv_registry(nvidia_device, &flags);
2950 +
2951 + device_owner = get_nv_max_device_owner(nvidia_device);
2952 +
2953 + if(device_owner==NULL)
2954 + {
2955 + t->owner = NULL;
2956 + }
2957 + else
2958 + {
2959 + if( is_realtime(device_owner))
2960 + {
2961 + TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
2962 + __FUNCTION__, nvidia_device,litmus_clock());
2963 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2964 + __FUNCTION__,device_owner->pid,nvidia_device);
2965 +
2966 + t->owner = device_owner;
2967 + sched_trace_tasklet_release(t->owner);
2968 + if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
2969 + {
2970 + unlock_nv_registry(nvidia_device, &flags);
2971 + return;
2972 + }
2973 + else
2974 + {
2975 + t->owner = NULL; /* fall through to normal scheduling */
2976 + }
2977 + }
2978 + else
2979 + {
2980 + t->owner = NULL;
2981 + }
2982 + }
2983 + unlock_nv_registry(nvidia_device, &flags);
2984 + }
2985 +#endif
2986 +
2987 + ___tasklet_hi_schedule(t);
2988 +}
2989 +EXPORT_SYMBOL(__tasklet_hi_schedule);
2990 +
2991 +void ___tasklet_hi_schedule(struct tasklet_struct* t)
2992 +{
2993 unsigned long flags;
2994
2995 local_irq_save(flags);
2996 @@ -427,19 +582,72 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
2997 raise_softirq_irqoff(HI_SOFTIRQ);
2998 local_irq_restore(flags);
2999 }
3000 -
3001 -EXPORT_SYMBOL(__tasklet_hi_schedule);
3002 +EXPORT_SYMBOL(___tasklet_hi_schedule);
3003
3004 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
3005 {
3006 BUG_ON(!irqs_disabled());
3007 +#ifdef CONFIG_LITMUS_NVIDIA
3008 + if(is_nvidia_func(t->func))
3009 + {
3010 + u32 nvidia_device = get_tasklet_nv_device_num(t);
3011 + // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
3012 + // __FUNCTION__, nvidia_device,litmus_clock());
3013 + unsigned long flags;
3014 + struct task_struct* device_owner;
3015 +
3016 + lock_nv_registry(nvidia_device, &flags);
3017 +
3018 + device_owner = get_nv_max_device_owner(nvidia_device);
3019 +
3020 + if(device_owner==NULL)
3021 + {
3022 + t->owner = NULL;
3023 + }
3024 + else
3025 + {
3026 + if(is_realtime(device_owner))
3027 + {
3028 + TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
3029 + __FUNCTION__, nvidia_device,litmus_clock());
3030 +
3031 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
3032 + __FUNCTION__,device_owner->pid,nvidia_device);
3033 +
3034 + t->owner = device_owner;
3035 + sched_trace_tasklet_release(t->owner);
3036 + if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
3037 + {
3038 + unlock_nv_registry(nvidia_device, &flags);
3039 + return;
3040 + }
3041 + else
3042 + {
3043 + t->owner = NULL; /* fall through to normal scheduling */
3044 + }
3045 + }
3046 + else
3047 + {
3048 + t->owner = NULL;
3049 + }
3050 + }
3051 + unlock_nv_registry(nvidia_device, &flags);
3052 + }
3053 +#endif
3054 +
3055 + ___tasklet_hi_schedule_first(t);
3056 +}
3057 +EXPORT_SYMBOL(__tasklet_hi_schedule_first);
3058 +
3059 +void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
3060 +{
3061 + BUG_ON(!irqs_disabled());
3062
3063 t->next = __this_cpu_read(tasklet_hi_vec.head);
3064 __this_cpu_write(tasklet_hi_vec.head, t);
3065 __raise_softirq_irqoff(HI_SOFTIRQ);
3066 }
3067 -
3068 -EXPORT_SYMBOL(__tasklet_hi_schedule_first);
3069 +EXPORT_SYMBOL(___tasklet_hi_schedule_first);
3070
3071 static void tasklet_action(struct softirq_action *a)
3072 {
3073 @@ -495,6 +703,7 @@ static void tasklet_hi_action(struct softirq_action *a)
3074 if (!atomic_read(&t->count)) {
3075 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
3076 BUG();
3077 +
3078 t->func(t->data);
3079 tasklet_unlock(t);
3080 continue;
3081 @@ -518,8 +727,13 @@ void tasklet_init(struct tasklet_struct *t,
3082 t->next = NULL;
3083 t->state = 0;
3084 atomic_set(&t->count, 0);
3085 +
3086 t->func = func;
3087 t->data = data;
3088 +
3089 +#ifdef CONFIG_LITMUS_SOFTIRQD
3090 + t->owner = NULL;
3091 +#endif
3092 }
3093
3094 EXPORT_SYMBOL(tasklet_init);
3095 @@ -534,6 +748,7 @@ void tasklet_kill(struct tasklet_struct *t)
3096 yield();
3097 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
3098 }
3099 +
3100 tasklet_unlock_wait(t);
3101 clear_bit(TASKLET_STATE_SCHED, &t->state);
3102 }
3103 @@ -808,6 +1023,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
3104 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
3105 if (*i == t) {
3106 *i = t->next;
3107 +
3108 /* If this was the tail element, move the tail ptr */
3109 if (*i == NULL)
3110 per_cpu(tasklet_vec, cpu).tail = i;
3111 diff --git a/kernel/workqueue.c b/kernel/workqueue.c
3112 index 0400553..6b59d59 100644
3113 --- a/kernel/workqueue.c
3114 +++ b/kernel/workqueue.c
3115 @@ -44,6 +44,13 @@
3116
3117 #include "workqueue_sched.h"
3118
3119 +#ifdef CONFIG_LITMUS_NVIDIA
3120 +#include <litmus/litmus.h>
3121 +#include <litmus/sched_trace.h>
3122 +#include <litmus/nvidia_info.h>
3123 +#endif
3124 +
3125 +
3126 enum {
3127 /* global_cwq flags */
3128 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
3129 @@ -1047,9 +1054,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
3130 work_flags |= WORK_STRUCT_DELAYED;
3131 worklist = &cwq->delayed_works;
3132 }
3133 -
3134 insert_work(cwq, work, worklist, work_flags);
3135 -
3136 spin_unlock_irqrestore(&gcwq->lock, flags);
3137 }
3138
3139 @@ -2687,10 +2692,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
3140 */
3141 int schedule_work(struct work_struct *work)
3142 {
3143 - return queue_work(system_wq, work);
3144 +#if 0
3145 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
3146 + if(is_nvidia_func(work->func))
3147 + {
3148 + u32 nvidiaDevice = get_work_nv_device_num(work);
3149 +
3150 + //1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
3151 + unsigned long flags;
3152 + struct task_struct* device_owner;
3153 +
3154 + lock_nv_registry(nvidiaDevice, &flags);
3155 +
3156 + device_owner = get_nv_max_device_owner(nvidiaDevice);
3157 +
3158 + //2) If there is an owner, set work->owner to the owner's task struct.
3159 + if(device_owner==NULL)
3160 + {
3161 + work->owner = NULL;
3162 + //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
3163 + }
3164 + else
3165 + {
3166 + if( is_realtime(device_owner))
3167 + {
3168 + TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
3169 + __FUNCTION__, nvidiaDevice,litmus_clock());
3170 + TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
3171 + __FUNCTION__,
3172 + device_owner->pid,
3173 + nvidiaDevice);
3174 +
3175 + //3) Call litmus_schedule_work() and return (don't execute the rest
3176 + // of schedule_schedule()).
3177 + work->owner = device_owner;
3178 + sched_trace_work_release(work->owner);
3179 + if(likely(litmus_schedule_work(work, nvidiaDevice)))
3180 + {
3181 + unlock_nv_registry(nvidiaDevice, &flags);
3182 + return 1;
3183 + }
3184 + else
3185 + {
3186 + work->owner = NULL; /* fall through to normal work scheduling */
3187 + }
3188 + }
3189 + else
3190 + {
3191 + work->owner = NULL;
3192 + }
3193 + }
3194 + unlock_nv_registry(nvidiaDevice, &flags);
3195 + }
3196 +#endif
3197 +#endif
3198 + return(__schedule_work(work));
3199 }
3200 EXPORT_SYMBOL(schedule_work);
3201
3202 +int __schedule_work(struct work_struct* work)
3203 +{
3204 + return queue_work(system_wq, work);
3205 +}
3206 +EXPORT_SYMBOL(__schedule_work);
3207 +
3208 /*
3209 * schedule_work_on - put work task on a specific cpu
3210 * @cpu: cpu to put the work task on
3211 diff --git a/litmus/Kconfig b/litmus/Kconfig
3212 index 94b48e1..8c156e4 100644
3213 --- a/litmus/Kconfig
3214 +++ b/litmus/Kconfig
3215 @@ -60,6 +60,42 @@ config LITMUS_LOCKING
3216 Say Yes if you want to include locking protocols such as the FMLP and
3217 Baker's SRP.
3218
3219 +config LITMUS_AFFINITY_LOCKING
3220 + bool "Enable affinity infrastructure in k-exclusion locking protocols."
3221 + depends on LITMUS_LOCKING
3222 + default n
3223 + help
3224 + Enable affinity tracking infrastructure in k-exclusion locking protocols.
3225 + This only enabled the *infrastructure* not actual affinity algorithms.
3226 +
3227 + If unsure, say No.
3228 +
3229 +config LITMUS_NESTED_LOCKING
3230 + bool "Support for nested inheritance in locking protocols"
3231 + depends on LITMUS_LOCKING
3232 + default n
3233 + help
3234 + Enable nested priority inheritance.
3235 +
3236 +config LITMUS_DGL_SUPPORT
3237 + bool "Support for dynamic group locks"
3238 + depends on LITMUS_NESTED_LOCKING
3239 + default n
3240 + help
3241 + Enable dynamic group lock support.
3242 +
3243 +config LITMUS_MAX_DGL_SIZE
3244 + int "Maximum size of a dynamic group lock."
3245 + depends on LITMUS_DGL_SUPPORT
3246 + range 1 128
3247 + default "10"
3248 + help
3249 + Dynamic group lock data structures are allocated on the process
3250 + stack when a group is requested. We set a maximum size of
3251 + locks in a dynamic group lock to avoid dynamic allocation.
3252 +
3253 + TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE.
3254 +
3255 endmenu
3256
3257 menu "Performance Enhancements"
3258 @@ -121,7 +157,7 @@ config SCHED_TASK_TRACE
3259 config SCHED_TASK_TRACE_SHIFT
3260 int "Buffer size for sched_trace_xxx() events"
3261 depends on SCHED_TASK_TRACE
3262 - range 8 13
3263 + range 8 15
3264 default 9
3265 help
3266
3267 @@ -215,4 +251,114 @@ config PREEMPT_STATE_TRACE
3268
3269 endmenu
3270
3271 +menu "Interrupt Handling"
3272 +
3273 +choice
3274 + prompt "Scheduling of interrupt bottom-halves in Litmus."
3275 + default LITMUS_SOFTIRQD_NONE
3276 + depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
3277 + help
3278 + Schedule tasklets with known priorities in Litmus.
3279 +
3280 +config LITMUS_SOFTIRQD_NONE
3281 + bool "No tasklet scheduling in Litmus."
3282 + help
3283 + Don't schedule tasklets in Litmus. Default.
3284 +
3285 +config LITMUS_SOFTIRQD
3286 + bool "Spawn klitirqd interrupt handling threads."
3287 + help
3288 + Create klitirqd interrupt handling threads. Work must be
3289 + specifically dispatched to these workers. (Softirqs for
3290 + Litmus tasks are not magically redirected to klitirqd.)
3291 +
3292 + G-EDF/RM, C-EDF/RM ONLY for now!
3293 +
3294 +
3295 +config LITMUS_PAI_SOFTIRQD
3296 + bool "Defer tasklets to context switch points."
3297 + help
3298 + Only execute scheduled tasklet bottom halves at
3299 + scheduling points. Trades context switch overhead
3300 + at the cost of non-preemptive durations of bottom half
3301 + processing.
3302 +
3303 + G-EDF/RM, C-EDF/RM ONLY for now!
3304 +
3305 +endchoice
3306 +
3307 +
3308 +config NR_LITMUS_SOFTIRQD
3309 + int "Number of klitirqd."
3310 + depends on LITMUS_SOFTIRQD
3311 + range 1 4096
3312 + default "1"
3313 + help
3314 + Should be <= to the number of CPUs in your system.
3315 +
3316 +config LITMUS_NVIDIA
3317 + bool "Litmus handling of NVIDIA interrupts."
3318 + default n
3319 + help
3320 + Direct tasklets from NVIDIA devices to Litmus's klitirqd
3321 + or PAI interrupt handling routines.
3322 +
3323 + If unsure, say No.
3324 +
3325 +config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
3326 + bool "Enable affinity-aware heuristics to improve GPU assignment."
3327 + depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING
3328 + default n
3329 + help
3330 + Enable several heuristics to improve the assignment
3331 + of GPUs to real-time tasks to reduce the overheads
3332 + of memory migrations.
3333 +
3334 + If unsure, say No.
3335 +
3336 +config NV_DEVICE_NUM
3337 + int "Number of NVIDIA GPUs."
3338 + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
3339 + range 1 4096
3340 + default "1"
3341 + help
3342 + Should be (<= to the number of CPUs) and
3343 + (<= to the number of GPUs) in your system.
3344 +
3345 +config NV_MAX_SIMULT_USERS
3346 + int "Maximum number of threads sharing a GPU simultanously"
3347 + depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
3348 + range 1 3
3349 + default "2"
3350 + help
3351 + Should be equal to the #copy_engines + #execution_engines
3352 + of the GPUs in your system.
3353 +
3354 + Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?)
3355 + Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx)
3356 + Older = 1 (ex. GTX-2xx)
3357 +
3358 +choice
3359 + prompt "CUDA/Driver Version Support"
3360 + default CUDA_4_0
3361 + depends on LITMUS_NVIDIA
3362 + help
3363 + Select the version of CUDA/driver to support.
3364 +
3365 +config CUDA_4_0
3366 + bool "CUDA 4.0"
3367 + depends on LITMUS_NVIDIA
3368 + help
3369 + Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
3370 +
3371 +config CUDA_3_2
3372 + bool "CUDA 3.2"
3373 + depends on LITMUS_NVIDIA
3374 + help
3375 + Support CUDA 3.2 (dev. driver version: x86_64-260.24)
3376 +
3377 +endchoice
3378 +
3379 +endmenu
3380 +
3381 endmenu
3382 diff --git a/litmus/Makefile b/litmus/Makefile
3383 index 7338180..080cbf6 100644
3384 --- a/litmus/Makefile
3385 +++ b/litmus/Makefile
3386 @@ -15,9 +15,11 @@ obj-y = sched_plugin.o litmus.o \
3387 locking.o \
3388 srp.o \
3389 bheap.o \
3390 + binheap.o \
3391 ctrldev.o \
3392 sched_gsn_edf.o \
3393 - sched_psn_edf.o
3394 + sched_psn_edf.o \
3395 + kfmlp_lock.o
3396
3397 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
3398 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
3399 @@ -27,3 +29,10 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
3400 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
3401 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
3402 obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
3403 +
3404 +obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
3405 +obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
3406 +obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
3407 +obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
3408 +
3409 +obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
3410 diff --git a/litmus/affinity.c b/litmus/affinity.c
3411 index 3fa6dd7..cd93249 100644
3412 --- a/litmus/affinity.c
3413 +++ b/litmus/affinity.c
3414 @@ -26,7 +26,7 @@ void init_topology(void) {
3415 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
3416 }
3417 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
3418 - cpu, neigh_info[cpu].size[i], i,
3419 + cpu, neigh_info[cpu].size[i], i,
3420 *cpumask_bits(neigh_info[cpu].neighbors[i]));
3421 }
3422
3423 diff --git a/litmus/binheap.c b/litmus/binheap.c
3424 new file mode 100644
3425 index 0000000..8d42403
3426 --- /dev/null
3427 +++ b/litmus/binheap.c
3428 @@ -0,0 +1,443 @@
3429 +#include <litmus/binheap.h>
3430 +
3431 +//extern void dump_node_data(struct binheap_node* parent, struct binheap_node* child);
3432 +//extern void dump_node_data2(struct binheap_handle *handle, struct binheap_node* bad_node);
3433 +
3434 +int binheap_is_in_this_heap(struct binheap_node *node,
3435 + struct binheap_handle* heap)
3436 +{
3437 + if(!binheap_is_in_heap(node)) {
3438 + return 0;
3439 + }
3440 +
3441 + while(node->parent != NULL) {
3442 + node = node->parent;
3443 + }
3444 +
3445 + return (node == heap->root);
3446 +}
3447 +
3448 +/* Update the node reference pointers. Same logic as Litmus binomial heap. */
3449 +static void __update_ref(struct binheap_node *parent,
3450 + struct binheap_node *child)
3451 +{
3452 + *(parent->ref_ptr) = child;
3453 + *(child->ref_ptr) = parent;
3454 +
3455 + swap(parent->ref_ptr, child->ref_ptr);
3456 +}
3457 +
3458 +/* Swaps data between two nodes. */
3459 +static void __binheap_swap(struct binheap_node *parent,
3460 + struct binheap_node *child)
3461 +{
3462 +// if(parent == BINHEAP_POISON || child == BINHEAP_POISON) {
3463 +// dump_node_data(parent, child);
3464 +// BUG();
3465 +// }
3466 +
3467 + swap(parent->data, child->data);
3468 + __update_ref(parent, child);
3469 +}
3470 +
3471 +
3472 +/* Swaps memory and data between two nodes. Actual nodes swap instead of
3473 + * just data. Needed when we delete nodes from the heap.
3474 + */
3475 +static void __binheap_swap_safe(struct binheap_handle *handle,
3476 + struct binheap_node *a,
3477 + struct binheap_node *b)
3478 +{
3479 + swap(a->data, b->data);
3480 + __update_ref(a, b);
3481 +
3482 + if((a->parent != NULL) && (a->parent == b->parent)) {
3483 + /* special case: shared parent */
3484 + swap(a->parent->left, a->parent->right);
3485 + }
3486 + else {
3487 + /* Update pointers to swap parents. */
3488 +
3489 + if(a->parent) {
3490 + if(a == a->parent->left) {
3491 + a->parent->left = b;
3492 + }
3493 + else {
3494 + a->parent->right = b;
3495 + }
3496 + }
3497 +
3498 + if(b->parent) {
3499 + if(b == b->parent->left) {
3500 + b->parent->left = a;
3501 + }
3502 + else {
3503 + b->parent->right = a;
3504 + }
3505 + }
3506 +
3507 + swap(a->parent, b->parent);
3508 + }
3509 +
3510 + /* swap children */
3511 +
3512 + if(a->left) {
3513 + a->left->parent = b;
3514 +
3515 + if(a->right) {
3516 + a->right->parent = b;
3517 + }
3518 + }
3519 +
3520 + if(b->left) {
3521 + b->left->parent = a;
3522 +
3523 + if(b->right) {
3524 + b->right->parent = a;
3525 + }
3526 + }
3527 +
3528 + swap(a->left, b->left);
3529 + swap(a->right, b->right);
3530 +
3531 +
3532 + /* update next/last/root pointers */
3533 +
3534 + if(a == handle->next) {
3535 + handle->next = b;
3536 + }
3537 + else if(b == handle->next) {
3538 + handle->next = a;
3539 + }
3540 +
3541 + if(a == handle->last) {
3542 + handle->last = b;
3543 + }
3544 + else if(b == handle->last) {
3545 + handle->last = a;
3546 + }
3547 +
3548 + if(a == handle->root) {
3549 + handle->root = b;
3550 + }
3551 + else if(b == handle->root) {
3552 + handle->root = a;
3553 + }
3554 +}
3555 +
3556 +
3557 +/**
3558 + * Update the pointer to the last node in the complete binary tree.
3559 + * Called internally after the root node has been deleted.
3560 + */
3561 +static void __binheap_update_last(struct binheap_handle *handle)
3562 +{
3563 + struct binheap_node *temp = handle->last;
3564 +
3565 + /* find a "bend" in the tree. */
3566 + while(temp->parent && (temp == temp->parent->left)) {
3567 + temp = temp->parent;
3568 + }
3569 +
3570 + /* step over to sibling if we're not at root */
3571 + if(temp->parent != NULL) {
3572 + temp = temp->parent->left;
3573 + }
3574 +
3575 + /* now travel right as far as possible. */
3576 + while(temp->right != NULL) {
3577 + temp = temp->right;
3578 + }
3579 +
3580 + /* take one step to the left if we're not at the bottom-most level. */
3581 + if(temp->left != NULL) {
3582 + temp = temp->left;
3583 + }
3584 +
3585 + //BUG_ON(!(temp->left == NULL && temp->right == NULL));
3586 +
3587 + handle->last = temp;
3588 +}
3589 +
3590 +/**
3591 + * Update the pointer to the node that will take the next inserted node.
3592 + * Called internally after a node has been inserted.
3593 + */
3594 +static void __binheap_update_next(struct binheap_handle *handle)
3595 +{
3596 + struct binheap_node *temp = handle->next;
3597 +
3598 + /* find a "bend" in the tree. */
3599 + while(temp->parent && (temp == temp->parent->right)) {
3600 + temp = temp->parent;
3601 + }
3602 +
3603 + /* step over to sibling if we're not at root */
3604 + if(temp->parent != NULL) {
3605 + temp = temp->parent->right;
3606 + }
3607 +
3608 + /* now travel left as far as possible. */
3609 + while(temp->left != NULL) {
3610 + temp = temp->left;
3611 + }
3612 +
3613 + handle->next = temp;
3614 +}
3615 +
3616 +
3617 +
3618 +/* bubble node up towards root */
3619 +static void __binheap_bubble_up(
3620 + struct binheap_handle *handle,
3621 + struct binheap_node *node)
3622 +{
3623 + //BUG_ON(!binheap_is_in_heap(node));
3624 +// if(!binheap_is_in_heap(node))
3625 +// {
3626 +// dump_node_data2(handle, node);
3627 +// BUG();
3628 +// }
3629 +
3630 + while((node->parent != NULL) &&
3631 + ((node->data == BINHEAP_POISON) /* let BINHEAP_POISON data bubble to the top */ ||
3632 + handle->compare(node, node->parent))) {
3633 + __binheap_swap(node->parent, node);
3634 + node = node->parent;
3635 +
3636 +// if(!binheap_is_in_heap(node))
3637 +// {
3638 +// dump_node_data2(handle, node);
3639 +// BUG();
3640 +// }
3641 + }
3642 +}
3643 +
3644 +
3645 +/* bubble node down, swapping with min-child */
3646 +static void __binheap_bubble_down(struct binheap_handle *handle)
3647 +{
3648 + struct binheap_node *node = handle->root;
3649 +
3650 + while(node->left != NULL) {
3651 + if(node->right && handle->compare(node->right, node->left)) {
3652 + if(handle->compare(node->right, node)) {
3653 + __binheap_swap(node, node->right);
3654 + node = node->right;
3655 + }
3656 + else {
3657 + break;
3658 + }
3659 + }
3660 + else {
3661 + if(handle->compare(node->left, node)) {
3662 + __binheap_swap(node, node->left);
3663 + node = node->left;
3664 + }
3665 + else {
3666 + break;
3667 + }
3668 + }
3669 + }
3670 +}
3671 +
3672 +
3673 +
3674 +void __binheap_add(struct binheap_node *new_node,
3675 + struct binheap_handle *handle,
3676 + void *data)
3677 +{
3678 +// if(binheap_is_in_heap(new_node))
3679 +// {
3680 +// dump_node_data2(handle, new_node);
3681 +// BUG();
3682 +// }
3683 +
3684 + new_node->data = data;
3685 + new_node->ref = new_node;
3686 + new_node->ref_ptr = &(new_node->ref);
3687 +
3688 + if(!binheap_empty(handle)) {
3689 + /* insert left side first */
3690 + if(handle->next->left == NULL) {
3691 + handle->next->left = new_node;
3692 + new_node->parent = handle->next;
3693 + new_node->left = NULL;
3694 + new_node->right = NULL;
3695 +
3696 + handle->last = new_node;
3697 +
3698 + __binheap_bubble_up(handle, new_node);
3699 + }
3700 + else {
3701 + /* left occupied. insert right. */
3702 + handle->next->right = new_node;
3703 + new_node->parent = handle->next;
3704 + new_node->left = NULL;
3705 + new_node->right = NULL;
3706 +
3707 + handle->last = new_node;
3708 +
3709 + __binheap_update_next(handle);
3710 + __binheap_bubble_up(handle, new_node);
3711 + }
3712 + }
3713 + else {
3714 + /* first node in heap */
3715 +
3716 + new_node->parent = NULL;
3717 + new_node->left = NULL;
3718 + new_node->right = NULL;
3719 +
3720 + handle->root = new_node;
3721 + handle->next = new_node;
3722 + handle->last = new_node;
3723 + }
3724 +}
3725 +
3726 +
3727 +
3728 +/**
3729 + * Removes the root node from the heap. The node is removed after coalescing
3730 + * the binheap_node with its original data pointer at the root of the tree.
3731 + *
3732 + * The 'last' node in the tree is then swapped up to the root and bubbled
3733 + * down.
3734 + */
3735 +void __binheap_delete_root(struct binheap_handle *handle,
3736 + struct binheap_node *container)
3737 +{
3738 + struct binheap_node *root = handle->root;
3739 +
3740 +// if(!binheap_is_in_heap(container))
3741 +// {
3742 +// dump_node_data2(handle, container);
3743 +// BUG();
3744 +// }
3745 +
3746 + if(root != container) {
3747 + /* coalesce */
3748 + __binheap_swap_safe(handle, root, container);
3749 + root = container;
3750 + }
3751 +
3752 + if(handle->last != root) {
3753 + /* swap 'last' node up to root and bubble it down. */
3754 +
3755 + struct binheap_node *to_move = handle->last;
3756 +
3757 + if(to_move->parent != root) {
3758 + handle->next = to_move->parent;
3759 +
3760 + if(handle->next->right == to_move) {
3761 + /* disconnect from parent */
3762 + to_move->parent->right = NULL;
3763 + handle->last = handle->next->left;
3764 + }
3765 + else {
3766 + /* find new 'last' before we disconnect */
3767 + __binheap_update_last(handle);
3768 +
3769 + /* disconnect from parent */
3770 + to_move->parent->left = NULL;
3771 + }
3772 + }
3773 + else {
3774 + /* 'last' is direct child of root */
3775 +
3776 + handle->next = to_move;
3777 +
3778 + if(to_move == to_move->parent->right) {
3779 + to_move->parent->right = NULL;
3780 + handle->last = to_move->parent->left;
3781 + }
3782 + else {
3783 + to_move->parent->left = NULL;
3784 + handle->last = to_move;
3785 + }
3786 + }
3787 + to_move->parent = NULL;
3788 +
3789 + /* reconnect as root. We can't just swap data ptrs since root node
3790 + * may be freed after this function returns.
3791 + */
3792 + to_move->left = root->left;
3793 + to_move->right = root->right;
3794 + if(to_move->left != NULL) {
3795 + to_move->left->parent = to_move;
3796 + }
3797 + if(to_move->right != NULL) {
3798 + to_move->right->parent = to_move;
3799 + }
3800 +
3801 + handle->root = to_move;
3802 +
3803 + /* bubble down */
3804 + __binheap_bubble_down(handle);
3805 + }
3806 + else {
3807 + /* removing last node in tree */
3808 + handle->root = NULL;
3809 + handle->next = NULL;
3810 + handle->last = NULL;
3811 + }
3812 +
3813 + /* mark as removed */
3814 + container->parent = BINHEAP_POISON;
3815 +}
3816 +
3817 +
3818 +/**
3819 + * Delete an arbitrary node. Bubble node to delete up to the root,
3820 + * and then delete to root.
3821 + */
3822 +void __binheap_delete(struct binheap_node *node_to_delete,
3823 + struct binheap_handle *handle)
3824 +{
3825 + struct binheap_node *target = node_to_delete->ref;
3826 + void *temp_data = target->data;
3827 +
3828 +// if(!binheap_is_in_heap(node_to_delete))
3829 +// {
3830 +// dump_node_data2(handle, node_to_delete);
3831 +// BUG();
3832 +// }
3833 +//
3834 +// if(!binheap_is_in_heap(target))
3835 +// {
3836 +// dump_node_data2(handle, target);
3837 +// BUG();
3838 +// }
3839 +
3840 + /* temporarily set data to null to allow node to bubble up to the top. */
3841 + target->data = BINHEAP_POISON;
3842 +
3843 + __binheap_bubble_up(handle, target);
3844 + __binheap_delete_root(handle, node_to_delete);
3845 +
3846 + node_to_delete->data = temp_data; /* restore node data pointer */
3847 + //node_to_delete->parent = BINHEAP_POISON; /* poison the node */
3848 +}
3849 +
3850 +/**
3851 + * Bubble up a node whose pointer has decreased in value.
3852 + */
3853 +void __binheap_decrease(struct binheap_node *orig_node,
3854 + struct binheap_handle *handle)
3855 +{
3856 + struct binheap_node *target = orig_node->ref;
3857 +
3858 +// if(!binheap_is_in_heap(orig_node))
3859 +// {
3860 +// dump_node_data2(handle, orig_node);
3861 +// BUG();
3862 +// }
3863 +//
3864 +// if(!binheap_is_in_heap(target))
3865 +// {
3866 +// dump_node_data2(handle, target);
3867 +// BUG();
3868 +// }
3869 +//
3870 + __binheap_bubble_up(handle, target);
3871 +}
3872 diff --git a/litmus/edf_common.c b/litmus/edf_common.c
3873 index 9b44dc2..b346bdd 100644
3874 --- a/litmus/edf_common.c
3875 +++ b/litmus/edf_common.c
3876 @@ -12,40 +12,61 @@
3877 #include <litmus/sched_plugin.h>
3878 #include <litmus/sched_trace.h>
3879
3880 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3881 +#include <litmus/locking.h>
3882 +#endif
3883 +
3884 #include <litmus/edf_common.h>
3885
3886 +
3887 +
3888 /* edf_higher_prio - returns true if first has a higher EDF priority
3889 * than second. Deadline ties are broken by PID.
3890 *
3891 * both first and second may be NULL
3892 */
3893 -int edf_higher_prio(struct task_struct* first,
3894 - struct task_struct* second)
3895 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3896 +int __edf_higher_prio(
3897 + struct task_struct* first, comparison_mode_t first_mode,
3898 + struct task_struct* second, comparison_mode_t second_mode)
3899 +#else
3900 +int edf_higher_prio(struct task_struct* first, struct task_struct* second)
3901 +#endif
3902 {
3903 struct task_struct *first_task = first;
3904 struct task_struct *second_task = second;
3905
3906 /* There is no point in comparing a task to itself. */
3907 if (first && first == second) {
3908 - TRACE_TASK(first,
3909 - "WARNING: pointless edf priority comparison.\n");
3910 + TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
3911 + WARN_ON(1);
3912 return 0;
3913 }
3914
3915
3916 /* check for NULL tasks */
3917 - if (!first || !second)
3918 + if (!first || !second) {
3919 return first && !second;
3920 + }
3921
3922 #ifdef CONFIG_LITMUS_LOCKING
3923 -
3924 - /* Check for inherited priorities. Change task
3925 + /* Check for EFFECTIVE priorities. Change task
3926 * used for comparison in such a case.
3927 */
3928 - if (unlikely(first->rt_param.inh_task))
3929 + if (unlikely(first->rt_param.inh_task)
3930 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3931 + && (first_mode == EFFECTIVE)
3932 +#endif
3933 + ) {
3934 first_task = first->rt_param.inh_task;
3935 - if (unlikely(second->rt_param.inh_task))
3936 + }
3937 + if (unlikely(second->rt_param.inh_task)
3938 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3939 + && (second_mode == EFFECTIVE)
3940 +#endif
3941 + ) {
3942 second_task = second->rt_param.inh_task;
3943 + }
3944
3945 /* Check for priority boosting. Tie-break by start of boosting.
3946 */
3947 @@ -53,37 +74,109 @@ int edf_higher_prio(struct task_struct* first,
3948 /* first_task is boosted, how about second_task? */
3949 if (!is_priority_boosted(second_task) ||
3950 lt_before(get_boost_start(first_task),
3951 - get_boost_start(second_task)))
3952 + get_boost_start(second_task))) {
3953 return 1;
3954 - else
3955 + }
3956 + else {
3957 return 0;
3958 - } else if (unlikely(is_priority_boosted(second_task)))
3959 + }
3960 + }
3961 + else if (unlikely(is_priority_boosted(second_task))) {
3962 /* second_task is boosted, first is not*/
3963 return 0;
3964 + }
3965
3966 #endif
3967
3968 +// // rate-monotonic for testing
3969 +// if (!is_realtime(second_task)) {
3970 +// return true;
3971 +// }
3972 +//
3973 +// if (shorter_period(first_task, second_task)) {
3974 +// return true;
3975 +// }
3976 +//
3977 +// if (get_period(first_task) == get_period(second_task)) {
3978 +// if (first_task->pid < second_task->pid) {
3979 +// return true;
3980 +// }
3981 +// else if (first_task->pid == second_task->pid) {
3982 +// return !second->rt_param.inh_task;
3983 +// }
3984 +// }
3985 +
3986 + if (!is_realtime(second_task)) {
3987 + return true;
3988 + }
3989 +
3990 + if (earlier_deadline(first_task, second_task)) {
3991 + return true;
3992 + }
3993 + if (get_deadline(first_task) == get_deadline(second_task)) {
3994 +
3995 + if (shorter_period(first_task, second_task)) {
3996 + return true;
3997 + }
3998 + if (get_rt_period(first_task) == get_rt_period(second_task)) {
3999 + if (first_task->pid < second_task->pid) {
4000 + return true;
4001 + }
4002 + if (first_task->pid == second_task->pid) {
4003 +#ifdef CONFIG_LITMUS_SOFTIRQD
4004 + if (first_task->rt_param.is_proxy_thread <
4005 + second_task->rt_param.is_proxy_thread) {
4006 + return true;
4007 + }
4008 + if(first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
4009 + return !second->rt_param.inh_task;
4010 + }
4011 +#else
4012 + return !second->rt_param.inh_task;
4013 +#endif
4014 + }
4015 +
4016 + }
4017 + }
4018 +
4019 + return false;
4020 +}
4021 +
4022 +
4023 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
4024 +int edf_higher_prio(struct task_struct* first, struct task_struct* second)
4025 +{
4026 + return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
4027 +}
4028 +
4029 +int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b)
4030 +{
4031 + struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
4032 + struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
4033
4034 - return !is_realtime(second_task) ||
4035 + return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
4036 +}
4037
4038 - /* is the deadline of the first task earlier?
4039 - * Then it has higher priority.
4040 - */
4041 - earlier_deadline(first_task, second_task) ||
4042 +int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b)
4043 +{
4044 + return edf_max_heap_order(b, a); // swap comparison
4045 +}
4046
4047 - /* Do we have a deadline tie?
4048 - * Then break by PID.
4049 - */
4050 - (get_deadline(first_task) == get_deadline(second_task) &&
4051 - (first_task->pid < second_task->pid ||
4052 +int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
4053 +{
4054 + struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
4055 + struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
4056
4057 - /* If the PIDs are the same then the task with the inherited
4058 - * priority wins.
4059 - */
4060 - (first_task->pid == second_task->pid &&
4061 - !second->rt_param.inh_task)));
4062 + return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
4063 }
4064
4065 +int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
4066 +{
4067 + return edf_max_heap_base_priority_order(b, a); // swap comparison
4068 +}
4069 +#endif
4070 +
4071 +
4072 int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
4073 {
4074 return edf_higher_prio(bheap2task(a), bheap2task(b));
4075 diff --git a/litmus/fdso.c b/litmus/fdso.c
4076 index aa7b384..18fc61b 100644
4077 --- a/litmus/fdso.c
4078 +++ b/litmus/fdso.c
4079 @@ -20,9 +20,22 @@
4080
4081 extern struct fdso_ops generic_lock_ops;
4082
4083 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4084 +extern struct fdso_ops generic_affinity_ops;
4085 +#endif
4086 +
4087 static const struct fdso_ops* fdso_ops[] = {
4088 &generic_lock_ops, /* FMLP_SEM */
4089 &generic_lock_ops, /* SRP_SEM */
4090 + &generic_lock_ops, /* RSM_MUTEX */
4091 + &generic_lock_ops, /* IKGLP_SEM */
4092 + &generic_lock_ops, /* KFMLP_SEM */
4093 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4094 + &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
4095 + &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
4096 + &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
4097 + &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
4098 +#endif
4099 };
4100
4101 static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
4102 diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
4103 new file mode 100644
4104 index 0000000..9762be1
4105 --- /dev/null
4106 +++ b/litmus/gpu_affinity.c
4107 @@ -0,0 +1,113 @@
4108 +
4109 +#ifdef CONFIG_LITMUS_NVIDIA
4110 +
4111 +#include <linux/sched.h>
4112 +#include <litmus/litmus.h>
4113 +#include <litmus/gpu_affinity.h>
4114 +
4115 +#include <litmus/sched_trace.h>
4116 +
4117 +#define OBSERVATION_CAP 2*1e9
4118 +
4119 +static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
4120 +{
4121 + fp_t relative_err;
4122 + fp_t err, new;
4123 + fp_t actual = _integer_to_fp(observed);
4124 +
4125 + err = _sub(actual, fb->est);
4126 + new = _add(_mul(a, err), _mul(b, fb->accum_err));
4127 +
4128 + relative_err = _div(err, actual);
4129 +
4130 + fb->est = new;
4131 + fb->accum_err = _add(fb->accum_err, err);
4132 +
4133 + return relative_err;
4134 +}
4135 +
4136 +void update_gpu_estimate(struct task_struct *t, lt_t observed)
4137 +{
4138 + feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
4139 +
4140 + BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
4141 +
4142 + if(unlikely(fb->est.val == 0)) {
4143 + // kludge-- cap observed values to prevent whacky estimations.
4144 + // whacky stuff happens during the first few jobs.
4145 + if(unlikely(observed > OBSERVATION_CAP)) {
4146 + TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
4147 + observed, OBSERVATION_CAP);
4148 + observed = OBSERVATION_CAP;
4149 + }
4150 +
4151 + // take the first observation as our estimate
4152 + // (initial value of 0 was bogus anyhow)
4153 + fb->est = _integer_to_fp(observed);
4154 + fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
4155 + }
4156 + else {
4157 + fp_t rel_err = update_estimate(fb,
4158 + tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
4159 + tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
4160 + observed);
4161 +
4162 + if(unlikely(_fp_to_integer(fb->est) <= 0)) {
4163 + TRACE_TASK(t, "Invalid estimate. Patching.\n");
4164 + fb->est = _integer_to_fp(observed);
4165 + fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
4166 + }
4167 + else {
4168 +// struct migration_info mig_info;
4169 +
4170 + sched_trace_prediction_err(t,
4171 + &(tsk_rt(t)->gpu_migration),
4172 + &rel_err);
4173 +
4174 +// mig_info.observed = observed;
4175 +// mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
4176 +// mig_info.distance = tsk_rt(t)->gpu_migration;
4177 +//
4178 +// sched_trace_migration(t, &mig_info);
4179 + }
4180 + }
4181 +
4182 + TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
4183 + tsk_rt(t)->gpu_migration,
4184 + observed,
4185 + _fp_to_integer(fb->est),
4186 + _point(fb->est));
4187 +}
4188 +
4189 +gpu_migration_dist_t gpu_migration_distance(int a, int b)
4190 +{
4191 + // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
4192 + int i;
4193 + int dist;
4194 +
4195 + if(likely(a >= 0 && b >= 0)) {
4196 + for(i = 0; i <= MIG_FAR; ++i) {
4197 + if(a>>i == b>>i) {
4198 + dist = i;
4199 + goto out;
4200 + }
4201 + }
4202 + dist = MIG_NONE; // hopefully never reached.
4203 + TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
4204 + }
4205 + else {
4206 + dist = MIG_NONE;
4207 + }
4208 +
4209 +out:
4210 + TRACE_CUR("Distance %d -> %d is %d\n",
4211 + a, b, dist);
4212 +
4213 + return dist;
4214 +}
4215 +
4216 +
4217 +
4218 +
4219 +#endif
4220 +
4221 diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
4222 new file mode 100644
4223 index 0000000..83b708a
4224 --- /dev/null
4225 +++ b/litmus/ikglp_lock.c
4226 @@ -0,0 +1,2838 @@
4227 +#include <linux/slab.h>
4228 +#include <linux/uaccess.h>
4229 +
4230 +#include <litmus/trace.h>
4231 +#include <litmus/sched_plugin.h>
4232 +#include <litmus/fdso.h>
4233 +
4234 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
4235 +#include <litmus/gpu_affinity.h>
4236 +#include <litmus/nvidia_info.h>
4237 +#endif
4238 +
4239 +#include <litmus/ikglp_lock.h>
4240 +
4241 +// big signed value.
4242 +#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
4243 +
4244 +int ikglp_max_heap_base_priority_order(struct binheap_node *a,
4245 + struct binheap_node *b)
4246 +{
4247 + ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
4248 + ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
4249 +
4250 + BUG_ON(!d_a);
4251 + BUG_ON(!d_b);
4252 +
4253 + return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
4254 +}
4255 +
4256 +int ikglp_min_heap_base_priority_order(struct binheap_node *a,
4257 + struct binheap_node *b)
4258 +{
4259 + ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
4260 + ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
4261 +
4262 + return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
4263 +}
4264 +
4265 +int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
4266 + struct binheap_node *b)
4267 +{
4268 + ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
4269 + ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
4270 +
4271 + return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
4272 +}
4273 +
4274 +
4275 +int ikglp_min_heap_donee_order(struct binheap_node *a,
4276 + struct binheap_node *b)
4277 +{
4278 + struct task_struct *prio_a, *prio_b;
4279 +
4280 + ikglp_donee_heap_node_t *d_a =
4281 + binheap_entry(a, ikglp_donee_heap_node_t, node);
4282 + ikglp_donee_heap_node_t *d_b =
4283 + binheap_entry(b, ikglp_donee_heap_node_t, node);
4284 +
4285 + if(!d_a->donor_info) {
4286 + prio_a = d_a->task;
4287 + }
4288 + else {
4289 + prio_a = d_a->donor_info->task;
4290 + BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
4291 + }
4292 +
4293 + if(!d_b->donor_info) {
4294 + prio_b = d_b->task;
4295 + }
4296 + else {
4297 + prio_b = d_b->donor_info->task;
4298 + BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
4299 + }
4300 +
4301 + // note reversed order
4302 + return litmus->__compare(prio_b, BASE, prio_a, BASE);
4303 +}
4304 +
4305 +
4306 +
4307 +static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
4308 + struct fifo_queue *queue)
4309 +{
4310 + return (queue - &sem->fifo_queues[0]);
4311 +}
4312 +
4313 +static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
4314 + struct task_struct *holder)
4315 +{
4316 + int i;
4317 + for(i = 0; i < sem->nr_replicas; ++i)
4318 + if(sem->fifo_queues[i].owner == holder)
4319 + return(&sem->fifo_queues[i]);
4320 + return(NULL);
4321 +}
4322 +
4323 +
4324 +
4325 +static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
4326 + struct task_struct *skip)
4327 +{
4328 + struct list_head *pos;
4329 + struct task_struct *queued, *found = NULL;
4330 +
4331 + list_for_each(pos, &kqueue->wait.task_list) {
4332 + queued = (struct task_struct*) list_entry(pos,
4333 + wait_queue_t, task_list)->private;
4334 +
4335 + /* Compare task prios, find high prio task. */
4336 + if(queued != skip && litmus->compare(queued, found))
4337 + found = queued;
4338 + }
4339 + return found;
4340 +}
4341 +
4342 +static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
4343 + struct fifo_queue *search_start)
4344 +{
4345 + // we start our search at search_start instead of at the beginning of the
4346 + // queue list to load-balance across all resources.
4347 + struct fifo_queue* step = search_start;
4348 + struct fifo_queue* shortest = sem->shortest_fifo_queue;
4349 +
4350 + do {
4351 + step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
4352 + step+1 : &sem->fifo_queues[0];
4353 +
4354 + if(step->count < shortest->count) {
4355 + shortest = step;
4356 + if(step->count == 0)
4357 + break; /* can't get any shorter */
4358 + }
4359 +
4360 + }while(step != search_start);
4361 +
4362 + return(shortest);
4363 +}
4364 +
4365 +static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
4366 +{
4367 + return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
4368 +}
4369 +
4370 +
4371 +
4372 +#if 0
4373 +static void print_global_list(struct binheap_node* n, int depth)
4374 +{
4375 + ikglp_heap_node_t *global_heap_node;
4376 + char padding[81] = " ";
4377 +
4378 + if(n == NULL) {
4379 + TRACE_CUR("+-> %p\n", NULL);
4380 + return;
4381 + }
4382 +
4383 + global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
4384 +
4385 + if(depth*2 <= 80)
4386 + padding[depth*2] = '\0';
4387 +
4388 + TRACE_CUR("%s+-> %s/%d\n",
4389 + padding,
4390 + global_heap_node->task->comm,
4391 + global_heap_node->task->pid);
4392 +
4393 + if(n->left) print_global_list(n->left, depth+1);
4394 + if(n->right) print_global_list(n->right, depth+1);
4395 +}
4396 +
4397 +static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
4398 +{
4399 + ikglp_donee_heap_node_t *donee_node;
4400 + char padding[81] = " ";
4401 + struct task_struct* donor = NULL;
4402 +
4403 + if(n == NULL) {
4404 + TRACE_CUR("+-> %p\n", NULL);
4405 + return;
4406 + }
4407 +
4408 + donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
4409 +
4410 + if(depth*2 <= 80)
4411 + padding[depth*2] = '\0';
4412 +
4413 + if(donee_node->donor_info) {
4414 + donor = donee_node->donor_info->task;
4415 + }
4416 +
4417 + TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
4418 + padding,
4419 + donee_node->task->comm,
4420 + donee_node->task->pid,
4421 + (donor) ? donor->comm : "nil",
4422 + (donor) ? donor->pid : -1,
4423 + ikglp_get_idx(sem, donee_node->fq));
4424 +
4425 + if(n->left) print_donees(sem, n->left, depth+1);
4426 + if(n->right) print_donees(sem, n->right, depth+1);
4427 +}
4428 +
4429 +static void print_donors(struct binheap_node *n, int depth)
4430 +{
4431 + ikglp_wait_state_t *donor_node;
4432 + char padding[81] = " ";
4433 +
4434 + if(n == NULL) {
4435 + TRACE_CUR("+-> %p\n", NULL);
4436 + return;
4437 + }
4438 +
4439 + donor_node = binheap_entry(n, ikglp_wait_state_t, node);
4440 +
4441 + if(depth*2 <= 80)
4442 + padding[depth*2] = '\0';
4443 +
4444 +
4445 + TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
4446 + padding,
4447 + donor_node->task->comm,
4448 + donor_node->task->pid,
4449 + donor_node->donee_info->task->comm,
4450 + donor_node->donee_info->task->pid);
4451 +
4452 + if(n->left) print_donors(n->left, depth+1);
4453 + if(n->right) print_donors(n->right, depth+1);
4454 +}
4455 +#endif
4456 +
4457 +static void ikglp_add_global_list(struct ikglp_semaphore *sem,
4458 + struct task_struct *t,
4459 + ikglp_heap_node_t *node)
4460 +{
4461 +
4462 +
4463 + node->task = t;
4464 + INIT_BINHEAP_NODE(&node->node);
4465 +
4466 + if(sem->top_m_size < sem->m) {
4467 + TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
4468 + t->comm, t->pid);
4469 +// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
4470 +// print_global_list(sem->top_m.root, 1);
4471 +
4472 + binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
4473 + ++(sem->top_m_size);
4474 +
4475 +// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
4476 +// print_global_list(sem->top_m.root, 1);
4477 + }
4478 + else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
4479 + ikglp_heap_node_t *evicted =
4480 + binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
4481 +
4482 + TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
4483 + t->comm, t->pid,
4484 + evicted->task->comm, evicted->task->pid);
4485 +
4486 +// TRACE_CUR("Not-Top-M Before:\n");
4487 +// print_global_list(sem->not_top_m.root, 1);
4488 +// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
4489 +// print_global_list(sem->top_m.root, 1);
4490 +
4491 +
4492 + binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
4493 + INIT_BINHEAP_NODE(&evicted->node);
4494 + binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
4495 +
4496 + binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
4497 +
4498 +// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
4499 +// print_global_list(sem->top_m.root, 1);
4500 +// TRACE_CUR("Not-Top-M After:\n");
4501 +// print_global_list(sem->not_top_m.root, 1);
4502 + }
4503 + else {
4504 + TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
4505 + t->comm, t->pid);
4506 +// TRACE_CUR("Not-Top-M Before:\n");
4507 +// print_global_list(sem->not_top_m.root, 1);
4508 +
4509 + binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
4510 +
4511 +// TRACE_CUR("Not-Top-M After:\n");
4512 +// print_global_list(sem->not_top_m.root, 1);
4513 + }
4514 +}
4515 +
4516 +
4517 +static void ikglp_del_global_list(struct ikglp_semaphore *sem,
4518 + struct task_struct *t,
4519 + ikglp_heap_node_t *node)
4520 +{
4521 + BUG_ON(!binheap_is_in_heap(&node->node));
4522 +
4523 + TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
4524 +
4525 + if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
4526 + TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
4527 +
4528 +// TRACE_CUR("Not-Top-M Before:\n");
4529 +// print_global_list(sem->not_top_m.root, 1);
4530 +// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
4531 +// print_global_list(sem->top_m.root, 1);
4532 +
4533 +
4534 + binheap_delete(&node->node, &sem->top_m);
4535 +
4536 + if(!binheap_empty(&sem->not_top_m)) {
4537 + ikglp_heap_node_t *promoted =
4538 + binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
4539 +
4540 + TRACE_CUR("Promoting %s/%d to top-m\n",
4541 + promoted->task->comm, promoted->task->pid);
4542 +
4543 + binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
4544 + INIT_BINHEAP_NODE(&promoted->node);
4545 +
4546 + binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
4547 + }
4548 + else {
4549 + TRACE_CUR("No one to promote to top-m.\n");
4550 + --(sem->top_m_size);
4551 + }
4552 +
4553 +// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
4554 +// print_global_list(sem->top_m.root, 1);
4555 +// TRACE_CUR("Not-Top-M After:\n");
4556 +// print_global_list(sem->not_top_m.root, 1);
4557 + }
4558 + else {
4559 + TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
4560 +// TRACE_CUR("Not-Top-M Before:\n");
4561 +// print_global_list(sem->not_top_m.root, 1);
4562 +
4563 + binheap_delete(&node->node, &sem->not_top_m);
4564 +
4565 +// TRACE_CUR("Not-Top-M After:\n");
4566 +// print_global_list(sem->not_top_m.root, 1);
4567 + }
4568 +}
4569 +
4570 +
4571 +static void ikglp_add_donees(struct ikglp_semaphore *sem,
4572 + struct fifo_queue *fq,
4573 + struct task_struct *t,
4574 + ikglp_donee_heap_node_t* node)
4575 +{
4576 +// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
4577 +// TRACE_CUR("donees Before:\n");
4578 +// print_donees(sem, sem->donees.root, 1);
4579 +
4580 + node->task = t;
4581 + node->donor_info = NULL;
4582 + node->fq = fq;
4583 + INIT_BINHEAP_NODE(&node->node);
4584 +
4585 + binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
4586 +
4587 +// TRACE_CUR("donees After:\n");
4588 +// print_donees(sem, sem->donees.root, 1);
4589 +}
4590 +
4591 +
4592 +static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
4593 + struct fifo_queue *fq,
4594 + struct ikglp_semaphore *sem,
4595 + unsigned long flags)
4596 +{
4597 + // priority of 't' has increased (note: 't' might already be hp_waiter).
4598 + if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
4599 + struct task_struct *old_max_eff_prio;
4600 + struct task_struct *new_max_eff_prio;
4601 + struct task_struct *new_prio = NULL;
4602 + struct task_struct *owner = fq->owner;
4603 +
4604 + if(fq->hp_waiter)
4605 + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
4606 + fq->hp_waiter->comm, fq->hp_waiter->pid);
4607 + else
4608 + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
4609 +
4610 + if(owner)
4611 + {
4612 + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4613 +
4614 +// TRACE_TASK(owner, "Heap Before:\n");
4615 +// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
4616 +
4617 + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4618 +
4619 + fq->hp_waiter = t;
4620 + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
4621 +
4622 + binheap_decrease(&fq->nest.hp_binheap_node,
4623 + &tsk_rt(owner)->hp_blocked_tasks);
4624 +
4625 +// TRACE_TASK(owner, "Heap After:\n");
4626 +// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
4627 +
4628 + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4629 +
4630 + if(new_max_eff_prio != old_max_eff_prio) {
4631 + TRACE_TASK(t, "is new hp_waiter.\n");
4632 +
4633 + if ((effective_priority(owner) == old_max_eff_prio) ||
4634 + (litmus->__compare(new_max_eff_prio, BASE,
4635 + owner, EFFECTIVE))){
4636 + new_prio = new_max_eff_prio;
4637 + }
4638 + }
4639 + else {
4640 + TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
4641 + }
4642 +
4643 + if(new_prio) {
4644 + // set new inheritance and propagate
4645 + TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
4646 + owner->comm, owner->pid,
4647 + new_prio->comm, new_prio->pid);
4648 + litmus->nested_increase_prio(owner, new_prio, &sem->lock,
4649 + flags); // unlocks lock.
4650 + }
4651 + else {
4652 + TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n",
4653 + new_max_eff_prio->comm, new_max_eff_prio->pid);
4654 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4655 + unlock_fine_irqrestore(&sem->lock, flags);
4656 + }
4657 + }
4658 + else {
4659 + fq->hp_waiter = t;
4660 + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
4661 +
4662 + TRACE_TASK(t, "no owner.\n");
4663 + unlock_fine_irqrestore(&sem->lock, flags);
4664 + }
4665 + }
4666 + else {
4667 + TRACE_TASK(t, "hp_waiter is unaffected.\n");
4668 + unlock_fine_irqrestore(&sem->lock, flags);
4669 + }
4670 +}
4671 +
4672 +// hp_waiter has decreased
4673 +static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
4674 + struct ikglp_semaphore *sem,
4675 + unsigned long flags)
4676 +{
4677 + struct task_struct *owner = fq->owner;
4678 +
4679 + struct task_struct *old_max_eff_prio;
4680 + struct task_struct *new_max_eff_prio;
4681 +
4682 + if(!owner) {
4683 + TRACE_CUR("No owner. Returning.\n");
4684 + unlock_fine_irqrestore(&sem->lock, flags);
4685 + return;
4686 + }
4687 +
4688 + TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
4689 +
4690 + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4691 +
4692 + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4693 +
4694 + binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
4695 + fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
4696 + binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
4697 + struct nested_info, hp_binheap_node);
4698 +
4699 + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4700 +
4701 + if((old_max_eff_prio != new_max_eff_prio) &&
4702 + (effective_priority(owner) == old_max_eff_prio))
4703 + {
4704 + // Need to set new effective_priority for owner
4705 + struct task_struct *decreased_prio;
4706 +
4707 + TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
4708 + ikglp_get_idx(sem, fq));
4709 +
4710 + if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
4711 + TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
4712 + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
4713 + (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
4714 + owner->comm,
4715 + owner->pid,
4716 + ikglp_get_idx(sem, fq));
4717 +
4718 + decreased_prio = new_max_eff_prio;
4719 + }
4720 + else {
4721 + TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
4722 + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
4723 + (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
4724 + owner->comm,
4725 + owner->pid,
4726 + ikglp_get_idx(sem, fq));
4727 +
4728 + decreased_prio = NULL;
4729 + }
4730 +
4731 + // beware: recursion
4732 + litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
4733 + }
4734 + else {
4735 + TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
4736 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4737 + unlock_fine_irqrestore(&sem->lock, flags);
4738 + }
4739 +}
4740 +
4741 +
4742 +static void ikglp_remove_donation_from_owner(struct binheap_node *n,
4743 + struct fifo_queue *fq,
4744 + struct ikglp_semaphore *sem,
4745 + unsigned long flags)
4746 +{
4747 + struct task_struct *owner = fq->owner;
4748 +
4749 + struct task_struct *old_max_eff_prio;
4750 + struct task_struct *new_max_eff_prio;
4751 +
4752 + BUG_ON(!owner);
4753 +
4754 + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4755 +
4756 + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4757 +
4758 + binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
4759 +
4760 + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4761 +
4762 + if((old_max_eff_prio != new_max_eff_prio) &&
4763 + (effective_priority(owner) == old_max_eff_prio))
4764 + {
4765 + // Need to set new effective_priority for owner
4766 + struct task_struct *decreased_prio;
4767 +
4768 + TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
4769 + ikglp_get_idx(sem, fq));
4770 +
4771 + if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
4772 + TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
4773 + ikglp_get_idx(sem, fq));
4774 + decreased_prio = new_max_eff_prio;
4775 + }
4776 + else {
4777 + TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
4778 + ikglp_get_idx(sem, fq));
4779 + decreased_prio = NULL;
4780 + }
4781 +
4782 + // beware: recursion
4783 + litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
4784 + }
4785 + else {
4786 + TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
4787 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4788 + unlock_fine_irqrestore(&sem->lock, flags);
4789 + }
4790 +}
4791 +
4792 +static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
4793 + struct binheap_node *n)
4794 +{
4795 + struct task_struct *old_max_eff_prio;
4796 + struct task_struct *new_max_eff_prio;
4797 +
4798 + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
4799 +
4800 + old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
4801 +
4802 + binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
4803 +
4804 + new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
4805 +
4806 + if((old_max_eff_prio != new_max_eff_prio) &&
4807 + (effective_priority(t) == old_max_eff_prio))
4808 + {
4809 + // Need to set new effective_priority for owner
4810 + struct task_struct *decreased_prio;
4811 +
4812 + if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
4813 + decreased_prio = new_max_eff_prio;
4814 + }
4815 + else {
4816 + decreased_prio = NULL;
4817 + }
4818 +
4819 + tsk_rt(t)->inh_task = decreased_prio;
4820 + }
4821 +
4822 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
4823 +}
4824 +
4825 +static void ikglp_get_immediate(struct task_struct* t,
4826 + struct fifo_queue *fq,
4827 + struct ikglp_semaphore *sem,
4828 + unsigned long flags)
4829 +{
4830 + // resource available now
4831 + TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
4832 +
4833 + fq->owner = t;
4834 +
4835 + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
4836 + binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
4837 + struct nested_info, hp_binheap_node);
4838 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
4839 +
4840 + ++(fq->count);
4841 +
4842 + ikglp_add_global_list(sem, t, &fq->global_heap_node);
4843 + ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
4844 +
4845 + sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
4846 +
4847 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4848 + if(sem->aff_obs) {
4849 + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
4850 + sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
4851 + }
4852 +#endif
4853 +
4854 + unlock_fine_irqrestore(&sem->lock, flags);
4855 +}
4856 +
4857 +
4858 +
4859 +
4860 +
4861 +static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
4862 + struct fifo_queue* fq,
4863 + struct task_struct* t,
4864 + wait_queue_t *wait,
4865 + ikglp_heap_node_t *global_heap_node,
4866 + ikglp_donee_heap_node_t *donee_heap_node)
4867 +{
4868 + /* resource is not free => must suspend and wait */
4869 + TRACE_TASK(t, "Enqueuing on fq %d.\n",
4870 + ikglp_get_idx(sem, fq));
4871 +
4872 + init_waitqueue_entry(wait, t);
4873 +
4874 + __add_wait_queue_tail_exclusive(&fq->wait, wait);
4875 +
4876 + ++(fq->count);
4877 + ++(sem->nr_in_fifos);
4878 +
4879 + // update global list.
4880 + if(likely(global_heap_node)) {
4881 + if(binheap_is_in_heap(&global_heap_node->node)) {
4882 + WARN_ON(1);
4883 + ikglp_del_global_list(sem, t, global_heap_node);
4884 + }
4885 + ikglp_add_global_list(sem, t, global_heap_node);
4886 + }
4887 + // update donor eligiblity list.
4888 + if(likely(donee_heap_node)) {
4889 +// if(binheap_is_in_heap(&donee_heap_node->node)) {
4890 +// WARN_ON(1);
4891 +// }
4892 + ikglp_add_donees(sem, fq, t, donee_heap_node);
4893 + }
4894 +
4895 + if(sem->shortest_fifo_queue == fq) {
4896 + sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
4897 + }
4898 +
4899 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4900 + if(sem->aff_obs) {
4901 + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
4902 + }
4903 +#endif
4904 +
4905 + TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
4906 +}
4907 +
4908 +
4909 +static void ikglp_enqueue_on_fq(
4910 + struct ikglp_semaphore *sem,
4911 + struct fifo_queue *fq,
4912 + ikglp_wait_state_t *wait,
4913 + unsigned long flags)
4914 +{
4915 + /* resource is not free => must suspend and wait */
4916 + TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
4917 + ikglp_get_idx(sem, fq));
4918 +
4919 + INIT_BINHEAP_NODE(&wait->global_heap_node.node);
4920 + INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
4921 +
4922 + __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
4923 + &wait->global_heap_node, &wait->donee_heap_node);
4924 +
4925 + ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock
4926 +}
4927 +
4928 +
4929 +static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
4930 + ikglp_wait_state_t *wait)
4931 +{
4932 + TRACE_TASK(wait->task, "goes to PQ.\n");
4933 +
4934 + wait->pq_node.task = wait->task; // copy over task (little redundant...)
4935 +
4936 + binheap_add(&wait->pq_node.node, &sem->priority_queue,
4937 + ikglp_heap_node_t, node);
4938 +}
4939 +
4940 +static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
4941 + ikglp_wait_state_t *wait)
4942 +{
4943 + INIT_BINHEAP_NODE(&wait->global_heap_node.node);
4944 + INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
4945 + INIT_BINHEAP_NODE(&wait->pq_node.node);
4946 +
4947 + __ikglp_enqueue_on_pq(sem, wait);
4948 +}
4949 +
4950 +static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
4951 + ikglp_wait_state_t* wait,
4952 + unsigned long flags)
4953 +{
4954 + struct task_struct *t = wait->task;
4955 + ikglp_donee_heap_node_t *donee_node = NULL;
4956 + struct task_struct *donee;
4957 +
4958 + struct task_struct *old_max_eff_prio;
4959 + struct task_struct *new_max_eff_prio;
4960 + struct task_struct *new_prio = NULL;
4961 +
4962 + INIT_BINHEAP_NODE(&wait->global_heap_node.node);
4963 + INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
4964 + INIT_BINHEAP_NODE(&wait->pq_node.node);
4965 + INIT_BINHEAP_NODE(&wait->node);
4966 +
4967 +// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
4968 +// TRACE_CUR("donors Before:\n");
4969 +// print_donors(sem->donors.root, 1);
4970 +
4971 + // Add donor to the global list.
4972 + ikglp_add_global_list(sem, t, &wait->global_heap_node);
4973 +
4974 + // Select a donee
4975 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4976 + donee_node = (sem->aff_obs) ?
4977 + sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
4978 + binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
4979 +#else
4980 + donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
4981 +#endif
4982 +
4983 + donee = donee_node->task;
4984 +
4985 + TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
4986 +
4987 + TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
4988 + donee->comm, donee->pid);
4989 +// TRACE_CUR("donees Before:\n");
4990 +// print_donees(sem, sem->donees.root, 1);
4991 +
4992 + //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
4993 + binheap_delete(&donee_node->node, &sem->donees);
4994 +
4995 +// TRACE_CUR("donees After:\n");
4996 +// print_donees(sem, sem->donees.root, 1);
4997 +
4998 +
4999 + wait->donee_info = donee_node;
5000 +
5001 + // Add t to donor heap.
5002 + binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
5003 +
5004 + // Now adjust the donee's priority.
5005 +
5006 + // Lock the donee's inheritance heap.
5007 + raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
5008 +
5009 + old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
5010 +
5011 + if(donee_node->donor_info) {
5012 + // Steal donation relation. Evict old donor to PQ.
5013 +
5014 + // Remove old donor from donor heap
5015 + ikglp_wait_state_t *old_wait = donee_node->donor_info;
5016 + struct task_struct *old_donor = old_wait->task;
5017 +
5018 + TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n",
5019 + donee->comm, donee->pid, old_donor->comm, old_donor->pid);
5020 +
5021 + binheap_delete(&old_wait->node, &sem->donors);
5022 +
5023 + // Remove donation from donee's inheritance heap.
5024 + binheap_delete(&old_wait->prio_donation.hp_binheap_node,
5025 + &tsk_rt(donee)->hp_blocked_tasks);
5026 + // WARNING: have not updated inh_prio!
5027 +
5028 + // Add old donor to PQ.
5029 + __ikglp_enqueue_on_pq(sem, old_wait);
5030 +
5031 + // Remove old donor from the global heap.
5032 + ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
5033 + }
5034 +
5035 + // Add back donee's node to the donees heap with increased prio
5036 + donee_node->donor_info = wait;
5037 + INIT_BINHEAP_NODE(&donee_node->node);
5038 +
5039 +
5040 + TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
5041 +// TRACE_CUR("donees Before:\n");
5042 +// print_donees(sem, sem->donees.root, 1);
5043 +
5044 + binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
5045 +
5046 +// TRACE_CUR("donees After:\n");
5047 +// print_donees(sem, sem->donees.root, 1);
5048 +
5049 + // Add an inheritance/donation to the donee's inheritance heap.
5050 + wait->prio_donation.lock = (struct litmus_lock*)sem;
5051 + wait->prio_donation.hp_waiter_eff_prio = t;
5052 + wait->prio_donation.hp_waiter_ptr = NULL;
5053 + INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
5054 +
5055 + binheap_add(&wait->prio_donation.hp_binheap_node,
5056 + &tsk_rt(donee)->hp_blocked_tasks,
5057 + struct nested_info, hp_binheap_node);
5058 +
5059 + new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
5060 +
5061 + if(new_max_eff_prio != old_max_eff_prio) {
5062 + if ((effective_priority(donee) == old_max_eff_prio) ||
5063 + (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
5064 + TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
5065 + donee->comm, donee->pid);
5066 + new_prio = new_max_eff_prio;
5067 + }
5068 +// else {
5069 +// // should be bug. donor would not be in top-m.
5070 +// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
5071 +// WARN_ON(1);
5072 +// }
5073 +// }
5074 +// else {
5075 +// // should be bug. donor would not be in top-m.
5076 +// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
5077 +// WARN_ON(1);
5078 + }
5079 +
5080 + if(new_prio) {
5081 + struct fifo_queue *donee_fq = donee_node->fq;
5082 +
5083 + if(donee != donee_fq->owner) {
5084 + TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
5085 + donee->comm, donee->pid,
5086 + donee_fq->owner->comm, donee_fq->owner->pid);
5087 +
5088 + raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
5089 + ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock
5090 + }
5091 + else {
5092 + TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
5093 + donee->comm, donee->pid);
5094 + litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock
5095 + }
5096 + }
5097 + else {
5098 + TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n",
5099 + new_max_eff_prio->comm, new_max_eff_prio->pid);
5100 + raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
5101 + unlock_fine_irqrestore(&sem->lock, flags);
5102 + }
5103 +
5104 +
5105 +// TRACE_CUR("donors After:\n");
5106 +// print_donors(sem->donors.root, 1);
5107 +}
5108 +
5109 +int ikglp_lock(struct litmus_lock* l)
5110 +{
5111 + struct task_struct* t = current;
5112 + struct ikglp_semaphore *sem = ikglp_from_lock(l);
5113 + unsigned long flags = 0, real_flags;
5114 + struct fifo_queue *fq = NULL;
5115 + int replica = -EINVAL;
5116 +
5117 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5118 + raw_spinlock_t *dgl_lock;
5119 +#endif
5120 +
5121 + ikglp_wait_state_t wait;
5122 +
5123 + if (!is_realtime(t))
5124 + return -EPERM;
5125 +
5126 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5127 + dgl_lock = litmus->get_dgl_spinlock(t);
5128 +#endif
5129 +
5130 + raw_spin_lock_irqsave(&sem->real_lock, real_flags);
5131 +
5132 + lock_global_irqsave(dgl_lock, flags);
5133 + lock_fine_irqsave(&sem->lock, flags);
5134 +
5135 + if(sem->nr_in_fifos < sem->m) {
5136 + // enqueue somwhere
5137 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5138 + fq = (sem->aff_obs) ?
5139 + sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
5140 + sem->shortest_fifo_queue;
5141 +#else
5142 + fq = sem->shortest_fifo_queue;
5143 +#endif
5144 + if(fq->count == 0) {
5145 + // take available resource
5146 + replica = ikglp_get_idx(sem, fq);
5147 +
5148 + ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
5149 +
5150 + unlock_global_irqrestore(dgl_lock, flags);
5151 + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5152 + goto acquired;
5153 + }
5154 + else {
5155 + wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
5156 +
5157 + tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
5158 + mb();
5159 +
5160 + /* FIXME: interruptible would be nice some day */
5161 + set_task_state(t, TASK_UNINTERRUPTIBLE);
5162 +
5163 + ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
5164 + }
5165 + }
5166 + else {
5167 + // donor!
5168 + wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
5169 +
5170 + tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
5171 + mb();
5172 +
5173 + /* FIXME: interruptible would be nice some day */
5174 + set_task_state(t, TASK_UNINTERRUPTIBLE);
5175 +
5176 + if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
5177 + // enqueue on PQ
5178 + ikglp_enqueue_on_pq(sem, &wait);
5179 + unlock_fine_irqrestore(&sem->lock, flags);
5180 + }
5181 + else {
5182 + // enqueue as donor
5183 + ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
5184 + }
5185 + }
5186 +
5187 + unlock_global_irqrestore(dgl_lock, flags);
5188 + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5189 +
5190 + TS_LOCK_SUSPEND;
5191 +
5192 + schedule();
5193 +
5194 + TS_LOCK_RESUME;
5195 +
5196 + fq = ikglp_get_queue(sem, t);
5197 + BUG_ON(!fq);
5198 +
5199 + replica = ikglp_get_idx(sem, fq);
5200 +
5201 +acquired:
5202 + TRACE_CUR("Acquired lock %d, queue %d\n",
5203 + l->ident, replica);
5204 +
5205 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5206 + if(sem->aff_obs) {
5207 + return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
5208 + }
5209 +#endif
5210 +
5211 + return replica;
5212 +}
5213 +
5214 +//int ikglp_lock(struct litmus_lock* l)
5215 +//{
5216 +// struct task_struct* t = current;
5217 +// struct ikglp_semaphore *sem = ikglp_from_lock(l);
5218 +// unsigned long flags = 0, real_flags;
5219 +// struct fifo_queue *fq = NULL;
5220 +// int replica = -EINVAL;
5221 +//
5222 +//#ifdef CONFIG_LITMUS_DGL_SUPPORT
5223 +// raw_spinlock_t *dgl_lock;
5224 +//#endif
5225 +//
5226 +// ikglp_wait_state_t wait;
5227 +//
5228 +// if (!is_realtime(t))
5229 +// return -EPERM;
5230 +//
5231 +//#ifdef CONFIG_LITMUS_DGL_SUPPORT
5232 +// dgl_lock = litmus->get_dgl_spinlock(t);
5233 +//#endif
5234 +//
5235 +// raw_spin_lock_irqsave(&sem->real_lock, real_flags);
5236 +//
5237 +// lock_global_irqsave(dgl_lock, flags);
5238 +// lock_fine_irqsave(&sem->lock, flags);
5239 +//
5240 +//
5241 +//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5242 +// fq = (sem->aff_obs) ?
5243 +// sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
5244 +// sem->shortest_fifo_queue;
5245 +//#else
5246 +// fq = sem->shortest_fifo_queue;
5247 +//#endif
5248 +//
5249 +// if(fq->count == 0) {
5250 +// // take available resource
5251 +// replica = ikglp_get_idx(sem, fq);
5252 +//
5253 +// ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
5254 +//
5255 +// unlock_global_irqrestore(dgl_lock, flags);
5256 +// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5257 +// }
5258 +// else
5259 +// {
5260 +// // we have to suspend.
5261 +//
5262 +// wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
5263 +//
5264 +// tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
5265 +// mb();
5266 +//
5267 +// /* FIXME: interruptible would be nice some day */
5268 +// set_task_state(t, TASK_UNINTERRUPTIBLE);
5269 +//
5270 +// if(fq->count < sem->max_fifo_len) {
5271 +// // enqueue on fq
5272 +// ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
5273 +// }
5274 +// else {
5275 +//
5276 +// TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
5277 +//
5278 +// // no room in fifos. Go to PQ or donors.
5279 +//
5280 +// if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
5281 +// // enqueue on PQ
5282 +// ikglp_enqueue_on_pq(sem, &wait);
5283 +// unlock_fine_irqrestore(&sem->lock, flags);
5284 +// }
5285 +// else {
5286 +// // enqueue as donor
5287 +// ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
5288 +// }
5289 +// }
5290 +//
5291 +// unlock_global_irqrestore(dgl_lock, flags);
5292 +// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5293 +//
5294 +// TS_LOCK_SUSPEND;
5295 +//
5296 +// schedule();
5297 +//
5298 +// TS_LOCK_RESUME;
5299 +//
5300 +// fq = ikglp_get_queue(sem, t);
5301 +// BUG_ON(!fq);
5302 +//
5303 +// replica = ikglp_get_idx(sem, fq);
5304 +// }
5305 +//
5306 +// TRACE_CUR("Acquired lock %d, queue %d\n",
5307 +// l->ident, replica);
5308 +//
5309 +//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5310 +// if(sem->aff_obs) {
5311 +// return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
5312 +// }
5313 +//#endif
5314 +//
5315 +// return replica;
5316 +//}
5317 +
5318 +static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
5319 + struct fifo_queue *fq,
5320 + ikglp_wait_state_t *donor_info)
5321 +{
5322 + struct task_struct *t = donor_info->task;
5323 +
5324 + TRACE_CUR("Donor %s/%d being moved to fq %d\n",
5325 + t->comm,
5326 + t->pid,
5327 + ikglp_get_idx(sem, fq));
5328 +
5329 + binheap_delete(&donor_info->node, &sem->donors);
5330 +
5331 + __ikglp_enqueue_on_fq(sem, fq, t,
5332 + &donor_info->fq_node,
5333 + NULL, // already in global_list, so pass null to prevent adding 2nd time.
5334 + &donor_info->donee_heap_node);
5335 +
5336 + // warning:
5337 + // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
5338 +}
5339 +
5340 +static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
5341 + struct fifo_queue *fq,
5342 + ikglp_wait_state_t *wait)
5343 +{
5344 + struct task_struct *t = wait->task;
5345 +
5346 + TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
5347 + t->comm,
5348 + t->pid,
5349 + ikglp_get_idx(sem, fq));
5350 +
5351 + binheap_delete(&wait->pq_node.node, &sem->priority_queue);
5352 +
5353 + __ikglp_enqueue_on_fq(sem, fq, t,
5354 + &wait->fq_node,
5355 + &wait->global_heap_node,
5356 + &wait->donee_heap_node);
5357 + // warning:
5358 + // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
5359 +}
5360 +
5361 +static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
5362 + struct ikglp_semaphore* sem)
5363 +{
5364 + /* must hold sem->lock */
5365 +
5366 + struct fifo_queue *fq = NULL;
5367 + struct list_head *pos;
5368 + struct task_struct *queued;
5369 + int i;
5370 +
5371 + for(i = 0; i < sem->nr_replicas; ++i) {
5372 + if( (sem->fifo_queues[i].count > 1) &&
5373 + (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
5374 +
5375 + TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
5376 + ikglp_get_idx(sem, &sem->fifo_queues[i]),
5377 + sem->fifo_queues[i].hp_waiter->comm,
5378 + sem->fifo_queues[i].hp_waiter->pid,
5379 + (fq) ? ikglp_get_idx(sem, fq) : -1,
5380 + (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
5381 + (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
5382 +
5383 + fq = &sem->fifo_queues[i];
5384 +
5385 + WARN_ON(!(fq->hp_waiter));
5386 + }
5387 + }
5388 +
5389 + if(fq) {
5390 + struct task_struct *max_hp = fq->hp_waiter;
5391 + ikglp_wait_state_t* ret = NULL;
5392 +
5393 + TRACE_CUR("Searching for %s/%d on fq %d\n",
5394 + max_hp->comm,
5395 + max_hp->pid,
5396 + ikglp_get_idx(sem, fq));
5397 +
5398 + BUG_ON(!max_hp);
5399 +
5400 + list_for_each(pos, &fq->wait.task_list) {
5401 + wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
5402 +
5403 + queued = (struct task_struct*) wait->private;
5404 +
5405 + TRACE_CUR("fq %d entry: %s/%d\n",
5406 + ikglp_get_idx(sem, fq),
5407 + queued->comm,
5408 + queued->pid);
5409 +
5410 + /* Compare task prios, find high prio task. */
5411 + if (queued == max_hp) {
5412 + TRACE_CUR("Found it!\n");
5413 + ret = container_of(wait, ikglp_wait_state_t, fq_node);
5414 + }
5415 + }
5416 +
5417 + WARN_ON(!ret);
5418 + return ret;
5419 + }
5420 +
5421 + return(NULL);
5422 +}
5423 +
5424 +static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
5425 + struct fifo_queue *fq,
5426 + ikglp_wait_state_t *fq_wait)
5427 +{
5428 + struct task_struct *t = fq_wait->task;
5429 + struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
5430 +
5431 + TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
5432 + t->comm,
5433 + t->pid,
5434 + ikglp_get_idx(sem, fq));
5435 +
5436 + fq_wait->donee_heap_node.fq = fq; // just to be safe
5437 +
5438 +
5439 + __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
5440 + --(fq_steal->count);
5441 +
5442 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5443 + if(sem->aff_obs) {
5444 + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
5445 + }
5446 +#endif
5447 +
5448 + if(t == fq_steal->hp_waiter) {
5449 + fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
5450 + TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
5451 + ikglp_get_idx(sem, fq_steal),
5452 + (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
5453 + (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
5454 + }
5455 +
5456 +
5457 + // Update shortest.
5458 + if(fq_steal->count < sem->shortest_fifo_queue->count) {
5459 + sem->shortest_fifo_queue = fq_steal;
5460 + }
5461 +
5462 + __ikglp_enqueue_on_fq(sem, fq, t,
5463 + &fq_wait->fq_node,
5464 + NULL,
5465 + NULL);
5466 +
5467 + // warning: We have not checked the priority inheritance of fq's owner yet.
5468 +}
5469 +
5470 +
5471 +static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
5472 + struct fifo_queue *fq,
5473 + ikglp_wait_state_t *old_wait)
5474 +{
5475 + struct task_struct *t = old_wait->task;
5476 +
5477 + BUG_ON(old_wait->donee_heap_node.fq != fq);
5478 +
5479 + TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
5480 + ikglp_get_idx(sem, fq));
5481 +
5482 + // need to migrate global_heap_node and donee_heap_node off of the stack
5483 + // to the nodes allocated for the owner of this fq.
5484 +
5485 + // TODO: Enhance binheap() to perform this operation in place.
5486 +
5487 + ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
5488 + fq->global_heap_node = old_wait->global_heap_node; // copy
5489 + ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add
5490 +
5491 + binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove
5492 + fq->donee_heap_node = old_wait->donee_heap_node; // copy
5493 +
5494 + if(fq->donee_heap_node.donor_info) {
5495 + // let donor know that our location has changed
5496 + BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link
5497 + fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
5498 + }
5499 + INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
5500 + binheap_add(&fq->donee_heap_node.node, &sem->donees,
5501 + ikglp_donee_heap_node_t, node); // re-add
5502 +}
5503 +
5504 +int ikglp_unlock(struct litmus_lock* l)
5505 +{
5506 + struct ikglp_semaphore *sem = ikglp_from_lock(l);
5507 + struct task_struct *t = current;
5508 + struct task_struct *donee = NULL;
5509 + struct task_struct *next = NULL;
5510 + struct task_struct *new_on_fq = NULL;
5511 + struct fifo_queue *fq_of_new_on_fq = NULL;
5512 +
5513 + ikglp_wait_state_t *other_donor_info = NULL;
5514 + struct fifo_queue *to_steal = NULL;
5515 + int need_steal_prio_reeval = 0;
5516 + struct fifo_queue *fq;
5517 +
5518 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5519 + raw_spinlock_t *dgl_lock;
5520 +#endif
5521 +
5522 + unsigned long flags = 0, real_flags;
5523 +
5524 + int err = 0;
5525 +
5526 + fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
5527 +
5528 + if (!fq) {
5529 + err = -EINVAL;
5530 + goto out;
5531 + }
5532 +
5533 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5534 + dgl_lock = litmus->get_dgl_spinlock(t);
5535 +#endif
5536 + raw_spin_lock_irqsave(&sem->real_lock, real_flags);
5537 +
5538 + lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
5539 + lock_fine_irqsave(&sem->lock, flags);
5540 +
5541 + TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
5542 +
5543 +
5544 + // Remove 't' from the heaps, but data in nodes will still be good.
5545 + ikglp_del_global_list(sem, t, &fq->global_heap_node);
5546 + binheap_delete(&fq->donee_heap_node.node, &sem->donees);
5547 +
5548 + fq->owner = NULL; // no longer owned!!
5549 + --(fq->count);
5550 + if(fq->count < sem->shortest_fifo_queue->count) {
5551 + sem->shortest_fifo_queue = fq;
5552 + }
5553 + --(sem->nr_in_fifos);
5554 +
5555 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5556 + if(sem->aff_obs) {
5557 + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
5558 + sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
5559 + }
5560 +#endif
5561 +
5562 + // Move the next request into the FQ and update heaps as needed.
5563 + // We defer re-evaluation of priorities to later in the function.
5564 + if(fq->donee_heap_node.donor_info) { // move my donor to FQ
5565 + ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
5566 +
5567 + new_on_fq = donor_info->task;
5568 +
5569 + // donor moved to FQ
5570 + donee = t;
5571 +
5572 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5573 + if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
5574 + fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
5575 + if(fq_of_new_on_fq->count == 0) {
5576 + // ignore it?
5577 +// fq_of_new_on_fq = fq;
5578 + }
5579 + }
5580 + else {
5581 + fq_of_new_on_fq = fq;
5582 + }
5583 +#else
5584 + fq_of_new_on_fq = fq;
5585 +#endif
5586 +
5587 + TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
5588 + new_on_fq->comm, new_on_fq->pid,
5589 + ikglp_get_idx(sem, fq_of_new_on_fq),
5590 + ikglp_get_idx(sem, fq));
5591 +
5592 +
5593 + ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
5594 + }
5595 + else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
5596 + // move other donor to FQ
5597 + // Select a donor
5598 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5599 + other_donor_info = (sem->aff_obs) ?
5600 + sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
5601 + binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
5602 +#else
5603 + other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
5604 +#endif
5605 +
5606 + new_on_fq = other_donor_info->task;
5607 + donee = other_donor_info->donee_info->task;
5608 +
5609 + // update the donee's heap position.
5610 + other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
5611 + binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
5612 +
5613 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5614 + if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
5615 + fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
5616 + if(fq_of_new_on_fq->count == 0) {
5617 + // ignore it?
5618 +// fq_of_new_on_fq = fq;
5619 + }
5620 + }
5621 + else {
5622 + fq_of_new_on_fq = fq;
5623 + }
5624 +#else
5625 + fq_of_new_on_fq = fq;
5626 +#endif
5627 +
5628 + TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
5629 + new_on_fq->comm, new_on_fq->pid,
5630 + ikglp_get_idx(sem, fq_of_new_on_fq),
5631 + ikglp_get_idx(sem, fq));
5632 +
5633 + ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
5634 + }
5635 + else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ
5636 + ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
5637 + ikglp_heap_node_t, node);
5638 + ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
5639 + pq_node);
5640 +
5641 + new_on_fq = pq_wait->task;
5642 +
5643 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5644 + if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
5645 + fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
5646 + if(fq_of_new_on_fq->count == 0) {
5647 + // ignore it?
5648 +// fq_of_new_on_fq = fq;
5649 + }
5650 + }
5651 + else {
5652 + fq_of_new_on_fq = fq;
5653 + }
5654 +#else
5655 + fq_of_new_on_fq = fq;
5656 +#endif
5657 +
5658 + TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
5659 + new_on_fq->comm, new_on_fq->pid,
5660 + ikglp_get_idx(sem, fq_of_new_on_fq),
5661 + ikglp_get_idx(sem, fq));
5662 +
5663 + ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
5664 + }
5665 + else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
5666 + ikglp_wait_state_t *fq_wait;
5667 +
5668 + TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
5669 + ikglp_get_idx(sem, fq));
5670 +
5671 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5672 + fq_wait = (sem->aff_obs) ?
5673 + sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
5674 + ikglp_find_hp_waiter_to_steal(sem);
5675 +#else
5676 + fq_wait = ikglp_find_hp_waiter_to_steal(sem);
5677 +#endif
5678 +
5679 + if(fq_wait) {
5680 + to_steal = fq_wait->donee_heap_node.fq;
5681 +
5682 + new_on_fq = fq_wait->task;
5683 + fq_of_new_on_fq = fq;
5684 + need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
5685 +
5686 + TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
5687 + new_on_fq->comm, new_on_fq->pid,
5688 + ikglp_get_idx(sem, to_steal),
5689 + ikglp_get_idx(sem, fq));
5690 +
5691 + ikglp_steal_to_fq(sem, fq, fq_wait);
5692 + }
5693 + else {
5694 + TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
5695 + ikglp_get_idx(sem, fq));
5696 + }
5697 + }
5698 + else { // move no one
5699 + }
5700 +
5701 + // 't' must drop all priority and clean up data structures before hand-off.
5702 +
5703 + // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST
5704 + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
5705 + {
5706 + int count = 0;
5707 + while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
5708 + binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
5709 + struct nested_info, hp_binheap_node);
5710 + ++count;
5711 + }
5712 + litmus->decrease_prio(t, NULL);
5713 + WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible.
5714 + }
5715 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
5716 +
5717 +
5718 +
5719 + // Now patch up other priorities.
5720 + //
5721 + // At most one of the following:
5722 + // if(donee && donee != t), decrease prio, propagate to owner, or onward
5723 + // if(to_steal), update owner's prio (hp_waiter has already been set)
5724 + //
5725 +
5726 + BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
5727 +
5728 + if(other_donor_info) {
5729 + struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
5730 +
5731 + BUG_ON(!donee);
5732 + BUG_ON(donee == t);
5733 +
5734 + TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
5735 + other_donor_info->task->comm, other_donor_info->task->pid,
5736 + donee->comm, donee->pid);
5737 +
5738 + // need to terminate donation relation.
5739 + if(donee == other_fq->owner) {
5740 + TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
5741 + donee->comm, donee->pid,
5742 + ikglp_get_idx(sem, other_fq));
5743 +
5744 + ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
5745 + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
5746 + }
5747 + else {
5748 + TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
5749 + donee->comm, donee->pid,
5750 + ikglp_get_idx(sem, other_fq));
5751 +
5752 + ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
5753 + if(donee == other_fq->hp_waiter) {
5754 + TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
5755 + donee->comm, donee->pid,
5756 + ikglp_get_idx(sem, other_fq));
5757 +
5758 + other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
5759 + TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
5760 + ikglp_get_idx(sem, other_fq),
5761 + (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
5762 + (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
5763 +
5764 + ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it.
5765 + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
5766 + }
5767 + }
5768 + }
5769 + else if(to_steal) {
5770 + TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
5771 + ikglp_get_idx(sem, to_steal));
5772 +
5773 + if(need_steal_prio_reeval) {
5774 + ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it.
5775 + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
5776 + }
5777 + }
5778 +
5779 + // check for new HP waiter.
5780 + if(new_on_fq) {
5781 + if(fq == fq_of_new_on_fq) {
5782 + // fq->owner is null, so just update the hp_waiter without locking.
5783 + if(new_on_fq == fq->hp_waiter) {
5784 + TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
5785 + fq->hp_waiter->comm, fq->hp_waiter->pid);
5786 + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
5787 + }
5788 + else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
5789 + if(fq->hp_waiter)
5790 + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
5791 + fq->hp_waiter->comm, fq->hp_waiter->pid);
5792 + else
5793 + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
5794 +
5795 + fq->hp_waiter = new_on_fq;
5796 + fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
5797 +
5798 + TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
5799 + ikglp_get_idx(sem, fq),
5800 + (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
5801 + (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
5802 + }
5803 + }
5804 + else {
5805 + ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it.
5806 + lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
5807 + }
5808 + }
5809 +
5810 +wake_kludge:
5811 + if(waitqueue_active(&fq->wait))
5812 + {
5813 + wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
5814 + ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
5815 + next = (struct task_struct*) wait->private;
5816 +
5817 + __remove_wait_queue(&fq->wait, wait);
5818 +
5819 + TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
5820 + ikglp_get_idx(sem, fq),
5821 + next->comm, next->pid);
5822 +
5823 + // migrate wait-state to fifo-memory.
5824 + ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
5825 +
5826 + /* next becomes the resouce holder */
5827 + fq->owner = next;
5828 + tsk_rt(next)->blocked_lock = NULL;
5829 +
5830 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5831 + if(sem->aff_obs) {
5832 + sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
5833 + }
5834 +#endif
5835 +
5836 + /* determine new hp_waiter if necessary */
5837 + if (next == fq->hp_waiter) {
5838 +
5839 + TRACE_TASK(next, "was highest-prio waiter\n");
5840 + /* next has the highest priority --- it doesn't need to
5841 + * inherit. However, we need to make sure that the
5842 + * next-highest priority in the queue is reflected in
5843 + * hp_waiter. */
5844 + fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
5845 + TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
5846 + ikglp_get_idx(sem, fq),
5847 + (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
5848 + (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
5849 +
5850 + fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
5851 + effective_priority(fq->hp_waiter) : NULL;
5852 +
5853 + if (fq->hp_waiter)
5854 + TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
5855 + else
5856 + TRACE("no further waiters\n");
5857 +
5858 + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
5859 +
5860 +// TRACE_TASK(next, "Heap Before:\n");
5861 +// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
5862 +
5863 + binheap_add(&fq->nest.hp_binheap_node,
5864 + &tsk_rt(next)->hp_blocked_tasks,
5865 + struct nested_info,
5866 + hp_binheap_node);
5867 +
5868 +// TRACE_TASK(next, "Heap After:\n");
5869 +// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
5870 +
5871 + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
5872 + }
5873 + else {
5874 + /* Well, if 'next' is not the highest-priority waiter,
5875 + * then it (probably) ought to inherit the highest-priority
5876 + * waiter's priority. */
5877 + TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
5878 + ikglp_get_idx(sem, fq),
5879 + (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
5880 + (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
5881 +
5882 + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
5883 +
5884 + binheap_add(&fq->nest.hp_binheap_node,
5885 + &tsk_rt(next)->hp_blocked_tasks,
5886 + struct nested_info,
5887 + hp_binheap_node);
5888 +
5889 + /* It is possible that 'next' *should* be the hp_waiter, but isn't
5890 + * because that update hasn't yet executed (update operation is
5891 + * probably blocked on mutex->lock). So only inherit if the top of
5892 + * 'next's top heap node is indeed the effective prio. of hp_waiter.
5893 + * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
5894 + * since the effective priority of hp_waiter can change (and the
5895 + * update has not made it to this lock).)
5896 + */
5897 + if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
5898 + fq->nest.hp_waiter_eff_prio))
5899 + {
5900 + if(fq->nest.hp_waiter_eff_prio)
5901 + litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
5902 + else
5903 + WARN_ON(1);
5904 + }
5905 +
5906 + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
5907 + }
5908 +
5909 +
5910 + // wake up the new resource holder!
5911 + wake_up_process(next);
5912 + }
5913 + if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
5914 + // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
5915 + // Wake up the new guy too.
5916 +
5917 + BUG_ON(fq_of_new_on_fq->owner != NULL);
5918 +
5919 + fq = fq_of_new_on_fq;
5920 + fq_of_new_on_fq = NULL;
5921 + goto wake_kludge;
5922 + }
5923 +
5924 + unlock_fine_irqrestore(&sem->lock, flags);
5925 + unlock_global_irqrestore(dgl_lock, flags);
5926 +
5927 + raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5928 +
5929 +out:
5930 + return err;
5931 +}
5932 +
5933 +
5934 +
5935 +int ikglp_close(struct litmus_lock* l)
5936 +{
5937 + struct task_struct *t = current;
5938 + struct ikglp_semaphore *sem = ikglp_from_lock(l);
5939 + unsigned long flags;
5940 +
5941 + int owner = 0;
5942 + int i;
5943 +
5944 + raw_spin_lock_irqsave(&sem->real_lock, flags);
5945 +
5946 + for(i = 0; i < sem->nr_replicas; ++i) {
5947 + if(sem->fifo_queues[i].owner == t) {
5948 + owner = 1;
5949 + break;
5950 + }
5951 + }
5952 +
5953 + raw_spin_unlock_irqrestore(&sem->real_lock, flags);
5954 +
5955 + if (owner)
5956 + ikglp_unlock(l);
5957 +
5958 + return 0;
5959 +}
5960 +
5961 +void ikglp_free(struct litmus_lock* l)
5962 +{
5963 + struct ikglp_semaphore *sem = ikglp_from_lock(l);
5964 +
5965 + kfree(sem->fifo_queues);
5966 + kfree(sem);
5967 +}
5968 +
5969 +
5970 +
5971 +struct litmus_lock* ikglp_new(int m,
5972 + struct litmus_lock_ops* ops,
5973 + void* __user arg)
5974 +{
5975 + struct ikglp_semaphore* sem;
5976 + int nr_replicas = 0;
5977 + int i;
5978 +
5979 + if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
5980 + {
5981 + return(NULL);
5982 + }
5983 + if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
5984 + {
5985 + return(NULL);
5986 + }
5987 + if(nr_replicas < 1)
5988 + {
5989 + return(NULL);
5990 + }
5991 +
5992 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
5993 + if(!sem)
5994 + {
5995 + return NULL;
5996 + }
5997 +
5998 + sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
5999 + if(!sem->fifo_queues)
6000 + {
6001 + kfree(sem);
6002 + return NULL;
6003 + }
6004 +
6005 + sem->litmus_lock.ops = ops;
6006 +
6007 +#ifdef CONFIG_DEBUG_SPINLOCK
6008 + {
6009 + __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
6010 + }
6011 +#else
6012 + raw_spin_lock_init(&sem->lock);
6013 +#endif
6014 +
6015 + raw_spin_lock_init(&sem->real_lock);
6016 +
6017 + sem->nr_replicas = nr_replicas;
6018 + sem->m = m;
6019 + sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
6020 + sem->nr_in_fifos = 0;
6021 +
6022 + TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
6023 + sem->m,
6024 + sem->nr_replicas,
6025 + sem->max_fifo_len);
6026 +
6027 + for(i = 0; i < nr_replicas; ++i)
6028 + {
6029 + struct fifo_queue* q = &(sem->fifo_queues[i]);
6030 +
6031 + q->owner = NULL;
6032 + q->hp_waiter = NULL;
6033 + init_waitqueue_head(&q->wait);
6034 + q->count = 0;
6035 +
6036 + q->global_heap_node.task = NULL;
6037 + INIT_BINHEAP_NODE(&q->global_heap_node.node);
6038 +
6039 + q->donee_heap_node.task = NULL;
6040 + q->donee_heap_node.donor_info = NULL;
6041 + q->donee_heap_node.fq = NULL;
6042 + INIT_BINHEAP_NODE(&q->donee_heap_node.node);
6043 +
6044 + q->nest.lock = (struct litmus_lock*)sem;
6045 + q->nest.hp_waiter_eff_prio = NULL;
6046 + q->nest.hp_waiter_ptr = &q->hp_waiter;
6047 + INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
6048 + }
6049 +
6050 + sem->shortest_fifo_queue = &sem->fifo_queues[0];
6051 +
6052 + sem->top_m_size = 0;
6053 +
6054 + // init heaps
6055 + INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
6056 + INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
6057 + INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
6058 + INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
6059 + INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
6060 +
6061 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
6062 + sem->aff_obs = NULL;
6063 +#endif
6064 +
6065 + return &sem->litmus_lock;
6066 +}
6067 +
6068 +
6069 +
6070 +
6071 +
6072 +
6073 +
6074 +
6075 +
6076 +
6077 +
6078 +
6079 +
6080 +
6081 +
6082 +
6083 +
6084 +
6085 +
6086 +
6087 +
6088 +
6089 +
6090 +
6091 +
6092 +
6093 +
6094 +
6095 +
6096 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
6097 +
6098 +static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
6099 +{
6100 + int gpu = replica % aff->nr_rsrc;
6101 + return gpu;
6102 +}
6103 +
6104 +static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
6105 +{
6106 + int gpu = __replica_to_gpu(aff, replica) + aff->offset;
6107 + return gpu;
6108 +}
6109 +
6110 +static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
6111 +{
6112 + int replica = gpu - aff->offset;
6113 + return replica;
6114 +}
6115 +
6116 +
6117 +int ikglp_aff_obs_close(struct affinity_observer* obs)
6118 +{
6119 + return 0;
6120 +}
6121 +
6122 +void ikglp_aff_obs_free(struct affinity_observer* obs)
6123 +{
6124 + struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
6125 + kfree(ikglp_aff->nr_cur_users_on_rsrc);
6126 + kfree(ikglp_aff->q_info);
6127 + kfree(ikglp_aff);
6128 +}
6129 +
6130 +static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
6131 + struct ikglp_affinity_ops* ikglp_ops,
6132 + void* __user args)
6133 +{
6134 + struct ikglp_affinity* ikglp_aff;
6135 + struct gpu_affinity_observer_args aff_args;
6136 + struct ikglp_semaphore* sem;
6137 + int i;
6138 + unsigned long flags;
6139 +
6140 + if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
6141 + return(NULL);
6142 + }
6143 + if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
6144 + return(NULL);
6145 + }
6146 +
6147 + sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
6148 +
6149 + if(sem->litmus_lock.type != IKGLP_SEM) {
6150 + TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
6151 + return(NULL);
6152 + }
6153 +
6154 + if((aff_args.nr_simult_users <= 0) ||
6155 + (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
6156 + TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
6157 + "(%d) per replica. #replicas should be evenly divisible "
6158 + "by #simult_users.\n",
6159 + sem->litmus_lock.ident,
6160 + sem->nr_replicas,
6161 + aff_args.nr_simult_users);
6162 + return(NULL);
6163 + }
6164 +
6165 + if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
6166 + TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
6167 + NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
6168 +// return(NULL);
6169 + }
6170 +
6171 + ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
6172 + if(!ikglp_aff) {
6173 + return(NULL);
6174 + }
6175 +
6176 + ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
6177 + if(!ikglp_aff->q_info) {
6178 + kfree(ikglp_aff);
6179 + return(NULL);
6180 + }
6181 +
6182 + ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
6183 + if(!ikglp_aff->nr_cur_users_on_rsrc) {
6184 + kfree(ikglp_aff->q_info);
6185 + kfree(ikglp_aff);
6186 + return(NULL);
6187 + }
6188 +
6189 + affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
6190 +
6191 + ikglp_aff->ops = ikglp_ops;
6192 + ikglp_aff->offset = aff_args.replica_to_gpu_offset;
6193 + ikglp_aff->nr_simult = aff_args.nr_simult_users;
6194 + ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
6195 + ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
6196 +
6197 + TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
6198 + "nr_rsrc = %d, relaxed_fifo_len = %d\n",
6199 + ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
6200 + ikglp_aff->relax_max_fifo_len);
6201 +
6202 + memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
6203 +
6204 + for(i = 0; i < sem->nr_replicas; ++i) {
6205 + ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
6206 + ikglp_aff->q_info[i].estimated_len = 0;
6207 +
6208 + // multiple q_info's will point to the same resource (aka GPU) if
6209 + // aff_args.nr_simult_users > 1
6210 + ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
6211 + }
6212 +
6213 + // attach observer to the lock
6214 + raw_spin_lock_irqsave(&sem->real_lock, flags);
6215 + sem->aff_obs = ikglp_aff;
6216 + raw_spin_unlock_irqrestore(&sem->real_lock, flags);
6217 +
6218 + return &ikglp_aff->obs;
6219 +}
6220 +
6221 +
6222 +
6223 +
6224 +static int gpu_replica_to_resource(struct ikglp_affinity* aff,
6225 + struct fifo_queue* fq) {
6226 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6227 + return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
6228 +}
6229 +
6230 +
6231 +// Smart IKGLP Affinity
6232 +
6233 +//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
6234 +//{
6235 +// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6236 +// struct ikglp_queue_info *shortest = &aff->q_info[0];
6237 +// int i;
6238 +//
6239 +// for(i = 1; i < sem->nr_replicas; ++i) {
6240 +// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
6241 +// shortest = &aff->q_info[i];
6242 +// }
6243 +// }
6244 +//
6245 +// return(shortest);
6246 +//}
6247 +
6248 +struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
6249 +{
6250 + // advise_enqueue must be smart as not not break IKGLP rules:
6251 + // * No queue can be greater than ceil(m/k) in length. We may return
6252 + // such a queue, but IKGLP will be smart enough as to send requests
6253 + // to donors or PQ.
6254 + // * Cannot let a queue idle if there exist waiting PQ/donors
6255 + // -- needed to guarantee parallel progress of waiters.
6256 + //
6257 + // We may be able to relax some of these constraints, but this will have to
6258 + // be carefully evaluated.
6259 + //
6260 + // Huristic strategy: Find the shortest queue that is not full.
6261 +
6262 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6263 + lt_t min_len;
6264 + int min_nr_users;
6265 + struct ikglp_queue_info *shortest;
6266 + struct fifo_queue *to_enqueue;
6267 + int i;
6268 + int affinity_gpu;
6269 +
6270 + int max_fifo_len = (aff->relax_max_fifo_len) ?
6271 + sem->m : sem->max_fifo_len;
6272 +
6273 + // simply pick the shortest queue if, we have no affinity, or we have
6274 + // affinity with the shortest
6275 + if(unlikely(tsk_rt(t)->last_gpu < 0)) {
6276 + affinity_gpu = aff->offset; // first gpu
6277 + TRACE_CUR("no affinity\n");
6278 + }
6279 + else {
6280 + affinity_gpu = tsk_rt(t)->last_gpu;
6281 + }
6282 +
6283 + // all things being equal, let's start with the queue with which we have
6284 + // affinity. this helps us maintain affinity even when we don't have
6285 + // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
6286 + shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
6287 +
6288 + // if(shortest == aff->shortest_queue) {
6289 + // TRACE_CUR("special case: have affinity with shortest queue\n");
6290 + // goto out;
6291 + // }
6292 +
6293 + min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
6294 + min_nr_users = *(shortest->nr_cur_users);
6295 +
6296 + TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
6297 + get_gpu_estimate(t, MIG_LOCAL),
6298 + ikglp_get_idx(sem, shortest->q),
6299 + shortest->q->count,
6300 + min_len);
6301 +
6302 + for(i = 0; i < sem->nr_replicas; ++i) {
6303 + if(&aff->q_info[i] != shortest) {
6304 + if(aff->q_info[i].q->count < max_fifo_len) {
6305 +
6306 + lt_t est_len =
6307 + aff->q_info[i].estimated_len +
6308 + get_gpu_estimate(t,
6309 + gpu_migration_distance(tsk_rt(t)->last_gpu,
6310 + replica_to_gpu(aff, i)));
6311 +
6312 + // queue is smaller, or they're equal and the other has a smaller number
6313 + // of total users.
6314 + //
6315 + // tie-break on the shortest number of simult users. this only kicks in
6316 + // when there are more than 1 empty queues.
6317 + if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */
6318 + (est_len < min_len) || /* i-th queue has shortest length */
6319 + ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */
6320 + (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
6321 +
6322 + shortest = &aff->q_info[i];
6323 + min_len = est_len;
6324 + min_nr_users = *(aff->q_info[i].nr_cur_users);
6325 + }
6326 +
6327 + TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
6328 + get_gpu_estimate(t,
6329 + gpu_migration_distance(tsk_rt(t)->last_gpu,
6330 + replica_to_gpu(aff, i))),
6331 + ikglp_get_idx(sem, aff->q_info[i].q),
6332 + aff->q_info[i].q->count,
6333 + est_len);
6334 + }
6335 + else {
6336 + TRACE_CUR("queue %d is too long. ineligible for enqueue.\n",
6337 + ikglp_get_idx(sem, aff->q_info[i].q));
6338 + }
6339 + }
6340 + }
6341 +
6342 + if(shortest->q->count >= max_fifo_len) {
6343 + TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
6344 + ikglp_get_idx(sem, shortest->q));
6345 + }
6346 +
6347 + to_enqueue = shortest->q;
6348 + TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
6349 + ikglp_get_idx(sem, to_enqueue),
6350 + to_enqueue->count,
6351 + ikglp_get_idx(sem, sem->shortest_fifo_queue));
6352 +
6353 + return to_enqueue;
6354 +
6355 + //return(sem->shortest_fifo_queue);
6356 +}
6357 +
6358 +
6359 +
6360 +
6361 +static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
6362 + int dest_gpu,
6363 + struct fifo_queue* fq)
6364 +{
6365 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6366 + ikglp_wait_state_t *wait = NULL;
6367 + int max_improvement = -(MIG_NONE+1);
6368 + int replica = ikglp_get_idx(sem, fq);
6369 +
6370 + if(waitqueue_active(&fq->wait)) {
6371 + int this_gpu = replica_to_gpu(aff, replica);
6372 + struct list_head *pos;
6373 +
6374 + list_for_each(pos, &fq->wait.task_list) {
6375 + wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
6376 + ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
6377 +
6378 + int tmp_improvement =
6379 + gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
6380 + gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
6381 +
6382 + if(tmp_improvement > max_improvement) {
6383 + wait = tmp_wait;
6384 + max_improvement = tmp_improvement;
6385 +
6386 + if(max_improvement >= (MIG_NONE-1)) {
6387 + goto out;
6388 + }
6389 + }
6390 + }
6391 +
6392 + BUG_ON(!wait);
6393 + }
6394 + else {
6395 + TRACE_CUR("fq %d is empty!\n", replica);
6396 + }
6397 +
6398 +out:
6399 +
6400 + TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n",
6401 + replica,
6402 + (wait) ? wait->task->comm : "nil",
6403 + (wait) ? wait->task->pid : -1,
6404 + max_improvement);
6405 +
6406 + return wait;
6407 +}
6408 +
6409 +
6410 +ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
6411 + struct fifo_queue* dst)
6412 +{
6413 + // Huristic strategy: Find task with greatest improvement in affinity.
6414 + //
6415 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6416 + ikglp_wait_state_t *to_steal_state = NULL;
6417 +// ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
6418 + int max_improvement = -(MIG_NONE+1);
6419 + int replica, i;
6420 + int dest_gpu;
6421 +
6422 + replica = ikglp_get_idx(sem, dst);
6423 + dest_gpu = replica_to_gpu(aff, replica);
6424 +
6425 + for(i = 0; i < sem->nr_replicas; ++i) {
6426 + ikglp_wait_state_t *tmp_to_steal_state =
6427 + pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
6428 +
6429 + if(tmp_to_steal_state) {
6430 + int tmp_improvement =
6431 + gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
6432 + gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
6433 +
6434 + if(tmp_improvement > max_improvement) {
6435 + to_steal_state = tmp_to_steal_state;
6436 + max_improvement = tmp_improvement;
6437 +
6438 + if(max_improvement >= (MIG_NONE-1)) {
6439 + goto out;
6440 + }
6441 + }
6442 + }
6443 + }
6444 +
6445 +out:
6446 + if(!to_steal_state) {
6447 + TRACE_CUR("Could not find anyone to steal.\n");
6448 + }
6449 + else {
6450 + TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
6451 + to_steal_state->task->comm, to_steal_state->task->pid,
6452 + ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
6453 + replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
6454 + ikglp_get_idx(sem, dst),
6455 + dest_gpu,
6456 + max_improvement);
6457 +
6458 +// TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
6459 +// default_to_steal_state->task->comm, default_to_steal_state->task->pid,
6460 +// ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
6461 +// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
6462 +// ikglp_get_idx(sem, dst),
6463 +// replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
6464 +//
6465 +// gpu_migration_distance(
6466 +// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
6467 +// tsk_rt(default_to_steal_state->task)->last_gpu) -
6468 +// gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
6469 + }
6470 +
6471 + return(to_steal_state);
6472 +}
6473 +
6474 +
6475 +static inline int has_donor(wait_queue_t* fq_wait)
6476 +{
6477 + ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
6478 + return(wait->donee_heap_node.donor_info != NULL);
6479 +}
6480 +
6481 +static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
6482 + struct fifo_queue* fq,
6483 + int* dist_from_head)
6484 +{
6485 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6486 + struct task_struct *donee;
6487 + ikglp_donee_heap_node_t *donee_node;
6488 + struct task_struct *mth_highest = ikglp_mth_highest(sem);
6489 +
6490 +// lt_t now = litmus_clock();
6491 +//
6492 +// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
6493 +// ikglp_get_idx(sem, fq),
6494 +// mth_highest->comm, mth_highest->pid,
6495 +// (int)get_deadline(mth_highest) - now);
6496 +
6497 + if(fq->owner &&
6498 + fq->donee_heap_node.donor_info == NULL &&
6499 + mth_highest != fq->owner &&
6500 + litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
6501 + donee = fq->owner;
6502 + donee_node = &(fq->donee_heap_node);
6503 + *dist_from_head = 0;
6504 +
6505 + BUG_ON(donee != donee_node->task);
6506 +
6507 + TRACE_CUR("picked owner of fq %d as donee\n",
6508 + ikglp_get_idx(sem, fq));
6509 +
6510 + goto out;
6511 + }
6512 + else if(waitqueue_active(&fq->wait)) {
6513 + struct list_head *pos;
6514 +
6515 +
6516 +// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
6517 +// "(mth_highest != fq->owner) = %d "
6518 +// "(mth_highest > fq->owner) = %d\n",
6519 +// ikglp_get_idx(sem, fq),
6520 +// (fq->owner) ? fq->owner->comm : "nil",
6521 +// (fq->owner) ? fq->owner->pid : -1,
6522 +// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
6523 +// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
6524 +// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
6525 +// (mth_highest != fq->owner),
6526 +// (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
6527 +
6528 +
6529 + *dist_from_head = 1;
6530 +
6531 + // iterating from the start of the queue is nice since this means
6532 + // the donee will be closer to obtaining a resource.
6533 + list_for_each(pos, &fq->wait.task_list) {
6534 + wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
6535 + ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
6536 +
6537 +// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
6538 +// "(mth_highest != wait->task) = %d "
6539 +// "(mth_highest > wait->task) = %d\n",
6540 +// ikglp_get_idx(sem, fq),
6541 +// dist_from_head,
6542 +// wait->task->comm, wait->task->pid,
6543 +// (int)get_deadline(wait->task) - now,
6544 +// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
6545 +// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
6546 +// (mth_highest != wait->task),
6547 +// (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
6548 +
6549 +
6550 + if(!has_donor(fq_wait) &&
6551 + mth_highest != wait->task &&
6552 + litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
6553 + donee = (struct task_struct*) fq_wait->private;
6554 + donee_node = &wait->donee_heap_node;
6555 +
6556 + BUG_ON(donee != donee_node->task);
6557 +
6558 + TRACE_CUR("picked waiter in fq %d as donee\n",
6559 + ikglp_get_idx(sem, fq));
6560 +
6561 + goto out;
6562 + }
6563 + ++(*dist_from_head);
6564 + }
6565 + }
6566 +
6567 + donee = NULL;
6568 + donee_node = NULL;
6569 + //*dist_from_head = sem->max_fifo_len + 1;
6570 + *dist_from_head = IKGLP_INVAL_DISTANCE;
6571 +
6572 + TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
6573 +
6574 +out:
6575 +
6576 + TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
6577 + ikglp_get_idx(sem, fq),
6578 + (donee) ? (donee)->comm : "nil",
6579 + (donee) ? (donee)->pid : -1,
6580 + *dist_from_head);
6581 +
6582 + return donee_node;
6583 +}
6584 +
6585 +ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
6586 + struct ikglp_affinity* aff,
6587 + struct task_struct* donor)
6588 +{
6589 + // Huristic strategy: Find the highest-priority donee that is waiting on
6590 + // a queue closest to our affinity. (1) The donee CANNOT already have a
6591 + // donor (exception: donee is the lowest-prio task in the donee heap).
6592 + // (2) Requests in 'top_m' heap are ineligible.
6593 + //
6594 + // Further strategy: amongst elible donees waiting for the same GPU, pick
6595 + // the one closest to the head of the FIFO queue (including owners).
6596 + //
6597 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6598 + ikglp_donee_heap_node_t *donee_node;
6599 + gpu_migration_dist_t distance;
6600 + int start, i, j;
6601 +
6602 + ikglp_donee_heap_node_t *default_donee;
6603 + ikglp_wait_state_t *default_donee_donor_info;
6604 +
6605 + if(tsk_rt(donor)->last_gpu < 0) {
6606 + // no affinity. just return the min prio, like standard IKGLP
6607 + // TODO: Find something closer to the head of the queue??
6608 + donee_node = binheap_top_entry(&sem->donees,
6609 + ikglp_donee_heap_node_t,
6610 + node);
6611 + goto out;
6612 + }
6613 +
6614 +
6615 + // Temporarily break any donation relation the default donee (the lowest
6616 + // prio task in the FIFO queues) to make it eligible for selection below.
6617 + //
6618 + // NOTE: The original donor relation *must* be restored, even if we select
6619 + // the default donee throug affinity-aware selection, before returning
6620 + // from this function so we don't screw up our heap ordering.
6621 + // The standard IKGLP algorithm will steal the donor relationship if needed.
6622 + default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
6623 + default_donee_donor_info = default_donee->donor_info; // back-up donor relation
6624 + default_donee->donor_info = NULL; // temporarily break any donor relation.
6625 +
6626 + // initialize our search
6627 + donee_node = NULL;
6628 + distance = MIG_NONE;
6629 +
6630 + // TODO: The below search logic may work well for locating nodes to steal
6631 + // when an FQ goes idle. Validate this code and apply it to stealing.
6632 +
6633 + // begin search with affinity GPU.
6634 + start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
6635 + i = start;
6636 + do { // "for each gpu" / "for each aff->nr_rsrc"
6637 + gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
6638 +
6639 + // only interested in queues that will improve our distance
6640 + if(temp_distance < distance || donee_node == NULL) {
6641 + int dist_from_head = IKGLP_INVAL_DISTANCE;
6642 +
6643 + TRACE_CUR("searching for donor on GPU %d", i);
6644 +
6645 + // visit each queue and pick a donee. bail as soon as we find
6646 + // one for this class.
6647 +
6648 + for(j = 0; j < aff->nr_simult; ++j) {
6649 + int temp_dist_from_head;
6650 + ikglp_donee_heap_node_t *temp_donee_node;
6651 + struct fifo_queue *fq;
6652 +
6653 + fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
6654 + temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
6655 +
6656 + if(temp_dist_from_head < dist_from_head)
6657 + {
6658 + // we check all the FQs for this GPU to spread priorities
6659 + // out across the queues. does this decrease jitter?
6660 + donee_node = temp_donee_node;
6661 + dist_from_head = temp_dist_from_head;
6662 + }
6663 + }
6664 +
6665 + if(dist_from_head != IKGLP_INVAL_DISTANCE) {
6666 + TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
6667 + donee_node->task->comm, donee_node->task->pid,
6668 + dist_from_head);
6669 + }
6670 + else {
6671 + TRACE_CUR("found no eligible donors from GPU %d\n", i);
6672 + }
6673 + }
6674 + else {
6675 + TRACE_CUR("skipping GPU %d (distance = %d, best donor "
6676 + "distance = %d)\n", i, temp_distance, distance);
6677 + }
6678 +
6679 + i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around
6680 + } while (i != start);
6681 +
6682 +
6683 + // restore old donor info state.
6684 + default_donee->donor_info = default_donee_donor_info;
6685 +
6686 + if(!donee_node) {
6687 + donee_node = default_donee;
6688 +
6689 + TRACE_CUR("Could not find a donee. We have to steal one.\n");
6690 + WARN_ON(default_donee->donor_info == NULL);
6691 + }
6692 +
6693 +out:
6694 +
6695 + TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
6696 + donee_node->task->comm, donee_node->task->pid,
6697 + ikglp_get_idx(sem, donee_node->fq),
6698 + replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
6699 + donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
6700 +
6701 + return(donee_node);
6702 +}
6703 +
6704 +
6705 +
6706 +static void __find_closest_donor(int target_gpu,
6707 + struct binheap_node* donor_node,
6708 + ikglp_wait_state_t** cur_closest,
6709 + int* cur_dist)
6710 +{
6711 + ikglp_wait_state_t *this_donor =
6712 + binheap_entry(donor_node, ikglp_wait_state_t, node);
6713 +
6714 + int this_dist =
6715 + gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
6716 +
6717 +// TRACE_CUR("%s/%d: dist from target = %d\n",
6718 +// this_donor->task->comm,
6719 +// this_donor->task->pid,
6720 +// this_dist);
6721 +
6722 + if(this_dist < *cur_dist) {
6723 + // take this donor
6724 + *cur_dist = this_dist;
6725 + *cur_closest = this_donor;
6726 + }
6727 + else if(this_dist == *cur_dist) {
6728 + // priority tie-break. Even though this is a pre-order traversal,
6729 + // this is a heap, not a binary tree, so we still need to do a priority
6730 + // comparision.
6731 + if(!(*cur_closest) ||
6732 + litmus->compare(this_donor->task, (*cur_closest)->task)) {
6733 + *cur_dist = this_dist;
6734 + *cur_closest = this_donor;
6735 + }
6736 + }
6737 +
6738 + if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
6739 + if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
6740 +}
6741 +
6742 +ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
6743 +{
6744 + // Huristic strategy: Find donor with the closest affinity to fq.
6745 + // Tie-break on priority.
6746 +
6747 + // We need to iterate over all the donors to do this. Unfortunatly,
6748 + // our donors are organized in a heap. We'll visit each node with a
6749 + // recurisve call. This is realitively safe since there are only sem->m
6750 + // donors, at most. We won't recurse too deeply to have to worry about
6751 + // our stack. (even with 128 CPUs, our nest depth is at most 7 deep).
6752 +
6753 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6754 + ikglp_wait_state_t *donor = NULL;
6755 + int distance = MIG_NONE;
6756 + int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
6757 + ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
6758 +
6759 + __find_closest_donor(gpu, sem->donors.root, &donor, &distance);
6760 +
6761 + TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
6762 + "(non-aff wanted %s/%d). differs = %d\n",
6763 + donor->task->comm, donor->task->pid,
6764 + distance,
6765 + ikglp_get_idx(sem, fq),
6766 + default_donor->task->comm, default_donor->task->pid,
6767 + (donor->task != default_donor->task)
6768 + );
6769 +
6770 + return(donor);
6771 +}
6772 +
6773 +
6774 +
6775 +void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
6776 +{
6777 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6778 + int replica = ikglp_get_idx(sem, fq);
6779 + int gpu = replica_to_gpu(aff, replica);
6780 + struct ikglp_queue_info *info = &aff->q_info[replica];
6781 + lt_t est_time;
6782 + lt_t est_len_before;
6783 +
6784 + if(current == t) {
6785 + tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
6786 + }
6787 +
6788 + est_len_before = info->estimated_len;
6789 + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
6790 + info->estimated_len += est_time;
6791 +
6792 + TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
6793 + ikglp_get_idx(sem, info->q),
6794 + est_len_before, est_time,
6795 + info->estimated_len);
6796 +
6797 + // if(aff->shortest_queue == info) {
6798 + // // we may no longer be the shortest
6799 + // aff->shortest_queue = ikglp_aff_find_shortest(aff);
6800 + //
6801 + // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
6802 + // ikglp_get_idx(sem, aff->shortest_queue->q),
6803 + // aff->shortest_queue->q->count,
6804 + // aff->shortest_queue->estimated_len);
6805 + // }
6806 +}
6807 +
6808 +void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
6809 +{
6810 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6811 + int replica = ikglp_get_idx(sem, fq);
6812 + int gpu = replica_to_gpu(aff, replica);
6813 + struct ikglp_queue_info *info = &aff->q_info[replica];
6814 + lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
6815 +
6816 + if(est_time > info->estimated_len) {
6817 + WARN_ON(1);
6818 + info->estimated_len = 0;
6819 + }
6820 + else {
6821 + info->estimated_len -= est_time;
6822 + }
6823 +
6824 + TRACE_CUR("fq %d est len is now %llu\n",
6825 + ikglp_get_idx(sem, info->q),
6826 + info->estimated_len);
6827 +
6828 + // check to see if we're the shortest queue now.
6829 + // if((aff->shortest_queue != info) &&
6830 + // (aff->shortest_queue->estimated_len > info->estimated_len)) {
6831 + //
6832 + // aff->shortest_queue = info;
6833 + //
6834 + // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
6835 + // ikglp_get_idx(sem, info->q),
6836 + // info->q->count,
6837 + // info->estimated_len);
6838 + // }
6839 +}
6840 +
6841 +void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
6842 + struct fifo_queue* fq,
6843 + struct task_struct* t)
6844 +{
6845 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6846 + int replica = ikglp_get_idx(sem, fq);
6847 + int gpu = replica_to_gpu(aff, replica);
6848 +
6849 + tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
6850 +
6851 + TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
6852 + t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
6853 +
6854 + // count the number or resource holders
6855 + ++(*(aff->q_info[replica].nr_cur_users));
6856 +
6857 + reg_nv_device(gpu, 1, t); // register
6858 +
6859 + tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
6860 + reset_gpu_tracker(t);
6861 + start_gpu_tracker(t);
6862 +}
6863 +
6864 +void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
6865 + struct fifo_queue* fq,
6866 + struct task_struct* t)
6867 +{
6868 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6869 + int replica = ikglp_get_idx(sem, fq);
6870 + int gpu = replica_to_gpu(aff, replica);
6871 + lt_t est_time;
6872 +
6873 + stop_gpu_tracker(t); // stop the tracker before we do anything else.
6874 +
6875 + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
6876 +
6877 + // count the number or resource holders
6878 + --(*(aff->q_info[replica].nr_cur_users));
6879 +
6880 + reg_nv_device(gpu, 0, t); // unregister
6881 +
6882 + // update estimates
6883 + update_gpu_estimate(t, get_gpu_time(t));
6884 +
6885 + TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. "
6886 + "estimated was %llu. diff is %d\n",
6887 + t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
6888 + tsk_rt(t)->gpu_migration,
6889 + get_gpu_time(t),
6890 + est_time,
6891 + (long long)get_gpu_time(t) - (long long)est_time);
6892 +
6893 + tsk_rt(t)->last_gpu = gpu;
6894 +}
6895 +
6896 +struct ikglp_affinity_ops gpu_ikglp_affinity =
6897 +{
6898 + .advise_enqueue = gpu_ikglp_advise_enqueue,
6899 + .advise_steal = gpu_ikglp_advise_steal,
6900 + .advise_donee_selection = gpu_ikglp_advise_donee_selection,
6901 + .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
6902 +
6903 + .notify_enqueue = gpu_ikglp_notify_enqueue,
6904 + .notify_dequeue = gpu_ikglp_notify_dequeue,
6905 + .notify_acquired = gpu_ikglp_notify_acquired,
6906 + .notify_freed = gpu_ikglp_notify_freed,
6907 +
6908 + .replica_to_resource = gpu_replica_to_resource,
6909 +};
6910 +
6911 +struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
6912 + void* __user args)
6913 +{
6914 + return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
6915 +}
6916 +
6917 +
6918 +
6919 +
6920 +
6921 +
6922 +
6923 +
6924 +// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
6925 +
6926 +struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
6927 +{
6928 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6929 + int min_count;
6930 + int min_nr_users;
6931 + struct ikglp_queue_info *shortest;
6932 + struct fifo_queue *to_enqueue;
6933 + int i;
6934 +
6935 + // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
6936 +
6937 + shortest = &aff->q_info[0];
6938 + min_count = shortest->q->count;
6939 + min_nr_users = *(shortest->nr_cur_users);
6940 +
6941 + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
6942 + ikglp_get_idx(sem, shortest->q),
6943 + shortest->q->count,
6944 + min_nr_users);
6945 +
6946 + for(i = 1; i < sem->nr_replicas; ++i) {
6947 + int len = aff->q_info[i].q->count;
6948 +
6949 + // queue is smaller, or they're equal and the other has a smaller number
6950 + // of total users.
6951 + //
6952 + // tie-break on the shortest number of simult users. this only kicks in
6953 + // when there are more than 1 empty queues.
6954 + if((len < min_count) ||
6955 + ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
6956 + shortest = &aff->q_info[i];
6957 + min_count = shortest->q->count;
6958 + min_nr_users = *(aff->q_info[i].nr_cur_users);
6959 + }
6960 +
6961 + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
6962 + ikglp_get_idx(sem, aff->q_info[i].q),
6963 + aff->q_info[i].q->count,
6964 + *(aff->q_info[i].nr_cur_users));
6965 + }
6966 +
6967 + to_enqueue = shortest->q;
6968 + TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
6969 + ikglp_get_idx(sem, to_enqueue),
6970 + ikglp_get_idx(sem, sem->shortest_fifo_queue));
6971 +
6972 + return to_enqueue;
6973 +}
6974 +
6975 +ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
6976 + struct fifo_queue* dst)
6977 +{
6978 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6979 + // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
6980 + return ikglp_find_hp_waiter_to_steal(sem);
6981 +}
6982 +
6983 +ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
6984 +{
6985 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6986 + ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
6987 + return(donee);
6988 +}
6989 +
6990 +ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
6991 +{
6992 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6993 + ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
6994 + return(donor);
6995 +}
6996 +
6997 +void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
6998 +{
6999 + // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
7000 +}
7001 +
7002 +void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
7003 +{
7004 + // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
7005 +}
7006 +
7007 +void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
7008 +{
7009 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
7010 + int replica = ikglp_get_idx(sem, fq);
7011 + int gpu = replica_to_gpu(aff, replica);
7012 +
7013 + // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
7014 +
7015 + // count the number or resource holders
7016 + ++(*(aff->q_info[replica].nr_cur_users));
7017 +
7018 + reg_nv_device(gpu, 1, t); // register
7019 +}
7020 +
7021 +void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
7022 +{
7023 + struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
7024 + int replica = ikglp_get_idx(sem, fq);
7025 + int gpu = replica_to_gpu(aff, replica);
7026 +
7027 + // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
7028 + // count the number or resource holders
7029 + --(*(aff->q_info[replica].nr_cur_users));
7030 +
7031 + reg_nv_device(gpu, 0, t); // unregister
7032 +}
7033 +
7034 +struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
7035 +{
7036 + .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
7037 + .advise_steal = simple_gpu_ikglp_advise_steal,
7038 + .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
7039 + .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
7040 +
7041 + .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
7042 + .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
7043 + .notify_acquired = simple_gpu_ikglp_notify_acquired,
7044 + .notify_freed = simple_gpu_ikglp_notify_freed,
7045 +
7046 + .replica_to_resource = gpu_replica_to_resource,
7047 +};
7048 +
7049 +struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
7050 + void* __user args)
7051 +{
7052 + return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
7053 +}
7054 +
7055 +#endif
7056 +
7057 +
7058 +
7059 +
7060 +
7061 +
7062 +
7063 +
7064 +
7065 diff --git a/litmus/jobs.c b/litmus/jobs.c
7066 index 36e3146..1d97462 100644
7067 --- a/litmus/jobs.c
7068 +++ b/litmus/jobs.c
7069 @@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
7070 {
7071 BUG_ON(!t);
7072 /* prepare next release */
7073 - t->rt_param.job_params.release = t->rt_param.job_params.deadline;
7074 - t->rt_param.job_params.deadline += get_rt_period(t);
7075 +
7076 + if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
7077 + /* allow the release point to slip if we've passed our deadline. */
7078 + lt_t now = litmus_clock();
7079 + t->rt_param.job_params.release =
7080 + (t->rt_param.job_params.deadline < now) ?
7081 + now : t->rt_param.job_params.deadline;
7082 + t->rt_param.job_params.deadline =
7083 + t->rt_param.job_params.release + get_rt_period(t);
7084 + }
7085 + else {
7086 + t->rt_param.job_params.release = t->rt_param.job_params.deadline;
7087 + t->rt_param.job_params.deadline += get_rt_period(t);
7088 + }
7089 +
7090 t->rt_param.job_params.exec_time = 0;
7091 /* update job sequence number */
7092 t->rt_param.job_params.job_no++;
7093 diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c
7094 new file mode 100644
7095 index 0000000..5ef5e54
7096 --- /dev/null
7097 +++ b/litmus/kexclu_affinity.c
7098 @@ -0,0 +1,92 @@
7099 +#include <litmus/fdso.h>
7100 +#include <litmus/sched_plugin.h>
7101 +#include <litmus/trace.h>
7102 +#include <litmus/litmus.h>
7103 +#include <litmus/locking.h>
7104 +
7105 +#include <litmus/kexclu_affinity.h>
7106 +
7107 +static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg);
7108 +static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg);
7109 +static int close_generic_aff_obs(struct od_table_entry* entry);
7110 +static void destroy_generic_aff_obs(obj_type_t type, void* sem);
7111 +
7112 +struct fdso_ops generic_affinity_ops = {
7113 + .create = create_generic_aff_obs,
7114 + .open = open_generic_aff_obs,
7115 + .close = close_generic_aff_obs,
7116 + .destroy = destroy_generic_aff_obs
7117 +};
7118 +
7119 +static atomic_t aff_obs_id_gen = ATOMIC_INIT(0);
7120 +
7121 +static inline bool is_affinity_observer(struct od_table_entry *entry)
7122 +{
7123 + return (entry->class == &generic_affinity_ops);
7124 +}
7125 +
7126 +static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry)
7127 +{
7128 + BUG_ON(!is_affinity_observer(entry));
7129 + return (struct affinity_observer*) entry->obj->obj;
7130 +}
7131 +
7132 +static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg)
7133 +{
7134 + struct affinity_observer* aff_obs;
7135 + int err;
7136 +
7137 + err = litmus->allocate_aff_obs(&aff_obs, type, arg);
7138 + if (err == 0) {
7139 + BUG_ON(!aff_obs->lock);
7140 + aff_obs->type = type;
7141 + *obj_ref = aff_obs;
7142 + }
7143 + return err;
7144 +}
7145 +
7146 +static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg)
7147 +{
7148 + struct affinity_observer* aff_obs = get_affinity_observer(entry);
7149 + if (aff_obs->ops->open)
7150 + return aff_obs->ops->open(aff_obs, arg);
7151 + else
7152 + return 0; /* default: any task can open it */
7153 +}
7154 +
7155 +static int close_generic_aff_obs(struct od_table_entry* entry)
7156 +{
7157 + struct affinity_observer* aff_obs = get_affinity_observer(entry);
7158 + if (aff_obs->ops->close)
7159 + return aff_obs->ops->close(aff_obs);
7160 + else
7161 + return 0; /* default: closing succeeds */
7162 +}
7163 +
7164 +static void destroy_generic_aff_obs(obj_type_t type, void* obj)
7165 +{
7166 + struct affinity_observer* aff_obs = (struct affinity_observer*) obj;
7167 + aff_obs->ops->deallocate(aff_obs);
7168 +}
7169 +
7170 +
7171 +struct litmus_lock* get_lock_from_od(int od)
7172 +{
7173 + extern struct fdso_ops generic_lock_ops;
7174 +
7175 + struct od_table_entry *entry = get_entry_for_od(od);
7176 +
7177 + if(entry && entry->class == &generic_lock_ops) {
7178 + return (struct litmus_lock*) entry->obj->obj;
7179 + }
7180 + return NULL;
7181 +}
7182 +
7183 +void affinity_observer_new(struct affinity_observer* aff,
7184 + struct affinity_observer_ops* ops,
7185 + struct affinity_observer_args* args)
7186 +{
7187 + aff->ops = ops;
7188 + aff->lock = get_lock_from_od(args->lock_od);
7189 + aff->ident = atomic_inc_return(&aff_obs_id_gen);
7190 +}
7191 \ No newline at end of file
7192 diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
7193 new file mode 100644
7194 index 0000000..bff857e
7195 --- /dev/null
7196 +++ b/litmus/kfmlp_lock.c
7197 @@ -0,0 +1,1002 @@
7198 +#include <linux/slab.h>
7199 +#include <linux/uaccess.h>
7200 +
7201 +#include <litmus/trace.h>
7202 +#include <litmus/sched_plugin.h>
7203 +#include <litmus/fdso.h>
7204 +
7205 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
7206 +#include <litmus/gpu_affinity.h>
7207 +#include <litmus/nvidia_info.h>
7208 +#endif
7209 +
7210 +#include <litmus/kfmlp_lock.h>
7211 +
7212 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
7213 + struct kfmlp_queue* queue)
7214 +{
7215 + return (queue - &sem->queues[0]);
7216 +}
7217 +
7218 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
7219 + struct task_struct* holder)
7220 +{
7221 + int i;
7222 + for(i = 0; i < sem->num_resources; ++i)
7223 + if(sem->queues[i].owner == holder)
7224 + return(&sem->queues[i]);
7225 + return(NULL);
7226 +}
7227 +
7228 +/* caller is responsible for locking */
7229 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
7230 + struct task_struct *skip)
7231 +{
7232 + struct list_head *pos;
7233 + struct task_struct *queued, *found = NULL;
7234 +
7235 + list_for_each(pos, &kqueue->wait.task_list) {
7236 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
7237 + task_list)->private;
7238 +
7239 + /* Compare task prios, find high prio task. */
7240 + //if (queued != skip && edf_higher_prio(queued, found))
7241 + if (queued != skip && litmus->compare(queued, found))
7242 + found = queued;
7243 + }
7244 + return found;
7245 +}
7246 +
7247 +static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
7248 + struct kfmlp_queue* search_start)
7249 +{
7250 + // we start our search at search_start instead of at the beginning of the
7251 + // queue list to load-balance across all resources.
7252 + struct kfmlp_queue* step = search_start;
7253 + struct kfmlp_queue* shortest = sem->shortest_queue;
7254 +
7255 + do
7256 + {
7257 + step = (step+1 != &sem->queues[sem->num_resources]) ?
7258 + step+1 : &sem->queues[0];
7259 +
7260 + if(step->count < shortest->count)
7261 + {
7262 + shortest = step;
7263 + if(step->count == 0)
7264 + break; /* can't get any shorter */
7265 + }
7266 +
7267 + }while(step != search_start);
7268 +
7269 + return(shortest);
7270 +}
7271 +
7272 +
7273 +static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
7274 + wait_queue_t** to_steal,
7275 + struct kfmlp_queue** to_steal_from)
7276 +{
7277 + /* must hold sem->lock */
7278 +
7279 + int i;
7280 +
7281 + *to_steal = NULL;
7282 + *to_steal_from = NULL;
7283 +
7284 + for(i = 0; i < sem->num_resources; ++i)
7285 + {
7286 + if( (sem->queues[i].count > 1) &&
7287 + ((*to_steal_from == NULL) ||
7288 + //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
7289 + (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
7290 + {
7291 + *to_steal_from = &sem->queues[i];
7292 + }
7293 + }
7294 +
7295 + if(*to_steal_from)
7296 + {
7297 + struct list_head *pos;
7298 + struct task_struct *target = (*to_steal_from)->hp_waiter;
7299 +
7300 + TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
7301 + target->comm,
7302 + target->pid,
7303 + kfmlp_get_idx(sem, *to_steal_from));
7304 +
7305 + list_for_each(pos, &(*to_steal_from)->wait.task_list)
7306 + {
7307 + wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
7308 + struct task_struct *queued = (struct task_struct*) node->private;
7309 + /* Compare task prios, find high prio task. */
7310 + if (queued == target)
7311 + {
7312 + *to_steal = node;
7313 +
7314 + TRACE_CUR("steal: selected %s/%d from queue %d\n",
7315 + queued->comm, queued->pid,
7316 + kfmlp_get_idx(sem, *to_steal_from));
7317 +
7318 + return queued;
7319 + }
7320 + }
7321 +
7322 + TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n",
7323 + target->comm,
7324 + target->pid,
7325 + kfmlp_get_idx(sem, *to_steal_from));
7326 + }
7327 +
7328 + return NULL;
7329 +}
7330 +
7331 +static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
7332 + struct kfmlp_queue *dst,
7333 + wait_queue_t *wait,
7334 + struct kfmlp_queue *src)
7335 +{
7336 + struct task_struct* t = (struct task_struct*) wait->private;
7337 +
7338 + __remove_wait_queue(&src->wait, wait);
7339 + --(src->count);
7340 +
7341 + if(t == src->hp_waiter) {
7342 + src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
7343 +
7344 + TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
7345 + kfmlp_get_idx(sem, src),
7346 + (src->hp_waiter) ? src->hp_waiter->comm : "nil",
7347 + (src->hp_waiter) ? src->hp_waiter->pid : -1);
7348 +
7349 + if(src->owner && tsk_rt(src->owner)->inh_task == t) {
7350 + litmus->decrease_prio(src->owner, src->hp_waiter);
7351 + }
7352 + }
7353 +
7354 + if(sem->shortest_queue->count > src->count) {
7355 + sem->shortest_queue = src;
7356 + TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
7357 + }
7358 +
7359 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7360 + if(sem->aff_obs) {
7361 + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
7362 + }
7363 +#endif
7364 +
7365 + init_waitqueue_entry(wait, t);
7366 + __add_wait_queue_tail_exclusive(&dst->wait, wait);
7367 + ++(dst->count);
7368 +
7369 + if(litmus->compare(t, dst->hp_waiter)) {
7370 + dst->hp_waiter = t;
7371 +
7372 + TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
7373 + kfmlp_get_idx(sem, dst),
7374 + t->comm, t->pid);
7375 +
7376 + if(dst->owner && litmus->compare(t, dst->owner))
7377 + {
7378 + litmus->increase_prio(dst->owner, t);
7379 + }
7380 + }
7381 +
7382 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7383 + if(sem->aff_obs) {
7384 + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
7385 + }
7386 +#endif
7387 +}
7388 +
7389 +
7390 +int kfmlp_lock(struct litmus_lock* l)
7391 +{
7392 + struct task_struct* t = current;
7393 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7394 + struct kfmlp_queue* my_queue = NULL;
7395 + wait_queue_t wait;
7396 + unsigned long flags;
7397 +
7398 + if (!is_realtime(t))
7399 + return -EPERM;
7400 +
7401 + spin_lock_irqsave(&sem->lock, flags);
7402 +
7403 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7404 + if(sem->aff_obs) {
7405 + my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
7406 + }
7407 + if(!my_queue) {
7408 + my_queue = sem->shortest_queue;
7409 + }
7410 +#else
7411 + my_queue = sem->shortest_queue;
7412 +#endif
7413 +
7414 + if (my_queue->owner) {
7415 + /* resource is not free => must suspend and wait */
7416 + TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
7417 + kfmlp_get_idx(sem, my_queue),
7418 + my_queue->count);
7419 +
7420 + init_waitqueue_entry(&wait, t);
7421 +
7422 + /* FIXME: interruptible would be nice some day */
7423 + set_task_state(t, TASK_UNINTERRUPTIBLE);
7424 +
7425 + __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
7426 +
7427 + TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
7428 + kfmlp_get_idx(sem, my_queue),
7429 + (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
7430 + (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
7431 +
7432 + /* check if we need to activate priority inheritance */
7433 + //if (edf_higher_prio(t, my_queue->hp_waiter))
7434 + if (litmus->compare(t, my_queue->hp_waiter)) {
7435 + my_queue->hp_waiter = t;
7436 + TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
7437 + kfmlp_get_idx(sem, my_queue),
7438 + t->comm, t->pid);
7439 +
7440 + //if (edf_higher_prio(t, my_queue->owner))
7441 + if (litmus->compare(t, my_queue->owner)) {
7442 + litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
7443 + }
7444 + }
7445 +
7446 + ++(my_queue->count);
7447 +
7448 + if(my_queue == sem->shortest_queue) {
7449 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
7450 + TRACE_CUR("queue %d is the shortest\n",
7451 + kfmlp_get_idx(sem, sem->shortest_queue));
7452 + }
7453 +
7454 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7455 + if(sem->aff_obs) {
7456 + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
7457 + }
7458 +#endif
7459 +
7460 + /* release lock before sleeping */
7461 + spin_unlock_irqrestore(&sem->lock, flags);
7462 +
7463 + /* We depend on the FIFO order. Thus, we don't need to recheck
7464 + * when we wake up; we are guaranteed to have the lock since
7465 + * there is only one wake up per release (or steal).
7466 + */
7467 + schedule();
7468 +
7469 +
7470 + if(my_queue->owner == t) {
7471 + TRACE_CUR("queue %d: acquired through waiting\n",
7472 + kfmlp_get_idx(sem, my_queue));
7473 + }
7474 + else {
7475 + /* this case may happen if our wait entry was stolen
7476 + between queues. record where we went. */
7477 + my_queue = kfmlp_get_queue(sem, t);
7478 +
7479 + BUG_ON(!my_queue);
7480 + TRACE_CUR("queue %d: acquired through stealing\n",
7481 + kfmlp_get_idx(sem, my_queue));
7482 + }
7483 + }
7484 + else {
7485 + TRACE_CUR("queue %d: acquired immediately\n",
7486 + kfmlp_get_idx(sem, my_queue));
7487 +
7488 + my_queue->owner = t;
7489 +
7490 + ++(my_queue->count);
7491 +
7492 + if(my_queue == sem->shortest_queue) {
7493 + sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
7494 + TRACE_CUR("queue %d is the shortest\n",
7495 + kfmlp_get_idx(sem, sem->shortest_queue));
7496 + }
7497 +
7498 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7499 + if(sem->aff_obs) {
7500 + sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
7501 + sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
7502 + }
7503 +#endif
7504 +
7505 + spin_unlock_irqrestore(&sem->lock, flags);
7506 + }
7507 +
7508 +
7509 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7510 + if(sem->aff_obs) {
7511 + return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
7512 + }
7513 +#endif
7514 + return kfmlp_get_idx(sem, my_queue);
7515 +}
7516 +
7517 +
7518 +int kfmlp_unlock(struct litmus_lock* l)
7519 +{
7520 + struct task_struct *t = current, *next;
7521 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7522 + struct kfmlp_queue *my_queue, *to_steal_from;
7523 + unsigned long flags;
7524 + int err = 0;
7525 +
7526 + my_queue = kfmlp_get_queue(sem, t);
7527 +
7528 + if (!my_queue) {
7529 + err = -EINVAL;
7530 + goto out;
7531 + }
7532 +
7533 + spin_lock_irqsave(&sem->lock, flags);
7534 +
7535 + TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
7536 +
7537 + my_queue->owner = NULL; // clear ownership
7538 + --(my_queue->count);
7539 +
7540 + if(my_queue->count < sem->shortest_queue->count)
7541 + {
7542 + sem->shortest_queue = my_queue;
7543 + TRACE_CUR("queue %d is the shortest\n",
7544 + kfmlp_get_idx(sem, sem->shortest_queue));
7545 + }
7546 +
7547 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7548 + if(sem->aff_obs) {
7549 + sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
7550 + sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
7551 + }
7552 +#endif
7553 +
7554 + /* we lose the benefit of priority inheritance (if any) */
7555 + if (tsk_rt(t)->inh_task)
7556 + litmus->decrease_prio(t, NULL);
7557 +
7558 +
7559 + /* check if there are jobs waiting for this resource */
7560 +RETRY:
7561 + next = __waitqueue_remove_first(&my_queue->wait);
7562 + if (next) {
7563 + /* next becomes the resouce holder */
7564 + my_queue->owner = next;
7565 +
7566 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7567 + if(sem->aff_obs) {
7568 + sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
7569 + }
7570 +#endif
7571 +
7572 + TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
7573 + kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
7574 +
7575 + /* determine new hp_waiter if necessary */
7576 + if (next == my_queue->hp_waiter) {
7577 + TRACE_TASK(next, "was highest-prio waiter\n");
7578 + my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
7579 + if (my_queue->hp_waiter)
7580 + TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
7581 + else
7582 + TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
7583 + } else {
7584 + /* Well, if next is not the highest-priority waiter,
7585 + * then it ought to inherit the highest-priority
7586 + * waiter's priority. */
7587 + litmus->increase_prio(next, my_queue->hp_waiter);
7588 + }
7589 +
7590 + /* wake up next */
7591 + wake_up_process(next);
7592 + }
7593 + else {
7594 + // TODO: put this stealing logic before we attempt to release
7595 + // our resource. (simplifies code and gets rid of ugly goto RETRY.
7596 + wait_queue_t *wait;
7597 +
7598 + TRACE_CUR("queue %d: looking to steal someone...\n",
7599 + kfmlp_get_idx(sem, my_queue));
7600 +
7601 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7602 + next = (sem->aff_obs) ?
7603 + sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
7604 + kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
7605 +#else
7606 + next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
7607 +#endif
7608 +
7609 + if(next) {
7610 + TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
7611 + kfmlp_get_idx(sem, my_queue),
7612 + next->comm, next->pid,
7613 + kfmlp_get_idx(sem, to_steal_from));
7614 +
7615 + kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
7616 +
7617 + goto RETRY; // will succeed this time.
7618 + }
7619 + else {
7620 + TRACE_CUR("queue %d: no one to steal.\n",
7621 + kfmlp_get_idx(sem, my_queue));
7622 + }
7623 + }
7624 +
7625 + spin_unlock_irqrestore(&sem->lock, flags);
7626 +
7627 +out:
7628 + return err;
7629 +}
7630 +
7631 +int kfmlp_close(struct litmus_lock* l)
7632 +{
7633 + struct task_struct *t = current;
7634 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7635 + struct kfmlp_queue *my_queue;
7636 + unsigned long flags;
7637 +
7638 + int owner;
7639 +
7640 + spin_lock_irqsave(&sem->lock, flags);
7641 +
7642 + my_queue = kfmlp_get_queue(sem, t);
7643 + owner = (my_queue) ? (my_queue->owner == t) : 0;
7644 +
7645 + spin_unlock_irqrestore(&sem->lock, flags);
7646 +
7647 + if (owner)
7648 + kfmlp_unlock(l);
7649 +
7650 + return 0;
7651 +}
7652 +
7653 +void kfmlp_free(struct litmus_lock* l)
7654 +{
7655 + struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7656 + kfree(sem->queues);
7657 + kfree(sem);
7658 +}
7659 +
7660 +
7661 +
7662 +struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
7663 +{
7664 + struct kfmlp_semaphore* sem;
7665 + int num_resources = 0;
7666 + int i;
7667 +
7668 + if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
7669 + {
7670 + return(NULL);
7671 + }
7672 + if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
7673 + {
7674 + return(NULL);
7675 + }
7676 + if(num_resources < 1)
7677 + {
7678 + return(NULL);
7679 + }
7680 +
7681 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
7682 + if(!sem)
7683 + {
7684 + return(NULL);
7685 + }
7686 +
7687 + sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
7688 + if(!sem->queues)
7689 + {
7690 + kfree(sem);
7691 + return(NULL);
7692 + }
7693 +
7694 + sem->litmus_lock.ops = ops;
7695 + spin_lock_init(&sem->lock);
7696 + sem->num_resources = num_resources;
7697 +
7698 + for(i = 0; i < num_resources; ++i)
7699 + {
7700 + sem->queues[i].owner = NULL;
7701 + sem->queues[i].hp_waiter = NULL;
7702 + init_waitqueue_head(&sem->queues[i].wait);
7703 + sem->queues[i].count = 0;
7704 + }
7705 +
7706 + sem->shortest_queue = &sem->queues[0];
7707 +
7708 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7709 + sem->aff_obs = NULL;
7710 +#endif
7711 +
7712 + return &sem->litmus_lock;
7713 +}
7714 +
7715 +
7716 +
7717 +
7718 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
7719 +
7720 +static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
7721 +{
7722 + int gpu = replica % aff->nr_rsrc;
7723 + return gpu;
7724 +}
7725 +
7726 +static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
7727 +{
7728 + int gpu = __replica_to_gpu(aff, replica) + aff->offset;
7729 + return gpu;
7730 +}
7731 +
7732 +static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
7733 +{
7734 + int replica = gpu - aff->offset;
7735 + return replica;
7736 +}
7737 +
7738 +
7739 +int kfmlp_aff_obs_close(struct affinity_observer* obs)
7740 +{
7741 + return 0;
7742 +}
7743 +
7744 +void kfmlp_aff_obs_free(struct affinity_observer* obs)
7745 +{
7746 + struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
7747 + kfree(kfmlp_aff->nr_cur_users_on_rsrc);
7748 + kfree(kfmlp_aff->q_info);
7749 + kfree(kfmlp_aff);
7750 +}
7751 +
7752 +static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
7753 + struct kfmlp_affinity_ops* kfmlp_ops,
7754 + void* __user args)
7755 +{
7756 + struct kfmlp_affinity* kfmlp_aff;
7757 + struct gpu_affinity_observer_args aff_args;
7758 + struct kfmlp_semaphore* sem;
7759 + int i;
7760 + unsigned long flags;
7761 +
7762 + if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
7763 + return(NULL);
7764 + }
7765 + if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
7766 + return(NULL);
7767 + }
7768 +
7769 + sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
7770 +
7771 + if(sem->litmus_lock.type != KFMLP_SEM) {
7772 + TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
7773 + return(NULL);
7774 + }
7775 +
7776 + if((aff_args.nr_simult_users <= 0) ||
7777 + (sem->num_resources%aff_args.nr_simult_users != 0)) {
7778 + TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
7779 + "(%d) per replica. #replicas should be evenly divisible "
7780 + "by #simult_users.\n",
7781 + sem->litmus_lock.ident,
7782 + sem->num_resources,
7783 + aff_args.nr_simult_users);
7784 + return(NULL);
7785 + }
7786 +
7787 + if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
7788 + TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
7789 + NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
7790 +// return(NULL);
7791 + }
7792 +
7793 + kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
7794 + if(!kfmlp_aff) {
7795 + return(NULL);
7796 + }
7797 +
7798 + kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
7799 + if(!kfmlp_aff->q_info) {
7800 + kfree(kfmlp_aff);
7801 + return(NULL);
7802 + }
7803 +
7804 + kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
7805 + if(!kfmlp_aff->nr_cur_users_on_rsrc) {
7806 + kfree(kfmlp_aff->q_info);
7807 + kfree(kfmlp_aff);
7808 + return(NULL);
7809 + }
7810 +
7811 + affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
7812 +
7813 + kfmlp_aff->ops = kfmlp_ops;
7814 + kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
7815 + kfmlp_aff->nr_simult = aff_args.nr_simult_users;
7816 + kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
7817 +
7818 + memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
7819 +
7820 + for(i = 0; i < sem->num_resources; ++i) {
7821 + kfmlp_aff->q_info[i].q = &sem->queues[i];
7822 + kfmlp_aff->q_info[i].estimated_len = 0;
7823 +
7824 + // multiple q_info's will point to the same resource (aka GPU) if
7825 + // aff_args.nr_simult_users > 1
7826 + kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
7827 + }
7828 +
7829 + // attach observer to the lock
7830 + spin_lock_irqsave(&sem->lock, flags);
7831 + sem->aff_obs = kfmlp_aff;
7832 + spin_unlock_irqrestore(&sem->lock, flags);
7833 +
7834 + return &kfmlp_aff->obs;
7835 +}
7836 +
7837 +
7838 +
7839 +
7840 +static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
7841 + struct kfmlp_queue* fq) {
7842 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7843 + return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
7844 +}
7845 +
7846 +
7847 +// Smart KFMLP Affinity
7848 +
7849 +//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
7850 +//{
7851 +// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7852 +// struct kfmlp_queue_info *shortest = &aff->q_info[0];
7853 +// int i;
7854 +//
7855 +// for(i = 1; i < sem->num_resources; ++i) {
7856 +// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
7857 +// shortest = &aff->q_info[i];
7858 +// }
7859 +// }
7860 +//
7861 +// return(shortest);
7862 +//}
7863 +
7864 +struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
7865 +{
7866 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7867 + lt_t min_len;
7868 + int min_nr_users;
7869 + struct kfmlp_queue_info *shortest;
7870 + struct kfmlp_queue *to_enqueue;
7871 + int i;
7872 + int affinity_gpu;
7873 +
7874 + // simply pick the shortest queue if, we have no affinity, or we have
7875 + // affinity with the shortest
7876 + if(unlikely(tsk_rt(t)->last_gpu < 0)) {
7877 + affinity_gpu = aff->offset; // first gpu
7878 + TRACE_CUR("no affinity\n");
7879 + }
7880 + else {
7881 + affinity_gpu = tsk_rt(t)->last_gpu;
7882 + }
7883 +
7884 + // all things being equal, let's start with the queue with which we have
7885 + // affinity. this helps us maintain affinity even when we don't have
7886 + // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
7887 + shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
7888 +
7889 +// if(shortest == aff->shortest_queue) {
7890 +// TRACE_CUR("special case: have affinity with shortest queue\n");
7891 +// goto out;
7892 +// }
7893 +
7894 + min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
7895 + min_nr_users = *(shortest->nr_cur_users);
7896 +
7897 + TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
7898 + get_gpu_estimate(t, MIG_LOCAL),
7899 + kfmlp_get_idx(sem, shortest->q),
7900 + min_len);
7901 +
7902 + for(i = 0; i < sem->num_resources; ++i) {
7903 + if(&aff->q_info[i] != shortest) {
7904 +
7905 + lt_t est_len =
7906 + aff->q_info[i].estimated_len +
7907 + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
7908 +
7909 + // queue is smaller, or they're equal and the other has a smaller number
7910 + // of total users.
7911 + //
7912 + // tie-break on the shortest number of simult users. this only kicks in
7913 + // when there are more than 1 empty queues.
7914 + if((est_len < min_len) ||
7915 + ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
7916 + shortest = &aff->q_info[i];
7917 + min_len = est_len;
7918 + min_nr_users = *(aff->q_info[i].nr_cur_users);
7919 + }
7920 +
7921 + TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
7922 + get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
7923 + kfmlp_get_idx(sem, aff->q_info[i].q),
7924 + est_len);
7925 + }
7926 + }
7927 +
7928 + to_enqueue = shortest->q;
7929 + TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
7930 + kfmlp_get_idx(sem, to_enqueue),
7931 + kfmlp_get_idx(sem, sem->shortest_queue));
7932 +
7933 + return to_enqueue;
7934 +}
7935 +
7936 +struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
7937 +{
7938 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7939 +
7940 + // For now, just steal highest priority waiter
7941 + // TODO: Implement affinity-aware stealing.
7942 +
7943 + return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
7944 +}
7945 +
7946 +
7947 +void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
7948 +{
7949 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7950 + int replica = kfmlp_get_idx(sem, fq);
7951 + int gpu = replica_to_gpu(aff, replica);
7952 + struct kfmlp_queue_info *info = &aff->q_info[replica];
7953 + lt_t est_time;
7954 + lt_t est_len_before;
7955 +
7956 + if(current == t) {
7957 + tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
7958 + }
7959 +
7960 + est_len_before = info->estimated_len;
7961 + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
7962 + info->estimated_len += est_time;
7963 +
7964 + TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
7965 + kfmlp_get_idx(sem, info->q),
7966 + est_len_before, est_time,
7967 + info->estimated_len);
7968 +
7969 +// if(aff->shortest_queue == info) {
7970 +// // we may no longer be the shortest
7971 +// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
7972 +//
7973 +// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
7974 +// kfmlp_get_idx(sem, aff->shortest_queue->q),
7975 +// aff->shortest_queue->q->count,
7976 +// aff->shortest_queue->estimated_len);
7977 +// }
7978 +}
7979 +
7980 +void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
7981 +{
7982 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7983 + int replica = kfmlp_get_idx(sem, fq);
7984 + int gpu = replica_to_gpu(aff, replica);
7985 + struct kfmlp_queue_info *info = &aff->q_info[replica];
7986 + lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
7987 +
7988 + if(est_time > info->estimated_len) {
7989 + WARN_ON(1);
7990 + info->estimated_len = 0;
7991 + }
7992 + else {
7993 + info->estimated_len -= est_time;
7994 + }
7995 +
7996 + TRACE_CUR("fq %d est len is now %llu\n",
7997 + kfmlp_get_idx(sem, info->q),
7998 + info->estimated_len);
7999 +
8000 + // check to see if we're the shortest queue now.
8001 +// if((aff->shortest_queue != info) &&
8002 +// (aff->shortest_queue->estimated_len > info->estimated_len)) {
8003 +//
8004 +// aff->shortest_queue = info;
8005 +//
8006 +// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
8007 +// kfmlp_get_idx(sem, info->q),
8008 +// info->q->count,
8009 +// info->estimated_len);
8010 +// }
8011 +}
8012 +
8013 +void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8014 +{
8015 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8016 + int replica = kfmlp_get_idx(sem, fq);
8017 + int gpu = replica_to_gpu(aff, replica);
8018 +
8019 + tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
8020 +
8021 + TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
8022 + t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
8023 +
8024 + // count the number or resource holders
8025 + ++(*(aff->q_info[replica].nr_cur_users));
8026 +
8027 + reg_nv_device(gpu, 1, t); // register
8028 +
8029 + tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
8030 + reset_gpu_tracker(t);
8031 + start_gpu_tracker(t);
8032 +}
8033 +
8034 +void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8035 +{
8036 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8037 + int replica = kfmlp_get_idx(sem, fq);
8038 + int gpu = replica_to_gpu(aff, replica);
8039 + lt_t est_time;
8040 +
8041 + stop_gpu_tracker(t); // stop the tracker before we do anything else.
8042 +
8043 + est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
8044 +
8045 + tsk_rt(t)->last_gpu = gpu;
8046 +
8047 + // count the number or resource holders
8048 + --(*(aff->q_info[replica].nr_cur_users));
8049 +
8050 + reg_nv_device(gpu, 0, t); // unregister
8051 +
8052 + // update estimates
8053 + update_gpu_estimate(t, get_gpu_time(t));
8054 +
8055 + TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
8056 + t->comm, t->pid, gpu,
8057 + get_gpu_time(t),
8058 + est_time,
8059 + (long long)get_gpu_time(t) - (long long)est_time);
8060 +}
8061 +
8062 +struct kfmlp_affinity_ops gpu_kfmlp_affinity =
8063 +{
8064 + .advise_enqueue = gpu_kfmlp_advise_enqueue,
8065 + .advise_steal = gpu_kfmlp_advise_steal,
8066 + .notify_enqueue = gpu_kfmlp_notify_enqueue,
8067 + .notify_dequeue = gpu_kfmlp_notify_dequeue,
8068 + .notify_acquired = gpu_kfmlp_notify_acquired,
8069 + .notify_freed = gpu_kfmlp_notify_freed,
8070 + .replica_to_resource = gpu_replica_to_resource,
8071 +};
8072 +
8073 +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
8074 + void* __user args)
8075 +{
8076 + return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
8077 +}
8078 +
8079 +
8080 +
8081 +
8082 +
8083 +
8084 +
8085 +
8086 +// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
8087 +
8088 +struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
8089 +{
8090 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8091 + int min_count;
8092 + int min_nr_users;
8093 + struct kfmlp_queue_info *shortest;
8094 + struct kfmlp_queue *to_enqueue;
8095 + int i;
8096 +
8097 +// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
8098 +
8099 + shortest = &aff->q_info[0];
8100 + min_count = shortest->q->count;
8101 + min_nr_users = *(shortest->nr_cur_users);
8102 +
8103 + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
8104 + kfmlp_get_idx(sem, shortest->q),
8105 + shortest->q->count,
8106 + min_nr_users);
8107 +
8108 + for(i = 1; i < sem->num_resources; ++i) {
8109 + int len = aff->q_info[i].q->count;
8110 +
8111 + // queue is smaller, or they're equal and the other has a smaller number
8112 + // of total users.
8113 + //
8114 + // tie-break on the shortest number of simult users. this only kicks in
8115 + // when there are more than 1 empty queues.
8116 + if((len < min_count) ||
8117 + ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
8118 + shortest = &aff->q_info[i];
8119 + min_count = shortest->q->count;
8120 + min_nr_users = *(aff->q_info[i].nr_cur_users);
8121 + }
8122 +
8123 + TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
8124 + kfmlp_get_idx(sem, aff->q_info[i].q),
8125 + aff->q_info[i].q->count,
8126 + *(aff->q_info[i].nr_cur_users));
8127 + }
8128 +
8129 + to_enqueue = shortest->q;
8130 + TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
8131 + kfmlp_get_idx(sem, to_enqueue),
8132 + kfmlp_get_idx(sem, sem->shortest_queue));
8133 +
8134 + return to_enqueue;
8135 +}
8136 +
8137 +struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
8138 +{
8139 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8140 +// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
8141 + return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
8142 +}
8143 +
8144 +void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8145 +{
8146 +// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
8147 +}
8148 +
8149 +void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8150 +{
8151 +// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
8152 +}
8153 +
8154 +void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8155 +{
8156 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8157 + int replica = kfmlp_get_idx(sem, fq);
8158 + int gpu = replica_to_gpu(aff, replica);
8159 +
8160 +// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
8161 +
8162 + // count the number or resource holders
8163 + ++(*(aff->q_info[replica].nr_cur_users));
8164 +
8165 + reg_nv_device(gpu, 1, t); // register
8166 +}
8167 +
8168 +void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8169 +{
8170 + struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8171 + int replica = kfmlp_get_idx(sem, fq);
8172 + int gpu = replica_to_gpu(aff, replica);
8173 +
8174 +// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
8175 + // count the number or resource holders
8176 + --(*(aff->q_info[replica].nr_cur_users));
8177 +
8178 + reg_nv_device(gpu, 0, t); // unregister
8179 +}
8180 +
8181 +struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
8182 +{
8183 + .advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
8184 + .advise_steal = simple_gpu_kfmlp_advise_steal,
8185 + .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
8186 + .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
8187 + .notify_acquired = simple_gpu_kfmlp_notify_acquired,
8188 + .notify_freed = simple_gpu_kfmlp_notify_freed,
8189 + .replica_to_resource = gpu_replica_to_resource,
8190 +};
8191 +
8192 +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
8193 + void* __user args)
8194 +{
8195 + return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
8196 +}
8197 +
8198 +#endif
8199 +
8200 diff --git a/litmus/litmus.c b/litmus/litmus.c
8201 index 3013901..d1f836c 100644
8202 --- a/litmus/litmus.c
8203 +++ b/litmus/litmus.c
8204 @@ -21,6 +21,10 @@
8205 #include <litmus/affinity.h>
8206 #endif
8207
8208 +#ifdef CONFIG_LITMUS_NVIDIA
8209 +#include <litmus/nvidia_info.h>
8210 +#endif
8211 +
8212 /* Number of RT tasks that exist in the system */
8213 atomic_t rt_task_count = ATOMIC_INIT(0);
8214 static DEFINE_RAW_SPINLOCK(task_transition_lock);
8215 @@ -51,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
8216 struct release_heap* release_heap_alloc(int gfp_flags);
8217 void release_heap_free(struct release_heap* rh);
8218
8219 +#ifdef CONFIG_LITMUS_NVIDIA
8220 +/*
8221 + * sys_register_nv_device
8222 + * @nv_device_id: The Nvidia device id that the task want to register
8223 + * @reg_action: set to '1' to register the specified device. zero otherwise.
8224 + * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
8225 + * Returns EFAULT if nv_device_id is out of range.
8226 + * 0 if success
8227 + */
8228 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
8229 +{
8230 + /* register the device to caller (aka 'current') */
8231 + return(reg_nv_device(nv_device_id, reg_action, current));
8232 +}
8233 +#else
8234 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
8235 +{
8236 + return(-EINVAL);
8237 +}
8238 +#endif
8239 +
8240 +
8241 /*
8242 * sys_set_task_rt_param
8243 * @pid: Pid of the task which scheduling parameters must be changed
8244 @@ -269,6 +295,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
8245 return retval;
8246 }
8247
8248 +
8249 /* sys_null_call() is only used for determining raw system call
8250 * overheads (kernel entry, kernel exit). It has no useful side effects.
8251 * If ts is non-NULL, then the current Feather-Trace time is recorded.
8252 @@ -286,12 +313,42 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
8253 return ret;
8254 }
8255
8256 +
8257 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
8258 +void init_gpu_affinity_state(struct task_struct* p)
8259 +{
8260 + // under-damped
8261 + //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
8262 + //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
8263 +
8264 + // emperical;
8265 + p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
8266 + p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
8267 +
8268 + p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
8269 + p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
8270 +
8271 + p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
8272 + p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
8273 +
8274 + p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
8275 + p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
8276 +
8277 + p->rt_param.gpu_migration = MIG_NONE;
8278 + p->rt_param.last_gpu = -1;
8279 +}
8280 +#endif
8281 +
8282 /* p is a real-time task. Re-init its state as a best-effort task. */
8283 static void reinit_litmus_state(struct task_struct* p, int restore)
8284 {
8285 struct rt_task user_config = {};
8286 void* ctrl_page = NULL;
8287
8288 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8289 + binheap_order_t prio_order = NULL;
8290 +#endif
8291 +
8292 if (restore) {
8293 /* Safe user-space provided configuration data.
8294 * and allocated page. */
8295 @@ -299,11 +356,38 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
8296 ctrl_page = p->rt_param.ctrl_page;
8297 }
8298
8299 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8300 + prio_order = p->rt_param.hp_blocked_tasks.compare;
8301 +#endif
8302 +
8303 /* We probably should not be inheriting any task's priority
8304 * at this point in time.
8305 */
8306 WARN_ON(p->rt_param.inh_task);
8307
8308 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8309 + WARN_ON(p->rt_param.blocked_lock);
8310 + WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
8311 +#endif
8312 +
8313 +#ifdef CONFIG_LITMUS_SOFTIRQD
8314 + /* We probably should not have any tasklets executing for
8315 + * us at this time.
8316 + */
8317 + WARN_ON(p->rt_param.cur_klitirqd);
8318 + WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
8319 +
8320 + if(p->rt_param.cur_klitirqd)
8321 + flush_pending(p->rt_param.cur_klitirqd, p);
8322 +
8323 + if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
8324 + up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
8325 +#endif
8326 +
8327 +#ifdef CONFIG_LITMUS_NVIDIA
8328 + WARN_ON(p->rt_param.held_gpus != 0);
8329 +#endif
8330 +
8331 /* Cleanup everything else. */
8332 memset(&p->rt_param, 0, sizeof(p->rt_param));
8333
8334 @@ -312,6 +396,15 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
8335 p->rt_param.task_params = user_config;
8336 p->rt_param.ctrl_page = ctrl_page;
8337 }
8338 +
8339 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
8340 + init_gpu_affinity_state(p);
8341 +#endif
8342 +
8343 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8344 + INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
8345 + raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
8346 +#endif
8347 }
8348
8349 long litmus_admit_task(struct task_struct* tsk)
8350 @@ -358,6 +451,26 @@ long litmus_admit_task(struct task_struct* tsk)
8351 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
8352 }
8353
8354 +
8355 +#ifdef CONFIG_LITMUS_NVIDIA
8356 + atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
8357 +#endif
8358 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
8359 + init_gpu_affinity_state(tsk);
8360 +#endif
8361 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8362 + tsk_rt(tsk)->blocked_lock = NULL;
8363 + raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
8364 + //INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order); // done by scheduler
8365 +#endif
8366 +#ifdef CONFIG_LITMUS_SOFTIRQD
8367 + /* proxy thread off by default */
8368 + tsk_rt(tsk)is_proxy_thread = 0;
8369 + tsk_rt(tsk)cur_klitirqd = NULL;
8370 + mutex_init(&tsk_rt(tsk)->klitirqd_sem);
8371 + atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
8372 +#endif
8373 +
8374 retval = litmus->admit_task(tsk);
8375
8376 if (!retval) {
8377 @@ -403,7 +516,7 @@ static void synch_on_plugin_switch(void* info)
8378 */
8379 int switch_sched_plugin(struct sched_plugin* plugin)
8380 {
8381 - unsigned long flags;
8382 + //unsigned long flags;
8383 int ret = 0;
8384
8385 BUG_ON(!plugin);
8386 @@ -417,8 +530,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
8387 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
8388 cpu_relax();
8389
8390 +#ifdef CONFIG_LITMUS_SOFTIRQD
8391 + if(!klitirqd_is_dead())
8392 + {
8393 + kill_klitirqd();
8394 + }
8395 +#endif
8396 +
8397 /* stop task transitions */
8398 - raw_spin_lock_irqsave(&task_transition_lock, flags);
8399 + //raw_spin_lock_irqsave(&task_transition_lock, flags);
8400
8401 /* don't switch if there are active real-time tasks */
8402 if (atomic_read(&rt_task_count) == 0) {
8403 @@ -436,7 +556,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
8404 } else
8405 ret = -EBUSY;
8406 out:
8407 - raw_spin_unlock_irqrestore(&task_transition_lock, flags);
8408 + //raw_spin_unlock_irqrestore(&task_transition_lock, flags);
8409 atomic_set(&cannot_use_plugin, 0);
8410 return ret;
8411 }
8412 diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
8413 new file mode 100644
8414 index 0000000..300571a
8415 --- /dev/null
8416 +++ b/litmus/litmus_pai_softirq.c
8417 @@ -0,0 +1,64 @@
8418 +#include <linux/interrupt.h>
8419 +#include <linux/percpu.h>
8420 +#include <linux/cpu.h>
8421 +#include <linux/kthread.h>
8422 +#include <linux/ftrace.h>
8423 +#include <linux/smp.h>
8424 +#include <linux/slab.h>
8425 +#include <linux/mutex.h>
8426 +
8427 +#include <linux/sched.h>
8428 +#include <linux/cpuset.h>
8429 +
8430 +#include <litmus/litmus.h>
8431 +#include <litmus/sched_trace.h>
8432 +#include <litmus/jobs.h>
8433 +#include <litmus/sched_plugin.h>
8434 +#include <litmus/litmus_softirq.h>
8435 +
8436 +
8437 +
8438 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
8439 +{
8440 + int ret = 0; /* assume failure */
8441 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
8442 + {
8443 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
8444 + BUG();
8445 + }
8446 +
8447 + ret = litmus->enqueue_pai_tasklet(t);
8448 +
8449 + return(ret);
8450 +}
8451 +
8452 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
8453 +
8454 +
8455 +
8456 +// failure causes default Linux handling.
8457 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
8458 +{
8459 + int ret = 0; /* assume failure */
8460 + return(ret);
8461 +}
8462 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
8463 +
8464 +
8465 +// failure causes default Linux handling.
8466 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
8467 +{
8468 + int ret = 0; /* assume failure */
8469 + return(ret);
8470 +}
8471 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
8472 +
8473 +
8474 +// failure causes default Linux handling.
8475 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
8476 +{
8477 + int ret = 0; /* assume failure */
8478 + return(ret);
8479 +}
8480 +EXPORT_SYMBOL(__litmus_schedule_work);
8481 +
8482 diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
8483 index 4bf725a..9ab7e01 100644
8484 --- a/litmus/litmus_proc.c
8485 +++ b/litmus/litmus_proc.c
8486 @@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
8487 #ifdef CONFIG_RELEASE_MASTER
8488 *release_master_file = NULL,
8489 #endif
8490 +#ifdef CONFIG_LITMUS_SOFTIRQD
8491 + *klitirqd_file = NULL,
8492 +#endif
8493 *plugs_file = NULL;
8494
8495 /* in litmus/sync.c */
8496 int count_tasks_waiting_for_release(void);
8497
8498 +extern int proc_read_klitirqd_stats(char *page, char **start,
8499 + off_t off, int count,
8500 + int *eof, void *data);
8501 +
8502 static int proc_read_stats(char *page, char **start,
8503 off_t off, int count,
8504 int *eof, void *data)
8505 @@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
8506 release_master_file->write_proc = proc_write_release_master;
8507 #endif
8508
8509 +#ifdef CONFIG_LITMUS_SOFTIRQD
8510 + klitirqd_file =
8511 + create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
8512 + proc_read_klitirqd_stats, NULL);
8513 +#endif
8514 +
8515 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
8516 proc_read_stats, NULL);
8517
8518 @@ -187,6 +200,10 @@ void exit_litmus_proc(void)
8519 remove_proc_entry("stats", litmus_dir);
8520 if (curr_file)
8521 remove_proc_entry("active_plugin", litmus_dir);
8522 +#ifdef CONFIG_LITMUS_SOFTIRQD
8523 + if (klitirqd_file)
8524 + remove_proc_entry("klitirqd_stats", litmus_dir);
8525 +#endif
8526 #ifdef CONFIG_RELEASE_MASTER
8527 if (release_master_file)
8528 remove_proc_entry("release_master", litmus_dir);
8529 diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
8530 new file mode 100644
8531 index 0000000..9f7d9da
8532 --- /dev/null
8533 +++ b/litmus/litmus_softirq.c
8534 @@ -0,0 +1,1582 @@
8535 +#include <linux/interrupt.h>
8536 +#include <linux/percpu.h>
8537 +#include <linux/cpu.h>
8538 +#include <linux/kthread.h>
8539 +#include <linux/ftrace.h>
8540 +#include <linux/smp.h>
8541 +#include <linux/slab.h>
8542 +#include <linux/mutex.h>
8543 +
8544 +#include <linux/sched.h>
8545 +#include <linux/cpuset.h>
8546 +
8547 +#include <litmus/litmus.h>
8548 +#include <litmus/sched_trace.h>
8549 +#include <litmus/jobs.h>
8550 +#include <litmus/sched_plugin.h>
8551 +#include <litmus/litmus_softirq.h>
8552 +
8553 +/* TODO: Remove unneeded mb() and other barriers. */
8554 +
8555 +
8556 +/* counts number of daemons ready to handle litmus irqs. */
8557 +static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
8558 +
8559 +enum pending_flags
8560 +{
8561 + LIT_TASKLET_LOW = 0x1,
8562 + LIT_TASKLET_HI = LIT_TASKLET_LOW<<1,
8563 + LIT_WORK = LIT_TASKLET_HI<<1
8564 +};
8565 +
8566 +/* only support tasklet processing for now. */
8567 +struct tasklet_head
8568 +{
8569 + struct tasklet_struct *head;
8570 + struct tasklet_struct **tail;
8571 +};
8572 +
8573 +struct klitirqd_info
8574 +{
8575 + struct task_struct* klitirqd;
8576 + struct task_struct* current_owner;
8577 + int terminating;
8578 +
8579 +
8580 + raw_spinlock_t lock;
8581 +
8582 + u32 pending;
8583 + atomic_t num_hi_pending;
8584 + atomic_t num_low_pending;
8585 + atomic_t num_work_pending;
8586 +
8587 + /* in order of priority */
8588 + struct tasklet_head pending_tasklets_hi;
8589 + struct tasklet_head pending_tasklets;
8590 + struct list_head worklist;
8591 +};
8592 +
8593 +/* one list for each klitirqd */
8594 +static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
8595 +
8596 +
8597 +
8598 +
8599 +
8600 +int proc_read_klitirqd_stats(char *page, char **start,
8601 + off_t off, int count,
8602 + int *eof, void *data)
8603 +{
8604 + int len = snprintf(page, PAGE_SIZE,
8605 + "num ready klitirqds: %d\n\n",
8606 + atomic_read(&num_ready_klitirqds));
8607 +
8608 + if(klitirqd_is_ready())
8609 + {
8610 + int i;
8611 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
8612 + {
8613 + len +=
8614 + snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
8615 + "klitirqd_th%d: %s/%d\n"
8616 + "\tcurrent_owner: %s/%d\n"
8617 + "\tpending: %x\n"
8618 + "\tnum hi: %d\n"
8619 + "\tnum low: %d\n"
8620 + "\tnum work: %d\n\n",
8621 + i,
8622 + klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
8623 + (klitirqds[i].current_owner != NULL) ?
8624 + klitirqds[i].current_owner->comm : "(null)",
8625 + (klitirqds[i].current_owner != NULL) ?
8626 + klitirqds[i].current_owner->pid : 0,
8627 + klitirqds[i].pending,
8628 + atomic_read(&klitirqds[i].num_hi_pending),
8629 + atomic_read(&klitirqds[i].num_low_pending),
8630 + atomic_read(&klitirqds[i].num_work_pending));
8631 + }
8632 + }
8633 +
8634 + return(len);
8635 +}
8636 +
8637 +
8638 +
8639 +
8640 +
8641 +#if 0
8642 +static atomic_t dump_id = ATOMIC_INIT(0);
8643 +
8644 +static void __dump_state(struct klitirqd_info* which, const char* caller)
8645 +{
8646 + struct tasklet_struct* list;
8647 +
8648 + int id = atomic_inc_return(&dump_id);
8649 +
8650 + //if(in_interrupt())
8651 + {
8652 + if(which->current_owner)
8653 + {
8654 + TRACE("(id: %d caller: %s)\n"
8655 + "klitirqd: %s/%d\n"
8656 + "current owner: %s/%d\n"
8657 + "pending: %x\n",
8658 + id, caller,
8659 + which->klitirqd->comm, which->klitirqd->pid,
8660 + which->current_owner->comm, which->current_owner->pid,
8661 + which->pending);
8662 + }
8663 + else
8664 + {
8665 + TRACE("(id: %d caller: %s)\n"
8666 + "klitirqd: %s/%d\n"
8667 + "current owner: %p\n"
8668 + "pending: %x\n",
8669 + id, caller,
8670 + which->klitirqd->comm, which->klitirqd->pid,
8671 + NULL,
8672 + which->pending);
8673 + }
8674 +
8675 + list = which->pending_tasklets.head;
8676 + while(list)
8677 + {
8678 + struct tasklet_struct *t = list;
8679 + list = list->next; /* advance */
8680 + if(t->owner)
8681 + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
8682 + else
8683 + TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
8684 + }
8685 + }
8686 +}
8687 +
8688 +static void dump_state(struct klitirqd_info* which, const char* caller)
8689 +{
8690 + unsigned long flags;
8691 +
8692 + raw_spin_lock_irqsave(&which->lock, flags);
8693 + __dump_state(which, caller);
8694 + raw_spin_unlock_irqrestore(&which->lock, flags);
8695 +}
8696 +#endif
8697 +
8698 +
8699 +/* forward declarations */
8700 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
8701 + struct klitirqd_info *which,
8702 + int wakeup);
8703 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
8704 + struct klitirqd_info *which,
8705 + int wakeup);
8706 +static void ___litmus_schedule_work(struct work_struct *w,
8707 + struct klitirqd_info *which,
8708 + int wakeup);
8709 +
8710 +
8711 +
8712 +inline unsigned int klitirqd_id(struct task_struct* tsk)
8713 +{
8714 + int i;
8715 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
8716 + {
8717 + if(klitirqds[i].klitirqd == tsk)
8718 + {
8719 + return i;
8720 + }
8721 + }
8722 +
8723 + BUG();
8724 +
8725 + return 0;
8726 +}
8727 +
8728 +
8729 +inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
8730 +{
8731 + return (which->pending & LIT_TASKLET_HI);
8732 +}
8733 +
8734 +inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
8735 +{
8736 + return (which->pending & LIT_TASKLET_LOW);
8737 +}
8738 +
8739 +inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
8740 +{
8741 + return (which->pending & LIT_WORK);
8742 +}
8743 +
8744 +inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
8745 +{
8746 + return(which->pending);
8747 +}
8748 +
8749 +
8750 +inline static u32 litirq_pending(struct klitirqd_info* which)
8751 +{
8752 + unsigned long flags;
8753 + u32 pending;
8754 +
8755 + raw_spin_lock_irqsave(&which->lock, flags);
8756 + pending = litirq_pending_irqoff(which);
8757 + raw_spin_unlock_irqrestore(&which->lock, flags);
8758 +
8759 + return pending;
8760 +};
8761 +
8762 +inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
8763 +{
8764 + unsigned long flags;
8765 + u32 pending;
8766 +
8767 + raw_spin_lock_irqsave(&which->lock, flags);
8768 + pending = litirq_pending_irqoff(which);
8769 + if(pending)
8770 + {
8771 + if(which->current_owner != owner)
8772 + {
8773 + pending = 0; // owner switch!
8774 + }
8775 + }
8776 + raw_spin_unlock_irqrestore(&which->lock, flags);
8777 +
8778 + return pending;
8779 +}
8780 +
8781 +
8782 +inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
8783 + struct mutex** sem,
8784 + struct task_struct** t)
8785 +{
8786 + unsigned long flags;
8787 + u32 pending;
8788 +
8789 + /* init values */
8790 + *sem = NULL;
8791 + *t = NULL;
8792 +
8793 + raw_spin_lock_irqsave(&which->lock, flags);
8794 +
8795 + pending = litirq_pending_irqoff(which);
8796 + if(pending)
8797 + {
8798 + if(which->current_owner != NULL)
8799 + {
8800 + *t = which->current_owner;
8801 + *sem = &tsk_rt(which->current_owner)->klitirqd_sem;
8802 + }
8803 + else
8804 + {
8805 + BUG();
8806 + }
8807 + }
8808 + raw_spin_unlock_irqrestore(&which->lock, flags);
8809 +
8810 + if(likely(*sem))
8811 + {
8812 + return pending;
8813 + }
8814 + else
8815 + {
8816 + return 0;
8817 + }
8818 +}
8819 +
8820 +/* returns true if the next piece of work to do is from a different owner.
8821 + */
8822 +static int tasklet_ownership_change(
8823 + struct klitirqd_info* which,
8824 + enum pending_flags taskletQ)
8825 +{
8826 + /* this function doesn't have to look at work objects since they have
8827 + priority below tasklets. */
8828 +
8829 + unsigned long flags;
8830 + int ret = 0;
8831 +
8832 + raw_spin_lock_irqsave(&which->lock, flags);
8833 +
8834 + switch(taskletQ)
8835 + {
8836 + case LIT_TASKLET_HI:
8837 + if(litirq_pending_hi_irqoff(which))
8838 + {
8839 + ret = (which->pending_tasklets_hi.head->owner !=
8840 + which->current_owner);
8841 + }
8842 + break;
8843 + case LIT_TASKLET_LOW:
8844 + if(litirq_pending_low_irqoff(which))
8845 + {
8846 + ret = (which->pending_tasklets.head->owner !=
8847 + which->current_owner);
8848 + }
8849 + break;
8850 + default:
8851 + break;
8852 + }
8853 +
8854 + raw_spin_unlock_irqrestore(&which->lock, flags);
8855 +
8856 + TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
8857 +
8858 + return ret;
8859 +}
8860 +
8861 +
8862 +static void __reeval_prio(struct klitirqd_info* which)
8863 +{
8864 + struct task_struct* next_owner = NULL;
8865 + struct task_struct* klitirqd = which->klitirqd;
8866 +
8867 + /* Check in prio-order */
8868 + u32 pending = litirq_pending_irqoff(which);
8869 +
8870 + //__dump_state(which, "__reeval_prio: before");
8871 +
8872 + if(pending)
8873 + {
8874 + if(pending & LIT_TASKLET_HI)
8875 + {
8876 + next_owner = which->pending_tasklets_hi.head->owner;
8877 + }
8878 + else if(pending & LIT_TASKLET_LOW)
8879 + {
8880 + next_owner = which->pending_tasklets.head->owner;
8881 + }
8882 + else if(pending & LIT_WORK)
8883 + {
8884 + struct work_struct* work =
8885 + list_first_entry(&which->worklist, struct work_struct, entry);
8886 + next_owner = work->owner;
8887 + }
8888 + }
8889 +
8890 + if(next_owner != which->current_owner)
8891 + {
8892 + struct task_struct* old_owner = which->current_owner;
8893 +
8894 + /* bind the next owner. */
8895 + which->current_owner = next_owner;
8896 + mb();
8897 +
8898 + if(next_owner != NULL)
8899 + {
8900 + if(!in_interrupt())
8901 + {
8902 + TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
8903 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
8904 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
8905 + next_owner->comm, next_owner->pid);
8906 + }
8907 + else
8908 + {
8909 + TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
8910 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
8911 + ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
8912 + next_owner->comm, next_owner->pid);
8913 + }
8914 +
8915 + litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
8916 + }
8917 + else
8918 + {
8919 + if(likely(!in_interrupt()))
8920 + {
8921 + TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
8922 + __FUNCTION__, klitirqd->comm, klitirqd->pid);
8923 + }
8924 + else
8925 + {
8926 + // is this a bug?
8927 + TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
8928 + __FUNCTION__, klitirqd->comm, klitirqd->pid);
8929 + }
8930 +
8931 + BUG_ON(pending != 0);
8932 + litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
8933 + }
8934 + }
8935 +
8936 + //__dump_state(which, "__reeval_prio: after");
8937 +}
8938 +
8939 +static void reeval_prio(struct klitirqd_info* which)
8940 +{
8941 + unsigned long flags;
8942 +
8943 + raw_spin_lock_irqsave(&which->lock, flags);
8944 + __reeval_prio(which);
8945 + raw_spin_unlock_irqrestore(&which->lock, flags);
8946 +}
8947 +
8948 +
8949 +static void wakeup_litirqd_locked(struct klitirqd_info* which)
8950 +{
8951 + /* Interrupts are disabled: no need to stop preemption */
8952 + if (which && which->klitirqd)
8953 + {
8954 + __reeval_prio(which); /* configure the proper priority */
8955 +
8956 + if(which->klitirqd->state != TASK_RUNNING)
8957 + {
8958 + TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
8959 + which->klitirqd->comm, which->klitirqd->pid);
8960 +
8961 + wake_up_process(which->klitirqd);
8962 + }
8963 + }
8964 +}
8965 +
8966 +
8967 +static void do_lit_tasklet(struct klitirqd_info* which,
8968 + struct tasklet_head* pending_tasklets)
8969 +{
8970 + unsigned long flags;
8971 + struct tasklet_struct *list;
8972 + atomic_t* count;
8973 +
8974 + raw_spin_lock_irqsave(&which->lock, flags);
8975 +
8976 + //__dump_state(which, "do_lit_tasklet: before steal");
8977 +
8978 + /* copy out the tasklets for our private use. */
8979 + list = pending_tasklets->head;
8980 + pending_tasklets->head = NULL;
8981 + pending_tasklets->tail = &pending_tasklets->head;
8982 +
8983 + /* remove pending flag */
8984 + which->pending &= (pending_tasklets == &which->pending_tasklets) ?
8985 + ~LIT_TASKLET_LOW :
8986 + ~LIT_TASKLET_HI;
8987 +
8988 + count = (pending_tasklets == &which->pending_tasklets) ?
8989 + &which->num_low_pending:
8990 + &which->num_hi_pending;
8991 +
8992 + //__dump_state(which, "do_lit_tasklet: after steal");
8993 +
8994 + raw_spin_unlock_irqrestore(&which->lock, flags);
8995 +
8996 +
8997 + while(list)
8998 + {
8999 + struct tasklet_struct *t = list;
9000 +
9001 + /* advance, lest we forget */
9002 + list = list->next;
9003 +
9004 + /* execute tasklet if it has my priority and is free */
9005 + if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
9006 + if (!atomic_read(&t->count)) {
9007 +
9008 + sched_trace_tasklet_begin(t->owner);
9009 +
9010 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
9011 + {
9012 + BUG();
9013 + }
9014 + TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
9015 + t->func(t->data);
9016 + tasklet_unlock(t);
9017 +
9018 + atomic_dec(count);
9019 +
9020 + sched_trace_tasklet_end(t->owner, 0ul);
9021 +
9022 + continue; /* process more tasklets */
9023 + }
9024 + tasklet_unlock(t);
9025 + }
9026 +
9027 + TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__);
9028 +
9029 + /* couldn't process tasklet. put it back at the end of the queue. */
9030 + if(pending_tasklets == &which->pending_tasklets)
9031 + ___litmus_tasklet_schedule(t, which, 0);
9032 + else
9033 + ___litmus_tasklet_hi_schedule(t, which, 0);
9034 + }
9035 +}
9036 +
9037 +
9038 +// returns 1 if priorities need to be changed to continue processing
9039 +// pending tasklets.
9040 +static int do_litirq(struct klitirqd_info* which)
9041 +{
9042 + u32 pending;
9043 + int resched = 0;
9044 +
9045 + if(in_interrupt())
9046 + {
9047 + TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
9048 + return(0);
9049 + }
9050 +
9051 + if(which->klitirqd != current)
9052 + {
9053 + TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
9054 + __FUNCTION__, current->comm, current->pid,
9055 + which->klitirqd->comm, which->klitirqd->pid);
9056 + return(0);
9057 + }
9058 +
9059 + if(!is_realtime(current))
9060 + {
9061 + TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
9062 + __FUNCTION__, current->policy);
9063 + return(0);
9064 + }
9065 +
9066 +
9067 + /* We only handle tasklets & work objects, no need for RCU triggers? */
9068 +
9069 + pending = litirq_pending(which);
9070 + if(pending)
9071 + {
9072 + /* extract the work to do and do it! */
9073 + if(pending & LIT_TASKLET_HI)
9074 + {
9075 + TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
9076 + do_lit_tasklet(which, &which->pending_tasklets_hi);
9077 + resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
9078 +
9079 + if(resched)
9080 + {
9081 + TRACE_CUR("%s: HI tasklets of another owner remain. "
9082 + "Skipping any LOW tasklets.\n", __FUNCTION__);
9083 + }
9084 + }
9085 +
9086 + if(!resched && (pending & LIT_TASKLET_LOW))
9087 + {
9088 + TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
9089 + do_lit_tasklet(which, &which->pending_tasklets);
9090 + resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
9091 +
9092 + if(resched)
9093 + {
9094 + TRACE_CUR("%s: LOW tasklets of another owner remain. "
9095 + "Skipping any work objects.\n", __FUNCTION__);
9096 + }
9097 + }
9098 + }
9099 +
9100 + return(resched);
9101 +}
9102 +
9103 +
9104 +static void do_work(struct klitirqd_info* which)
9105 +{
9106 + unsigned long flags;
9107 + work_func_t f;
9108 + struct work_struct* work;
9109 +
9110 + // only execute one work-queue item to yield to tasklets.
9111 + // ...is this a good idea, or should we just batch them?
9112 + raw_spin_lock_irqsave(&which->lock, flags);
9113 +
9114 + if(!litirq_pending_work_irqoff(which))
9115 + {
9116 + raw_spin_unlock_irqrestore(&which->lock, flags);
9117 + goto no_work;
9118 + }
9119 +
9120 + work = list_first_entry(&which->worklist, struct work_struct, entry);
9121 + list_del_init(&work->entry);
9122 +
9123 + if(list_empty(&which->worklist))
9124 + {
9125 + which->pending &= ~LIT_WORK;
9126 + }
9127 +
9128 + raw_spin_unlock_irqrestore(&which->lock, flags);
9129 +
9130 +
9131 +
9132 + /* safe to read current_owner outside of lock since only this thread
9133 + may write to the pointer. */
9134 + if(work->owner == which->current_owner)
9135 + {
9136 + TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
9137 + // do the work!
9138 + work_clear_pending(work);
9139 + f = work->func;
9140 + f(work); /* can't touch 'work' after this point,
9141 + the user may have freed it. */
9142 +
9143 + atomic_dec(&which->num_work_pending);
9144 + }
9145 + else
9146 + {
9147 + TRACE_CUR("%s: Could not invoke work object. Requeuing.\n",
9148 + __FUNCTION__);
9149 + ___litmus_schedule_work(work, which, 0);
9150 + }
9151 +
9152 +no_work:
9153 + return;
9154 +}
9155 +
9156 +
9157 +static int set_litmus_daemon_sched(void)
9158 +{
9159 + /* set up a daemon job that will never complete.
9160 + it should only ever run on behalf of another
9161 + real-time task.
9162 +
9163 + TODO: Transition to a new job whenever a
9164 + new tasklet is handled */
9165 +
9166 + int ret = 0;
9167 +
9168 + struct rt_task tp = {
9169 + .exec_cost = 0,
9170 + .period = 1000000000, /* dummy 1 second period */
9171 + .phase = 0,
9172 + .cpu = task_cpu(current),
9173 + .budget_policy = NO_ENFORCEMENT,
9174 + .cls = RT_CLASS_BEST_EFFORT
9175 + };
9176 +
9177 + struct sched_param param = { .sched_priority = 0};
9178 +
9179 +
9180 + /* set task params, mark as proxy thread, and init other data */
9181 + tsk_rt(current)->task_params = tp;
9182 + tsk_rt(current)->is_proxy_thread = 1;
9183 + tsk_rt(current)->cur_klitirqd = NULL;
9184 + mutex_init(&tsk_rt(current)->klitirqd_sem);
9185 + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
9186 +
9187 + /* inform the OS we're SCHED_LITMUS --
9188 + sched_setscheduler_nocheck() calls litmus_admit_task(). */
9189 + sched_setscheduler_nocheck(current, SCHED_LITMUS, ¶m);
9190 +
9191 + return ret;
9192 +}
9193 +
9194 +static void enter_execution_phase(struct klitirqd_info* which,
9195 + struct mutex* sem,
9196 + struct task_struct* t)
9197 +{
9198 + TRACE_CUR("%s: Trying to enter execution phase. "
9199 + "Acquiring semaphore of %s/%d\n", __FUNCTION__,
9200 + t->comm, t->pid);
9201 + down_and_set_stat(current, HELD, sem);
9202 + TRACE_CUR("%s: Execution phase entered! "
9203 + "Acquired semaphore of %s/%d\n", __FUNCTION__,
9204 + t->comm, t->pid);
9205 +}
9206 +
9207 +static void exit_execution_phase(struct klitirqd_info* which,
9208 + struct mutex* sem,
9209 + struct task_struct* t)
9210 +{
9211 + TRACE_CUR("%s: Exiting execution phase. "
9212 + "Releasing semaphore of %s/%d\n", __FUNCTION__,
9213 + t->comm, t->pid);
9214 + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
9215 + {
9216 + up_and_set_stat(current, NOT_HELD, sem);
9217 + TRACE_CUR("%s: Execution phase exited! "
9218 + "Released semaphore of %s/%d\n", __FUNCTION__,
9219 + t->comm, t->pid);
9220 + }
9221 + else
9222 + {
9223 + TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
9224 + }
9225 +}
9226 +
9227 +/* main loop for klitsoftirqd */
9228 +static int run_klitirqd(void* unused)
9229 +{
9230 + struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
9231 + struct mutex* sem;
9232 + struct task_struct* owner;
9233 +
9234 + int rt_status = set_litmus_daemon_sched();
9235 +
9236 + if(rt_status != 0)
9237 + {
9238 + TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
9239 + goto rt_failed;
9240 + }
9241 +
9242 + atomic_inc(&num_ready_klitirqds);
9243 +
9244 + set_current_state(TASK_INTERRUPTIBLE);
9245 +
9246 + while (!kthread_should_stop())
9247 + {
9248 + preempt_disable();
9249 + if (!litirq_pending(which))
9250 + {
9251 + /* sleep for work */
9252 + TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
9253 + __FUNCTION__);
9254 + preempt_enable_no_resched();
9255 + schedule();
9256 +
9257 + if(kthread_should_stop()) /* bail out */
9258 + {
9259 + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
9260 + continue;
9261 + }
9262 +
9263 + preempt_disable();
9264 + }
9265 +
9266 + __set_current_state(TASK_RUNNING);
9267 +
9268 + while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
9269 + {
9270 + int needs_resched = 0;
9271 +
9272 + preempt_enable_no_resched();
9273 +
9274 + BUG_ON(sem == NULL);
9275 +
9276 + // wait to enter execution phase; wait for 'current_owner' to block.
9277 + enter_execution_phase(which, sem, owner);
9278 +
9279 + if(kthread_should_stop())
9280 + {
9281 + TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
9282 + break;
9283 + }
9284 +
9285 + preempt_disable();
9286 +
9287 + /* Double check that there's still pending work and the owner hasn't
9288 + * changed. Pending items may have been flushed while we were sleeping.
9289 + */
9290 + if(litirq_pending_with_owner(which, owner))
9291 + {
9292 + TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
9293 + __FUNCTION__);
9294 +
9295 + needs_resched = do_litirq(which);
9296 +
9297 + preempt_enable_no_resched();
9298 +
9299 + // work objects are preemptible.
9300 + if(!needs_resched)
9301 + {
9302 + do_work(which);
9303 + }
9304 +
9305 + // exit execution phase.
9306 + exit_execution_phase(which, sem, owner);
9307 +
9308 + TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
9309 + reeval_prio(which); /* check if we need to change priority here */
9310 + }
9311 + else
9312 + {
9313 + TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n",
9314 + __FUNCTION__,
9315 + owner->comm, owner->pid);
9316 + preempt_enable_no_resched();
9317 +
9318 + // exit execution phase.
9319 + exit_execution_phase(which, sem, owner);
9320 + }
9321 +
9322 + cond_resched();
9323 + preempt_disable();
9324 + }
9325 + preempt_enable();
9326 + set_current_state(TASK_INTERRUPTIBLE);
9327 + }
9328 + __set_current_state(TASK_RUNNING);
9329 +
9330 + atomic_dec(&num_ready_klitirqds);
9331 +
9332 +rt_failed:
9333 + litmus_exit_task(current);
9334 +
9335 + return rt_status;
9336 +}
9337 +
9338 +
9339 +struct klitirqd_launch_data
9340 +{
9341 + int* cpu_affinity;
9342 + struct work_struct work;
9343 +};
9344 +
9345 +/* executed by a kworker from workqueues */
9346 +static void launch_klitirqd(struct work_struct *work)
9347 +{
9348 + int i;
9349 +
9350 + struct klitirqd_launch_data* launch_data =
9351 + container_of(work, struct klitirqd_launch_data, work);
9352 +
9353 + TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
9354 +
9355 + /* create the daemon threads */
9356 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9357 + {
9358 + if(launch_data->cpu_affinity)
9359 + {
9360 + klitirqds[i].klitirqd =
9361 + kthread_create(
9362 + run_klitirqd,
9363 + /* treat the affinity as a pointer, we'll cast it back later */
9364 + (void*)(long long)launch_data->cpu_affinity[i],
9365 + "klitirqd_th%d/%d",
9366 + i,
9367 + launch_data->cpu_affinity[i]);
9368 +
9369 + /* litmus will put is in the right cluster. */
9370 + kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
9371 + }
9372 + else
9373 + {
9374 + klitirqds[i].klitirqd =
9375 + kthread_create(
9376 + run_klitirqd,
9377 + /* treat the affinity as a pointer, we'll cast it back later */
9378 + (void*)(long long)(-1),
9379 + "klitirqd_th%d",
9380 + i);
9381 + }
9382 + }
9383 +
9384 + TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
9385 +
9386 + /* unleash the daemons */
9387 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9388 + {
9389 + wake_up_process(klitirqds[i].klitirqd);
9390 + }
9391 +
9392 + if(launch_data->cpu_affinity)
9393 + kfree(launch_data->cpu_affinity);
9394 + kfree(launch_data);
9395 +}
9396 +
9397 +
9398 +void spawn_klitirqd(int* affinity)
9399 +{
9400 + int i;
9401 + struct klitirqd_launch_data* delayed_launch;
9402 +
9403 + if(atomic_read(&num_ready_klitirqds) != 0)
9404 + {
9405 + TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
9406 + return;
9407 + }
9408 +
9409 + /* init the tasklet & work queues */
9410 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9411 + {
9412 + klitirqds[i].terminating = 0;
9413 + klitirqds[i].pending = 0;
9414 +
9415 + klitirqds[i].num_hi_pending.counter = 0;
9416 + klitirqds[i].num_low_pending.counter = 0;
9417 + klitirqds[i].num_work_pending.counter = 0;
9418 +
9419 + klitirqds[i].pending_tasklets_hi.head = NULL;
9420 + klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
9421 +
9422 + klitirqds[i].pending_tasklets.head = NULL;
9423 + klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
9424 +
9425 + INIT_LIST_HEAD(&klitirqds[i].worklist);
9426 +
9427 + raw_spin_lock_init(&klitirqds[i].lock);
9428 + }
9429 +
9430 + /* wait to flush the initializations to memory since other threads
9431 + will access it. */
9432 + mb();
9433 +
9434 + /* tell a work queue to launch the threads. we can't make scheduling
9435 + calls since we're in an atomic state. */
9436 + TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
9437 + delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
9438 + if(affinity)
9439 + {
9440 + delayed_launch->cpu_affinity =
9441 + kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
9442 +
9443 + memcpy(delayed_launch->cpu_affinity, affinity,
9444 + sizeof(int)*NR_LITMUS_SOFTIRQD);
9445 + }
9446 + else
9447 + {
9448 + delayed_launch->cpu_affinity = NULL;
9449 + }
9450 + INIT_WORK(&delayed_launch->work, launch_klitirqd);
9451 + schedule_work(&delayed_launch->work);
9452 +}
9453 +
9454 +
9455 +void kill_klitirqd(void)
9456 +{
9457 + if(!klitirqd_is_dead())
9458 + {
9459 + int i;
9460 +
9461 + TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
9462 +
9463 + for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9464 + {
9465 + if(klitirqds[i].terminating != 1)
9466 + {
9467 + klitirqds[i].terminating = 1;
9468 + mb(); /* just to be sure? */
9469 + flush_pending(klitirqds[i].klitirqd, NULL);
9470 +
9471 + /* signal termination */
9472 + kthread_stop(klitirqds[i].klitirqd);
9473 + }
9474 + }
9475 + }
9476 +}
9477 +
9478 +
9479 +int klitirqd_is_ready(void)
9480 +{
9481 + return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
9482 +}
9483 +
9484 +int klitirqd_is_dead(void)
9485 +{
9486 + return(atomic_read(&num_ready_klitirqds) == 0);
9487 +}
9488 +
9489 +
9490 +struct task_struct* get_klitirqd(unsigned int k_id)
9491 +{
9492 + return(klitirqds[k_id].klitirqd);
9493 +}
9494 +
9495 +
9496 +void flush_pending(struct task_struct* klitirqd_thread,
9497 + struct task_struct* owner)
9498 +{
9499 + unsigned int k_id = klitirqd_id(klitirqd_thread);
9500 + struct klitirqd_info *which = &klitirqds[k_id];
9501 +
9502 + unsigned long flags;
9503 + struct tasklet_struct *list;
9504 +
9505 + u32 work_flushed = 0;
9506 +
9507 + raw_spin_lock_irqsave(&which->lock, flags);
9508 +
9509 + //__dump_state(which, "flush_pending: before");
9510 +
9511 + // flush hi tasklets.
9512 + if(litirq_pending_hi_irqoff(which))
9513 + {
9514 + which->pending &= ~LIT_TASKLET_HI;
9515 +
9516 + list = which->pending_tasklets_hi.head;
9517 + which->pending_tasklets_hi.head = NULL;
9518 + which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
9519 +
9520 + TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
9521 +
9522 + while(list)
9523 + {
9524 + struct tasklet_struct *t = list;
9525 + list = list->next;
9526 +
9527 + if(likely((t->owner == owner) || (owner == NULL)))
9528 + {
9529 + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
9530 + {
9531 + BUG();
9532 + }
9533 +
9534 + work_flushed |= LIT_TASKLET_HI;
9535 +
9536 + t->owner = NULL;
9537 +
9538 + // WTF?
9539 + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
9540 + {
9541 + atomic_dec(&which->num_hi_pending);
9542 + ___tasklet_hi_schedule(t);
9543 + }
9544 + else
9545 + {
9546 + TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
9547 + BUG();
9548 + }
9549 + }
9550 + else
9551 + {
9552 + TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
9553 + // put back on queue.
9554 + ___litmus_tasklet_hi_schedule(t, which, 0);
9555 + }
9556 + }
9557 + }
9558 +
9559 + // flush low tasklets.
9560 + if(litirq_pending_low_irqoff(which))
9561 + {
9562 + which->pending &= ~LIT_TASKLET_LOW;
9563 +
9564 + list = which->pending_tasklets.head;
9565 + which->pending_tasklets.head = NULL;
9566 + which->pending_tasklets.tail = &which->pending_tasklets.head;
9567 +
9568 + TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
9569 +
9570 + while(list)
9571 + {
9572 + struct tasklet_struct *t = list;
9573 + list = list->next;
9574 +
9575 + if(likely((t->owner == owner) || (owner == NULL)))
9576 + {
9577 + if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
9578 + {
9579 + BUG();
9580 + }
9581 +
9582 + work_flushed |= LIT_TASKLET_LOW;
9583 +
9584 + t->owner = NULL;
9585 + sched_trace_tasklet_end(owner, 1ul);
9586 +
9587 + if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
9588 + {
9589 + atomic_dec(&which->num_low_pending);
9590 + ___tasklet_schedule(t);
9591 + }
9592 + else
9593 + {
9594 + TRACE("%s: dropped tasklet??\n", __FUNCTION__);
9595 + BUG();
9596 + }
9597 + }
9598 + else
9599 + {
9600 + TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
9601 + // put back on queue
9602 + ___litmus_tasklet_schedule(t, which, 0);
9603 + }
9604 + }
9605 + }
9606 +
9607 + // flush work objects
9608 + if(litirq_pending_work_irqoff(which))
9609 + {
9610 + which->pending &= ~LIT_WORK;
9611 +
9612 + TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
9613 +
9614 + while(!list_empty(&which->worklist))
9615 + {
9616 + struct work_struct* work =
9617 + list_first_entry(&which->worklist, struct work_struct, entry);
9618 + list_del_init(&work->entry);
9619 +
9620 + if(likely((work->owner == owner) || (owner == NULL)))
9621 + {
9622 + work_flushed |= LIT_WORK;
9623 + atomic_dec(&which->num_work_pending);
9624 +
9625 + work->owner = NULL;
9626 + sched_trace_work_end(owner, current, 1ul);
9627 + __schedule_work(work);
9628 + }
9629 + else
9630 + {
9631 + TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
9632 + // put back on queue
9633 + ___litmus_schedule_work(work, which, 0);
9634 + }
9635 + }
9636 + }
9637 +
9638 + //__dump_state(which, "flush_pending: after (before reeval prio)");
9639 +
9640 +
9641 + mb(); /* commit changes to pending flags */
9642 +
9643 + /* reset the scheduling priority */
9644 + if(work_flushed)
9645 + {
9646 + __reeval_prio(which);
9647 +
9648 + /* Try to offload flushed tasklets to Linux's ksoftirqd. */
9649 + if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
9650 + {
9651 + wakeup_softirqd();
9652 + }
9653 + }
9654 + else
9655 + {
9656 + TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
9657 + }
9658 +
9659 + raw_spin_unlock_irqrestore(&which->lock, flags);
9660 +}
9661 +
9662 +
9663 +
9664 +
9665 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
9666 + struct klitirqd_info *which,
9667 + int wakeup)
9668 +{
9669 + unsigned long flags;
9670 + u32 old_pending;
9671 +
9672 + t->next = NULL;
9673 +
9674 + raw_spin_lock_irqsave(&which->lock, flags);
9675 +
9676 + //__dump_state(which, "___litmus_tasklet_schedule: before queuing");
9677 +
9678 + *(which->pending_tasklets.tail) = t;
9679 + which->pending_tasklets.tail = &t->next;
9680 +
9681 + old_pending = which->pending;
9682 + which->pending |= LIT_TASKLET_LOW;
9683 +
9684 + atomic_inc(&which->num_low_pending);
9685 +
9686 + mb();
9687 +
9688 + if(!old_pending && wakeup)
9689 + {
9690 + wakeup_litirqd_locked(which); /* wake up the klitirqd */
9691 + }
9692 +
9693 + //__dump_state(which, "___litmus_tasklet_schedule: after queuing");
9694 +
9695 + raw_spin_unlock_irqrestore(&which->lock, flags);
9696 +}
9697 +
9698 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
9699 +{
9700 + int ret = 0; /* assume failure */
9701 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
9702 + {
9703 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9704 + BUG();
9705 + }
9706 +
9707 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9708 + {
9709 + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
9710 + BUG();
9711 + }
9712 +
9713 + if(likely(!klitirqds[k_id].terminating))
9714 + {
9715 + /* Can't accept tasklets while we're processing a workqueue
9716 + because they're handled by the same thread. This case is
9717 + very RARE.
9718 +
9719 + TODO: Use a separate thread for work objects!!!!!!
9720 + */
9721 + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
9722 + {
9723 + ret = 1;
9724 + ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
9725 + }
9726 + else
9727 + {
9728 + TRACE("%s: rejected tasklet because of pending work.\n",
9729 + __FUNCTION__);
9730 + }
9731 + }
9732 + return(ret);
9733 +}
9734 +
9735 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
9736 +
9737 +
9738 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
9739 + struct klitirqd_info *which,
9740 + int wakeup)
9741 +{
9742 + unsigned long flags;
9743 + u32 old_pending;
9744 +
9745 + t->next = NULL;
9746 +
9747 + raw_spin_lock_irqsave(&which->lock, flags);
9748 +
9749 + *(which->pending_tasklets_hi.tail) = t;
9750 + which->pending_tasklets_hi.tail = &t->next;
9751 +
9752 + old_pending = which->pending;
9753 + which->pending |= LIT_TASKLET_HI;
9754 +
9755 + atomic_inc(&which->num_hi_pending);
9756 +
9757 + mb();
9758 +
9759 + if(!old_pending && wakeup)
9760 + {
9761 + wakeup_litirqd_locked(which); /* wake up the klitirqd */
9762 + }
9763 +
9764 + raw_spin_unlock_irqrestore(&which->lock, flags);
9765 +}
9766 +
9767 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
9768 +{
9769 + int ret = 0; /* assume failure */
9770 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
9771 + {
9772 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9773 + BUG();
9774 + }
9775 +
9776 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9777 + {
9778 + TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
9779 + BUG();
9780 + }
9781 +
9782 + if(unlikely(!klitirqd_is_ready()))
9783 + {
9784 + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
9785 + BUG();
9786 + }
9787 +
9788 + if(likely(!klitirqds[k_id].terminating))
9789 + {
9790 + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
9791 + {
9792 + ret = 1;
9793 + ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
9794 + }
9795 + else
9796 + {
9797 + TRACE("%s: rejected tasklet because of pending work.\n",
9798 + __FUNCTION__);
9799 + }
9800 + }
9801 + return(ret);
9802 +}
9803 +
9804 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
9805 +
9806 +
9807 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
9808 +{
9809 + int ret = 0; /* assume failure */
9810 + u32 old_pending;
9811 +
9812 + BUG_ON(!irqs_disabled());
9813 +
9814 + if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
9815 + {
9816 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9817 + BUG();
9818 + }
9819 +
9820 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9821 + {
9822 + TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
9823 + BUG();
9824 + }
9825 +
9826 + if(unlikely(!klitirqd_is_ready()))
9827 + {
9828 + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
9829 + BUG();
9830 + }
9831 +
9832 + if(likely(!klitirqds[k_id].terminating))
9833 + {
9834 + raw_spin_lock(&klitirqds[k_id].lock);
9835 +
9836 + if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
9837 + {
9838 + ret = 1; // success!
9839 +
9840 + t->next = klitirqds[k_id].pending_tasklets_hi.head;
9841 + klitirqds[k_id].pending_tasklets_hi.head = t;
9842 +
9843 + old_pending = klitirqds[k_id].pending;
9844 + klitirqds[k_id].pending |= LIT_TASKLET_HI;
9845 +
9846 + atomic_inc(&klitirqds[k_id].num_hi_pending);
9847 +
9848 + mb();
9849 +
9850 + if(!old_pending)
9851 + wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
9852 + }
9853 + else
9854 + {
9855 + TRACE("%s: rejected tasklet because of pending work.\n",
9856 + __FUNCTION__);
9857 + }
9858 +
9859 + raw_spin_unlock(&klitirqds[k_id].lock);
9860 + }
9861 + return(ret);
9862 +}
9863 +
9864 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
9865 +
9866 +
9867 +
9868 +static void ___litmus_schedule_work(struct work_struct *w,
9869 + struct klitirqd_info *which,
9870 + int wakeup)
9871 +{
9872 + unsigned long flags;
9873 + u32 old_pending;
9874 +
9875 + raw_spin_lock_irqsave(&which->lock, flags);
9876 +
9877 + work_pending(w);
9878 + list_add_tail(&w->entry, &which->worklist);
9879 +
9880 + old_pending = which->pending;
9881 + which->pending |= LIT_WORK;
9882 +
9883 + atomic_inc(&which->num_work_pending);
9884 +
9885 + mb();
9886 +
9887 + if(!old_pending && wakeup)
9888 + {
9889 + wakeup_litirqd_locked(which); /* wakeup the klitirqd */
9890 + }
9891 +
9892 + raw_spin_unlock_irqrestore(&which->lock, flags);
9893 +}
9894 +
9895 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
9896 +{
9897 + int ret = 1; /* assume success */
9898 + if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
9899 + {
9900 + TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
9901 + BUG();
9902 + }
9903 +
9904 + if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9905 + {
9906 + TRACE("%s: No klitirqd_th%u!\n", k_id);
9907 + BUG();
9908 + }
9909 +
9910 + if(unlikely(!klitirqd_is_ready()))
9911 + {
9912 + TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
9913 + BUG();
9914 + }
9915 +
9916 + if(likely(!klitirqds[k_id].terminating))
9917 + ___litmus_schedule_work(w, &klitirqds[k_id], 1);
9918 + else
9919 + ret = 0;
9920 + return(ret);
9921 +}
9922 +EXPORT_SYMBOL(__litmus_schedule_work);
9923 +
9924 +
9925 +static int set_klitirqd_sem_status(unsigned long stat)
9926 +{
9927 + TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
9928 + atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
9929 + stat);
9930 + atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
9931 + //mb();
9932 +
9933 + return(0);
9934 +}
9935 +
9936 +static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
9937 +{
9938 + if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
9939 + {
9940 + return(set_klitirqd_sem_status(stat));
9941 + }
9942 + return(-1);
9943 +}
9944 +
9945 +
9946 +void __down_and_reset_and_set_stat(struct task_struct* t,
9947 + enum klitirqd_sem_status to_reset,
9948 + enum klitirqd_sem_status to_set,
9949 + struct mutex* sem)
9950 +{
9951 +#if 0
9952 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
9953 + struct task_struct* task = container_of(param, struct task_struct, rt_param);
9954 +
9955 + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
9956 + __FUNCTION__, task->comm, task->pid);
9957 +#endif
9958 +
9959 + mutex_lock_sfx(sem,
9960 + set_klitirqd_sem_status_if_not_held, to_reset,
9961 + set_klitirqd_sem_status, to_set);
9962 +#if 0
9963 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
9964 + __FUNCTION__, task->comm, task->pid);
9965 +#endif
9966 +}
9967 +
9968 +void down_and_set_stat(struct task_struct* t,
9969 + enum klitirqd_sem_status to_set,
9970 + struct mutex* sem)
9971 +{
9972 +#if 0
9973 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
9974 + struct task_struct* task = container_of(param, struct task_struct, rt_param);
9975 +
9976 + TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
9977 + __FUNCTION__, task->comm, task->pid);
9978 +#endif
9979 +
9980 + mutex_lock_sfx(sem,
9981 + NULL, 0,
9982 + set_klitirqd_sem_status, to_set);
9983 +
9984 +#if 0
9985 + TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
9986 + __FUNCTION__, task->comm, task->pid);
9987 +#endif
9988 +}
9989 +
9990 +
9991 +void up_and_set_stat(struct task_struct* t,
9992 + enum klitirqd_sem_status to_set,
9993 + struct mutex* sem)
9994 +{
9995 +#if 0
9996 + struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
9997 + struct task_struct* task = container_of(param, struct task_struct, rt_param);
9998 +
9999 + TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n",
10000 + __FUNCTION__,
10001 + task->comm, task->pid);
10002 +#endif
10003 +
10004 + mutex_unlock_sfx(sem, NULL, 0,
10005 + set_klitirqd_sem_status, to_set);
10006 +
10007 +#if 0
10008 + TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n",
10009 + __FUNCTION__,
10010 + task->comm, task->pid);
10011 +#endif
10012 +}
10013 +
10014 +
10015 +
10016 +void release_klitirqd_lock(struct task_struct* t)
10017 +{
10018 + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
10019 + {
10020 + struct mutex* sem;
10021 + struct task_struct* owner = t;
10022 +
10023 + if(t->state == TASK_RUNNING)
10024 + {
10025 + TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
10026 + return;
10027 + }
10028 +
10029 + if(likely(!tsk_rt(t)->is_proxy_thread))
10030 + {
10031 + sem = &tsk_rt(t)->klitirqd_sem;
10032 + }
10033 + else
10034 + {
10035 + unsigned int k_id = klitirqd_id(t);
10036 + owner = klitirqds[k_id].current_owner;
10037 +
10038 + BUG_ON(t != klitirqds[k_id].klitirqd);
10039 +
10040 + if(likely(owner))
10041 + {
10042 + sem = &tsk_rt(owner)->klitirqd_sem;
10043 + }
10044 + else
10045 + {
10046 + BUG();
10047 +
10048 + // We had the rug pulled out from under us. Abort attempt
10049 + // to reacquire the lock since our client no longer needs us.
10050 + TRACE_CUR("HUH?! How did this happen?\n");
10051 + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
10052 + return;
10053 + }
10054 + }
10055 +
10056 + //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
10057 + up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
10058 + //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
10059 + }
10060 + /*
10061 + else if(is_realtime(t))
10062 + {
10063 + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
10064 + }
10065 + */
10066 +}
10067 +
10068 +int reacquire_klitirqd_lock(struct task_struct* t)
10069 +{
10070 + int ret = 0;
10071 +
10072 + if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
10073 + {
10074 + struct mutex* sem;
10075 + struct task_struct* owner = t;
10076 +
10077 + if(likely(!tsk_rt(t)->is_proxy_thread))
10078 + {
10079 + sem = &tsk_rt(t)->klitirqd_sem;
10080 + }
10081 + else
10082 + {
10083 + unsigned int k_id = klitirqd_id(t);
10084 + //struct task_struct* owner = klitirqds[k_id].current_owner;
10085 + owner = klitirqds[k_id].current_owner;
10086 +
10087 + BUG_ON(t != klitirqds[k_id].klitirqd);
10088 +
10089 + if(likely(owner))
10090 + {
10091 + sem = &tsk_rt(owner)->klitirqd_sem;
10092 + }
10093 + else
10094 + {
10095 + // We had the rug pulled out from under us. Abort attempt
10096 + // to reacquire the lock since our client no longer needs us.
10097 + TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
10098 + atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
10099 + return(0);
10100 + }
10101 + }
10102 +
10103 + //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
10104 + __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
10105 + //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
10106 + }
10107 + /*
10108 + else if(is_realtime(t))
10109 + {
10110 + TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
10111 + }
10112 + */
10113 +
10114 + return(ret);
10115 +}
10116 +
10117 diff --git a/litmus/locking.c b/litmus/locking.c
10118 index 0c1aa6a..718a5a3 100644
10119 --- a/litmus/locking.c
10120 +++ b/litmus/locking.c
10121 @@ -4,6 +4,15 @@
10122
10123 #include <litmus/sched_plugin.h>
10124 #include <litmus/trace.h>
10125 +#include <litmus/litmus.h>
10126 +
10127 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
10128 +#include <linux/uaccess.h>
10129 +#endif
10130 +
10131 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
10132 +#include <litmus/gpu_affinity.h>
10133 +#endif
10134
10135 static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
10136 static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
10137 @@ -17,6 +26,9 @@ struct fdso_ops generic_lock_ops = {
10138 .destroy = destroy_generic_lock
10139 };
10140
10141 +static atomic_t lock_id_gen = ATOMIC_INIT(0);
10142 +
10143 +
10144 static inline bool is_lock(struct od_table_entry* entry)
10145 {
10146 return entry->class == &generic_lock_ops;
10147 @@ -34,8 +46,21 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
10148 int err;
10149
10150 err = litmus->allocate_lock(&lock, type, arg);
10151 - if (err == 0)
10152 + if (err == 0) {
10153 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
10154 + lock->nest.lock = lock;
10155 + lock->nest.hp_waiter_eff_prio = NULL;
10156 +
10157 + INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
10158 + if(!lock->nest.hp_waiter_ptr) {
10159 + TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in "
10160 + "most uses. (exception: IKGLP donors)\n");
10161 + }
10162 +#endif
10163 + lock->type = type;
10164 + lock->ident = atomic_inc_return(&lock_id_gen);
10165 *obj_ref = lock;
10166 + }
10167 return err;
10168 }
10169
10170 @@ -74,7 +99,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
10171 entry = get_entry_for_od(lock_od);
10172 if (entry && is_lock(entry)) {
10173 l = get_lock(entry);
10174 - TRACE_CUR("attempts to lock 0x%p\n", l);
10175 + //TRACE_CUR("attempts to lock 0x%p\n", l);
10176 + TRACE_CUR("attempts to lock %d\n", l->ident);
10177 err = l->ops->lock(l);
10178 }
10179
10180 @@ -96,7 +122,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
10181 entry = get_entry_for_od(lock_od);
10182 if (entry && is_lock(entry)) {
10183 l = get_lock(entry);
10184 - TRACE_CUR("attempts to unlock 0x%p\n", l);
10185 + //TRACE_CUR("attempts to unlock 0x%p\n", l);
10186 + TRACE_CUR("attempts to unlock %d\n", l->ident);
10187 err = l->ops->unlock(l);
10188 }
10189
10190 @@ -121,8 +148,366 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
10191 return(t);
10192 }
10193
10194 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
10195 +
10196 +void print_hp_waiters(struct binheap_node* n, int depth)
10197 +{
10198 + struct litmus_lock *l;
10199 + struct nested_info *nest;
10200 + char padding[81] = " ";
10201 + struct task_struct *hp = NULL;
10202 + struct task_struct *hp_eff = NULL;
10203 + struct task_struct *node_prio = NULL;
10204 +
10205 +
10206 + if(n == NULL) {
10207 + TRACE("+-> %p\n", NULL);
10208 + return;
10209 + }
10210 +
10211 + nest = binheap_entry(n, struct nested_info, hp_binheap_node);
10212 + l = nest->lock;
10213 +
10214 + if(depth*2 <= 80)
10215 + padding[depth*2] = '\0';
10216 +
10217 + if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) {
10218 + hp = *(nest->hp_waiter_ptr);
10219 +
10220 + if(tsk_rt(hp)->inh_task) {
10221 + hp_eff = tsk_rt(hp)->inh_task;
10222 + }
10223 + }
10224 +
10225 + node_prio = nest->hp_waiter_eff_prio;
10226 +
10227 + TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n",
10228 + padding,
10229 + (node_prio) ? node_prio->comm : "nil",
10230 + (node_prio) ? node_prio->pid : -1,
10231 + (hp) ? hp->comm : "nil",
10232 + (hp) ? hp->pid : -1,
10233 + (hp_eff) ? hp_eff->comm : "nil",
10234 + (hp_eff) ? hp_eff->pid : -1,
10235 + l->ident);
10236 +
10237 + if(n->left) print_hp_waiters(n->left, depth+1);
10238 + if(n->right) print_hp_waiters(n->right, depth+1);
10239 +}
10240 +#endif
10241 +
10242 +
10243 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
10244 +
10245 +void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
10246 +{
10247 + /*
10248 + We pick the next lock in reverse order. This causes inheritance propagation
10249 + from locks received earlier to flow in the same direction as regular nested
10250 + locking. This might make fine-grain DGL easier in the future.
10251 + */
10252 +
10253 + BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
10254 +
10255 + //WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock);
10256 +
10257 + // note reverse order
10258 + for(dgl_wait->last_primary = dgl_wait->last_primary - 1;
10259 + dgl_wait->last_primary >= 0;
10260 + --(dgl_wait->last_primary)){
10261 + if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner(
10262 + dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) {
10263 +
10264 + tsk_rt(dgl_wait->task)->blocked_lock =
10265 + dgl_wait->locks[dgl_wait->last_primary];
10266 + mb();
10267 +
10268 + TRACE_CUR("New blocked lock is %d\n",
10269 + dgl_wait->locks[dgl_wait->last_primary]->ident);
10270 +
10271 + break;
10272 + }
10273 + }
10274 +}
10275 +
10276 +int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
10277 +{
10278 + // should never be called.
10279 + BUG();
10280 + return 1;
10281 +}
10282 +
10283 +void __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
10284 + dgl_wait_state_t** dgl_wait,
10285 + struct task_struct **task)
10286 +{
10287 + wait_queue_t *q;
10288 +
10289 + *dgl_wait = NULL;
10290 + *task = NULL;
10291 +
10292 + if (waitqueue_active(wq)) {
10293 + q = list_entry(wq->task_list.next,
10294 + wait_queue_t, task_list);
10295 +
10296 + if(q->func == dgl_wake_up) {
10297 + *dgl_wait = (dgl_wait_state_t*) q->private;
10298 + }
10299 + else {
10300 + *task = (struct task_struct*) q->private;
10301 + }
10302 +
10303 + __remove_wait_queue(wq, q);
10304 + }
10305 +}
10306 +
10307 +void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
10308 +{
10309 + init_waitqueue_entry(wq_node, dgl_wait->task);
10310 + wq_node->private = dgl_wait;
10311 + wq_node->func = dgl_wake_up;
10312 +}
10313 +
10314 +
10315 +static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
10316 +{
10317 + int i;
10318 + unsigned long irqflags; //, dummyflags;
10319 + raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
10320 +
10321 + BUG_ON(dgl_wait->task != current);
10322 +
10323 + raw_spin_lock_irqsave(dgl_lock, irqflags);
10324 +
10325 +
10326 + dgl_wait->nr_remaining = dgl_wait->size;
10327 +
10328 + TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size);
10329 +
10330 + // try to acquire each lock. enqueue (non-blocking) if it is unavailable.
10331 + for(i = 0; i < dgl_wait->size; ++i) {
10332 + struct litmus_lock *l = dgl_wait->locks[i];
10333 +
10334 + // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
10335 +
10336 + if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
10337 + --(dgl_wait->nr_remaining);
10338 + TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
10339 + }
10340 + }
10341 +
10342 + if(dgl_wait->nr_remaining == 0) {
10343 + // acquired entire group immediatly
10344 + TRACE_CUR("Acquired all locks in DGL immediatly!\n");
10345 + }
10346 + else {
10347 +
10348 + TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
10349 + dgl_wait->nr_remaining);
10350 +
10351 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
10352 + // KLUDGE: don't count this suspension as time in the critical gpu
10353 + // critical section
10354 + if(tsk_rt(dgl_wait->task)->held_gpus) {
10355 + tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
10356 + }
10357 +#endif
10358 +
10359 + // note reverse order. see comments in select_next_lock for reason.
10360 + for(i = dgl_wait->size - 1; i >= 0; --i) {
10361 + struct litmus_lock *l = dgl_wait->locks[i];
10362 + if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe
10363 +
10364 + TRACE_CUR("Activating priority inheritance on lock %d\n",
10365 + l->ident);
10366 +
10367 + TS_DGL_LOCK_SUSPEND;
10368 +
10369 + l->ops->enable_priority(l, dgl_wait);
10370 + dgl_wait->last_primary = i;
10371 +
10372 + TRACE_CUR("Suspending for lock %d\n", l->ident);
10373 +
10374 + raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
10375 +
10376 + schedule(); // suspend!!!
10377 +
10378 + TS_DGL_LOCK_RESUME;
10379 +
10380 + TRACE_CUR("Woken up from DGL suspension.\n");
10381 +
10382 + goto all_acquired; // we should hold all locks when we wake up.
10383 + }
10384 + }
10385 +
10386 + TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
10387 + //BUG();
10388 + }
10389 +
10390 + raw_spin_unlock_irqrestore(dgl_lock, irqflags);
10391 +
10392 +all_acquired:
10393 +
10394 + // FOR SANITY CHECK FOR TESTING
10395 +// for(i = 0; i < dgl_wait->size; ++i) {
10396 +// struct litmus_lock *l = dgl_wait->locks[i];
10397 +// BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
10398 +// }
10399 +
10400 + TRACE_CUR("Acquired entire DGL\n");
10401 +
10402 + return 0;
10403 +}
10404 +
10405 +static int supports_dgl(struct litmus_lock *l)
10406 +{
10407 + struct litmus_lock_ops* ops = l->ops;
10408 +
10409 + return (ops->dgl_lock &&
10410 + ops->is_owner &&
10411 + ops->enable_priority);
10412 +}
10413 +
10414 +asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
10415 +{
10416 + struct task_struct *t = current;
10417 + long err = -EINVAL;
10418 + int dgl_ods[MAX_DGL_SIZE];
10419 + int i;
10420 +
10421 + dgl_wait_state_t dgl_wait_state; // lives on the stack until all resources in DGL are held.
10422 +
10423 + if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
10424 + goto out;
10425 +
10426 + if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
10427 + goto out;
10428 +
10429 + if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
10430 + goto out;
10431 +
10432 + if (!is_realtime(t)) {
10433 + err = -EPERM;
10434 + goto out;
10435 + }
10436 +
10437 + for(i = 0; i < dgl_size; ++i) {
10438 + struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
10439 + if(entry && is_lock(entry)) {
10440 + dgl_wait_state.locks[i] = get_lock(entry);
10441 + if(!supports_dgl(dgl_wait_state.locks[i])) {
10442 + TRACE_CUR("Lock %d does not support all required DGL operations.\n",
10443 + dgl_wait_state.locks[i]->ident);
10444 + goto out;
10445 + }
10446 + }
10447 + else {
10448 + TRACE_CUR("Invalid lock identifier\n");
10449 + goto out;
10450 + }
10451 + }
10452 +
10453 + dgl_wait_state.task = t;
10454 + dgl_wait_state.size = dgl_size;
10455 +
10456 + TS_DGL_LOCK_START;
10457 + err = do_litmus_dgl_lock(&dgl_wait_state);
10458 +
10459 + /* Note: task my have been suspended or preempted in between! Take
10460 + * this into account when computing overheads. */
10461 + TS_DGL_LOCK_END;
10462 +
10463 +out:
10464 + return err;
10465 +}
10466 +
10467 +static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size)
10468 +{
10469 + int i;
10470 + long err = 0;
10471 +
10472 + TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size);
10473 +
10474 + for(i = dgl_size - 1; i >= 0; --i) { // unlock in reverse order
10475 +
10476 + struct litmus_lock *l = dgl_locks[i];
10477 + long tmp_err;
10478 +
10479 + TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident);
10480 +
10481 + tmp_err = l->ops->unlock(l);
10482 +
10483 + if(tmp_err) {
10484 + TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err);
10485 + err = tmp_err;
10486 + }
10487 + }
10488 +
10489 + TRACE_CUR("DGL unlocked. err = %d\n", err);
10490 +
10491 + return err;
10492 +}
10493 +
10494 +asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
10495 +{
10496 + long err = -EINVAL;
10497 + int dgl_ods[MAX_DGL_SIZE];
10498 + struct od_table_entry* entry;
10499 + int i;
10500 +
10501 + struct litmus_lock* dgl_locks[MAX_DGL_SIZE];
10502 +
10503 + if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
10504 + goto out;
10505 +
10506 + if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
10507 + goto out;
10508 +
10509 + if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
10510 + goto out;
10511 +
10512 + for(i = 0; i < dgl_size; ++i) {
10513 + entry = get_entry_for_od(dgl_ods[i]);
10514 + if(entry && is_lock(entry)) {
10515 + dgl_locks[i] = get_lock(entry);
10516 + if(!supports_dgl(dgl_locks[i])) {
10517 + TRACE_CUR("Lock %d does not support all required DGL operations.\n",
10518 + dgl_locks[i]->ident);
10519 + goto out;
10520 + }
10521 + }
10522 + else {
10523 + TRACE_CUR("Invalid lock identifier\n");
10524 + goto out;
10525 + }
10526 + }
10527 +
10528 + TS_DGL_UNLOCK_START;
10529 + err = do_litmus_dgl_unlock(dgl_locks, dgl_size);
10530 +
10531 + /* Note: task my have been suspended or preempted in between! Take
10532 + * this into account when computing overheads. */
10533 + TS_DGL_UNLOCK_END;
10534 +
10535 +out:
10536 + return err;
10537 +}
10538 +
10539 +#else // CONFIG_LITMUS_DGL_SUPPORT
10540 +
10541 +asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
10542 +{
10543 + return -ENOSYS;
10544 +}
10545 +
10546 +asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
10547 +{
10548 + return -ENOSYS;
10549 +}
10550 +
10551 +#endif
10552
10553 -#else
10554 +#else // CONFIG_LITMUS_LOCKING
10555
10556 struct fdso_ops generic_lock_ops = {};
10557
10558 diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
10559 new file mode 100644
10560 index 0000000..4b86a50
10561 --- /dev/null
10562 +++ b/litmus/nvidia_info.c
10563 @@ -0,0 +1,597 @@
10564 +#include <linux/module.h>
10565 +#include <linux/semaphore.h>
10566 +#include <linux/pci.h>
10567 +
10568 +#include <litmus/sched_trace.h>
10569 +#include <litmus/nvidia_info.h>
10570 +#include <litmus/litmus.h>
10571 +
10572 +#include <litmus/sched_plugin.h>
10573 +
10574 +#include <litmus/binheap.h>
10575 +
10576 +typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
10577 +typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
10578 +typedef unsigned char NvU8; /* 0 to 255 */
10579 +typedef unsigned short NvU16; /* 0 to 65535 */
10580 +typedef signed char NvS8; /* -128 to 127 */
10581 +typedef signed short NvS16; /* -32768 to 32767 */
10582 +typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
10583 +typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
10584 +typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
10585 +typedef unsigned int NvU32; /* 0 to 4294967295 */
10586 +typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
10587 +typedef union
10588 +{
10589 + volatile NvV8 Reg008[1];
10590 + volatile NvV16 Reg016[1];
10591 + volatile NvV32 Reg032[1];
10592 +} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
10593 +
10594 +typedef struct
10595 +{
10596 + NvU64 address;
10597 + NvU64 size;
10598 + NvU32 offset;
10599 + NvU32 *map;
10600 + litmus_nv_phwreg_t map_u;
10601 +} litmus_nv_aperture_t;
10602 +
10603 +typedef struct
10604 +{
10605 + void *priv; /* private data */
10606 + void *os_state; /* os-specific device state */
10607 +
10608 + int rmInitialized;
10609 + int flags;
10610 +
10611 + /* PCI config info */
10612 + NvU32 domain;
10613 + NvU16 bus;
10614 + NvU16 slot;
10615 + NvU16 vendor_id;
10616 + NvU16 device_id;
10617 + NvU16 subsystem_id;
10618 + NvU32 gpu_id;
10619 + void *handle;
10620 +
10621 + NvU32 pci_cfg_space[16];
10622 +
10623 + /* physical characteristics */
10624 + litmus_nv_aperture_t bars[3];
10625 + litmus_nv_aperture_t *regs;
10626 + litmus_nv_aperture_t *fb, ud;
10627 + litmus_nv_aperture_t agp;
10628 +
10629 + NvU32 interrupt_line;
10630 +
10631 + NvU32 agp_config;
10632 + NvU32 agp_status;
10633 +
10634 + NvU32 primary_vga;
10635 +
10636 + NvU32 sim_env;
10637 +
10638 + NvU32 rc_timer_enabled;
10639 +
10640 + /* list of events allocated for this device */
10641 + void *event_list;
10642 +
10643 + void *kern_mappings;
10644 +
10645 +} litmus_nv_state_t;
10646 +
10647 +typedef struct work_struct litmus_nv_task_t;
10648 +
10649 +typedef struct litmus_nv_work_s {
10650 + litmus_nv_task_t task;
10651 + void *data;
10652 +} litmus_nv_work_t;
10653 +
10654 +typedef struct litmus_nv_linux_state_s {
10655 + litmus_nv_state_t nv_state;
10656 + atomic_t usage_count;
10657 +
10658 + struct pci_dev *dev;
10659 + void *agp_bridge;
10660 + void *alloc_queue;
10661 +
10662 + void *timer_sp;
10663 + void *isr_sp;
10664 + void *pci_cfgchk_sp;
10665 + void *isr_bh_sp;
10666 +
10667 +#ifdef CONFIG_CUDA_4_0
10668 + char registry_keys[512];
10669 +#endif
10670 +
10671 + /* keep track of any pending bottom halfes */
10672 + struct tasklet_struct tasklet;
10673 + litmus_nv_work_t work;
10674 +
10675 + /* get a timer callback every second */
10676 + struct timer_list rc_timer;
10677 +
10678 + /* lock for linux-specific data, not used by core rm */
10679 + struct semaphore ldata_lock;
10680 +
10681 + /* lock for linux-specific alloc queue */
10682 + struct semaphore at_lock;
10683 +
10684 +#if 0
10685 +#if defined(NV_USER_MAP)
10686 + /* list of user mappings */
10687 + struct nv_usermap_s *usermap_list;
10688 +
10689 + /* lock for VMware-specific mapping list */
10690 + struct semaphore mt_lock;
10691 +#endif /* defined(NV_USER_MAP) */
10692 +#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
10693 + void *apm_nv_dev;
10694 +#endif
10695 +#endif
10696 +
10697 + NvU32 device_num;
10698 + struct litmus_nv_linux_state_s *next;
10699 +} litmus_nv_linux_state_t;
10700 +
10701 +void dump_nvidia_info(const struct tasklet_struct *t)
10702 +{
10703 + litmus_nv_state_t* nvstate = NULL;
10704 + litmus_nv_linux_state_t* linuxstate = NULL;
10705 + struct pci_dev* pci = NULL;
10706 +
10707 + nvstate = (litmus_nv_state_t*)(t->data);
10708 +
10709 + if(nvstate)
10710 + {
10711 + TRACE("NV State:\n"
10712 + "\ttasklet ptr = %p\n"
10713 + "\tstate ptr = %p\n"
10714 + "\tprivate data ptr = %p\n"
10715 + "\tos state ptr = %p\n"
10716 + "\tdomain = %u\n"
10717 + "\tbus = %u\n"
10718 + "\tslot = %u\n"
10719 + "\tvender_id = %u\n"
10720 + "\tdevice_id = %u\n"
10721 + "\tsubsystem_id = %u\n"
10722 + "\tgpu_id = %u\n"
10723 + "\tinterrupt_line = %u\n",
10724 + t,
10725 + nvstate,
10726 + nvstate->priv,
10727 + nvstate->os_state,
10728 + nvstate->domain,
10729 + nvstate->bus,
10730 + nvstate->slot,
10731 + nvstate->vendor_id,
10732 + nvstate->device_id,
10733 + nvstate->subsystem_id,
10734 + nvstate->gpu_id,
10735 + nvstate->interrupt_line);
10736 +
10737 + linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
10738 + }
10739 + else
10740 + {
10741 + TRACE("INVALID NVSTATE????\n");
10742 + }
10743 +
10744 + if(linuxstate)
10745 + {
10746 + int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
10747 + int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
10748 + int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
10749 +
10750 +
10751 + TRACE("LINUX NV State:\n"
10752 + "\tlinux nv state ptr: %p\n"
10753 + "\taddress of tasklet: %p\n"
10754 + "\taddress of work: %p\n"
10755 + "\tusage_count: %d\n"
10756 + "\tdevice_num: %u\n"
10757 + "\ttasklet addr == this tasklet: %d\n"
10758 + "\tpci: %p\n",
10759 + linuxstate,
10760 + &(linuxstate->tasklet),
10761 + &(linuxstate->work),
10762 + atomic_read(&(linuxstate->usage_count)),
10763 + linuxstate->device_num,
10764 + (t == &(linuxstate->tasklet)),
10765 + linuxstate->dev);
10766 +
10767 + pci = linuxstate->dev;
10768 +
10769 + TRACE("Offsets:\n"
10770 + "\tOffset from LinuxState: %d, %x\n"
10771 + "\tOffset from NVState: %d, %x\n"
10772 + "\tOffset from parameter: %d, %x\n"
10773 + "\tdevice_num: %u\n",
10774 + ls_offset, ls_offset,
10775 + ns_offset_raw, ns_offset_raw,
10776 + ns_offset_desired, ns_offset_desired,
10777 + *((u32*)((void*)nvstate + ns_offset_desired)));
10778 + }
10779 + else
10780 + {
10781 + TRACE("INVALID LINUXNVSTATE?????\n");
10782 + }
10783 +
10784 +#if 0
10785 + if(pci)
10786 + {
10787 + TRACE("PCI DEV Info:\n"
10788 + "pci device ptr: %p\n"
10789 + "\tdevfn = %d\n"
10790 + "\tvendor = %d\n"
10791 + "\tdevice = %d\n"
10792 + "\tsubsystem_vendor = %d\n"
10793 + "\tsubsystem_device = %d\n"
10794 + "\tslot # = %d\n",
10795 + pci,
10796 + pci->devfn,
10797 + pci->vendor,
10798 + pci->device,
10799 + pci->subsystem_vendor,
10800 + pci->subsystem_device,
10801 + pci->slot->number);
10802 + }
10803 + else
10804 + {
10805 + TRACE("INVALID PCIDEV PTR?????\n");
10806 + }
10807 +#endif
10808 +}
10809 +
10810 +static struct module* nvidia_mod = NULL;
10811 +int init_nvidia_info(void)
10812 +{
10813 + mutex_lock(&module_mutex);
10814 + nvidia_mod = find_module("nvidia");
10815 + mutex_unlock(&module_mutex);
10816 + if(nvidia_mod != NULL)
10817 + {
10818 + TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
10819 + (void*)(nvidia_mod->module_core),
10820 + (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
10821 + init_nv_device_reg();
10822 + return(0);
10823 + }
10824 + else
10825 + {
10826 + TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
10827 + return(-1);
10828 + }
10829 +}
10830 +
10831 +void shutdown_nvidia_info(void)
10832 +{
10833 + nvidia_mod = NULL;
10834 + mb();
10835 +}
10836 +
10837 +/* works with pointers to static data inside the module too. */
10838 +int is_nvidia_func(void* func_addr)
10839 +{
10840 + int ret = 0;
10841 + if(nvidia_mod)
10842 + {
10843 + ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
10844 + /*
10845 + if(ret)
10846 + {
10847 + TRACE("%s : %p is in NVIDIA module: %d\n",
10848 + __FUNCTION__, func_addr, ret);
10849 + }*/
10850 + }
10851 +
10852 + return(ret);
10853 +}
10854 +
10855 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
10856 +{
10857 + // life is too short to use hard-coded offsets. update this later.
10858 + litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
10859 + litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
10860 +
10861 + BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
10862 +
10863 + return(linuxstate->device_num);
10864 +
10865 + //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
10866 +
10867 +#if 0
10868 + // offset determined though observed behavior of the NV driver.
10869 + //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1
10870 + //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2
10871 +
10872 + void* state = (void*)(t->data);
10873 + void* device_num_ptr = state + DEVICE_NUM_OFFSET;
10874 +
10875 + //dump_nvidia_info(t);
10876 + return(*((u32*)device_num_ptr));
10877 +#endif
10878 +}
10879 +
10880 +u32 get_work_nv_device_num(const struct work_struct *t)
10881 +{
10882 + // offset determined though observed behavior of the NV driver.
10883 + const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
10884 + void* state = (void*)(t);
10885 + void** device_num_ptr = state + DEVICE_NUM_OFFSET;
10886 + return(*((u32*)(*device_num_ptr)));
10887 +}
10888 +
10889 +
10890 +typedef struct {
10891 + raw_spinlock_t lock;
10892 + int nr_owners;
10893 + struct task_struct* max_prio_owner;
10894 + struct task_struct* owners[NV_MAX_SIMULT_USERS];
10895 +}nv_device_registry_t;
10896 +
10897 +static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
10898 +
10899 +int init_nv_device_reg(void)
10900 +{
10901 + int i;
10902 +
10903 + memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
10904 +
10905 + for(i = 0; i < NV_DEVICE_NUM; ++i)
10906 + {
10907 + raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
10908 + }
10909 +
10910 + return(1);
10911 +}
10912 +
10913 +/* use to get nv_device_id by given owner.
10914 + (if return -1, can't get the assocaite device id)*/
10915 +/*
10916 +int get_nv_device_id(struct task_struct* owner)
10917 +{
10918 + int i;
10919 + if(!owner)
10920 + {
10921 + return(-1);
10922 + }
10923 + for(i = 0; i < NV_DEVICE_NUM; ++i)
10924 + {
10925 + if(NV_DEVICE_REG[i].device_owner == owner)
10926 + return(i);
10927 + }
10928 + return(-1);
10929 +}
10930 +*/
10931 +
10932 +static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
10933 + int i;
10934 + struct task_struct *found = NULL;
10935 + for(i = 0; i < reg->nr_owners; ++i) {
10936 + if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
10937 + found = reg->owners[i];
10938 + }
10939 + }
10940 + return found;
10941 +}
10942 +
10943 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10944 +void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
10945 +{
10946 + unsigned long flags;
10947 + nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
10948 +
10949 + if(reg->max_prio_owner != t) {
10950 +
10951 + raw_spin_lock_irqsave(®->lock, flags);
10952 +
10953 + if(reg->max_prio_owner != t) {
10954 + if(litmus->compare(t, reg->max_prio_owner)) {
10955 + litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
10956 + reg->max_prio_owner = t;
10957 + }
10958 + }
10959 +
10960 + raw_spin_unlock_irqrestore(®->lock, flags);
10961 + }
10962 +}
10963 +
10964 +
10965 +void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
10966 +{
10967 + unsigned long flags;
10968 + nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
10969 +
10970 + if(reg->max_prio_owner == t) {
10971 +
10972 + raw_spin_lock_irqsave(®->lock, flags);
10973 +
10974 + if(reg->max_prio_owner == t) {
10975 + reg->max_prio_owner = find_hp_owner(reg, NULL);
10976 + if(reg->max_prio_owner != t) {
10977 + litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
10978 + }
10979 + }
10980 +
10981 + raw_spin_unlock_irqrestore(®->lock, flags);
10982 + }
10983 +}
10984 +#endif
10985 +
10986 +static int __reg_nv_device(int reg_device_id, struct task_struct *t)
10987 +{
10988 + int ret = 0;
10989 + int i;
10990 + struct task_struct *old_max = NULL;
10991 + unsigned long flags;
10992 + nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
10993 +
10994 + if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
10995 + // TODO: check if taks is already registered.
10996 + return ret; // assume already registered.
10997 + }
10998 +
10999 +
11000 + raw_spin_lock_irqsave(®->lock, flags);
11001 +
11002 + if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
11003 + TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
11004 + for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
11005 + if(reg->owners[i] == NULL) {
11006 + reg->owners[i] = t;
11007 +
11008 + //if(edf_higher_prio(t, reg->max_prio_owner)) {
11009 + if(litmus->compare(t, reg->max_prio_owner)) {
11010 + old_max = reg->max_prio_owner;
11011 + reg->max_prio_owner = t;
11012 +
11013 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11014 + litmus->change_prio_pai_tasklet(old_max, t);
11015 +#endif
11016 + }
11017 +
11018 +#ifdef CONFIG_LITMUS_SOFTIRQD
11019 + down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
11020 +#endif
11021 + ++(reg->nr_owners);
11022 +
11023 + break;
11024 + }
11025 + }
11026 + }
11027 + else
11028 + {
11029 + TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
11030 + //ret = -EBUSY;
11031 + }
11032 +
11033 + raw_spin_unlock_irqrestore(®->lock, flags);
11034 +
11035 + __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
11036 +
11037 + return(ret);
11038 +}
11039 +
11040 +static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
11041 +{
11042 + int ret = 0;
11043 + int i;
11044 + unsigned long flags;
11045 + nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
11046 +
11047 +#ifdef CONFIG_LITMUS_SOFTIRQD
11048 + struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
11049 +#endif
11050 +
11051 + if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
11052 + return ret;
11053 + }
11054 +
11055 + raw_spin_lock_irqsave(®->lock, flags);
11056 +
11057 + TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
11058 +
11059 + for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
11060 + if(reg->owners[i] == t) {
11061 +#ifdef CONFIG_LITMUS_SOFTIRQD
11062 + flush_pending(klitirqd_th, t);
11063 +#endif
11064 + if(reg->max_prio_owner == t) {
11065 + reg->max_prio_owner = find_hp_owner(reg, t);
11066 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11067 + litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
11068 +#endif
11069 + }
11070 +
11071 +#ifdef CONFIG_LITMUS_SOFTIRQD
11072 + up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
11073 +#endif
11074 +
11075 + reg->owners[i] = NULL;
11076 + --(reg->nr_owners);
11077 +
11078 + break;
11079 + }
11080 + }
11081 +
11082 + raw_spin_unlock_irqrestore(®->lock, flags);
11083 +
11084 + __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
11085 +
11086 + return(ret);
11087 +}
11088 +
11089 +
11090 +int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
11091 +{
11092 + int ret;
11093 +
11094 + if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
11095 + {
11096 + if(reg_action)
11097 + ret = __reg_nv_device(reg_device_id, t);
11098 + else
11099 + ret = __clear_reg_nv_device(reg_device_id, t);
11100 + }
11101 + else
11102 + {
11103 + ret = -ENODEV;
11104 + }
11105 +
11106 + return(ret);
11107 +}
11108 +
11109 +/* use to get the owner of nv_device_id. */
11110 +struct task_struct* get_nv_max_device_owner(u32 target_device_id)
11111 +{
11112 + struct task_struct *owner = NULL;
11113 + BUG_ON(target_device_id >= NV_DEVICE_NUM);
11114 + owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
11115 + return(owner);
11116 +}
11117 +
11118 +void lock_nv_registry(u32 target_device_id, unsigned long* flags)
11119 +{
11120 + BUG_ON(target_device_id >= NV_DEVICE_NUM);
11121 +
11122 + if(in_interrupt())
11123 + TRACE("Locking registry for %d.\n", target_device_id);
11124 + else
11125 + TRACE_CUR("Locking registry for %d.\n", target_device_id);
11126 +
11127 + raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
11128 +}
11129 +
11130 +void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
11131 +{
11132 + BUG_ON(target_device_id >= NV_DEVICE_NUM);
11133 +
11134 + if(in_interrupt())
11135 + TRACE("Unlocking registry for %d.\n", target_device_id);
11136 + else
11137 + TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
11138 +
11139 + raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
11140 +}
11141 +
11142 +
11143 +//void increment_nv_int_count(u32 device)
11144 +//{
11145 +// unsigned long flags;
11146 +// struct task_struct* owner;
11147 +//
11148 +// lock_nv_registry(device, &flags);
11149 +//
11150 +// owner = NV_DEVICE_REG[device].device_owner;
11151 +// if(owner)
11152 +// {
11153 +// atomic_inc(&tsk_rt(owner)->nv_int_count);
11154 +// }
11155 +//
11156 +// unlock_nv_registry(device, &flags);
11157 +//}
11158 +//EXPORT_SYMBOL(increment_nv_int_count);
11159 +
11160 +
11161 diff --git a/litmus/preempt.c b/litmus/preempt.c
11162 index 5704d0b..28368d5 100644
11163 --- a/litmus/preempt.c
11164 +++ b/litmus/preempt.c
11165 @@ -30,6 +30,7 @@ void sched_state_will_schedule(struct task_struct* tsk)
11166 /* Litmus tasks should never be subject to a remote
11167 * set_tsk_need_resched(). */
11168 BUG_ON(is_realtime(tsk));
11169 +
11170 #ifdef CONFIG_PREEMPT_STATE_TRACE
11171 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
11172 __builtin_return_address(0));
11173 @@ -45,13 +46,17 @@ void sched_state_ipi(void)
11174 /* Cause scheduler to be invoked.
11175 * This will cause a transition to WILL_SCHEDULE. */
11176 set_tsk_need_resched(current);
11177 + /*
11178 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
11179 current->comm, current->pid);
11180 + */
11181 } else {
11182 /* ignore */
11183 + /*
11184 TRACE_STATE("ignoring IPI in state %x (%s)\n",
11185 get_sched_state(),
11186 sched_state_name(get_sched_state()));
11187 + */
11188 }
11189 }
11190
11191 diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
11192 new file mode 100644
11193 index 0000000..75ed87c
11194 --- /dev/null
11195 +++ b/litmus/rsm_lock.c
11196 @@ -0,0 +1,796 @@
11197 +#include <linux/slab.h>
11198 +#include <linux/uaccess.h>
11199 +
11200 +#include <litmus/trace.h>
11201 +#include <litmus/sched_plugin.h>
11202 +#include <litmus/rsm_lock.h>
11203 +
11204 +//#include <litmus/edf_common.h>
11205 +
11206 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11207 +#include <litmus/gpu_affinity.h>
11208 +#endif
11209 +
11210 +
11211 +/* caller is responsible for locking */
11212 +static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
11213 + struct task_struct* skip)
11214 +{
11215 + wait_queue_t *q;
11216 + struct list_head *pos;
11217 + struct task_struct *queued = NULL, *found = NULL;
11218 +
11219 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11220 + dgl_wait_state_t *dgl_wait = NULL;
11221 +#endif
11222 +
11223 + list_for_each(pos, &mutex->wait.task_list) {
11224 + q = list_entry(pos, wait_queue_t, task_list);
11225 +
11226 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11227 + if(q->func == dgl_wake_up) {
11228 + dgl_wait = (dgl_wait_state_t*) q->private;
11229 + if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) {
11230 + queued = dgl_wait->task;
11231 + }
11232 + else {
11233 + queued = NULL; // skip it.
11234 + }
11235 + }
11236 + else {
11237 + queued = (struct task_struct*) q->private;
11238 + }
11239 +#else
11240 + queued = (struct task_struct*) q->private;
11241 +#endif
11242 +
11243 + /* Compare task prios, find high prio task. */
11244 + //if (queued && queued != skip && edf_higher_prio(queued, found)) {
11245 + if (queued && queued != skip && litmus->compare(queued, found)) {
11246 + found = queued;
11247 + }
11248 + }
11249 + return found;
11250 +}
11251 +
11252 +
11253 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11254 +
11255 +int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
11256 +{
11257 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11258 + return(mutex->owner == t);
11259 +}
11260 +
11261 +// return 1 if resource was immediatly acquired.
11262 +// Assumes mutex->lock is held.
11263 +// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
11264 +int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
11265 + wait_queue_t* wq_node)
11266 +{
11267 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11268 + struct task_struct *t = dgl_wait->task;
11269 +
11270 + int acquired_immediatly = 0;
11271 +
11272 + BUG_ON(t != current);
11273 +
11274 + if (mutex->owner) {
11275 + TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident);
11276 +
11277 + init_dgl_waitqueue_entry(wq_node, dgl_wait);
11278 +
11279 + set_task_state(t, TASK_UNINTERRUPTIBLE);
11280 + __add_wait_queue_tail_exclusive(&mutex->wait, wq_node);
11281 + } else {
11282 + TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
11283 +
11284 + /* it's ours now */
11285 + mutex->owner = t;
11286 +
11287 + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
11288 + binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
11289 + struct nested_info, hp_binheap_node);
11290 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
11291 +
11292 + acquired_immediatly = 1;
11293 + }
11294 +
11295 + return acquired_immediatly;
11296 +}
11297 +
11298 +void rsm_mutex_enable_priority(struct litmus_lock *l,
11299 + dgl_wait_state_t* dgl_wait)
11300 +{
11301 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11302 + struct task_struct *t = dgl_wait->task;
11303 + struct task_struct *owner = mutex->owner;
11304 + unsigned long flags = 0; // these are unused under DGL coarse-grain locking
11305 +
11306 + BUG_ON(owner == t);
11307 +
11308 + tsk_rt(t)->blocked_lock = l;
11309 + mb();
11310 +
11311 + //if (edf_higher_prio(t, mutex->hp_waiter)) {
11312 + if (litmus->compare(t, mutex->hp_waiter)) {
11313 +
11314 + struct task_struct *old_max_eff_prio;
11315 + struct task_struct *new_max_eff_prio;
11316 + struct task_struct *new_prio = NULL;
11317 +
11318 + if(mutex->hp_waiter)
11319 + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
11320 + mutex->hp_waiter->comm, mutex->hp_waiter->pid);
11321 + else
11322 + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
11323 +
11324 + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11325 +
11326 + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11327 + mutex->hp_waiter = t;
11328 + l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
11329 + binheap_decrease(&l->nest.hp_binheap_node,
11330 + &tsk_rt(owner)->hp_blocked_tasks);
11331 + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11332 +
11333 + if(new_max_eff_prio != old_max_eff_prio) {
11334 + TRACE_TASK(t, "is new hp_waiter.\n");
11335 +
11336 + if ((effective_priority(owner) == old_max_eff_prio) ||
11337 + //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11338 + (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11339 + new_prio = new_max_eff_prio;
11340 + }
11341 + }
11342 + else {
11343 + TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
11344 + }
11345 +
11346 + if(new_prio) {
11347 + litmus->nested_increase_prio(owner, new_prio,
11348 + &mutex->lock, flags); // unlocks lock.
11349 + }
11350 + else {
11351 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11352 + unlock_fine_irqrestore(&mutex->lock, flags);
11353 + }
11354 + }
11355 + else {
11356 + TRACE_TASK(t, "no change in hp_waiter.\n");
11357 + unlock_fine_irqrestore(&mutex->lock, flags);
11358 + }
11359 +}
11360 +
11361 +static void select_next_lock_if_primary(struct litmus_lock *l,
11362 + dgl_wait_state_t *dgl_wait)
11363 +{
11364 + if(tsk_rt(dgl_wait->task)->blocked_lock == l) {
11365 + TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
11366 + l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
11367 + tsk_rt(dgl_wait->task)->blocked_lock = NULL;
11368 + mb();
11369 + select_next_lock(dgl_wait /*, l*/); // pick the next lock to be blocked on
11370 + }
11371 + else {
11372 + TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
11373 + l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
11374 + }
11375 +}
11376 +#endif
11377 +
11378 +
11379 +
11380 +
11381 +int rsm_mutex_lock(struct litmus_lock* l)
11382 +{
11383 + struct task_struct *t = current;
11384 + struct task_struct *owner;
11385 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11386 + wait_queue_t wait;
11387 + unsigned long flags;
11388 +
11389 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11390 + raw_spinlock_t *dgl_lock;
11391 +#endif
11392 +
11393 + if (!is_realtime(t))
11394 + return -EPERM;
11395 +
11396 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11397 + dgl_lock = litmus->get_dgl_spinlock(t);
11398 +#endif
11399 +
11400 + lock_global_irqsave(dgl_lock, flags);
11401 + lock_fine_irqsave(&mutex->lock, flags);
11402 +
11403 + if (mutex->owner) {
11404 + TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
11405 +
11406 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11407 + // KLUDGE: don't count this suspension as time in the critical gpu
11408 + // critical section
11409 + if(tsk_rt(t)->held_gpus) {
11410 + tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
11411 + }
11412 +#endif
11413 +
11414 + /* resource is not free => must suspend and wait */
11415 +
11416 + owner = mutex->owner;
11417 +
11418 + init_waitqueue_entry(&wait, t);
11419 +
11420 + tsk_rt(t)->blocked_lock = l; /* record where we are blocked */
11421 + mb(); // needed?
11422 +
11423 + /* FIXME: interruptible would be nice some day */
11424 + set_task_state(t, TASK_UNINTERRUPTIBLE);
11425 +
11426 + __add_wait_queue_tail_exclusive(&mutex->wait, &wait);
11427 +
11428 + /* check if we need to activate priority inheritance */
11429 + //if (edf_higher_prio(t, mutex->hp_waiter)) {
11430 + if (litmus->compare(t, mutex->hp_waiter)) {
11431 +
11432 + struct task_struct *old_max_eff_prio;
11433 + struct task_struct *new_max_eff_prio;
11434 + struct task_struct *new_prio = NULL;
11435 +
11436 + if(mutex->hp_waiter)
11437 + TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
11438 + mutex->hp_waiter->comm, mutex->hp_waiter->pid);
11439 + else
11440 + TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
11441 +
11442 + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11443 +
11444 + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11445 + mutex->hp_waiter = t;
11446 + l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
11447 + binheap_decrease(&l->nest.hp_binheap_node,
11448 + &tsk_rt(owner)->hp_blocked_tasks);
11449 + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11450 +
11451 + if(new_max_eff_prio != old_max_eff_prio) {
11452 + TRACE_TASK(t, "is new hp_waiter.\n");
11453 +
11454 + if ((effective_priority(owner) == old_max_eff_prio) ||
11455 + //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11456 + (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11457 + new_prio = new_max_eff_prio;
11458 + }
11459 + }
11460 + else {
11461 + TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
11462 + }
11463 +
11464 + if(new_prio) {
11465 + litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
11466 + flags); // unlocks lock.
11467 + }
11468 + else {
11469 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11470 + unlock_fine_irqrestore(&mutex->lock, flags);
11471 + }
11472 + }
11473 + else {
11474 + TRACE_TASK(t, "no change in hp_waiter.\n");
11475 +
11476 + unlock_fine_irqrestore(&mutex->lock, flags);
11477 + }
11478 +
11479 + unlock_global_irqrestore(dgl_lock, flags);
11480 +
11481 + TS_LOCK_SUSPEND;
11482 +
11483 + /* We depend on the FIFO order. Thus, we don't need to recheck
11484 + * when we wake up; we are guaranteed to have the lock since
11485 + * there is only one wake up per release.
11486 + */
11487 +
11488 + schedule();
11489 +
11490 + TS_LOCK_RESUME;
11491 +
11492 + /* Since we hold the lock, no other task will change
11493 + * ->owner. We can thus check it without acquiring the spin
11494 + * lock. */
11495 + BUG_ON(mutex->owner != t);
11496 +
11497 + TRACE_TASK(t, "Acquired lock %d.\n", l->ident);
11498 +
11499 + } else {
11500 + TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
11501 +
11502 + /* it's ours now */
11503 + mutex->owner = t;
11504 +
11505 + raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
11506 + binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
11507 + struct nested_info, hp_binheap_node);
11508 + raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
11509 +
11510 +
11511 + unlock_fine_irqrestore(&mutex->lock, flags);
11512 + unlock_global_irqrestore(dgl_lock, flags);
11513 + }
11514 +
11515 + return 0;
11516 +}
11517 +
11518 +
11519 +
11520 +int rsm_mutex_unlock(struct litmus_lock* l)
11521 +{
11522 + struct task_struct *t = current, *next = NULL;
11523 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11524 + unsigned long flags;
11525 +
11526 + struct task_struct *old_max_eff_prio;
11527 +
11528 + int wake_up_task = 1;
11529 +
11530 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11531 + dgl_wait_state_t *dgl_wait = NULL;
11532 + raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
11533 +#endif
11534 +
11535 + int err = 0;
11536 +
11537 + if (mutex->owner != t) {
11538 + err = -EINVAL;
11539 + return err;
11540 + }
11541 +
11542 + lock_global_irqsave(dgl_lock, flags);
11543 + lock_fine_irqsave(&mutex->lock, flags);
11544 +
11545 + raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
11546 +
11547 + TRACE_TASK(t, "Freeing lock %d\n", l->ident);
11548 +
11549 + old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
11550 + binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);
11551 +
11552 + if(tsk_rt(t)->inh_task){
11553 + struct task_struct *new_max_eff_prio =
11554 + top_priority(&tsk_rt(t)->hp_blocked_tasks);
11555 +
11556 + if((new_max_eff_prio == NULL) ||
11557 + /* there was a change in eff prio */
11558 + ( (new_max_eff_prio != old_max_eff_prio) &&
11559 + /* and owner had the old eff prio */
11560 + (effective_priority(t) == old_max_eff_prio)) )
11561 + {
11562 + // old_max_eff_prio > new_max_eff_prio
11563 +
11564 + //if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) {
11565 + if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
11566 + TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d task: %s/%d [%s/%d]\n",
11567 + new_max_eff_prio->comm, new_max_eff_prio->pid,
11568 + t->comm, t->pid, tsk_rt(t)->inh_task->comm,
11569 + tsk_rt(t)->inh_task->pid);
11570 + WARN_ON(1);
11571 + }
11572 +
11573 + litmus->decrease_prio(t, new_max_eff_prio);
11574 + }
11575 + }
11576 +
11577 + if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
11578 + tsk_rt(t)->inh_task != NULL)
11579 + {
11580 + WARN_ON(tsk_rt(t)->inh_task != NULL);
11581 + TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
11582 + tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
11583 + }
11584 +
11585 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
11586 +
11587 +
11588 + /* check if there are jobs waiting for this resource */
11589 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11590 + __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next);
11591 + if(dgl_wait) {
11592 + next = dgl_wait->task;
11593 + //select_next_lock_if_primary(l, dgl_wait);
11594 + }
11595 +#else
11596 + next = __waitqueue_remove_first(&mutex->wait);
11597 +#endif
11598 + if (next) {
11599 + /* next becomes the resouce holder */
11600 + mutex->owner = next;
11601 + TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
11602 +
11603 + /* determine new hp_waiter if necessary */
11604 + if (next == mutex->hp_waiter) {
11605 +
11606 + TRACE_TASK(next, "was highest-prio waiter\n");
11607 + /* next has the highest priority --- it doesn't need to
11608 + * inherit. However, we need to make sure that the
11609 + * next-highest priority in the queue is reflected in
11610 + * hp_waiter. */
11611 + mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next);
11612 + l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
11613 + effective_priority(mutex->hp_waiter) :
11614 + NULL;
11615 +
11616 + if (mutex->hp_waiter)
11617 + TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
11618 + else
11619 + TRACE("no further waiters\n");
11620 +
11621 + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
11622 +
11623 + binheap_add(&l->nest.hp_binheap_node,
11624 + &tsk_rt(next)->hp_blocked_tasks,
11625 + struct nested_info, hp_binheap_node);
11626 +
11627 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11628 + if(dgl_wait) {
11629 + select_next_lock_if_primary(l, dgl_wait);
11630 + //wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining);
11631 + --(dgl_wait->nr_remaining);
11632 + wake_up_task = (dgl_wait->nr_remaining == 0);
11633 + }
11634 +#endif
11635 + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
11636 + }
11637 + else {
11638 + /* Well, if 'next' is not the highest-priority waiter,
11639 + * then it (probably) ought to inherit the highest-priority
11640 + * waiter's priority. */
11641 + TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);
11642 +
11643 + raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
11644 +
11645 + binheap_add(&l->nest.hp_binheap_node,
11646 + &tsk_rt(next)->hp_blocked_tasks,
11647 + struct nested_info, hp_binheap_node);
11648 +
11649 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11650 + if(dgl_wait) {
11651 + select_next_lock_if_primary(l, dgl_wait);
11652 + --(dgl_wait->nr_remaining);
11653 + wake_up_task = (dgl_wait->nr_remaining == 0);
11654 + }
11655 +#endif
11656 +
11657 + /* It is possible that 'next' *should* be the hp_waiter, but isn't
11658 + * because that update hasn't yet executed (update operation is
11659 + * probably blocked on mutex->lock). So only inherit if the top of
11660 + * 'next's top heap node is indeed the effective prio. of hp_waiter.
11661 + * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
11662 + * since the effective priority of hp_waiter can change (and the
11663 + * update has not made it to this lock).)
11664 + */
11665 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11666 + if((l->nest.hp_waiter_eff_prio != NULL) &&
11667 + (top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
11668 + l->nest.hp_waiter_eff_prio))
11669 + {
11670 + if(dgl_wait && tsk_rt(next)->blocked_lock) {
11671 + BUG_ON(wake_up_task);
11672 + //if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
11673 + if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
11674 + litmus->nested_increase_prio(next,
11675 + l->nest.hp_waiter_eff_prio, &mutex->lock, flags); // unlocks lock && hp_blocked_tasks_lock.
11676 + goto out; // all spinlocks are released. bail out now.
11677 + }
11678 + }
11679 + else {
11680 + litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
11681 + }
11682 + }
11683 +
11684 + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
11685 +#else
11686 + if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
11687 + l->nest.hp_waiter_eff_prio))
11688 + {
11689 + litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
11690 + }
11691 + raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
11692 +#endif
11693 + }
11694 +
11695 + if(wake_up_task) {
11696 + TRACE_TASK(next, "waking up since it is no longer blocked.\n");
11697 +
11698 + tsk_rt(next)->blocked_lock = NULL;
11699 + mb();
11700 +
11701 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11702 + // re-enable tracking
11703 + if(tsk_rt(next)->held_gpus) {
11704 + tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
11705 + }
11706 +#endif
11707 +
11708 + wake_up_process(next);
11709 + }
11710 + else {
11711 + TRACE_TASK(next, "is still blocked.\n");
11712 + }
11713 + }
11714 + else {
11715 + /* becomes available */
11716 + mutex->owner = NULL;
11717 + }
11718 +
11719 + unlock_fine_irqrestore(&mutex->lock, flags);
11720 +
11721 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11722 +out:
11723 +#endif
11724 + unlock_global_irqrestore(dgl_lock, flags);
11725 +
11726 + return err;
11727 +}
11728 +
11729 +
11730 +void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
11731 + struct task_struct* t,
11732 + raw_spinlock_t* to_unlock,
11733 + unsigned long irqflags)
11734 +{
11735 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11736 +
11737 + // relay-style locking
11738 + lock_fine(&mutex->lock);
11739 + unlock_fine(to_unlock);
11740 +
11741 + if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
11742 + struct task_struct *owner = mutex->owner;
11743 +
11744 + struct task_struct *old_max_eff_prio;
11745 + struct task_struct *new_max_eff_prio;
11746 +
11747 + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11748 +
11749 + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11750 +
11751 + //if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) {
11752 + if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
11753 + TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
11754 + mutex->hp_waiter = t;
11755 + }
11756 + if(t == mutex->hp_waiter) {
11757 + // reflect the decreased priority in the heap node.
11758 + l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
11759 +
11760 + BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
11761 + BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
11762 + &tsk_rt(owner)->hp_blocked_tasks));
11763 +
11764 + binheap_decrease(&l->nest.hp_binheap_node,
11765 + &tsk_rt(owner)->hp_blocked_tasks);
11766 + }
11767 +
11768 + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11769 +
11770 +
11771 + if(new_max_eff_prio != old_max_eff_prio) {
11772 + // new_max_eff_prio > old_max_eff_prio holds.
11773 + if ((effective_priority(owner) == old_max_eff_prio) ||
11774 + //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
11775 + (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
11776 + TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
11777 + l->ident);
11778 +
11779 + // beware: recursion
11780 + litmus->nested_increase_prio(owner, new_max_eff_prio,
11781 + &mutex->lock, irqflags); // unlocks mutex->lock
11782 + }
11783 + else {
11784 + TRACE_CUR("Lower priority than holder %s/%d. No propagation.\n",
11785 + owner->comm, owner->pid);
11786 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11787 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11788 + }
11789 + }
11790 + else {
11791 + TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
11792 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11793 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11794 + }
11795 + }
11796 + else {
11797 + struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
11798 +
11799 + TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
11800 + if(still_blocked) {
11801 + TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
11802 + still_blocked->ident);
11803 + if(still_blocked->ops->propagate_increase_inheritance) {
11804 + /* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
11805 + we know that task 't' has not released any locks behind us in this
11806 + chain. Propagation just needs to catch up with task 't'. */
11807 + still_blocked->ops->propagate_increase_inheritance(still_blocked,
11808 + t,
11809 + &mutex->lock,
11810 + irqflags);
11811 + }
11812 + else {
11813 + TRACE_TASK(t,
11814 + "Inheritor is blocked on lock (%p) that does not "
11815 + "support nesting!\n",
11816 + still_blocked);
11817 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11818 + }
11819 + }
11820 + else {
11821 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11822 + }
11823 + }
11824 +}
11825 +
11826 +
11827 +void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
11828 + struct task_struct* t,
11829 + raw_spinlock_t* to_unlock,
11830 + unsigned long irqflags)
11831 +{
11832 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11833 +
11834 + // relay-style locking
11835 + lock_fine(&mutex->lock);
11836 + unlock_fine(to_unlock);
11837 +
11838 + if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
11839 + if(t == mutex->hp_waiter) {
11840 + struct task_struct *owner = mutex->owner;
11841 +
11842 + struct task_struct *old_max_eff_prio;
11843 + struct task_struct *new_max_eff_prio;
11844 +
11845 + raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11846 +
11847 + old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11848 +
11849 + binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
11850 + mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL);
11851 + l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
11852 + effective_priority(mutex->hp_waiter) : NULL;
11853 + binheap_add(&l->nest.hp_binheap_node,
11854 + &tsk_rt(owner)->hp_blocked_tasks,
11855 + struct nested_info, hp_binheap_node);
11856 +
11857 + new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11858 +
11859 + if((old_max_eff_prio != new_max_eff_prio) &&
11860 + (effective_priority(owner) == old_max_eff_prio))
11861 + {
11862 + // Need to set new effective_priority for owner
11863 +
11864 + struct task_struct *decreased_prio;
11865 +
11866 + TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
11867 + l->ident);
11868 +
11869 + //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
11870 + if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
11871 + TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
11872 + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
11873 + (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
11874 + owner->comm,
11875 + owner->pid,
11876 + l->ident);
11877 +
11878 + decreased_prio = new_max_eff_prio;
11879 + }
11880 + else {
11881 + TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
11882 + (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
11883 + (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
11884 + owner->comm,
11885 + owner->pid,
11886 + l->ident);
11887 +
11888 + decreased_prio = NULL;
11889 + }
11890 +
11891 + // beware: recursion
11892 + litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock
11893 + }
11894 + else {
11895 + raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11896 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11897 + }
11898 + }
11899 + else {
11900 + TRACE_TASK(t, "is not hp_waiter. No propagation.\n");
11901 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11902 + }
11903 + }
11904 + else {
11905 + struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
11906 +
11907 + TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
11908 + if(still_blocked) {
11909 + TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
11910 + still_blocked->ident);
11911 + if(still_blocked->ops->propagate_decrease_inheritance) {
11912 + /* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
11913 + we know that task 't' has not released any locks behind us in this
11914 + chain. propagation just needs to catch up with task 't' */
11915 + still_blocked->ops->propagate_decrease_inheritance(still_blocked,
11916 + t,
11917 + &mutex->lock,
11918 + irqflags);
11919 + }
11920 + else {
11921 + TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
11922 + still_blocked);
11923 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11924 + }
11925 + }
11926 + else {
11927 + unlock_fine_irqrestore(&mutex->lock, irqflags);
11928 + }
11929 + }
11930 +}
11931 +
11932 +
11933 +int rsm_mutex_close(struct litmus_lock* l)
11934 +{
11935 + struct task_struct *t = current;
11936 + struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11937 + unsigned long flags;
11938 +
11939 + int owner;
11940 +
11941 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11942 + raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
11943 +#endif
11944 +
11945 + lock_global_irqsave(dgl_lock, flags);
11946 + lock_fine_irqsave(&mutex->lock, flags);
11947 +
11948 + owner = (mutex->owner == t);
11949 +
11950 + unlock_fine_irqrestore(&mutex->lock, flags);
11951 + unlock_global_irqrestore(dgl_lock, flags);
11952 +
11953 + if (owner)
11954 + rsm_mutex_unlock(l);
11955 +
11956 + return 0;
11957 +}
11958 +
11959 +void rsm_mutex_free(struct litmus_lock* lock)
11960 +{
11961 + kfree(rsm_mutex_from_lock(lock));
11962 +}
11963 +
11964 +struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops)
11965 +{
11966 + struct rsm_mutex* mutex;
11967 +
11968 + mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
11969 + if (!mutex)
11970 + return NULL;
11971 +
11972 + mutex->litmus_lock.ops = ops;
11973 + mutex->owner = NULL;
11974 + mutex->hp_waiter = NULL;
11975 + init_waitqueue_head(&mutex->wait);
11976 +
11977 +
11978 +#ifdef CONFIG_DEBUG_SPINLOCK
11979 + {
11980 + __raw_spin_lock_init(&mutex->lock,
11981 + ((struct litmus_lock*)mutex)->cheat_lockdep,
11982 + &((struct litmus_lock*)mutex)->key);
11983 + }
11984 +#else
11985 + raw_spin_lock_init(&mutex->lock);
11986 +#endif
11987 +
11988 + ((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;
11989 +
11990 + return &mutex->litmus_lock;
11991 +}
11992 +
11993 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
11994 index 480c62b..be14dbe 100644
11995 --- a/litmus/sched_cedf.c
11996 +++ b/litmus/sched_cedf.c
11997 @@ -29,7 +29,7 @@
11998 #include <linux/percpu.h>
11999 #include <linux/sched.h>
12000 #include <linux/slab.h>
12001 -
12002 +#include <linux/uaccess.h>
12003 #include <linux/module.h>
12004
12005 #include <litmus/litmus.h>
12006 @@ -42,6 +42,16 @@
12007 #include <litmus/clustered.h>
12008
12009 #include <litmus/bheap.h>
12010 +#include <litmus/binheap.h>
12011 +
12012 +#ifdef CONFIG_LITMUS_LOCKING
12013 +#include <litmus/kfmlp_lock.h>
12014 +#endif
12015 +
12016 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12017 +#include <litmus/rsm_lock.h>
12018 +#include <litmus/ikglp_lock.h>
12019 +#endif
12020
12021 #ifdef CONFIG_SCHED_CPU_AFFINITY
12022 #include <litmus/affinity.h>
12023 @@ -49,7 +59,27 @@
12024
12025 /* to configure the cluster size */
12026 #include <litmus/litmus_proc.h>
12027 -#include <linux/uaccess.h>
12028 +
12029 +#ifdef CONFIG_SCHED_CPU_AFFINITY
12030 +#include <litmus/affinity.h>
12031 +#endif
12032 +
12033 +#ifdef CONFIG_LITMUS_SOFTIRQD
12034 +#include <litmus/litmus_softirq.h>
12035 +#endif
12036 +
12037 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12038 +#include <linux/interrupt.h>
12039 +#include <litmus/trace.h>
12040 +#endif
12041 +
12042 +#ifdef CONFIG_LITMUS_NVIDIA
12043 +#include <litmus/nvidia_info.h>
12044 +#endif
12045 +
12046 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
12047 +#include <litmus/gpu_affinity.h>
12048 +#endif
12049
12050 /* Reference configuration variable. Determines which cache level is used to
12051 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
12052 @@ -70,7 +100,7 @@ typedef struct {
12053 struct task_struct* linked; /* only RT tasks */
12054 struct task_struct* scheduled; /* only RT tasks */
12055 atomic_t will_schedule; /* prevent unneeded IPIs */
12056 - struct bheap_node* hn;
12057 + struct binheap_node hn;
12058 } cpu_entry_t;
12059
12060 /* one cpu_entry_t per CPU */
12061 @@ -83,6 +113,14 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
12062 #define test_will_schedule(cpu) \
12063 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
12064
12065 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12066 +struct tasklet_head
12067 +{
12068 + struct tasklet_struct *head;
12069 + struct tasklet_struct **tail;
12070 +};
12071 +#endif
12072 +
12073 /*
12074 * In C-EDF there is a cedf domain _per_ cluster
12075 * The number of clusters is dynamically determined accordingly to the
12076 @@ -96,10 +134,17 @@ typedef struct clusterdomain {
12077 /* map of this cluster cpus */
12078 cpumask_var_t cpu_map;
12079 /* the cpus queue themselves according to priority in here */
12080 - struct bheap_node *heap_node;
12081 - struct bheap cpu_heap;
12082 + struct binheap_handle cpu_heap;
12083 /* lock for this cluster */
12084 #define cluster_lock domain.ready_lock
12085 +
12086 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12087 + struct tasklet_head pending_tasklets;
12088 +#endif
12089 +
12090 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
12091 + raw_spinlock_t dgl_lock;
12092 +#endif
12093 } cedf_domain_t;
12094
12095 /* a cedf_domain per cluster; allocation is done at init/activation time */
12096 @@ -108,6 +153,22 @@ cedf_domain_t *cedf;
12097 #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
12098 #define task_cpu_cluster(task) remote_cluster(get_partition(task))
12099
12100 +/* total number of cluster */
12101 +static int num_clusters;
12102 +/* we do not support cluster of different sizes */
12103 +static unsigned int cluster_size;
12104 +
12105 +static int clusters_allocated = 0;
12106 +
12107 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
12108 +static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
12109 +{
12110 + cedf_domain_t *cluster = task_cpu_cluster(t);
12111 + return(&cluster->dgl_lock);
12112 +}
12113 +#endif
12114 +
12115 +
12116 /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
12117 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
12118 * information during the initialization of the plugin (e.g., topology)
12119 @@ -115,11 +176,11 @@ cedf_domain_t *cedf;
12120 */
12121 #define VERBOSE_INIT
12122
12123 -static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
12124 +static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
12125 {
12126 - cpu_entry_t *a, *b;
12127 - a = _a->value;
12128 - b = _b->value;
12129 + cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
12130 + cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
12131 +
12132 /* Note that a and b are inverted: we want the lowest-priority CPU at
12133 * the top of the heap.
12134 */
12135 @@ -133,20 +194,17 @@ static void update_cpu_position(cpu_entry_t *entry)
12136 {
12137 cedf_domain_t *cluster = entry->cluster;
12138
12139 - if (likely(bheap_node_in_heap(entry->hn)))
12140 - bheap_delete(cpu_lower_prio,
12141 - &cluster->cpu_heap,
12142 - entry->hn);
12143 + if (likely(binheap_is_in_heap(&entry->hn))) {
12144 + binheap_delete(&entry->hn, &cluster->cpu_heap);
12145 + }
12146
12147 - bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
12148 + binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
12149 }
12150
12151 /* caller must hold cedf lock */
12152 static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
12153 {
12154 - struct bheap_node* hn;
12155 - hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
12156 - return hn->value;
12157 + return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
12158 }
12159
12160
12161 @@ -208,7 +266,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
12162 }
12163
12164 /* unlink - Make sure a task is not linked any longer to an entry
12165 - * where it was linked before. Must hold cedf_lock.
12166 + * where it was linked before. Must hold cluster_lock.
12167 */
12168 static noinline void unlink(struct task_struct* t)
12169 {
12170 @@ -244,7 +302,7 @@ static void preempt(cpu_entry_t *entry)
12171 }
12172
12173 /* requeue - Put an unlinked task into gsn-edf domain.
12174 - * Caller must hold cedf_lock.
12175 + * Caller must hold cluster_lock.
12176 */
12177 static noinline void requeue(struct task_struct* task)
12178 {
12179 @@ -339,13 +397,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
12180 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12181 }
12182
12183 -/* caller holds cedf_lock */
12184 +/* caller holds cluster_lock */
12185 static noinline void job_completion(struct task_struct *t, int forced)
12186 {
12187 BUG_ON(!t);
12188
12189 sched_trace_task_completion(t, forced);
12190
12191 +#ifdef CONFIG_LITMUS_NVIDIA
12192 + atomic_set(&tsk_rt(t)->nv_int_count, 0);
12193 +#endif
12194 +
12195 TRACE_TASK(t, "job_completion().\n");
12196
12197 /* set flags */
12198 @@ -389,6 +451,314 @@ static void cedf_tick(struct task_struct* t)
12199 }
12200 }
12201
12202 +
12203 +
12204 +
12205 +
12206 +
12207 +
12208 +
12209 +
12210 +
12211 +
12212 +
12213 +
12214 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12215 +
12216 +
12217 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
12218 +{
12219 + if (!atomic_read(&tasklet->count)) {
12220 + if(tasklet->owner) {
12221 + sched_trace_tasklet_begin(tasklet->owner);
12222 + }
12223 +
12224 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
12225 + {
12226 + BUG();
12227 + }
12228 + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
12229 + __FUNCTION__,
12230 + (tasklet->owner) ? tasklet->owner->pid : -1,
12231 + (tasklet->owner) ? 0 : 1);
12232 + tasklet->func(tasklet->data);
12233 + tasklet_unlock(tasklet);
12234 +
12235 + if(tasklet->owner) {
12236 + sched_trace_tasklet_end(tasklet->owner, flushed);
12237 + }
12238 + }
12239 + else {
12240 + BUG();
12241 + }
12242 +}
12243 +
12244 +
12245 +static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
12246 +{
12247 + int work_to_do = 1;
12248 + struct tasklet_struct *tasklet = NULL;
12249 + unsigned long flags;
12250 +
12251 + while(work_to_do) {
12252 +
12253 + TS_NV_SCHED_BOTISR_START;
12254 +
12255 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12256 +
12257 + if(cluster->pending_tasklets.head != NULL) {
12258 + // remove tasklet at head.
12259 + struct tasklet_struct *prev = NULL;
12260 + tasklet = cluster->pending_tasklets.head;
12261 +
12262 + // find a tasklet with prio to execute; skip ones where
12263 + // sched_task has a higher priority.
12264 + // We use the '!edf' test instead of swaping function arguments since
12265 + // both sched_task and owner could be NULL. In this case, we want to
12266 + // still execute the tasklet.
12267 + while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
12268 + prev = tasklet;
12269 + tasklet = tasklet->next;
12270 + }
12271 +
12272 + if(tasklet) { // found something to execuite
12273 + // remove the tasklet from the queue
12274 + if(prev) {
12275 + prev->next = tasklet->next;
12276 + if(prev->next == NULL) {
12277 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
12278 + cluster->pending_tasklets.tail = &(prev);
12279 + }
12280 + }
12281 + else {
12282 + cluster->pending_tasklets.head = tasklet->next;
12283 + if(tasklet->next == NULL) {
12284 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
12285 + cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
12286 + }
12287 + }
12288 + }
12289 + else {
12290 + TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
12291 + }
12292 + }
12293 + else {
12294 + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
12295 + }
12296 +
12297 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12298 +
12299 + if(tasklet) {
12300 + __do_lit_tasklet(tasklet, 0ul);
12301 + tasklet = NULL;
12302 + }
12303 + else {
12304 + work_to_do = 0;
12305 + }
12306 +
12307 + TS_NV_SCHED_BOTISR_END;
12308 + }
12309 +}
12310 +
12311 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
12312 +{
12313 + struct tasklet_struct* step;
12314 +
12315 + tasklet->next = NULL; // make sure there are no old values floating around
12316 +
12317 + step = cluster->pending_tasklets.head;
12318 + if(step == NULL) {
12319 + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
12320 + // insert at tail.
12321 + *(cluster->pending_tasklets.tail) = tasklet;
12322 + cluster->pending_tasklets.tail = &(tasklet->next);
12323 + }
12324 + else if((*(cluster->pending_tasklets.tail) != NULL) &&
12325 + edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
12326 + // insert at tail.
12327 + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
12328 +
12329 + *(cluster->pending_tasklets.tail) = tasklet;
12330 + cluster->pending_tasklets.tail = &(tasklet->next);
12331 + }
12332 + else {
12333 +
12334 + // insert the tasklet somewhere in the middle.
12335 +
12336 + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
12337 +
12338 + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
12339 + step = step->next;
12340 + }
12341 +
12342 + // insert tasklet right before step->next.
12343 +
12344 + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
12345 + tasklet->owner->pid,
12346 + (step->owner) ?
12347 + step->owner->pid :
12348 + -1,
12349 + (step->next) ?
12350 + ((step->next->owner) ?
12351 + step->next->owner->pid :
12352 + -1) :
12353 + -1);
12354 +
12355 + tasklet->next = step->next;
12356 + step->next = tasklet;
12357 +
12358 + // patch up the head if needed.
12359 + if(cluster->pending_tasklets.head == step)
12360 + {
12361 + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
12362 + cluster->pending_tasklets.head = tasklet;
12363 + }
12364 + }
12365 +}
12366 +
12367 +static void cedf_run_tasklets(struct task_struct* sched_task)
12368 +{
12369 + cedf_domain_t* cluster;
12370 +
12371 + preempt_disable();
12372 +
12373 + cluster = (is_realtime(sched_task)) ?
12374 + task_cpu_cluster(sched_task) :
12375 + remote_cluster(smp_processor_id());
12376 +
12377 + if(cluster && cluster->pending_tasklets.head != NULL) {
12378 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
12379 + do_lit_tasklets(cluster, sched_task);
12380 + }
12381 +
12382 + preempt_enable_no_resched();
12383 +}
12384 +
12385 +
12386 +
12387 +static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
12388 +{
12389 +#if 0
12390 + cedf_domain_t *cluster = NULL;
12391 + cpu_entry_t *targetCPU = NULL;
12392 + int thisCPU;
12393 + int runLocal = 0;
12394 + int runNow = 0;
12395 + unsigned long flags;
12396 +
12397 + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
12398 + {
12399 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
12400 + return 0;
12401 + }
12402 +
12403 + cluster = task_cpu_cluster(tasklet->owner);
12404 +
12405 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12406 +
12407 + thisCPU = smp_processor_id();
12408 +
12409 +#ifdef CONFIG_SCHED_CPU_AFFINITY
12410 + {
12411 + cpu_entry_t* affinity = NULL;
12412 +
12413 + // use this CPU if it is in our cluster and isn't running any RT work.
12414 + if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
12415 + affinity = &(__get_cpu_var(cedf_cpu_entries));
12416 + }
12417 + else {
12418 + // this CPU is busy or shouldn't run tasklet in this cluster.
12419 + // look for available near by CPUs.
12420 + // NOTE: Affinity towards owner and not this CPU. Is this right?
12421 + affinity =
12422 + cedf_get_nearest_available_cpu(cluster,
12423 + &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
12424 + }
12425 +
12426 + targetCPU = affinity;
12427 + }
12428 +#endif
12429 +
12430 + if (targetCPU == NULL) {
12431 + targetCPU = lowest_prio_cpu(cluster);
12432 + }
12433 +
12434 + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
12435 + if (thisCPU == targetCPU->cpu) {
12436 + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
12437 + runLocal = 1;
12438 + runNow = 1;
12439 + }
12440 + else {
12441 + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
12442 + runLocal = 0;
12443 + runNow = 1;
12444 + }
12445 + }
12446 + else {
12447 + runLocal = 0;
12448 + runNow = 0;
12449 + }
12450 +
12451 + if(!runLocal) {
12452 + // enqueue the tasklet
12453 + __add_pai_tasklet(tasklet, cluster);
12454 + }
12455 +
12456 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12457 +
12458 +
12459 + if (runLocal /*&& runNow */) { // runNow == 1 is implied
12460 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
12461 + __do_lit_tasklet(tasklet, 0ul);
12462 + }
12463 + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
12464 + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
12465 + preempt(targetCPU); // need to be protected by cluster_lock?
12466 + }
12467 + else {
12468 + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
12469 + }
12470 +#else
12471 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
12472 + __do_lit_tasklet(tasklet, 0ul);
12473 +#endif
12474 + return(1); // success
12475 +}
12476 +
12477 +static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
12478 + struct task_struct *new_prio)
12479 +{
12480 + struct tasklet_struct* step;
12481 + unsigned long flags;
12482 + cedf_domain_t *cluster;
12483 + struct task_struct *probe;
12484 +
12485 + // identify the cluster by the assignment of these tasks. one should
12486 + // be non-NULL.
12487 + probe = (old_prio) ? old_prio : new_prio;
12488 +
12489 + if(probe) {
12490 + cluster = task_cpu_cluster(probe);
12491 +
12492 + if(cluster->pending_tasklets.head != NULL) {
12493 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12494 + for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
12495 + if(step->owner == old_prio) {
12496 + TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
12497 + step->owner = new_prio;
12498 + }
12499 + }
12500 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12501 + }
12502 + }
12503 + else {
12504 + TRACE("%s: Both priorities were NULL\n");
12505 + }
12506 +}
12507 +
12508 +#endif // PAI
12509 +
12510 /* Getting schedule() right is a bit tricky. schedule() may not make any
12511 * assumptions on the state of the current task since it may be called for a
12512 * number of reasons. The reasons include a scheduler_tick() determined that it
12513 @@ -465,6 +835,19 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
12514 if (blocks)
12515 unlink(entry->scheduled);
12516
12517 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
12518 + if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
12519 + if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
12520 + // don't track preemptions or locking protocol suspensions.
12521 + TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
12522 + stop_gpu_tracker(entry->scheduled);
12523 + }
12524 + else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
12525 + TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
12526 + }
12527 + }
12528 +#endif
12529 +
12530 /* Request a sys_exit_np() call if we would like to preempt but cannot.
12531 * We need to make sure to update the link structure anyway in case
12532 * that we are still linked. Multiple calls to request_exit_np() don't
12533 @@ -514,7 +897,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
12534 raw_spin_unlock(&cluster->cluster_lock);
12535
12536 #ifdef WANT_ALL_SCHED_EVENTS
12537 - TRACE("cedf_lock released, next=0x%p\n", next);
12538 + TRACE("cluster_lock released, next=0x%p\n", next);
12539
12540 if (next)
12541 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
12542 @@ -522,7 +905,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
12543 TRACE("becomes idle at %llu.\n", litmus_clock());
12544 #endif
12545
12546 -
12547 return next;
12548 }
12549
12550 @@ -548,7 +930,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
12551 cpu_entry_t* entry;
12552 cedf_domain_t* cluster;
12553
12554 - TRACE("gsn edf: task new %d\n", t->pid);
12555 + TRACE("c-edf: task new %d\n", t->pid);
12556
12557 /* the cluster doesn't change even if t is running */
12558 cluster = task_cpu_cluster(t);
12559 @@ -586,7 +968,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
12560 static void cedf_task_wake_up(struct task_struct *task)
12561 {
12562 unsigned long flags;
12563 - lt_t now;
12564 + //lt_t now;
12565 cedf_domain_t *cluster;
12566
12567 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
12568 @@ -594,6 +976,8 @@ static void cedf_task_wake_up(struct task_struct *task)
12569 cluster = task_cpu_cluster(task);
12570
12571 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12572 +
12573 +#if 0 // sproadic task model
12574 /* We need to take suspensions because of semaphores into
12575 * account! If a job resumes after being suspended due to acquiring
12576 * a semaphore, it should never be treated as a new job release.
12577 @@ -615,7 +999,13 @@ static void cedf_task_wake_up(struct task_struct *task)
12578 }
12579 }
12580 }
12581 - cedf_job_arrival(task);
12582 +#else
12583 + set_rt_flags(task, RT_F_RUNNING); // periodic model
12584 +#endif
12585 +
12586 + if(tsk_rt(task)->linked_on == NO_CPU)
12587 + cedf_job_arrival(task);
12588 +
12589 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12590 }
12591
12592 @@ -642,6 +1032,10 @@ static void cedf_task_exit(struct task_struct * t)
12593 unsigned long flags;
12594 cedf_domain_t *cluster = task_cpu_cluster(t);
12595
12596 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12597 + cedf_change_prio_pai_tasklet(t, NULL);
12598 +#endif
12599 +
12600 /* unlink if necessary */
12601 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12602 unlink(t);
12603 @@ -659,13 +1053,536 @@ static void cedf_task_exit(struct task_struct * t)
12604
12605 static long cedf_admit_task(struct task_struct* tsk)
12606 {
12607 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12608 + INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
12609 + edf_max_heap_base_priority_order);
12610 +#endif
12611 +
12612 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
12613 }
12614
12615 -/* total number of cluster */
12616 -static int num_clusters;
12617 -/* we do not support cluster of different sizes */
12618 -static unsigned int cluster_size;
12619 +
12620 +
12621 +#ifdef CONFIG_LITMUS_LOCKING
12622 +
12623 +#include <litmus/fdso.h>
12624 +
12625 +
12626 +
12627 +/* called with IRQs off */
12628 +static void __increase_priority_inheritance(struct task_struct* t,
12629 + struct task_struct* prio_inh)
12630 +{
12631 + int linked_on;
12632 + int check_preempt = 0;
12633 +
12634 + cedf_domain_t* cluster = task_cpu_cluster(t);
12635 +
12636 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12637 + /* this sanity check allows for weaker locking in protocols */
12638 + /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
12639 + if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
12640 +#endif
12641 + TRACE_TASK(t, "inherits priority from %s/%d\n",
12642 + prio_inh->comm, prio_inh->pid);
12643 + tsk_rt(t)->inh_task = prio_inh;
12644 +
12645 + linked_on = tsk_rt(t)->linked_on;
12646 +
12647 + /* If it is scheduled, then we need to reorder the CPU heap. */
12648 + if (linked_on != NO_CPU) {
12649 + TRACE_TASK(t, "%s: linked on %d\n",
12650 + __FUNCTION__, linked_on);
12651 + /* Holder is scheduled; need to re-order CPUs.
12652 + * We can't use heap_decrease() here since
12653 + * the cpu_heap is ordered in reverse direction, so
12654 + * it is actually an increase. */
12655 + binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
12656 + &cluster->cpu_heap);
12657 + binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
12658 + &cluster->cpu_heap, cpu_entry_t, hn);
12659 +
12660 + } else {
12661 + /* holder may be queued: first stop queue changes */
12662 + raw_spin_lock(&cluster->domain.release_lock);
12663 + if (is_queued(t)) {
12664 + TRACE_TASK(t, "%s: is queued\n",
12665 + __FUNCTION__);
12666 + /* We need to update the position of holder in some
12667 + * heap. Note that this could be a release heap if we
12668 + * budget enforcement is used and this job overran. */
12669 + check_preempt =
12670 + !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
12671 + } else {
12672 + /* Nothing to do: if it is not queued and not linked
12673 + * then it is either sleeping or currently being moved
12674 + * by other code (e.g., a timer interrupt handler) that
12675 + * will use the correct priority when enqueuing the
12676 + * task. */
12677 + TRACE_TASK(t, "%s: is NOT queued => Done.\n",
12678 + __FUNCTION__);
12679 + }
12680 + raw_spin_unlock(&cluster->domain.release_lock);
12681 +
12682 + /* If holder was enqueued in a release heap, then the following
12683 + * preemption check is pointless, but we can't easily detect
12684 + * that case. If you want to fix this, then consider that
12685 + * simply adding a state flag requires O(n) time to update when
12686 + * releasing n tasks, which conflicts with the goal to have
12687 + * O(log n) merges. */
12688 + if (check_preempt) {
12689 + /* heap_decrease() hit the top level of the heap: make
12690 + * sure preemption checks get the right task, not the
12691 + * potentially stale cache. */
12692 + bheap_uncache_min(edf_ready_order,
12693 + &cluster->domain.ready_queue);
12694 + check_for_preemptions(cluster);
12695 + }
12696 + }
12697 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12698 + }
12699 + else {
12700 + TRACE_TASK(t, "Spurious invalid priority increase. "
12701 + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
12702 + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
12703 + t->comm, t->pid,
12704 + effective_priority(t)->comm, effective_priority(t)->pid,
12705 + (prio_inh) ? prio_inh->comm : "nil",
12706 + (prio_inh) ? prio_inh->pid : -1);
12707 + WARN_ON(!prio_inh);
12708 + }
12709 +#endif
12710 +}
12711 +
12712 +/* called with IRQs off */
12713 +static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
12714 +{
12715 + cedf_domain_t* cluster = task_cpu_cluster(t);
12716 +
12717 + raw_spin_lock(&cluster->cluster_lock);
12718 +
12719 + __increase_priority_inheritance(t, prio_inh);
12720 +
12721 +#ifdef CONFIG_LITMUS_SOFTIRQD
12722 + if(tsk_rt(t)->cur_klitirqd != NULL)
12723 + {
12724 + TRACE_TASK(t, "%s/%d inherits a new priority!\n",
12725 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12726 +
12727 + __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
12728 + }
12729 +#endif
12730 +
12731 + raw_spin_unlock(&cluster->cluster_lock);
12732 +
12733 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
12734 + if(tsk_rt(t)->held_gpus) {
12735 + int i;
12736 + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
12737 + i < NV_DEVICE_NUM;
12738 + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
12739 + pai_check_priority_increase(t, i);
12740 + }
12741 + }
12742 +#endif
12743 +}
12744 +
12745 +/* called with IRQs off */
12746 +static void __decrease_priority_inheritance(struct task_struct* t,
12747 + struct task_struct* prio_inh)
12748 +{
12749 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12750 + if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
12751 +#endif
12752 + /* A job only stops inheriting a priority when it releases a
12753 + * resource. Thus we can make the following assumption.*/
12754 + if(prio_inh)
12755 + TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
12756 + prio_inh->comm, prio_inh->pid);
12757 + else
12758 + TRACE_TASK(t, "base priority restored.\n");
12759 +
12760 + tsk_rt(t)->inh_task = prio_inh;
12761 +
12762 + if(tsk_rt(t)->scheduled_on != NO_CPU) {
12763 + TRACE_TASK(t, "is scheduled.\n");
12764 +
12765 + /* Check if rescheduling is necessary. We can't use heap_decrease()
12766 + * since the priority was effectively lowered. */
12767 + unlink(t);
12768 + cedf_job_arrival(t);
12769 + }
12770 + else {
12771 + cedf_domain_t* cluster = task_cpu_cluster(t);
12772 + /* task is queued */
12773 + raw_spin_lock(&cluster->domain.release_lock);
12774 + if (is_queued(t)) {
12775 + TRACE_TASK(t, "is queued.\n");
12776 +
12777 + /* decrease in priority, so we have to re-add to binomial heap */
12778 + unlink(t);
12779 + cedf_job_arrival(t);
12780 + }
12781 + else {
12782 + TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
12783 + }
12784 + raw_spin_unlock(&cluster->domain.release_lock);
12785 + }
12786 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12787 + }
12788 + else {
12789 + TRACE_TASK(t, "Spurious invalid priority decrease. "
12790 + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
12791 + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
12792 + t->comm, t->pid,
12793 + effective_priority(t)->comm, effective_priority(t)->pid,
12794 + (prio_inh) ? prio_inh->comm : "nil",
12795 + (prio_inh) ? prio_inh->pid : -1);
12796 + }
12797 +#endif
12798 +}
12799 +
12800 +static void decrease_priority_inheritance(struct task_struct* t,
12801 + struct task_struct* prio_inh)
12802 +{
12803 + cedf_domain_t* cluster = task_cpu_cluster(t);
12804 +
12805 + raw_spin_lock(&cluster->cluster_lock);
12806 + __decrease_priority_inheritance(t, prio_inh);
12807 +
12808 +#ifdef CONFIG_LITMUS_SOFTIRQD
12809 + if(tsk_rt(t)->cur_klitirqd != NULL)
12810 + {
12811 + TRACE_TASK(t, "%s/%d decreases in priority!\n",
12812 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12813 +
12814 + __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
12815 + }
12816 +#endif
12817 +
12818 + raw_spin_unlock(&cluster->cluster_lock);
12819 +
12820 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
12821 + if(tsk_rt(t)->held_gpus) {
12822 + int i;
12823 + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
12824 + i < NV_DEVICE_NUM;
12825 + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
12826 + pai_check_priority_decrease(t, i);
12827 + }
12828 + }
12829 +#endif
12830 +}
12831 +
12832 +
12833 +
12834 +
12835 +
12836 +#ifdef CONFIG_LITMUS_SOFTIRQD
12837 +/* called with IRQs off */
12838 +static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12839 + struct task_struct* old_owner,
12840 + struct task_struct* new_owner)
12841 +{
12842 + cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
12843 +
12844 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12845 +
12846 + raw_spin_lock(&cluster->cluster_lock);
12847 +
12848 + if(old_owner != new_owner)
12849 + {
12850 + if(old_owner)
12851 + {
12852 + // unreachable?
12853 + tsk_rt(old_owner)->cur_klitirqd = NULL;
12854 + }
12855 +
12856 + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
12857 + new_owner->comm, new_owner->pid);
12858 +
12859 + tsk_rt(new_owner)->cur_klitirqd = klitirqd;
12860 + }
12861 +
12862 + __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
12863 +
12864 + __increase_priority_inheritance(klitirqd,
12865 + (tsk_rt(new_owner)->inh_task == NULL) ?
12866 + new_owner :
12867 + tsk_rt(new_owner)->inh_task);
12868 +
12869 + raw_spin_unlock(&cluster->cluster_lock);
12870 +}
12871 +
12872 +
12873 +/* called with IRQs off */
12874 +static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12875 + struct task_struct* old_owner,
12876 + struct task_struct* new_owner)
12877 +{
12878 + cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
12879 +
12880 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12881 +
12882 + raw_spin_lock(&cluster->cluster_lock);
12883 +
12884 + TRACE_TASK(klitirqd, "priority restored\n");
12885 +
12886 + __decrease_priority_inheritance(klitirqd, new_owner);
12887 +
12888 + tsk_rt(old_owner)->cur_klitirqd = NULL;
12889 +
12890 + raw_spin_unlock(&cluster->cluster_lock);
12891 +}
12892 +#endif // CONFIG_LITMUS_SOFTIRQD
12893 +
12894 +
12895 +
12896 +
12897 +
12898 +
12899 +
12900 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12901 +
12902 +/* called with IRQs off */
12903 +/* preconditions:
12904 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
12905 + (2) The lock 'to_unlock' is held.
12906 + */
12907 +static void nested_increase_priority_inheritance(struct task_struct* t,
12908 + struct task_struct* prio_inh,
12909 + raw_spinlock_t *to_unlock,
12910 + unsigned long irqflags)
12911 +{
12912 + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
12913 +
12914 + if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
12915 + increase_priority_inheritance(t, prio_inh); // increase our prio.
12916 + }
12917 +
12918 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
12919 +
12920 +
12921 + if(blocked_lock) {
12922 + if(blocked_lock->ops->propagate_increase_inheritance) {
12923 + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
12924 + blocked_lock->ident);
12925 +
12926 + // beware: recursion
12927 + blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
12928 + t, to_unlock,
12929 + irqflags);
12930 + }
12931 + else {
12932 + TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
12933 + blocked_lock->ident);
12934 + unlock_fine_irqrestore(to_unlock, irqflags);
12935 + }
12936 + }
12937 + else {
12938 + TRACE_TASK(t, "is not blocked. No propagation.\n");
12939 + unlock_fine_irqrestore(to_unlock, irqflags);
12940 + }
12941 +}
12942 +
12943 +/* called with IRQs off */
12944 +/* preconditions:
12945 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
12946 + (2) The lock 'to_unlock' is held.
12947 + */
12948 +static void nested_decrease_priority_inheritance(struct task_struct* t,
12949 + struct task_struct* prio_inh,
12950 + raw_spinlock_t *to_unlock,
12951 + unsigned long irqflags)
12952 +{
12953 + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
12954 + decrease_priority_inheritance(t, prio_inh);
12955 +
12956 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
12957 +
12958 + if(blocked_lock) {
12959 + if(blocked_lock->ops->propagate_decrease_inheritance) {
12960 + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
12961 + blocked_lock->ident);
12962 +
12963 + // beware: recursion
12964 + blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
12965 + to_unlock,
12966 + irqflags);
12967 + }
12968 + else {
12969 + TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
12970 + blocked_lock);
12971 + unlock_fine_irqrestore(to_unlock, irqflags);
12972 + }
12973 + }
12974 + else {
12975 + TRACE_TASK(t, "is not blocked. No propagation.\n");
12976 + unlock_fine_irqrestore(to_unlock, irqflags);
12977 + }
12978 +}
12979 +
12980 +
12981 +/* ******************** RSM MUTEX ********************** */
12982 +
12983 +static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
12984 + .lock = rsm_mutex_lock,
12985 + .unlock = rsm_mutex_unlock,
12986 + .close = rsm_mutex_close,
12987 + .deallocate = rsm_mutex_free,
12988 +
12989 + .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
12990 + .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
12991 +
12992 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
12993 + .dgl_lock = rsm_mutex_dgl_lock,
12994 + .is_owner = rsm_mutex_is_owner,
12995 + .enable_priority = rsm_mutex_enable_priority,
12996 +#endif
12997 +};
12998 +
12999 +static struct litmus_lock* cedf_new_rsm_mutex(void)
13000 +{
13001 + return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
13002 +}
13003 +
13004 +/* ******************** IKGLP ********************** */
13005 +
13006 +static struct litmus_lock_ops cedf_ikglp_lock_ops = {
13007 + .lock = ikglp_lock,
13008 + .unlock = ikglp_unlock,
13009 + .close = ikglp_close,
13010 + .deallocate = ikglp_free,
13011 +
13012 + // ikglp can only be an outer-most lock.
13013 + .propagate_increase_inheritance = NULL,
13014 + .propagate_decrease_inheritance = NULL,
13015 +};
13016 +
13017 +static struct litmus_lock* cedf_new_ikglp(void* __user arg)
13018 +{
13019 + // assumes clusters of uniform size.
13020 + return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
13021 +}
13022 +
13023 +#endif /* CONFIG_LITMUS_NESTED_LOCKING */
13024 +
13025 +
13026 +
13027 +
13028 +/* ******************** KFMLP support ********************** */
13029 +
13030 +static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
13031 + .lock = kfmlp_lock,
13032 + .unlock = kfmlp_unlock,
13033 + .close = kfmlp_close,
13034 + .deallocate = kfmlp_free,
13035 +
13036 + // kfmlp can only be an outer-most lock.
13037 + .propagate_increase_inheritance = NULL,
13038 + .propagate_decrease_inheritance = NULL,
13039 +};
13040 +
13041 +
13042 +static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
13043 +{
13044 + return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
13045 +}
13046 +
13047 +
13048 +/* **** lock constructor **** */
13049 +
13050 +static long cedf_allocate_lock(struct litmus_lock **lock, int type,
13051 + void* __user args)
13052 +{
13053 + int err;
13054 +
13055 + switch (type) {
13056 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13057 + case RSM_MUTEX:
13058 + *lock = cedf_new_rsm_mutex();
13059 + break;
13060 +
13061 + case IKGLP_SEM:
13062 + *lock = cedf_new_ikglp(args);
13063 + break;
13064 +#endif
13065 + case KFMLP_SEM:
13066 + *lock = cedf_new_kfmlp(args);
13067 + break;
13068 +
13069 + default:
13070 + err = -ENXIO;
13071 + goto UNSUPPORTED_LOCK;
13072 + };
13073 +
13074 + if (*lock)
13075 + err = 0;
13076 + else
13077 + err = -ENOMEM;
13078 +
13079 +UNSUPPORTED_LOCK:
13080 + return err;
13081 +}
13082 +
13083 +#endif // CONFIG_LITMUS_LOCKING
13084 +
13085 +
13086 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
13087 +static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
13088 + .close = kfmlp_aff_obs_close,
13089 + .deallocate = kfmlp_aff_obs_free,
13090 +};
13091 +
13092 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13093 +static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
13094 + .close = ikglp_aff_obs_close,
13095 + .deallocate = ikglp_aff_obs_free,
13096 +};
13097 +#endif
13098 +
13099 +static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
13100 + int type,
13101 + void* __user args)
13102 +{
13103 + int err;
13104 +
13105 + switch (type) {
13106 +
13107 + case KFMLP_SIMPLE_GPU_AFF_OBS:
13108 + *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
13109 + break;
13110 +
13111 + case KFMLP_GPU_AFF_OBS:
13112 + *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
13113 + break;
13114 +
13115 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13116 + case IKGLP_SIMPLE_GPU_AFF_OBS:
13117 + *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
13118 + break;
13119 +
13120 + case IKGLP_GPU_AFF_OBS:
13121 + *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
13122 + break;
13123 +#endif
13124 + default:
13125 + err = -ENXIO;
13126 + goto UNSUPPORTED_AFF_OBS;
13127 + };
13128 +
13129 + if (*aff_obs)
13130 + err = 0;
13131 + else
13132 + err = -ENOMEM;
13133 +
13134 +UNSUPPORTED_AFF_OBS:
13135 + return err;
13136 +}
13137 +#endif
13138 +
13139 +
13140 +
13141
13142 #ifdef VERBOSE_INIT
13143 static void print_cluster_topology(cpumask_var_t mask, int cpu)
13144 @@ -680,16 +1597,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
13145 }
13146 #endif
13147
13148 -static int clusters_allocated = 0;
13149 -
13150 static void cleanup_cedf(void)
13151 {
13152 int i;
13153
13154 +#ifdef CONFIG_LITMUS_NVIDIA
13155 + shutdown_nvidia_info();
13156 +#endif
13157 +
13158 if (clusters_allocated) {
13159 for (i = 0; i < num_clusters; i++) {
13160 kfree(cedf[i].cpus);
13161 - kfree(cedf[i].heap_node);
13162 free_cpumask_var(cedf[i].cpu_map);
13163 }
13164
13165 @@ -749,12 +1667,16 @@ static long cedf_activate_plugin(void)
13166
13167 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
13168 GFP_ATOMIC);
13169 - cedf[i].heap_node = kmalloc(
13170 - cluster_size * sizeof(struct bheap_node),
13171 - GFP_ATOMIC);
13172 - bheap_init(&(cedf[i].cpu_heap));
13173 + INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
13174 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
13175
13176 +
13177 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13178 + cedf[i].pending_tasklets.head = NULL;
13179 + cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
13180 +#endif
13181 +
13182 +
13183 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
13184 return -ENOMEM;
13185 #ifdef CONFIG_RELEASE_MASTER
13186 @@ -765,6 +1687,10 @@ static long cedf_activate_plugin(void)
13187 /* cycle through cluster and add cpus to them */
13188 for (i = 0; i < num_clusters; i++) {
13189
13190 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
13191 + raw_spin_lock_init(&cedf[i].dgl_lock);
13192 +#endif
13193 +
13194 for_each_online_cpu(cpu) {
13195 /* check if the cpu is already in a cluster */
13196 for (j = 0; j < num_clusters; j++)
13197 @@ -795,8 +1721,8 @@ static long cedf_activate_plugin(void)
13198 atomic_set(&entry->will_schedule, 0);
13199 entry->cpu = ccpu;
13200 entry->cluster = &cedf[i];
13201 - entry->hn = &(cedf[i].heap_node[cpu_count]);
13202 - bheap_node_init(&entry->hn, entry);
13203 +
13204 + INIT_BINHEAP_NODE(&entry->hn);
13205
13206 cpu_count++;
13207
13208 @@ -813,6 +1739,40 @@ static long cedf_activate_plugin(void)
13209 }
13210 }
13211
13212 +#ifdef CONFIG_LITMUS_SOFTIRQD
13213 + {
13214 + /* distribute the daemons evenly across the clusters. */
13215 + int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
13216 + int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
13217 + int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
13218 +
13219 + int daemon = 0;
13220 + for(i = 0; i < num_clusters; ++i)
13221 + {
13222 + int num_on_this_cluster = num_daemons_per_cluster;
13223 + if(left_over)
13224 + {
13225 + ++num_on_this_cluster;
13226 + --left_over;
13227 + }
13228 +
13229 + for(j = 0; j < num_on_this_cluster; ++j)
13230 + {
13231 + // first CPU of this cluster
13232 + affinity[daemon++] = i*cluster_size;
13233 + }
13234 + }
13235 +
13236 + spawn_klitirqd(affinity);
13237 +
13238 + kfree(affinity);
13239 + }
13240 +#endif
13241 +
13242 +#ifdef CONFIG_LITMUS_NVIDIA
13243 + init_nvidia_info();
13244 +#endif
13245 +
13246 free_cpumask_var(mask);
13247 clusters_allocated = 1;
13248 return 0;
13249 @@ -831,6 +1791,32 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
13250 .task_block = cedf_task_block,
13251 .admit_task = cedf_admit_task,
13252 .activate_plugin = cedf_activate_plugin,
13253 + .compare = edf_higher_prio,
13254 +#ifdef CONFIG_LITMUS_LOCKING
13255 + .allocate_lock = cedf_allocate_lock,
13256 + .increase_prio = increase_priority_inheritance,
13257 + .decrease_prio = decrease_priority_inheritance,
13258 +#endif
13259 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13260 + .nested_increase_prio = nested_increase_priority_inheritance,
13261 + .nested_decrease_prio = nested_decrease_priority_inheritance,
13262 + .__compare = __edf_higher_prio,
13263 +#endif
13264 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
13265 + .get_dgl_spinlock = cedf_get_dgl_spinlock,
13266 +#endif
13267 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
13268 + .allocate_aff_obs = cedf_allocate_affinity_observer,
13269 +#endif
13270 +#ifdef CONFIG_LITMUS_SOFTIRQD
13271 + .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
13272 + .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
13273 +#endif
13274 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13275 + .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
13276 + .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
13277 + .run_tasklets = cedf_run_tasklets,
13278 +#endif
13279 };
13280
13281 static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
13282 diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
13283 index 6ed504f..8c48757 100644
13284 --- a/litmus/sched_gsn_edf.c
13285 +++ b/litmus/sched_gsn_edf.c
13286 @@ -12,23 +12,49 @@
13287 #include <linux/percpu.h>
13288 #include <linux/sched.h>
13289 #include <linux/slab.h>
13290 +#include <linux/uaccess.h>
13291 +#include <linux/module.h>
13292
13293 #include <litmus/litmus.h>
13294 #include <litmus/jobs.h>
13295 #include <litmus/sched_plugin.h>
13296 #include <litmus/edf_common.h>
13297 #include <litmus/sched_trace.h>
13298 -#include <litmus/trace.h>
13299
13300 #include <litmus/preempt.h>
13301
13302 #include <litmus/bheap.h>
13303 +#include <litmus/binheap.h>
13304 +
13305 +#ifdef CONFIG_LITMUS_LOCKING
13306 +#include <litmus/kfmlp_lock.h>
13307 +#endif
13308 +
13309 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13310 +#include <litmus/rsm_lock.h>
13311 +#include <litmus/ikglp_lock.h>
13312 +#endif
13313
13314 #ifdef CONFIG_SCHED_CPU_AFFINITY
13315 #include <litmus/affinity.h>
13316 #endif
13317
13318 -#include <linux/module.h>
13319 +#ifdef CONFIG_LITMUS_SOFTIRQD
13320 +#include <litmus/litmus_softirq.h>
13321 +#endif
13322 +
13323 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13324 +#include <linux/interrupt.h>
13325 +#include <litmus/trace.h>
13326 +#endif
13327 +
13328 +#ifdef CONFIG_LITMUS_NVIDIA
13329 +#include <litmus/nvidia_info.h>
13330 +#endif
13331 +
13332 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
13333 +#include <litmus/gpu_affinity.h>
13334 +#endif
13335
13336 /* Overview of GSN-EDF operations.
13337 *
13338 @@ -103,52 +129,70 @@ typedef struct {
13339 int cpu;
13340 struct task_struct* linked; /* only RT tasks */
13341 struct task_struct* scheduled; /* only RT tasks */
13342 - struct bheap_node* hn;
13343 + struct binheap_node hn;
13344 } cpu_entry_t;
13345 DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
13346
13347 cpu_entry_t* gsnedf_cpus[NR_CPUS];
13348
13349 /* the cpus queue themselves according to priority in here */
13350 -static struct bheap_node gsnedf_heap_node[NR_CPUS];
13351 -static struct bheap gsnedf_cpu_heap;
13352 +static struct binheap_handle gsnedf_cpu_heap;
13353
13354 static rt_domain_t gsnedf;
13355 #define gsnedf_lock (gsnedf.ready_lock)
13356
13357 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
13358 +static raw_spinlock_t dgl_lock;
13359 +
13360 +static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
13361 +{
13362 + return(&dgl_lock);
13363 +}
13364 +#endif
13365 +
13366 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13367 +struct tasklet_head
13368 +{
13369 + struct tasklet_struct *head;
13370 + struct tasklet_struct **tail;
13371 +};
13372 +
13373 +struct tasklet_head gsnedf_pending_tasklets;
13374 +#endif
13375 +
13376
13377 /* Uncomment this if you want to see all scheduling decisions in the
13378 * TRACE() log.
13379 #define WANT_ALL_SCHED_EVENTS
13380 */
13381
13382 -static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
13383 +static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
13384 {
13385 - cpu_entry_t *a, *b;
13386 - a = _a->value;
13387 - b = _b->value;
13388 + cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
13389 + cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
13390 +
13391 /* Note that a and b are inverted: we want the lowest-priority CPU at
13392 * the top of the heap.
13393 */
13394 return edf_higher_prio(b->linked, a->linked);
13395 }
13396
13397 +
13398 /* update_cpu_position - Move the cpu entry to the correct place to maintain
13399 * order in the cpu queue. Caller must hold gsnedf lock.
13400 */
13401 static void update_cpu_position(cpu_entry_t *entry)
13402 {
13403 - if (likely(bheap_node_in_heap(entry->hn)))
13404 - bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
13405 - bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
13406 + if (likely(binheap_is_in_heap(&entry->hn))) {
13407 + binheap_delete(&entry->hn, &gsnedf_cpu_heap);
13408 + }
13409 + binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn);
13410 }
13411
13412 /* caller must hold gsnedf lock */
13413 static cpu_entry_t* lowest_prio_cpu(void)
13414 {
13415 - struct bheap_node* hn;
13416 - hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
13417 - return hn->value;
13418 + return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn);
13419 }
13420
13421
13422 @@ -337,6 +381,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
13423
13424 sched_trace_task_completion(t, forced);
13425
13426 +#ifdef CONFIG_LITMUS_NVIDIA
13427 + atomic_set(&tsk_rt(t)->nv_int_count, 0);
13428 +#endif
13429 +
13430 TRACE_TASK(t, "job_completion().\n");
13431
13432 /* set flags */
13433 @@ -379,6 +427,318 @@ static void gsnedf_tick(struct task_struct* t)
13434 }
13435 }
13436
13437 +
13438 +
13439 +
13440 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13441 +
13442 +
13443 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
13444 +{
13445 + if (!atomic_read(&tasklet->count)) {
13446 + if(tasklet->owner) {
13447 + sched_trace_tasklet_begin(tasklet->owner);
13448 + }
13449 +
13450 + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
13451 + {
13452 + BUG();
13453 + }
13454 + TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
13455 + __FUNCTION__,
13456 + (tasklet->owner) ? tasklet->owner->pid : -1,
13457 + (tasklet->owner) ? 0 : 1);
13458 + tasklet->func(tasklet->data);
13459 + tasklet_unlock(tasklet);
13460 +
13461 + if(tasklet->owner) {
13462 + sched_trace_tasklet_end(tasklet->owner, flushed);
13463 + }
13464 + }
13465 + else {
13466 + BUG();
13467 + }
13468 +}
13469 +
13470 +static void do_lit_tasklets(struct task_struct* sched_task)
13471 +{
13472 + int work_to_do = 1;
13473 + struct tasklet_struct *tasklet = NULL;
13474 + unsigned long flags;
13475 +
13476 + while(work_to_do) {
13477 +
13478 + TS_NV_SCHED_BOTISR_START;
13479 +
13480 + // execute one tasklet that has higher priority
13481 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
13482 +
13483 + if(gsnedf_pending_tasklets.head != NULL) {
13484 + struct tasklet_struct *prev = NULL;
13485 + tasklet = gsnedf_pending_tasklets.head;
13486 +
13487 + while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) {
13488 + prev = tasklet;
13489 + tasklet = tasklet->next;
13490 + }
13491 +
13492 + // remove the tasklet from the queue
13493 + if(prev) {
13494 + prev->next = tasklet->next;
13495 + if(prev->next == NULL) {
13496 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13497 + gsnedf_pending_tasklets.tail = &(prev);
13498 + }
13499 + }
13500 + else {
13501 + gsnedf_pending_tasklets.head = tasklet->next;
13502 + if(tasklet->next == NULL) {
13503 + TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13504 + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13505 + }
13506 + }
13507 + }
13508 + else {
13509 + TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
13510 + }
13511 +
13512 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13513 +
13514 + if(tasklet) {
13515 + __do_lit_tasklet(tasklet, 0ul);
13516 + tasklet = NULL;
13517 + }
13518 + else {
13519 + work_to_do = 0;
13520 + }
13521 +
13522 + TS_NV_SCHED_BOTISR_END;
13523 + }
13524 +}
13525 +
13526 +//static void do_lit_tasklets(struct task_struct* sched_task)
13527 +//{
13528 +// int work_to_do = 1;
13529 +// struct tasklet_struct *tasklet = NULL;
13530 +// //struct tasklet_struct *step;
13531 +// unsigned long flags;
13532 +//
13533 +// while(work_to_do) {
13534 +//
13535 +// TS_NV_SCHED_BOTISR_START;
13536 +//
13537 +// // remove tasklet at head of list if it has higher priority.
13538 +// raw_spin_lock_irqsave(&gsnedf_lock, flags);
13539 +//
13540 +// if(gsnedf_pending_tasklets.head != NULL) {
13541 +// // remove tasklet at head.
13542 +// tasklet = gsnedf_pending_tasklets.head;
13543 +//
13544 +// if(edf_higher_prio(tasklet->owner, sched_task)) {
13545 +//
13546 +// if(NULL == tasklet->next) {
13547 +// // tasklet is at the head, list only has one element
13548 +// TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13549 +// gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13550 +// }
13551 +//
13552 +// // remove the tasklet from the queue
13553 +// gsnedf_pending_tasklets.head = tasklet->next;
13554 +//
13555 +// TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13556 +// }
13557 +// else {
13558 +// TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
13559 +// tasklet = NULL;
13560 +// }
13561 +// }
13562 +// else {
13563 +// TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
13564 +// }
13565 +//
13566 +// raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13567 +//
13568 +// TS_NV_SCHED_BOTISR_END;
13569 +//
13570 +// if(tasklet) {
13571 +// __do_lit_tasklet(tasklet, 0ul);
13572 +// tasklet = NULL;
13573 +// }
13574 +// else {
13575 +// work_to_do = 0;
13576 +// }
13577 +// }
13578 +//
13579 +// //TRACE("%s: exited.\n", __FUNCTION__);
13580 +//}
13581 +
13582 +static void __add_pai_tasklet(struct tasklet_struct* tasklet)
13583 +{
13584 + struct tasklet_struct* step;
13585 +
13586 + tasklet->next = NULL; // make sure there are no old values floating around
13587 +
13588 + step = gsnedf_pending_tasklets.head;
13589 + if(step == NULL) {
13590 + TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
13591 + // insert at tail.
13592 + *(gsnedf_pending_tasklets.tail) = tasklet;
13593 + gsnedf_pending_tasklets.tail = &(tasklet->next);
13594 + }
13595 + else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
13596 + edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
13597 + // insert at tail.
13598 + TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
13599 +
13600 + *(gsnedf_pending_tasklets.tail) = tasklet;
13601 + gsnedf_pending_tasklets.tail = &(tasklet->next);
13602 + }
13603 + else {
13604 + // insert the tasklet somewhere in the middle.
13605 +
13606 + TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
13607 +
13608 + while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
13609 + step = step->next;
13610 + }
13611 +
13612 + // insert tasklet right before step->next.
13613 +
13614 + TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
13615 +
13616 + tasklet->next = step->next;
13617 + step->next = tasklet;
13618 +
13619 + // patch up the head if needed.
13620 + if(gsnedf_pending_tasklets.head == step)
13621 + {
13622 + TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
13623 + gsnedf_pending_tasklets.head = tasklet;
13624 + }
13625 + }
13626 +}
13627 +
13628 +static void gsnedf_run_tasklets(struct task_struct* sched_task)
13629 +{
13630 + preempt_disable();
13631 +
13632 + if(gsnedf_pending_tasklets.head != NULL) {
13633 + TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
13634 + do_lit_tasklets(sched_task);
13635 + }
13636 +
13637 + preempt_enable_no_resched();
13638 +}
13639 +
13640 +static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
13641 +{
13642 + cpu_entry_t *targetCPU = NULL;
13643 + int thisCPU;
13644 + int runLocal = 0;
13645 + int runNow = 0;
13646 + unsigned long flags;
13647 +
13648 + if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
13649 + {
13650 + TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
13651 + return 0;
13652 + }
13653 +
13654 +
13655 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
13656 +
13657 + thisCPU = smp_processor_id();
13658 +
13659 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13660 + {
13661 + cpu_entry_t* affinity = NULL;
13662 +
13663 + // use this CPU if it is in our cluster and isn't running any RT work.
13664 + if(
13665 +#ifdef CONFIG_RELEASE_MASTER
13666 + (thisCPU != gsnedf.release_master) &&
13667 +#endif
13668 + (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
13669 + affinity = &(__get_cpu_var(gsnedf_cpu_entries));
13670 + }
13671 + else {
13672 + // this CPU is busy or shouldn't run tasklet in this cluster.
13673 + // look for available near by CPUs.
13674 + // NOTE: Affinity towards owner and not this CPU. Is this right?
13675 + affinity =
13676 + gsnedf_get_nearest_available_cpu(
13677 + &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
13678 + }
13679 +
13680 + targetCPU = affinity;
13681 + }
13682 +#endif
13683 +
13684 + if (targetCPU == NULL) {
13685 + targetCPU = lowest_prio_cpu();
13686 + }
13687 +
13688 + if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
13689 + if (thisCPU == targetCPU->cpu) {
13690 + TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
13691 + runLocal = 1;
13692 + runNow = 1;
13693 + }
13694 + else {
13695 + TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
13696 + runLocal = 0;
13697 + runNow = 1;
13698 + }
13699 + }
13700 + else {
13701 + runLocal = 0;
13702 + runNow = 0;
13703 + }
13704 +
13705 + if(!runLocal) {
13706 + // enqueue the tasklet
13707 + __add_pai_tasklet(tasklet);
13708 + }
13709 +
13710 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13711 +
13712 +
13713 + if (runLocal /*&& runNow */) { // runNow == 1 is implied
13714 + TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
13715 + __do_lit_tasklet(tasklet, 0ul);
13716 + }
13717 + else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
13718 + TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
13719 + preempt(targetCPU); // need to be protected by cedf_lock?
13720 + }
13721 + else {
13722 + TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
13723 + }
13724 +
13725 + return(1); // success
13726 +}
13727 +
13728 +static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio,
13729 + struct task_struct *new_prio)
13730 +{
13731 + struct tasklet_struct* step;
13732 + unsigned long flags;
13733 +
13734 + if(gsnedf_pending_tasklets.head != NULL) {
13735 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
13736 + for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) {
13737 + if(step->owner == old_prio) {
13738 + TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
13739 + step->owner = new_prio;
13740 + }
13741 + }
13742 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13743 + }
13744 +}
13745 +
13746 +#endif // end PAI
13747 +
13748 +
13749 /* Getting schedule() right is a bit tricky. schedule() may not make any
13750 * assumptions on the state of the current task since it may be called for a
13751 * number of reasons. The reasons include a scheduler_tick() determined that it
13752 @@ -437,21 +797,32 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13753 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
13754 #endif
13755
13756 + /*
13757 if (exists)
13758 TRACE_TASK(prev,
13759 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
13760 "state:%d sig:%d\n",
13761 blocks, out_of_time, np, sleep, preempt,
13762 prev->state, signal_pending(prev));
13763 + */
13764 +
13765 if (entry->linked && preempt)
13766 TRACE_TASK(prev, "will be preempted by %s/%d\n",
13767 entry->linked->comm, entry->linked->pid);
13768
13769 -
13770 /* If a task blocks we have no choice but to reschedule.
13771 */
13772 - if (blocks)
13773 + if (blocks) {
13774 unlink(entry->scheduled);
13775 + }
13776 +
13777 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
13778 + if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
13779 + if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
13780 + stop_gpu_tracker(entry->scheduled);
13781 + }
13782 + }
13783 +#endif
13784
13785 /* Request a sys_exit_np() call if we would like to preempt but cannot.
13786 * We need to make sure to update the link structure anyway in case
13787 @@ -492,12 +863,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13788 entry->scheduled->rt_param.scheduled_on = NO_CPU;
13789 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
13790 }
13791 - } else
13792 + }
13793 + else
13794 + {
13795 /* Only override Linux scheduler if we have a real-time task
13796 * scheduled that needs to continue.
13797 */
13798 if (exists)
13799 next = prev;
13800 + }
13801
13802 sched_state_task_picked();
13803
13804 @@ -524,6 +898,7 @@ static void gsnedf_finish_switch(struct task_struct *prev)
13805 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
13806
13807 entry->scheduled = is_realtime(current) ? current : NULL;
13808 +
13809 #ifdef WANT_ALL_SCHED_EVENTS
13810 TRACE_TASK(prev, "switched away from\n");
13811 #endif
13812 @@ -572,11 +947,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
13813 static void gsnedf_task_wake_up(struct task_struct *task)
13814 {
13815 unsigned long flags;
13816 - lt_t now;
13817 + //lt_t now;
13818
13819 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
13820
13821 raw_spin_lock_irqsave(&gsnedf_lock, flags);
13822 +
13823 +
13824 +#if 0 // sporadic task model
13825 /* We need to take suspensions because of semaphores into
13826 * account! If a job resumes after being suspended due to acquiring
13827 * a semaphore, it should never be treated as a new job release.
13828 @@ -598,19 +976,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
13829 }
13830 }
13831 }
13832 +#else // periodic task model
13833 + set_rt_flags(task, RT_F_RUNNING);
13834 +#endif
13835 +
13836 gsnedf_job_arrival(task);
13837 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13838 }
13839
13840 static void gsnedf_task_block(struct task_struct *t)
13841 {
13842 + // TODO: is this called on preemption??
13843 unsigned long flags;
13844
13845 TRACE_TASK(t, "block at %llu\n", litmus_clock());
13846
13847 /* unlink if necessary */
13848 raw_spin_lock_irqsave(&gsnedf_lock, flags);
13849 +
13850 unlink(t);
13851 +
13852 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13853
13854 BUG_ON(!is_realtime(t));
13855 @@ -621,6 +1006,10 @@ static void gsnedf_task_exit(struct task_struct * t)
13856 {
13857 unsigned long flags;
13858
13859 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13860 + gsnedf_change_prio_pai_tasklet(t, NULL);
13861 +#endif
13862 +
13863 /* unlink if necessary */
13864 raw_spin_lock_irqsave(&gsnedf_lock, flags);
13865 unlink(t);
13866 @@ -637,101 +1026,423 @@ static void gsnedf_task_exit(struct task_struct * t)
13867
13868 static long gsnedf_admit_task(struct task_struct* tsk)
13869 {
13870 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13871 + INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
13872 + edf_max_heap_base_priority_order);
13873 +#endif
13874 +
13875 return 0;
13876 }
13877
13878 +
13879 +
13880 +
13881 +
13882 +
13883 #ifdef CONFIG_LITMUS_LOCKING
13884
13885 #include <litmus/fdso.h>
13886
13887 /* called with IRQs off */
13888 -static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13889 +static void __increase_priority_inheritance(struct task_struct* t,
13890 + struct task_struct* prio_inh)
13891 {
13892 int linked_on;
13893 int check_preempt = 0;
13894
13895 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13896 + /* this sanity check allows for weaker locking in protocols */
13897 + /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
13898 + if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
13899 +#endif
13900 + TRACE_TASK(t, "inherits priority from %s/%d\n",
13901 + prio_inh->comm, prio_inh->pid);
13902 + tsk_rt(t)->inh_task = prio_inh;
13903 +
13904 + linked_on = tsk_rt(t)->linked_on;
13905 +
13906 + /* If it is scheduled, then we need to reorder the CPU heap. */
13907 + if (linked_on != NO_CPU) {
13908 + TRACE_TASK(t, "%s: linked on %d\n",
13909 + __FUNCTION__, linked_on);
13910 + /* Holder is scheduled; need to re-order CPUs.
13911 + * We can't use heap_decrease() here since
13912 + * the cpu_heap is ordered in reverse direction, so
13913 + * it is actually an increase. */
13914 + binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap);
13915 + binheap_add(&gsnedf_cpus[linked_on]->hn,
13916 + &gsnedf_cpu_heap, cpu_entry_t, hn);
13917 + } else {
13918 + /* holder may be queued: first stop queue changes */
13919 + raw_spin_lock(&gsnedf.release_lock);
13920 + if (is_queued(t)) {
13921 + TRACE_TASK(t, "%s: is queued\n",
13922 + __FUNCTION__);
13923 + /* We need to update the position of holder in some
13924 + * heap. Note that this could be a release heap if we
13925 + * budget enforcement is used and this job overran. */
13926 + check_preempt =
13927 + !bheap_decrease(edf_ready_order,
13928 + tsk_rt(t)->heap_node);
13929 + } else {
13930 + /* Nothing to do: if it is not queued and not linked
13931 + * then it is either sleeping or currently being moved
13932 + * by other code (e.g., a timer interrupt handler) that
13933 + * will use the correct priority when enqueuing the
13934 + * task. */
13935 + TRACE_TASK(t, "%s: is NOT queued => Done.\n",
13936 + __FUNCTION__);
13937 + }
13938 + raw_spin_unlock(&gsnedf.release_lock);
13939 +
13940 + /* If holder was enqueued in a release heap, then the following
13941 + * preemption check is pointless, but we can't easily detect
13942 + * that case. If you want to fix this, then consider that
13943 + * simply adding a state flag requires O(n) time to update when
13944 + * releasing n tasks, which conflicts with the goal to have
13945 + * O(log n) merges. */
13946 + if (check_preempt) {
13947 + /* heap_decrease() hit the top level of the heap: make
13948 + * sure preemption checks get the right task, not the
13949 + * potentially stale cache. */
13950 + bheap_uncache_min(edf_ready_order,
13951 + &gsnedf.ready_queue);
13952 + check_for_preemptions();
13953 + }
13954 + }
13955 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13956 + }
13957 + else {
13958 + TRACE_TASK(t, "Spurious invalid priority increase. "
13959 + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
13960 + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
13961 + t->comm, t->pid,
13962 + effective_priority(t)->comm, effective_priority(t)->pid,
13963 + (prio_inh) ? prio_inh->comm : "nil",
13964 + (prio_inh) ? prio_inh->pid : -1);
13965 + WARN_ON(!prio_inh);
13966 + }
13967 +#endif
13968 +}
13969 +
13970 +/* called with IRQs off */
13971 +static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13972 +{
13973 raw_spin_lock(&gsnedf_lock);
13974
13975 - TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
13976 - tsk_rt(t)->inh_task = prio_inh;
13977 -
13978 - linked_on = tsk_rt(t)->linked_on;
13979 -
13980 - /* If it is scheduled, then we need to reorder the CPU heap. */
13981 - if (linked_on != NO_CPU) {
13982 - TRACE_TASK(t, "%s: linked on %d\n",
13983 - __FUNCTION__, linked_on);
13984 - /* Holder is scheduled; need to re-order CPUs.
13985 - * We can't use heap_decrease() here since
13986 - * the cpu_heap is ordered in reverse direction, so
13987 - * it is actually an increase. */
13988 - bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
13989 - gsnedf_cpus[linked_on]->hn);
13990 - bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
13991 - gsnedf_cpus[linked_on]->hn);
13992 - } else {
13993 - /* holder may be queued: first stop queue changes */
13994 - raw_spin_lock(&gsnedf.release_lock);
13995 - if (is_queued(t)) {
13996 - TRACE_TASK(t, "%s: is queued\n",
13997 - __FUNCTION__);
13998 - /* We need to update the position of holder in some
13999 - * heap. Note that this could be a release heap if we
14000 - * budget enforcement is used and this job overran. */
14001 - check_preempt =
14002 - !bheap_decrease(edf_ready_order,
14003 - tsk_rt(t)->heap_node);
14004 - } else {
14005 - /* Nothing to do: if it is not queued and not linked
14006 - * then it is either sleeping or currently being moved
14007 - * by other code (e.g., a timer interrupt handler) that
14008 - * will use the correct priority when enqueuing the
14009 - * task. */
14010 - TRACE_TASK(t, "%s: is NOT queued => Done.\n",
14011 - __FUNCTION__);
14012 + __increase_priority_inheritance(t, prio_inh);
14013 +
14014 +#ifdef CONFIG_LITMUS_SOFTIRQD
14015 + if(tsk_rt(t)->cur_klitirqd != NULL)
14016 + {
14017 + TRACE_TASK(t, "%s/%d inherits a new priority!\n",
14018 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
14019 +
14020 + __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
14021 + }
14022 +#endif
14023 +
14024 + raw_spin_unlock(&gsnedf_lock);
14025 +
14026 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
14027 + if(tsk_rt(t)->held_gpus) {
14028 + int i;
14029 + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
14030 + i < NV_DEVICE_NUM;
14031 + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
14032 + pai_check_priority_increase(t, i);
14033 + }
14034 + }
14035 +#endif
14036 +}
14037 +
14038 +
14039 +/* called with IRQs off */
14040 +static void __decrease_priority_inheritance(struct task_struct* t,
14041 + struct task_struct* prio_inh)
14042 +{
14043 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14044 + if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
14045 +#endif
14046 + /* A job only stops inheriting a priority when it releases a
14047 + * resource. Thus we can make the following assumption.*/
14048 + if(prio_inh)
14049 + TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
14050 + prio_inh->comm, prio_inh->pid);
14051 + else
14052 + TRACE_TASK(t, "base priority restored.\n");
14053 +
14054 + tsk_rt(t)->inh_task = prio_inh;
14055 +
14056 + if(tsk_rt(t)->scheduled_on != NO_CPU) {
14057 + TRACE_TASK(t, "is scheduled.\n");
14058 +
14059 + /* Check if rescheduling is necessary. We can't use heap_decrease()
14060 + * since the priority was effectively lowered. */
14061 + unlink(t);
14062 + gsnedf_job_arrival(t);
14063 }
14064 - raw_spin_unlock(&gsnedf.release_lock);
14065 -
14066 - /* If holder was enqueued in a release heap, then the following
14067 - * preemption check is pointless, but we can't easily detect
14068 - * that case. If you want to fix this, then consider that
14069 - * simply adding a state flag requires O(n) time to update when
14070 - * releasing n tasks, which conflicts with the goal to have
14071 - * O(log n) merges. */
14072 - if (check_preempt) {
14073 - /* heap_decrease() hit the top level of the heap: make
14074 - * sure preemption checks get the right task, not the
14075 - * potentially stale cache. */
14076 - bheap_uncache_min(edf_ready_order,
14077 - &gsnedf.ready_queue);
14078 - check_for_preemptions();
14079 + else {
14080 + /* task is queued */
14081 + raw_spin_lock(&gsnedf.release_lock);
14082 + if (is_queued(t)) {
14083 + TRACE_TASK(t, "is queued.\n");
14084 +
14085 + /* decrease in priority, so we have to re-add to binomial heap */
14086 + unlink(t);
14087 + gsnedf_job_arrival(t);
14088 + }
14089 + else {
14090 + TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
14091 + }
14092 + raw_spin_unlock(&gsnedf.release_lock);
14093 }
14094 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14095 + }
14096 + else {
14097 + TRACE_TASK(t, "Spurious invalid priority decrease. "
14098 + "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
14099 + "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
14100 + t->comm, t->pid,
14101 + effective_priority(t)->comm, effective_priority(t)->pid,
14102 + (prio_inh) ? prio_inh->comm : "nil",
14103 + (prio_inh) ? prio_inh->pid : -1);
14104 }
14105 +#endif
14106 +}
14107 +
14108 +static void decrease_priority_inheritance(struct task_struct* t,
14109 + struct task_struct* prio_inh)
14110 +{
14111 + raw_spin_lock(&gsnedf_lock);
14112 + __decrease_priority_inheritance(t, prio_inh);
14113 +
14114 +#ifdef CONFIG_LITMUS_SOFTIRQD
14115 + if(tsk_rt(t)->cur_klitirqd != NULL)
14116 + {
14117 + TRACE_TASK(t, "%s/%d decreases in priority!\n",
14118 + tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
14119 +
14120 + __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
14121 + }
14122 +#endif
14123
14124 raw_spin_unlock(&gsnedf_lock);
14125 +
14126 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
14127 + if(tsk_rt(t)->held_gpus) {
14128 + int i;
14129 + for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
14130 + i < NV_DEVICE_NUM;
14131 + i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
14132 + pai_check_priority_decrease(t, i);
14133 + }
14134 + }
14135 +#endif
14136 }
14137
14138 +
14139 +#ifdef CONFIG_LITMUS_SOFTIRQD
14140 /* called with IRQs off */
14141 -static void clear_priority_inheritance(struct task_struct* t)
14142 +static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
14143 + struct task_struct* old_owner,
14144 + struct task_struct* new_owner)
14145 {
14146 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
14147 +
14148 raw_spin_lock(&gsnedf_lock);
14149
14150 - /* A job only stops inheriting a priority when it releases a
14151 - * resource. Thus we can make the following assumption.*/
14152 - BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
14153 + if(old_owner != new_owner)
14154 + {
14155 + if(old_owner)
14156 + {
14157 + // unreachable?
14158 + tsk_rt(old_owner)->cur_klitirqd = NULL;
14159 + }
14160
14161 - TRACE_TASK(t, "priority restored\n");
14162 - tsk_rt(t)->inh_task = NULL;
14163 + TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
14164 + new_owner->comm, new_owner->pid);
14165
14166 - /* Check if rescheduling is necessary. We can't use heap_decrease()
14167 - * since the priority was effectively lowered. */
14168 - unlink(t);
14169 - gsnedf_job_arrival(t);
14170 + tsk_rt(new_owner)->cur_klitirqd = klitirqd;
14171 + }
14172 +
14173 + __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
14174 +
14175 + __increase_priority_inheritance(klitirqd,
14176 + (tsk_rt(new_owner)->inh_task == NULL) ?
14177 + new_owner :
14178 + tsk_rt(new_owner)->inh_task);
14179
14180 raw_spin_unlock(&gsnedf_lock);
14181 }
14182
14183
14184 +/* called with IRQs off */
14185 +static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
14186 + struct task_struct* old_owner,
14187 + struct task_struct* new_owner)
14188 +{
14189 + BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
14190 +
14191 + raw_spin_lock(&gsnedf_lock);
14192 +
14193 + TRACE_TASK(klitirqd, "priority restored\n");
14194 +
14195 + __decrease_priority_inheritance(klitirqd, new_owner);
14196 +
14197 + tsk_rt(old_owner)->cur_klitirqd = NULL;
14198 +
14199 + raw_spin_unlock(&gsnedf_lock);
14200 +}
14201 +#endif
14202 +
14203 +
14204 +
14205 +
14206 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14207 +
14208 +/* called with IRQs off */
14209 +/* preconditions:
14210 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
14211 + (2) The lock 'to_unlock' is held.
14212 + */
14213 +static void nested_increase_priority_inheritance(struct task_struct* t,
14214 + struct task_struct* prio_inh,
14215 + raw_spinlock_t *to_unlock,
14216 + unsigned long irqflags)
14217 +{
14218 + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
14219 +
14220 + if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
14221 + increase_priority_inheritance(t, prio_inh); // increase our prio.
14222 + }
14223 +
14224 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
14225 +
14226 +
14227 + if(blocked_lock) {
14228 + if(blocked_lock->ops->propagate_increase_inheritance) {
14229 + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
14230 + blocked_lock->ident);
14231 +
14232 + // beware: recursion
14233 + blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
14234 + t, to_unlock,
14235 + irqflags);
14236 + }
14237 + else {
14238 + TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
14239 + blocked_lock->ident);
14240 + unlock_fine_irqrestore(to_unlock, irqflags);
14241 + }
14242 + }
14243 + else {
14244 + TRACE_TASK(t, "is not blocked. No propagation.\n");
14245 + unlock_fine_irqrestore(to_unlock, irqflags);
14246 + }
14247 +}
14248 +
14249 +/* called with IRQs off */
14250 +/* preconditions:
14251 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
14252 + (2) The lock 'to_unlock' is held.
14253 + */
14254 +static void nested_decrease_priority_inheritance(struct task_struct* t,
14255 + struct task_struct* prio_inh,
14256 + raw_spinlock_t *to_unlock,
14257 + unsigned long irqflags)
14258 +{
14259 + struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
14260 + decrease_priority_inheritance(t, prio_inh);
14261 +
14262 + raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
14263 +
14264 + if(blocked_lock) {
14265 + if(blocked_lock->ops->propagate_decrease_inheritance) {
14266 + TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
14267 + blocked_lock->ident);
14268 +
14269 + // beware: recursion
14270 + blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
14271 + to_unlock,
14272 + irqflags);
14273 + }
14274 + else {
14275 + TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
14276 + blocked_lock);
14277 + unlock_fine_irqrestore(to_unlock, irqflags);
14278 + }
14279 + }
14280 + else {
14281 + TRACE_TASK(t, "is not blocked. No propagation.\n");
14282 + unlock_fine_irqrestore(to_unlock, irqflags);
14283 + }
14284 +}
14285 +
14286 +
14287 +/* ******************** RSM MUTEX ********************** */
14288 +
14289 +static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = {
14290 + .lock = rsm_mutex_lock,
14291 + .unlock = rsm_mutex_unlock,
14292 + .close = rsm_mutex_close,
14293 + .deallocate = rsm_mutex_free,
14294 +
14295 + .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
14296 + .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
14297 +
14298 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14299 + .dgl_lock = rsm_mutex_dgl_lock,
14300 + .is_owner = rsm_mutex_is_owner,
14301 + .enable_priority = rsm_mutex_enable_priority,
14302 +#endif
14303 +};
14304 +
14305 +static struct litmus_lock* gsnedf_new_rsm_mutex(void)
14306 +{
14307 + return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops);
14308 +}
14309 +
14310 +/* ******************** IKGLP ********************** */
14311 +
14312 +static struct litmus_lock_ops gsnedf_ikglp_lock_ops = {
14313 + .lock = ikglp_lock,
14314 + .unlock = ikglp_unlock,
14315 + .close = ikglp_close,
14316 + .deallocate = ikglp_free,
14317 +
14318 + // ikglp can only be an outer-most lock.
14319 + .propagate_increase_inheritance = NULL,
14320 + .propagate_decrease_inheritance = NULL,
14321 +};
14322 +
14323 +static struct litmus_lock* gsnedf_new_ikglp(void* __user arg)
14324 +{
14325 + return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg);
14326 +}
14327 +
14328 +#endif /* CONFIG_LITMUS_NESTED_LOCKING */
14329 +
14330 +
14331 +/* ******************** KFMLP support ********************** */
14332 +
14333 +static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
14334 + .lock = kfmlp_lock,
14335 + .unlock = kfmlp_unlock,
14336 + .close = kfmlp_close,
14337 + .deallocate = kfmlp_free,
14338 +
14339 + // kfmlp can only be an outer-most lock.
14340 + .propagate_increase_inheritance = NULL,
14341 + .propagate_decrease_inheritance = NULL,
14342 +};
14343 +
14344 +
14345 +static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
14346 +{
14347 + return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
14348 +}
14349 +
14350 /* ******************** FMLP support ********************** */
14351
14352 /* struct for semaphore with priority inheritance */
14353 @@ -797,7 +1508,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
14354 if (edf_higher_prio(t, sem->hp_waiter)) {
14355 sem->hp_waiter = t;
14356 if (edf_higher_prio(t, sem->owner))
14357 - set_priority_inheritance(sem->owner, sem->hp_waiter);
14358 + increase_priority_inheritance(sem->owner, sem->hp_waiter);
14359 }
14360
14361 TS_LOCK_SUSPEND;
14362 @@ -865,7 +1576,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
14363 /* Well, if next is not the highest-priority waiter,
14364 * then it ought to inherit the highest-priority
14365 * waiter's priority. */
14366 - set_priority_inheritance(next, sem->hp_waiter);
14367 + increase_priority_inheritance(next, sem->hp_waiter);
14368 }
14369
14370 /* wake up next */
14371 @@ -876,7 +1587,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
14372
14373 /* we lose the benefit of priority inheritance (if any) */
14374 if (tsk_rt(t)->inh_task)
14375 - clear_priority_inheritance(t);
14376 + decrease_priority_inheritance(t, NULL);
14377
14378 out:
14379 spin_unlock_irqrestore(&sem->wait.lock, flags);
14380 @@ -914,6 +1625,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
14381 .lock = gsnedf_fmlp_lock,
14382 .unlock = gsnedf_fmlp_unlock,
14383 .deallocate = gsnedf_fmlp_free,
14384 +
14385 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14386 + .propagate_increase_inheritance = NULL,
14387 + .propagate_decrease_inheritance = NULL
14388 +#endif
14389 };
14390
14391 static struct litmus_lock* gsnedf_new_fmlp(void)
14392 @@ -932,47 +1648,121 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
14393 return &sem->litmus_lock;
14394 }
14395
14396 -/* **** lock constructor **** */
14397 -
14398
14399 static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14400 - void* __user unused)
14401 + void* __user args)
14402 {
14403 - int err = -ENXIO;
14404 + int err;
14405
14406 - /* GSN-EDF currently only supports the FMLP for global resources. */
14407 switch (type) {
14408
14409 case FMLP_SEM:
14410 /* Flexible Multiprocessor Locking Protocol */
14411 *lock = gsnedf_new_fmlp();
14412 - if (*lock)
14413 - err = 0;
14414 - else
14415 - err = -ENOMEM;
14416 + break;
14417 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14418 + case RSM_MUTEX:
14419 + *lock = gsnedf_new_rsm_mutex();
14420 break;
14421
14422 + case IKGLP_SEM:
14423 + *lock = gsnedf_new_ikglp(args);
14424 + break;
14425 +#endif
14426 + case KFMLP_SEM:
14427 + *lock = gsnedf_new_kfmlp(args);
14428 + break;
14429 + default:
14430 + err = -ENXIO;
14431 + goto UNSUPPORTED_LOCK;
14432 };
14433
14434 + if (*lock)
14435 + err = 0;
14436 + else
14437 + err = -ENOMEM;
14438 +
14439 +UNSUPPORTED_LOCK:
14440 return err;
14441 }
14442
14443 +#endif // CONFIG_LITMUS_LOCKING
14444 +
14445 +
14446 +
14447 +
14448 +
14449 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14450 +static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
14451 + .close = kfmlp_aff_obs_close,
14452 + .deallocate = kfmlp_aff_obs_free,
14453 +};
14454 +
14455 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14456 +static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = {
14457 + .close = ikglp_aff_obs_close,
14458 + .deallocate = ikglp_aff_obs_free,
14459 +};
14460 #endif
14461
14462 +static long gsnedf_allocate_affinity_observer(
14463 + struct affinity_observer **aff_obs,
14464 + int type,
14465 + void* __user args)
14466 +{
14467 + int err;
14468 +
14469 + switch (type) {
14470 +
14471 + case KFMLP_SIMPLE_GPU_AFF_OBS:
14472 + *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
14473 + break;
14474 +
14475 + case KFMLP_GPU_AFF_OBS:
14476 + *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
14477 + break;
14478 +
14479 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14480 + case IKGLP_SIMPLE_GPU_AFF_OBS:
14481 + *aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
14482 + break;
14483 +
14484 + case IKGLP_GPU_AFF_OBS:
14485 + *aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
14486 + break;
14487 +#endif
14488 + default:
14489 + err = -ENXIO;
14490 + goto UNSUPPORTED_AFF_OBS;
14491 + };
14492 +
14493 + if (*aff_obs)
14494 + err = 0;
14495 + else
14496 + err = -ENOMEM;
14497 +
14498 +UNSUPPORTED_AFF_OBS:
14499 + return err;
14500 +}
14501 +#endif
14502 +
14503 +
14504 +
14505 +
14506
14507 static long gsnedf_activate_plugin(void)
14508 {
14509 int cpu;
14510 cpu_entry_t *entry;
14511
14512 - bheap_init(&gsnedf_cpu_heap);
14513 + INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
14514 #ifdef CONFIG_RELEASE_MASTER
14515 gsnedf.release_master = atomic_read(&release_master_cpu);
14516 #endif
14517
14518 for_each_online_cpu(cpu) {
14519 entry = &per_cpu(gsnedf_cpu_entries, cpu);
14520 - bheap_node_init(&entry->hn, entry);
14521 + INIT_BINHEAP_NODE(&entry->hn);
14522 entry->linked = NULL;
14523 entry->scheduled = NULL;
14524 #ifdef CONFIG_RELEASE_MASTER
14525 @@ -986,6 +1776,20 @@ static long gsnedf_activate_plugin(void)
14526 }
14527 #endif
14528 }
14529 +
14530 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14531 + gsnedf_pending_tasklets.head = NULL;
14532 + gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
14533 +#endif
14534 +
14535 +#ifdef CONFIG_LITMUS_SOFTIRQD
14536 + spawn_klitirqd(NULL);
14537 +#endif
14538 +
14539 +#ifdef CONFIG_LITMUS_NVIDIA
14540 + init_nvidia_info();
14541 +#endif
14542 +
14543 return 0;
14544 }
14545
14546 @@ -1002,8 +1806,31 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
14547 .task_block = gsnedf_task_block,
14548 .admit_task = gsnedf_admit_task,
14549 .activate_plugin = gsnedf_activate_plugin,
14550 + .compare = edf_higher_prio,
14551 #ifdef CONFIG_LITMUS_LOCKING
14552 .allocate_lock = gsnedf_allocate_lock,
14553 + .increase_prio = increase_priority_inheritance,
14554 + .decrease_prio = decrease_priority_inheritance,
14555 +#endif
14556 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14557 + .nested_increase_prio = nested_increase_priority_inheritance,
14558 + .nested_decrease_prio = nested_decrease_priority_inheritance,
14559 + .__compare = __edf_higher_prio,
14560 +#endif
14561 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14562 + .get_dgl_spinlock = gsnedf_get_dgl_spinlock,
14563 +#endif
14564 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14565 + .allocate_aff_obs = gsnedf_allocate_affinity_observer,
14566 +#endif
14567 +#ifdef CONFIG_LITMUS_SOFTIRQD
14568 + .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
14569 + .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
14570 +#endif
14571 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14572 + .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
14573 + .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
14574 + .run_tasklets = gsnedf_run_tasklets,
14575 #endif
14576 };
14577
14578 @@ -1013,15 +1840,20 @@ static int __init init_gsn_edf(void)
14579 int cpu;
14580 cpu_entry_t *entry;
14581
14582 - bheap_init(&gsnedf_cpu_heap);
14583 + INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
14584 /* initialize CPU state */
14585 - for (cpu = 0; cpu < NR_CPUS; cpu++) {
14586 + for (cpu = 0; cpu < NR_CPUS; ++cpu) {
14587 entry = &per_cpu(gsnedf_cpu_entries, cpu);
14588 gsnedf_cpus[cpu] = entry;
14589 entry->cpu = cpu;
14590 - entry->hn = &gsnedf_heap_node[cpu];
14591 - bheap_node_init(&entry->hn, entry);
14592 +
14593 + INIT_BINHEAP_NODE(&entry->hn);
14594 }
14595 +
14596 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14597 + raw_spin_lock_init(&dgl_lock);
14598 +#endif
14599 +
14600 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
14601 return register_sched_plugin(&gsn_edf_plugin);
14602 }
14603 diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
14604 index 5a15ce9..9a6fe48 100644
14605 --- a/litmus/sched_litmus.c
14606 +++ b/litmus/sched_litmus.c
14607 @@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
14608 }
14609 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
14610 if (next->oncpu)
14611 + {
14612 TRACE_TASK(next, "waiting for !oncpu");
14613 + }
14614 while (next->oncpu) {
14615 cpu_relax();
14616 mb();
14617 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
14618 index 00a1900..245e41c 100644
14619 --- a/litmus/sched_plugin.c
14620 +++ b/litmus/sched_plugin.c
14621 @@ -13,6 +13,10 @@
14622 #include <litmus/preempt.h>
14623 #include <litmus/jobs.h>
14624
14625 +#ifdef CONFIG_LITMUS_NVIDIA
14626 +#include <litmus/nvidia_info.h>
14627 +#endif
14628 +
14629 /*
14630 * Generic function to trigger preemption on either local or remote cpu
14631 * from scheduler plugins. The key feature is that this function is
14632 @@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
14633
14634 static long litmus_dummy_activate_plugin(void)
14635 {
14636 +#ifdef CONFIG_LITMUS_NVIDIA
14637 + shutdown_nvidia_info();
14638 +#endif
14639 return 0;
14640 }
14641
14642 @@ -110,14 +117,93 @@ static long litmus_dummy_deactivate_plugin(void)
14643 return 0;
14644 }
14645
14646 -#ifdef CONFIG_LITMUS_LOCKING
14647 +static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b)
14648 +{
14649 + TRACE_CUR("WARNING: Dummy compare function called!\n");
14650 + return 0;
14651 +}
14652
14653 +#ifdef CONFIG_LITMUS_LOCKING
14654 static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
14655 void* __user config)
14656 {
14657 return -ENXIO;
14658 }
14659
14660 +static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh)
14661 +{
14662 +}
14663 +
14664 +static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
14665 +{
14666 +}
14667 +#endif
14668 +
14669 +#ifdef CONFIG_LITMUS_SOFTIRQD
14670 +static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd,
14671 + struct task_struct* old_owner,
14672 + struct task_struct* new_owner)
14673 +{
14674 +}
14675 +
14676 +static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd,
14677 + struct task_struct* old_owner)
14678 +{
14679 +}
14680 +#endif
14681 +
14682 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14683 +static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
14684 +{
14685 + TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14686 + return(0); // failure.
14687 +}
14688 +
14689 +static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio,
14690 + struct task_struct *new_prio)
14691 +{
14692 + TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14693 +}
14694 +
14695 +static void litmus_dummy_run_tasklets(struct task_struct* t)
14696 +{
14697 + //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14698 +}
14699 +#endif
14700 +
14701 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14702 +static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh,
14703 + raw_spinlock_t *to_unlock, unsigned long irqflags)
14704 +{
14705 +}
14706 +
14707 +static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh,
14708 + raw_spinlock_t *to_unlock, unsigned long irqflags)
14709 +{
14710 +}
14711 +
14712 +static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod,
14713 + struct task_struct* b, comparison_mode_t b_mode)
14714 +{
14715 + TRACE_CUR("WARNING: Dummy compare function called!\n");
14716 + return 0;
14717 +}
14718 +#endif
14719 +
14720 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14721 +static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t)
14722 +{
14723 + return NULL;
14724 +}
14725 +#endif
14726 +
14727 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14728 +static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
14729 + int type,
14730 + void* __user config)
14731 +{
14732 + return -ENXIO;
14733 +}
14734 #endif
14735
14736
14737 @@ -136,9 +222,33 @@ struct sched_plugin linux_sched_plugin = {
14738 .finish_switch = litmus_dummy_finish_switch,
14739 .activate_plugin = litmus_dummy_activate_plugin,
14740 .deactivate_plugin = litmus_dummy_deactivate_plugin,
14741 + .compare = litmus_dummy_compare,
14742 #ifdef CONFIG_LITMUS_LOCKING
14743 .allocate_lock = litmus_dummy_allocate_lock,
14744 + .increase_prio = litmus_dummy_increase_prio,
14745 + .decrease_prio = litmus_dummy_decrease_prio,
14746 +#endif
14747 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14748 + .nested_increase_prio = litmus_dummy_nested_increase_prio,
14749 + .nested_decrease_prio = litmus_dummy_nested_decrease_prio,
14750 + .__compare = litmus_dummy___compare,
14751 +#endif
14752 +#ifdef CONFIG_LITMUS_SOFTIRQD
14753 + .increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd,
14754 + .decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd,
14755 +#endif
14756 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14757 + .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
14758 + .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
14759 + .run_tasklets = litmus_dummy_run_tasklets,
14760 +#endif
14761 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14762 + .get_dgl_spinlock = litmus_dummy_get_dgl_spinlock,
14763 #endif
14764 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14765 + .allocate_aff_obs = litmus_dummy_allocate_aff_obs,
14766 +#endif
14767 +
14768 .admit_task = litmus_dummy_admit_task
14769 };
14770
14771 @@ -174,8 +284,31 @@ int register_sched_plugin(struct sched_plugin* plugin)
14772 CHECK(complete_job);
14773 CHECK(activate_plugin);
14774 CHECK(deactivate_plugin);
14775 + CHECK(compare);
14776 #ifdef CONFIG_LITMUS_LOCKING
14777 CHECK(allocate_lock);
14778 + CHECK(increase_prio);
14779 + CHECK(decrease_prio);
14780 +#endif
14781 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14782 + CHECK(nested_increase_prio);
14783 + CHECK(nested_decrease_prio);
14784 + CHECK(__compare);
14785 +#endif
14786 +#ifdef CONFIG_LITMUS_SOFTIRQD
14787 + CHECK(increase_prio_klitirqd);
14788 + CHECK(decrease_prio_klitirqd);
14789 +#endif
14790 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14791 + CHECK(enqueue_pai_tasklet);
14792 + CHECK(change_prio_pai_tasklet);
14793 + CHECK(run_tasklets);
14794 +#endif
14795 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14796 + CHECK(get_dgl_spinlock);
14797 +#endif
14798 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14799 + CHECK(allocate_aff_obs);
14800 #endif
14801 CHECK(admit_task);
14802
14803 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
14804 index 5ef8d09..f7f5753 100644
14805 --- a/litmus/sched_task_trace.c
14806 +++ b/litmus/sched_task_trace.c
14807 @@ -7,6 +7,7 @@
14808 #include <linux/module.h>
14809 #include <linux/sched.h>
14810 #include <linux/percpu.h>
14811 +#include <linux/hardirq.h>
14812
14813 #include <litmus/ftdev.h>
14814 #include <litmus/litmus.h>
14815 @@ -16,13 +17,13 @@
14816 #include <litmus/ftdev.h>
14817
14818
14819 -#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
14820 +#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
14821
14822 #define now() litmus_clock()
14823
14824 struct local_buffer {
14825 - struct st_event_record record[NO_EVENTS];
14826 - char flag[NO_EVENTS];
14827 + struct st_event_record record[NUM_EVENTS];
14828 + char flag[NUM_EVENTS];
14829 struct ft_buffer ftbuf;
14830 };
14831
14832 @@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
14833 int i, ok = 0, err;
14834 printk("Allocated %u sched_trace_xxx() events per CPU "
14835 "(buffer size: %d bytes)\n",
14836 - NO_EVENTS, (int) sizeof(struct local_buffer));
14837 + NUM_EVENTS, (int) sizeof(struct local_buffer));
14838
14839 err = ftdev_init(&st_dev, THIS_MODULE,
14840 num_online_cpus(), "sched_trace");
14841 @@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
14842
14843 for (i = 0; i < st_dev.minor_cnt; i++) {
14844 buf = &per_cpu(st_event_buffer, i);
14845 - ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
14846 + ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
14847 sizeof(struct st_event_record),
14848 buf->flag,
14849 buf->record);
14850 @@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
14851 {
14852 struct task_struct *t = (struct task_struct*) _task;
14853 struct st_event_record* rec;
14854 - if (is_realtime(t)) {
14855 + //if (is_realtime(t)) /* comment out to trace EVERYTHING */
14856 + {
14857 rec = get_record(ST_SWITCH_TO, t);
14858 if (rec) {
14859 rec->data.switch_to.when = now();
14860 @@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
14861 {
14862 struct task_struct *t = (struct task_struct*) _task;
14863 struct st_event_record* rec;
14864 - if (is_realtime(t)) {
14865 + //if (is_realtime(t)) /* comment out to trace EVERYTHING */
14866 + {
14867 rec = get_record(ST_SWITCH_AWAY, t);
14868 if (rec) {
14869 rec->data.switch_away.when = now();
14870 @@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
14871 if (rec) {
14872 rec->data.completion.when = now();
14873 rec->data.completion.forced = forced;
14874 +#ifdef LITMUS_NVIDIA
14875 + rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
14876 +#endif
14877 put_record(rec);
14878 }
14879 }
14880 @@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id,
14881 put_record(rec);
14882 }
14883 }
14884 +
14885 +
14886 +
14887 +
14888 +feather_callback void do_sched_trace_prediction_err(unsigned long id,
14889 + unsigned long _task,
14890 + unsigned long _distance,
14891 + unsigned long _rel_err)
14892 +{
14893 + struct task_struct *t = (struct task_struct*) _task;
14894 + struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
14895 +
14896 + if (rec) {
14897 + gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
14898 + fp_t* rel_err = (fp_t*) _rel_err;
14899 +
14900 + rec->data.prediction_err.distance = *distance;
14901 + rec->data.prediction_err.rel_err = rel_err->val;
14902 + put_record(rec);
14903 + }
14904 +}
14905 +
14906 +
14907 +feather_callback void do_sched_trace_migration(unsigned long id,
14908 + unsigned long _task,
14909 + unsigned long _mig_info)
14910 +{
14911 + struct task_struct *t = (struct task_struct*) _task;
14912 + struct st_event_record *rec = get_record(ST_MIGRATION, t);
14913 +
14914 + if (rec) {
14915 + struct migration_info* mig_info = (struct migration_info*) _mig_info;
14916 +
14917 + rec->hdr.extra = mig_info->distance;
14918 + rec->data.migration.observed = mig_info->observed;
14919 + rec->data.migration.estimated = mig_info->estimated;
14920 +
14921 + put_record(rec);
14922 + }
14923 +}
14924 +
14925 +
14926 +
14927 +
14928 +
14929 +
14930 +
14931 +
14932 +
14933 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
14934 + unsigned long _owner)
14935 +{
14936 + struct task_struct *t = (struct task_struct*) _owner;
14937 + struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
14938 +
14939 + if (rec) {
14940 + rec->data.tasklet_release.when = now();
14941 + put_record(rec);
14942 + }
14943 +}
14944 +
14945 +
14946 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
14947 + unsigned long _owner)
14948 +{
14949 + struct task_struct *t = (struct task_struct*) _owner;
14950 + struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
14951 +
14952 + if (rec) {
14953 + rec->data.tasklet_begin.when = now();
14954 +
14955 + if(!in_interrupt())
14956 + rec->data.tasklet_begin.exe_pid = current->pid;
14957 + else
14958 + rec->data.tasklet_begin.exe_pid = 0;
14959 +
14960 + put_record(rec);
14961 + }
14962 +}
14963 +EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
14964 +
14965 +
14966 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
14967 + unsigned long _owner,
14968 + unsigned long _flushed)
14969 +{
14970 + struct task_struct *t = (struct task_struct*) _owner;
14971 + struct st_event_record *rec = get_record(ST_TASKLET_END, t);
14972 +
14973 + if (rec) {
14974 + rec->data.tasklet_end.when = now();
14975 + rec->data.tasklet_end.flushed = _flushed;
14976 +
14977 + if(!in_interrupt())
14978 + rec->data.tasklet_end.exe_pid = current->pid;
14979 + else
14980 + rec->data.tasklet_end.exe_pid = 0;
14981 +
14982 + put_record(rec);
14983 + }
14984 +}
14985 +EXPORT_SYMBOL(do_sched_trace_tasklet_end);
14986 +
14987 +
14988 +feather_callback void do_sched_trace_work_release(unsigned long id,
14989 + unsigned long _owner)
14990 +{
14991 + struct task_struct *t = (struct task_struct*) _owner;
14992 + struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
14993 +
14994 + if (rec) {
14995 + rec->data.work_release.when = now();
14996 + put_record(rec);
14997 + }
14998 +}
14999 +
15000 +
15001 +feather_callback void do_sched_trace_work_begin(unsigned long id,
15002 + unsigned long _owner,
15003 + unsigned long _exe)
15004 +{
15005 + struct task_struct *t = (struct task_struct*) _owner;
15006 + struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
15007 +
15008 + if (rec) {
15009 + struct task_struct *exe = (struct task_struct*) _exe;
15010 + rec->data.work_begin.exe_pid = exe->pid;
15011 + rec->data.work_begin.when = now();
15012 + put_record(rec);
15013 + }
15014 +}
15015 +EXPORT_SYMBOL(do_sched_trace_work_begin);
15016 +
15017 +
15018 +feather_callback void do_sched_trace_work_end(unsigned long id,
15019 + unsigned long _owner,
15020 + unsigned long _exe,
15021 + unsigned long _flushed)
15022 +{
15023 + struct task_struct *t = (struct task_struct*) _owner;
15024 + struct st_event_record *rec = get_record(ST_WORK_END, t);
15025 +
15026 + if (rec) {
15027 + struct task_struct *exe = (struct task_struct*) _exe;
15028 + rec->data.work_end.exe_pid = exe->pid;
15029 + rec->data.work_end.flushed = _flushed;
15030 + rec->data.work_end.when = now();
15031 + put_record(rec);
15032 + }
15033 +}
15034 +EXPORT_SYMBOL(do_sched_trace_work_end);
15035 +
15036 +
15037 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
15038 + unsigned long _task,
15039 + unsigned long _inh)
15040 +{
15041 + struct task_struct *t = (struct task_struct*) _task;
15042 + struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
15043 +
15044 + if (rec) {
15045 + struct task_struct *inh = (struct task_struct*) _inh;
15046 + rec->data.effective_priority_change.when = now();
15047 + rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
15048 + inh->pid :
15049 + 0xffff;
15050 +
15051 + put_record(rec);
15052 + }
15053 +}
15054 +
15055 +/* pray for no nesting of nv interrupts on same CPU... */
15056 +struct tracing_interrupt_map
15057 +{
15058 + int active;
15059 + int count;
15060 + unsigned long data[128]; // assume nesting less than 128...
15061 + unsigned long serial[128];
15062 +};
15063 +DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
15064 +
15065 +
15066 +DEFINE_PER_CPU(u32, intCounter);
15067 +
15068 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
15069 + unsigned long _device)
15070 +{
15071 + struct st_event_record *rec;
15072 + u32 serialNum;
15073 +
15074 + {
15075 + u32* serial;
15076 + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
15077 + if(!int_map->active == 0xcafebabe)
15078 + {
15079 + int_map->count++;
15080 + }
15081 + else
15082 + {
15083 + int_map->active = 0xcafebabe;
15084 + int_map->count = 1;
15085 + }
15086 + //int_map->data[int_map->count-1] = _device;
15087 +
15088 + serial = &per_cpu(intCounter, smp_processor_id());
15089 + *serial += num_online_cpus();
15090 + serialNum = *serial;
15091 + int_map->serial[int_map->count-1] = serialNum;
15092 + }
15093 +
15094 + rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
15095 + if(rec) {
15096 + u32 device = _device;
15097 + rec->data.nv_interrupt_begin.when = now();
15098 + rec->data.nv_interrupt_begin.device = device;
15099 + rec->data.nv_interrupt_begin.serialNumber = serialNum;
15100 + put_record(rec);
15101 + }
15102 +}
15103 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
15104 +
15105 +/*
15106 +int is_interrupt_tracing_active(void)
15107 +{
15108 + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
15109 + if(int_map->active == 0xcafebabe)
15110 + return 1;
15111 + return 0;
15112 +}
15113 +*/
15114 +
15115 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
15116 +{
15117 + struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
15118 + if(int_map->active == 0xcafebabe)
15119 + {
15120 + struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
15121 +
15122 + int_map->count--;
15123 + if(int_map->count == 0)
15124 + int_map->active = 0;
15125 +
15126 + if(rec) {
15127 + u32 device = _device;
15128 + rec->data.nv_interrupt_end.when = now();
15129 + //rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
15130 + rec->data.nv_interrupt_end.device = device;
15131 + rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
15132 + put_record(rec);
15133 + }
15134 + }
15135 +}
15136 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
15137 +
15138 +
15139 +
15140 +
15141 +
15142 +
15143 +
15144 +
15145 +
15146 diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
15147 new file mode 100644
15148 index 0000000..cf8e1d7
15149 --- /dev/null
15150 +++ b/litmus/sched_trace_external.c
15151 @@ -0,0 +1,64 @@
15152 +#include <linux/module.h>
15153 +
15154 +#include <litmus/trace.h>
15155 +#include <litmus/sched_trace.h>
15156 +#include <litmus/litmus.h>
15157 +
15158 +void __sched_trace_tasklet_begin_external(struct task_struct* t)
15159 +{
15160 + sched_trace_tasklet_begin(t);
15161 +}
15162 +EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
15163 +
15164 +void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
15165 +{
15166 + sched_trace_tasklet_end(t, flushed);
15167 +}
15168 +EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
15169 +
15170 +
15171 +
15172 +void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
15173 +{
15174 + sched_trace_work_begin(t, e);
15175 +}
15176 +EXPORT_SYMBOL(__sched_trace_work_begin_external);
15177 +
15178 +void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
15179 +{
15180 + sched_trace_work_end(t, e, f);
15181 +}
15182 +EXPORT_SYMBOL(__sched_trace_work_end_external);
15183 +
15184 +
15185 +
15186 +void __sched_trace_nv_interrupt_begin_external(u32 device)
15187 +{
15188 + //unsigned long _device = device;
15189 + sched_trace_nv_interrupt_begin((unsigned long)device);
15190 +}
15191 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
15192 +
15193 +void __sched_trace_nv_interrupt_end_external(u32 device)
15194 +{
15195 + //unsigned long _device = device;
15196 + sched_trace_nv_interrupt_end((unsigned long)device);
15197 +}
15198 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
15199 +
15200 +
15201 +#ifdef CONFIG_LITMUS_NVIDIA
15202 +
15203 +#define EXX_TS(evt) \
15204 +void __##evt(void) { evt; } \
15205 +EXPORT_SYMBOL(__##evt);
15206 +
15207 +EXX_TS(TS_NV_TOPISR_START)
15208 +EXX_TS(TS_NV_TOPISR_END)
15209 +EXX_TS(TS_NV_BOTISR_START)
15210 +EXX_TS(TS_NV_BOTISR_END)
15211 +EXX_TS(TS_NV_RELEASE_BOTISR_START)
15212 +EXX_TS(TS_NV_RELEASE_BOTISR_END)
15213 +
15214 +#endif
15215 +
15216 --
15217 1.7.9.5
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.