Attachment 'gpusync-rtss12.patch'

Download

   1 From a463f9a9e04385f0729f7435a0a6dff7d89b25de Mon Sep 17 00:00:00 2001
   2 From: Glenn Elliott <gelliott@cs.unc.edu>
   3 Date: Sat, 26 May 2012 17:29:58 -0400
   4 Subject: [PATCH] GPUSync patch for Litmus 2012.1.
   5 
   6 ---
   7  arch/x86/kernel/irq.c                 |    4 +
   8  arch/x86/kernel/syscall_table_32.S    |    1 +
   9  include/linux/completion.h            |    1 +
  10  include/linux/interrupt.h             |   10 +-
  11  include/linux/mutex.h                 |   10 +
  12  include/linux/semaphore.h             |    9 +
  13  include/linux/workqueue.h             |   18 +
  14  include/litmus/binheap.h              |  207 +++
  15  include/litmus/edf_common.h           |   12 +
  16  include/litmus/fdso.h                 |   14 +-
  17  include/litmus/fpmath.h               |  145 ++
  18  include/litmus/gpu_affinity.h         |   49 +
  19  include/litmus/ikglp_lock.h           |  160 ++
  20  include/litmus/kexclu_affinity.h      |   35 +
  21  include/litmus/kfmlp_lock.h           |   97 ++
  22  include/litmus/litmus.h               |    9 +-
  23  include/litmus/litmus_softirq.h       |  199 +++
  24  include/litmus/locking.h              |  142 +-
  25  include/litmus/nvidia_info.h          |   46 +
  26  include/litmus/preempt.h              |    2 +-
  27  include/litmus/rsm_lock.h             |   54 +
  28  include/litmus/rt_param.h             |  100 +-
  29  include/litmus/sched_plugin.h         |   76 +-
  30  include/litmus/sched_trace.h          |  218 ++-
  31  include/litmus/sched_trace_external.h |   78 +
  32  include/litmus/trace.h                |   34 +-
  33  include/litmus/unistd_32.h            |    5 +-
  34  include/litmus/unistd_64.h            |    9 +-
  35  kernel/lockdep.c                      |    7 +-
  36  kernel/mutex.c                        |  125 ++
  37  kernel/sched.c                        |   27 +
  38  kernel/semaphore.c                    |   13 +-
  39  kernel/softirq.c                      |  322 +++-
  40  kernel/workqueue.c                    |   71 +-
  41  litmus/Kconfig                        |  148 +-
  42  litmus/Makefile                       |   11 +-
  43  litmus/affinity.c                     |    2 +-
  44  litmus/binheap.c                      |  443 +++++
  45  litmus/edf_common.c                   |  147 +-
  46  litmus/fdso.c                         |   13 +
  47  litmus/gpu_affinity.c                 |  113 ++
  48  litmus/ikglp_lock.c                   | 2838 +++++++++++++++++++++++++++++++++
  49  litmus/jobs.c                         |   17 +-
  50  litmus/kexclu_affinity.c              |   92 ++
  51  litmus/kfmlp_lock.c                   | 1002 ++++++++++++
  52  litmus/litmus.c                       |  126 +-
  53  litmus/litmus_pai_softirq.c           |   64 +
  54  litmus/litmus_proc.c                  |   17 +
  55  litmus/litmus_softirq.c               | 1582 ++++++++++++++++++
  56  litmus/locking.c                      |  393 ++++-
  57  litmus/nvidia_info.c                  |  597 +++++++
  58  litmus/preempt.c                      |    5 +
  59  litmus/rsm_lock.c                     |  796 +++++++++
  60  litmus/sched_cedf.c                   | 1062 +++++++++++-
  61  litmus/sched_gsn_edf.c                | 1032 ++++++++++--
  62  litmus/sched_litmus.c                 |    2 +
  63  litmus/sched_plugin.c                 |  135 +-
  64  litmus/sched_task_trace.c             |  282 +++-
  65  litmus/sched_trace_external.c         |   64 +
  66  59 files changed, 13012 insertions(+), 280 deletions(-)
  67  create mode 100644 include/litmus/binheap.h
  68  create mode 100644 include/litmus/fpmath.h
  69  create mode 100644 include/litmus/gpu_affinity.h
  70  create mode 100644 include/litmus/ikglp_lock.h
  71  create mode 100644 include/litmus/kexclu_affinity.h
  72  create mode 100644 include/litmus/kfmlp_lock.h
  73  create mode 100644 include/litmus/litmus_softirq.h
  74  create mode 100644 include/litmus/nvidia_info.h
  75  create mode 100644 include/litmus/rsm_lock.h
  76  create mode 100644 include/litmus/sched_trace_external.h
  77  create mode 100644 litmus/binheap.c
  78  create mode 100644 litmus/gpu_affinity.c
  79  create mode 100644 litmus/ikglp_lock.c
  80  create mode 100644 litmus/kexclu_affinity.c
  81  create mode 100644 litmus/kfmlp_lock.c
  82  create mode 100644 litmus/litmus_pai_softirq.c
  83  create mode 100644 litmus/litmus_softirq.c
  84  create mode 100644 litmus/nvidia_info.c
  85  create mode 100644 litmus/rsm_lock.c
  86  create mode 100644 litmus/sched_trace_external.c
  87 
  88 diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
  89 index 6c0802e..680a5cb 100644
  90 --- a/arch/x86/kernel/irq.c
  91 +++ b/arch/x86/kernel/irq.c
  92 @@ -10,6 +10,10 @@
  93  #include <linux/ftrace.h>
  94  #include <linux/delay.h>
  95  
  96 +#ifdef CONFIG_LITMUS_NVIDIA
  97 +#include <litmus/sched_trace.h>
  98 +#endif
  99 +
 100  #include <asm/apic.h>
 101  #include <asm/io_apic.h>
 102  #include <asm/irq.h>
 103 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
 104 index d012622..0cb4373 100644
 105 --- a/arch/x86/kernel/syscall_table_32.S
 106 +++ b/arch/x86/kernel/syscall_table_32.S
 107 @@ -358,3 +358,4 @@ ENTRY(sys_call_table)
 108  	.long sys_wait_for_ts_release
 109  	.long sys_release_ts		/* +10 */
 110  	.long sys_null_call
 111 +	.long sys_register_nv_device
 112 diff --git a/include/linux/completion.h b/include/linux/completion.h
 113 index 9d72727..cff405c 100644
 114 --- a/include/linux/completion.h
 115 +++ b/include/linux/completion.h
 116 @@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
 117  	init_waitqueue_head(&x->wait);
 118  }
 119  
 120 +extern void __wait_for_completion_locked(struct completion *);
 121  extern void wait_for_completion(struct completion *);
 122  extern int wait_for_completion_interruptible(struct completion *x);
 123  extern int wait_for_completion_killable(struct completion *x);
 124 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
 125 index f6efed0..57a7bc8 100644
 126 --- a/include/linux/interrupt.h
 127 +++ b/include/linux/interrupt.h
 128 @@ -445,6 +445,7 @@ static inline void __raise_softirq_irqoff(unsigned int nr)
 129  
 130  extern void raise_softirq_irqoff(unsigned int nr);
 131  extern void raise_softirq(unsigned int nr);
 132 +extern void wakeup_softirqd(void);
 133  
 134  /* This is the worklist that queues up per-cpu softirq work.
 135   *
 136 @@ -500,6 +501,10 @@ struct tasklet_struct
 137  	atomic_t count;
 138  	void (*func)(unsigned long);
 139  	unsigned long data;
 140 +
 141 +#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
 142 +	struct task_struct *owner;
 143 +#endif
 144  };
 145  
 146  #define DECLARE_TASKLET(name, func, data) \
 147 @@ -537,6 +542,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 148  #define tasklet_unlock(t) do { } while (0)
 149  #endif
 150  
 151 +extern void ___tasklet_schedule(struct tasklet_struct *t);
 152  extern void __tasklet_schedule(struct tasklet_struct *t);
 153  
 154  static inline void tasklet_schedule(struct tasklet_struct *t)
 155 @@ -545,6 +551,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
 156  		__tasklet_schedule(t);
 157  }
 158  
 159 +extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
 160  extern void __tasklet_hi_schedule(struct tasklet_struct *t);
 161  
 162  static inline void tasklet_hi_schedule(struct tasklet_struct *t)
 163 @@ -553,6 +560,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
 164  		__tasklet_hi_schedule(t);
 165  }
 166  
 167 +extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
 168  extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
 169  
 170  /*
 171 @@ -582,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
 172  }
 173  
 174  static inline void tasklet_enable(struct tasklet_struct *t)
 175 -{
 176 +{	
 177  	smp_mb__before_atomic_dec();
 178  	atomic_dec(&t->count);
 179  }
 180 diff --git a/include/linux/mutex.h b/include/linux/mutex.h
 181 index a940fe4..cb47deb 100644
 182 --- a/include/linux/mutex.h
 183 +++ b/include/linux/mutex.h
 184 @@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
 185  	return atomic_read(&lock->count) != 1;
 186  }
 187  
 188 +/* return non-zero to abort.  only pre-side-effects may abort */
 189 +typedef int (*side_effect_t)(unsigned long);
 190 +extern void mutex_lock_sfx(struct mutex *lock,
 191 +						   side_effect_t pre, unsigned long pre_arg,
 192 +						   side_effect_t post, unsigned long post_arg);
 193 +extern void mutex_unlock_sfx(struct mutex *lock,
 194 +							 side_effect_t pre, unsigned long pre_arg,
 195 +							 side_effect_t post, unsigned long post_arg);
 196 +
 197  /*
 198   * See kernel/mutex.c for detailed documentation of these APIs.
 199   * Also see Documentation/mutex-design.txt.
 200 @@ -153,6 +162,7 @@ extern void mutex_lock(struct mutex *lock);
 201  extern int __must_check mutex_lock_interruptible(struct mutex *lock);
 202  extern int __must_check mutex_lock_killable(struct mutex *lock);
 203  
 204 +
 205  # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 206  # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 207  # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
 208 diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
 209 index 39fa049..c83fc2b 100644
 210 --- a/include/linux/semaphore.h
 211 +++ b/include/linux/semaphore.h
 212 @@ -43,4 +43,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
 213  extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
 214  extern void up(struct semaphore *sem);
 215  
 216 +extern void __down(struct semaphore *sem);
 217 +extern void __up(struct semaphore *sem);
 218 +
 219 +struct semaphore_waiter {
 220 +	struct list_head list;
 221 +	struct task_struct *task;
 222 +	int up;
 223 +};
 224 +
 225  #endif /* __LINUX_SEMAPHORE_H */
 226 diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
 227 index f584aba..1ec2ec7 100644
 228 --- a/include/linux/workqueue.h
 229 +++ b/include/linux/workqueue.h
 230 @@ -83,6 +83,9 @@ struct work_struct {
 231  #ifdef CONFIG_LOCKDEP
 232  	struct lockdep_map lockdep_map;
 233  #endif
 234 +#ifdef CONFIG_LITMUS_SOFTIRQD
 235 +	struct task_struct *owner;
 236 +#endif
 237  };
 238  
 239  #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
 240 @@ -115,11 +118,25 @@ struct execute_work {
 241  #define __WORK_INIT_LOCKDEP_MAP(n, k)
 242  #endif
 243  
 244 +#ifdef CONFIG_LITMUS_SOFTIRQD
 245 +#define __WORK_INIT_OWNER() \
 246 +	.owner = NULL,
 247 +
 248 +#define PREPARE_OWNER(_work, _owner) \
 249 +	do { \
 250 +		(_work)->owner = (_owner); \
 251 +	} while(0)
 252 +#else
 253 +#define __WORK_INIT_OWNER()
 254 +#define PREPARE_OWNER(_work, _owner)
 255 +#endif
 256 +
 257  #define __WORK_INITIALIZER(n, f) {				\
 258  	.data = WORK_DATA_STATIC_INIT(),			\
 259  	.entry	= { &(n).entry, &(n).entry },			\
 260  	.func = (f),						\
 261  	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
 262 +	__WORK_INIT_OWNER() \
 263  	}
 264  
 265  #define __DELAYED_WORK_INITIALIZER(n, f) {			\
 266 @@ -357,6 +374,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 267  extern void flush_workqueue(struct workqueue_struct *wq);
 268  extern void flush_scheduled_work(void);
 269  
 270 +extern int __schedule_work(struct work_struct *work);
 271  extern int schedule_work(struct work_struct *work);
 272  extern int schedule_work_on(int cpu, struct work_struct *work);
 273  extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 274 diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
 275 new file mode 100644
 276 index 0000000..9e966e3
 277 --- /dev/null
 278 +++ b/include/litmus/binheap.h
 279 @@ -0,0 +1,207 @@
 280 +#ifndef LITMUS_BINARY_HEAP_H
 281 +#define LITMUS_BINARY_HEAP_H
 282 +
 283 +#include <linux/kernel.h>
 284 +
 285 +/**
 286 + * Simple binary heap with add, arbitrary delete, delete_root, and top
 287 + * operations.
 288 + *
 289 + * Style meant to conform with list.h.
 290 + *
 291 + * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
 292 + * heap may be overkill (and perhaps not general enough) for some applications.
 293 + *
 294 + * Note: In order to make node swaps fast, a node inserted with a data pointer
 295 + * may not always hold said data pointer. This is similar to the binomial heap
 296 + * implementation. This does make node deletion tricky since we have to
 297 + * (1) locate the node that holds the data pointer to delete, and (2) the
 298 + * node that was originally inserted with said data pointer. These have to be
 299 + * coalesced into a single node before removal (see usage of
 300 + * __binheap_safe_swap()). We have to track node references to accomplish this.
 301 + */
 302 +
 303 +struct binheap_node {
 304 +	void	*data;
 305 +	struct binheap_node *parent;
 306 +	struct binheap_node *left;
 307 +	struct binheap_node *right;
 308 +
 309 +	/* pointer to binheap_node that holds *data for which this binheap_node
 310 +	 * was originally inserted.  (*data "owns" this node)
 311 +	 */
 312 +	struct binheap_node *ref;
 313 +	struct binheap_node **ref_ptr;
 314 +};
 315 +
 316 +/**
 317 + * Signature of compator function.  Assumed 'less-than' (min-heap).
 318 + * Pass in 'greater-than' for max-heap.
 319 + *
 320 + * TODO: Consider macro-based implementation that allows comparator to be
 321 + * inlined (similar to Linux red/black tree) for greater efficiency.
 322 + */
 323 +typedef int (*binheap_order_t)(struct binheap_node *a,
 324 +							   struct binheap_node *b);
 325 +
 326 +
 327 +struct binheap_handle {
 328 +	struct binheap_node *root;
 329 +
 330 +	/* pointer to node to take next inserted child */
 331 +	struct binheap_node *next;
 332 +
 333 +	/* pointer to last node in complete binary tree */
 334 +	struct binheap_node *last;
 335 +
 336 +	/* comparator function pointer */
 337 +	binheap_order_t compare;
 338 +};
 339 +
 340 +
 341 +#define BINHEAP_POISON	((void*)(0xdeadbeef))
 342 +
 343 +
 344 +/**
 345 + * binheap_entry - get the struct for this heap node.
 346 + *  Only valid when called upon heap nodes other than the root handle.
 347 + * @ptr:	the heap node.
 348 + * @type:	the type of struct pointed to by binheap_node::data.
 349 + * @member:	unused.
 350 + */
 351 +#define binheap_entry(ptr, type, member) \
 352 +((type *)((ptr)->data))
 353 +
 354 +/**
 355 + * binheap_node_container - get the struct that contains this node.
 356 + *  Only valid when called upon heap nodes other than the root handle.
 357 + * @ptr:	the heap node.
 358 + * @type:	the type of struct the node is embedded in.
 359 + * @member:	the name of the binheap_struct within the (type) struct.
 360 + */
 361 +#define binheap_node_container(ptr, type, member) \
 362 +container_of((ptr), type, member)
 363 +
 364 +/**
 365 + * binheap_top_entry - get the struct for the node at the top of the heap.
 366 + *  Only valid when called upon the heap handle node.
 367 + * @ptr:    the special heap-handle node.
 368 + * @type:   the type of the struct the head is embedded in.
 369 + * @member:	the name of the binheap_struct within the (type) struct.
 370 + */
 371 +#define binheap_top_entry(ptr, type, member) \
 372 +binheap_entry((ptr)->root, type, member)
 373 +
 374 +/**
 375 + * binheap_delete_root - remove the root element from the heap.
 376 + * @handle:	 handle to the heap.
 377 + * @type:    the type of the struct the head is embedded in.
 378 + * @member:	 the name of the binheap_struct within the (type) struct.
 379 + */
 380 +#define binheap_delete_root(handle, type, member) \
 381 +__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
 382 +
 383 +/**
 384 + * binheap_delete - remove an arbitrary element from the heap.
 385 + * @to_delete:  pointer to node to be removed.
 386 + * @handle:	 handle to the heap.
 387 + */
 388 +#define binheap_delete(to_delete, handle) \
 389 +__binheap_delete((to_delete), (handle))
 390 +
 391 +/**
 392 + * binheap_add - insert an element to the heap
 393 + * new_node: node to add.
 394 + * @handle:	 handle to the heap.
 395 + * @type:    the type of the struct the head is embedded in.
 396 + * @member:	 the name of the binheap_struct within the (type) struct.
 397 + */
 398 +#define binheap_add(new_node, handle, type, member) \
 399 +__binheap_add((new_node), (handle), container_of((new_node), type, member))
 400 +
 401 +/**
 402 + * binheap_decrease - re-eval the position of a node (based upon its
 403 + * original data pointer).
 404 + * @handle: handle to the heap.
 405 + * @orig_node: node that was associated with the data pointer
 406 + *             (whose value has changed) when said pointer was
 407 + *             added to the heap.
 408 + */
 409 +#define binheap_decrease(orig_node, handle) \
 410 +__binheap_decrease((orig_node), (handle))
 411 +
 412 +#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
 413 +
 414 +#define BINHEAP_NODE(name) \
 415 +	struct binheap_node name = BINHEAP_NODE_INIT()
 416 +
 417 +
 418 +static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
 419 +{
 420 +	n->data = NULL;
 421 +	n->parent = BINHEAP_POISON;
 422 +	n->left = NULL;
 423 +	n->right = NULL;
 424 +	n->ref = NULL;
 425 +	n->ref_ptr = NULL;
 426 +}
 427 +
 428 +static inline void INIT_BINHEAP_HANDLE(
 429 +	struct binheap_handle *handle,
 430 +	binheap_order_t compare)
 431 +{
 432 +	handle->root = NULL;
 433 +	handle->next = NULL;
 434 +	handle->last = NULL;
 435 +	handle->compare = compare;
 436 +}
 437 +
 438 +/* Returns true (1) if binheap is empty. */
 439 +static inline int binheap_empty(struct binheap_handle *handle)
 440 +{
 441 +	return(handle->root == NULL);
 442 +}
 443 +
 444 +/* Returns true (1) if binheap node is in a heap. */
 445 +static inline int binheap_is_in_heap(struct binheap_node *node)
 446 +{
 447 +	return (node->parent != BINHEAP_POISON);
 448 +}
 449 +
 450 +
 451 +int binheap_is_in_this_heap(struct binheap_node *node, struct binheap_handle* heap);
 452 +
 453 +
 454 +
 455 +void __binheap_add(struct binheap_node *new_node,
 456 +	struct binheap_handle *handle,
 457 +	void *data);
 458 +
 459 +
 460 +/**
 461 + * Removes the root node from the heap. The node is removed after coalescing
 462 + * the binheap_node with its original data pointer at the root of the tree.
 463 + *
 464 + * The 'last' node in the tree is then swapped up to the root and bubbled
 465 + * down.
 466 + */
 467 +void __binheap_delete_root(struct binheap_handle *handle,
 468 +	struct binheap_node *container);
 469 +
 470 +/**
 471 + * Delete an arbitrary node.  Bubble node to delete up to the root,
 472 + * and then delete to root.
 473 + */
 474 +void __binheap_delete(
 475 +	struct binheap_node *node_to_delete,
 476 +	struct binheap_handle *handle);
 477 +
 478 +/**
 479 + * Bubble up a node whose pointer has decreased in value.
 480 + */
 481 +void __binheap_decrease(struct binheap_node *orig_node,
 482 +						struct binheap_handle *handle);
 483 +
 484 +
 485 +#endif
 486 +
 487 diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
 488 index bbaf22e..63dff7e 100644
 489 --- a/include/litmus/edf_common.h
 490 +++ b/include/litmus/edf_common.h
 491 @@ -20,6 +20,18 @@ int edf_higher_prio(struct task_struct* first,
 492  
 493  int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
 494  
 495 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
 496 +/* binheap_nodes must be embedded within 'struct litmus_lock' */
 497 +int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b);
 498 +int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b);
 499 +int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
 500 +int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
 501 +
 502 +int __edf_higher_prio(struct task_struct* first, comparison_mode_t first_mode,
 503 +					  struct task_struct* second, comparison_mode_t second_mode);
 504 +
 505 +#endif
 506 +
 507  int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
 508  
 509  #endif
 510 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
 511 index caf2a1e..1f5d3bd 100644
 512 --- a/include/litmus/fdso.h
 513 +++ b/include/litmus/fdso.h
 514 @@ -20,7 +20,16 @@ typedef enum  {
 515  	FMLP_SEM	= 0,
 516  	SRP_SEM		= 1,
 517  
 518 -	MAX_OBJ_TYPE	= 1
 519 +	RSM_MUTEX	= 2,
 520 +	IKGLP_SEM	= 3,
 521 +	KFMLP_SEM	= 4,
 522 +
 523 +	IKGLP_SIMPLE_GPU_AFF_OBS = 5,
 524 +	IKGLP_GPU_AFF_OBS = 6,
 525 +	KFMLP_SIMPLE_GPU_AFF_OBS = 7,
 526 +	KFMLP_GPU_AFF_OBS = 8,
 527 +
 528 +	MAX_OBJ_TYPE	= 8
 529  } obj_type_t;
 530  
 531  struct inode_obj_id {
 532 @@ -64,8 +73,11 @@ static inline void* od_lookup(int od, obj_type_t type)
 533  }
 534  
 535  #define lookup_fmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, FMLP_SEM))
 536 +#define lookup_kfmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, KFMLP_SEM))
 537  #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
 538  #define lookup_ics(od)     ((struct ics*)           od_lookup(od, ICS_ID))
 539  
 540 +#define lookup_rsm_mutex(od)((struct litmus_lock*)  od_lookup(od, FMLP_SEM))
 541 +
 542  
 543  #endif
 544 diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
 545 new file mode 100644
 546 index 0000000..04d4bca
 547 --- /dev/null
 548 +++ b/include/litmus/fpmath.h
 549 @@ -0,0 +1,145 @@
 550 +#ifndef __FP_MATH_H__
 551 +#define __FP_MATH_H__
 552 +
 553 +#ifndef __KERNEL__
 554 +#include <stdint.h>
 555 +#define abs(x) (((x) < 0) ? -(x) : x)
 556 +#endif
 557 +
 558 +// Use 64-bit because we want to track things at the nanosecond scale.
 559 +// This can lead to very large numbers.
 560 +typedef int64_t fpbuf_t;
 561 +typedef struct
 562 +{
 563 +	fpbuf_t val;
 564 +} fp_t;
 565 +
 566 +#define FP_SHIFT 10
 567 +#define ROUND_BIT (FP_SHIFT - 1)
 568 +
 569 +#define _fp(x) ((fp_t) {x})
 570 +
 571 +#ifdef __KERNEL__
 572 +static const fp_t LITMUS_FP_ZERO = {.val = 0};
 573 +static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
 574 +#endif
 575 +
 576 +static inline fp_t FP(fpbuf_t x)
 577 +{
 578 +	return _fp(((fpbuf_t) x) << FP_SHIFT);
 579 +}
 580 +
 581 +/* divide two integers to obtain a fixed point value  */
 582 +static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
 583 +{
 584 +	return _fp(FP(a).val / (b));
 585 +}
 586 +
 587 +static inline fpbuf_t _point(fp_t x)
 588 +{
 589 +	return (x.val % (1 << FP_SHIFT));
 590 +
 591 +}
 592 +
 593 +#define fp2str(x) x.val
 594 +/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
 595 +#define _FP_  "%ld/1024"
 596 +
 597 +static inline fpbuf_t _floor(fp_t x)
 598 +{
 599 +	return x.val >> FP_SHIFT;
 600 +}
 601 +
 602 +/* FIXME: negative rounding */
 603 +static inline fpbuf_t _round(fp_t x)
 604 +{
 605 +	return _floor(x) + ((x.val >> ROUND_BIT) & 1);
 606 +}
 607 +
 608 +/* multiply two fixed point values */
 609 +static inline fp_t _mul(fp_t a, fp_t b)
 610 +{
 611 +	return _fp((a.val * b.val) >> FP_SHIFT);
 612 +}
 613 +
 614 +static inline fp_t _div(fp_t a, fp_t b)
 615 +{
 616 +#if !defined(__KERNEL__) && !defined(unlikely)
 617 +#define unlikely(x) (x)
 618 +#define DO_UNDEF_UNLIKELY
 619 +#endif
 620 +	/* try not to overflow */
 621 +	if (unlikely(  a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
 622 +		return _fp((a.val / b.val) << FP_SHIFT);
 623 +	else
 624 +		return _fp((a.val << FP_SHIFT) / b.val);
 625 +#ifdef DO_UNDEF_UNLIKELY
 626 +#undef unlikely
 627 +#undef DO_UNDEF_UNLIKELY
 628 +#endif
 629 +}
 630 +
 631 +static inline fp_t _add(fp_t a, fp_t b)
 632 +{
 633 +	return _fp(a.val + b.val);
 634 +}
 635 +
 636 +static inline fp_t _sub(fp_t a, fp_t b)
 637 +{
 638 +	return _fp(a.val - b.val);
 639 +}
 640 +
 641 +static inline fp_t _neg(fp_t x)
 642 +{
 643 +	return _fp(-x.val);
 644 +}
 645 +
 646 +static inline fp_t _abs(fp_t x)
 647 +{
 648 +	return _fp(abs(x.val));
 649 +}
 650 +
 651 +/* works the same as casting float/double to integer */
 652 +static inline fpbuf_t _fp_to_integer(fp_t x)
 653 +{
 654 +	return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
 655 +}
 656 +
 657 +static inline fp_t _integer_to_fp(fpbuf_t x)
 658 +{
 659 +	return _frac(x,1);
 660 +}
 661 +
 662 +static inline int _leq(fp_t a, fp_t b)
 663 +{
 664 +	return a.val <= b.val;
 665 +}
 666 +
 667 +static inline int _geq(fp_t a, fp_t b)
 668 +{
 669 +	return a.val >= b.val;
 670 +}
 671 +
 672 +static inline int _lt(fp_t a, fp_t b)
 673 +{
 674 +	return a.val < b.val;
 675 +}
 676 +
 677 +static inline int _gt(fp_t a, fp_t b)
 678 +{
 679 +	return a.val > b.val;
 680 +}
 681 +
 682 +static inline int _eq(fp_t a, fp_t b)
 683 +{
 684 +	return a.val == b.val;
 685 +}
 686 +
 687 +static inline fp_t _max(fp_t a, fp_t b)
 688 +{
 689 +	if (a.val < b.val)
 690 +		return b;
 691 +	else
 692 +		return a;
 693 +}
 694 +#endif
 695 diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
 696 new file mode 100644
 697 index 0000000..6b3fb8b
 698 --- /dev/null
 699 +++ b/include/litmus/gpu_affinity.h
 700 @@ -0,0 +1,49 @@
 701 +#ifndef LITMUS_GPU_AFFINITY_H
 702 +#define LITMUS_GPU_AFFINITY_H
 703 +
 704 +#include <litmus/rt_param.h>
 705 +#include <litmus/sched_plugin.h>
 706 +#include <litmus/litmus.h>
 707 +
 708 +void update_gpu_estimate(struct task_struct* t, lt_t observed);
 709 +gpu_migration_dist_t gpu_migration_distance(int a, int b);
 710 +
 711 +static inline void reset_gpu_tracker(struct task_struct* t)
 712 +{
 713 +	t->rt_param.accum_gpu_time = 0;
 714 +}
 715 +
 716 +static inline void start_gpu_tracker(struct task_struct* t)
 717 +{
 718 +	t->rt_param.gpu_time_stamp = litmus_clock();
 719 +}
 720 +
 721 +static inline void stop_gpu_tracker(struct task_struct* t)
 722 +{
 723 +	lt_t now = litmus_clock();
 724 +	t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp);
 725 +}
 726 +
 727 +static inline lt_t get_gpu_time(struct task_struct* t)
 728 +{
 729 +	return t->rt_param.accum_gpu_time;
 730 +}
 731 +
 732 +static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
 733 +{
 734 +	int i;
 735 +	fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
 736 +	lt_t val = (temp >= 0) ? temp : 0;  // never allow negative estimates...
 737 +
 738 +	WARN_ON(temp < 0);
 739 +
 740 +	// lower-bound a distant migration to be at least equal to the level
 741 +	// below it.
 742 +	for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
 743 +		val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
 744 +	}
 745 +
 746 +	return ((val > 0) ? val : dist+1);
 747 +}
 748 +
 749 +#endif
 750 diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
 751 new file mode 100644
 752 index 0000000..af6f151
 753 --- /dev/null
 754 +++ b/include/litmus/ikglp_lock.h
 755 @@ -0,0 +1,160 @@
 756 +#ifndef LITMUS_IKGLP_H
 757 +#define LITMUS_IKGLP_H
 758 +
 759 +#include <litmus/litmus.h>
 760 +#include <litmus/binheap.h>
 761 +#include <litmus/locking.h>
 762 +
 763 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 764 +#include <litmus/kexclu_affinity.h>
 765 +
 766 +struct ikglp_affinity;
 767 +#endif
 768 +
 769 +typedef struct ikglp_heap_node
 770 +{
 771 +	struct task_struct *task;
 772 +	struct binheap_node node;
 773 +} ikglp_heap_node_t;
 774 +
 775 +struct fifo_queue;
 776 +struct ikglp_wait_state;
 777 +
 778 +typedef struct ikglp_donee_heap_node
 779 +{
 780 +	struct task_struct *task;
 781 +	struct fifo_queue *fq;
 782 +	struct ikglp_wait_state *donor_info;  // cross-linked with ikglp_wait_state_t of donor
 783 +
 784 +	struct binheap_node node;
 785 +} ikglp_donee_heap_node_t;
 786 +
 787 +// Maintains the state of a request as it goes through the IKGLP
 788 +typedef struct ikglp_wait_state {
 789 +	struct task_struct *task;  // pointer back to the requesting task
 790 +
 791 +	// Data for while waiting in FIFO Queue
 792 +	wait_queue_t fq_node;
 793 +	ikglp_heap_node_t global_heap_node;
 794 +	ikglp_donee_heap_node_t donee_heap_node;
 795 +
 796 +	// Data for while waiting in PQ
 797 +	ikglp_heap_node_t pq_node;
 798 +
 799 +	// Data for while waiting as a donor
 800 +	ikglp_donee_heap_node_t *donee_info;  // cross-linked with donee's ikglp_donee_heap_node_t
 801 +	struct nested_info prio_donation;
 802 +	struct binheap_node node;
 803 +} ikglp_wait_state_t;
 804 +
 805 +/* struct for semaphore with priority inheritance */
 806 +struct fifo_queue
 807 +{
 808 +	wait_queue_head_t wait;
 809 +	struct task_struct* owner;
 810 +
 811 +	// used for bookkeepping
 812 +	ikglp_heap_node_t global_heap_node;
 813 +	ikglp_donee_heap_node_t donee_heap_node;
 814 +
 815 +	struct task_struct* hp_waiter;
 816 +	int count; /* number of waiters + holder */
 817 +
 818 +	struct nested_info nest;
 819 +};
 820 +
 821 +struct ikglp_semaphore
 822 +{
 823 +	struct litmus_lock litmus_lock;
 824 +
 825 +	raw_spinlock_t	lock;
 826 +	raw_spinlock_t	real_lock;
 827 +
 828 +	int nr_replicas; // AKA k
 829 +	int m;
 830 +
 831 +	int max_fifo_len; // max len of a fifo queue
 832 +	int nr_in_fifos;
 833 +
 834 +	struct binheap_handle top_m;  // min heap, base prio
 835 +	int top_m_size;  // number of nodes in top_m
 836 +
 837 +	struct binheap_handle not_top_m; // max heap, base prio
 838 +
 839 +	struct binheap_handle donees;	// min-heap, base prio
 840 +	struct fifo_queue *shortest_fifo_queue; // pointer to shortest fifo queue
 841 +
 842 +	/* data structures for holding requests */
 843 +	struct fifo_queue *fifo_queues; // array nr_replicas in length
 844 +	struct binheap_handle priority_queue;	// max-heap, base prio
 845 +	struct binheap_handle donors;	// max-heap, base prio
 846 +
 847 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 848 +	struct ikglp_affinity *aff_obs;
 849 +#endif
 850 +};
 851 +
 852 +static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
 853 +{
 854 +	return container_of(lock, struct ikglp_semaphore, litmus_lock);
 855 +}
 856 +
 857 +int ikglp_lock(struct litmus_lock* l);
 858 +int ikglp_unlock(struct litmus_lock* l);
 859 +int ikglp_close(struct litmus_lock* l);
 860 +void ikglp_free(struct litmus_lock* l);
 861 +struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
 862 +
 863 +
 864 +
 865 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
 866 +
 867 +struct ikglp_queue_info
 868 +{
 869 +	struct fifo_queue* q;
 870 +	lt_t estimated_len;
 871 +	int *nr_cur_users;
 872 +};
 873 +
 874 +struct ikglp_affinity_ops
 875 +{
 876 +	struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t);	// select FIFO
 877 +	ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff, struct fifo_queue* dst);	// select steal from FIFO
 878 +	ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff, struct task_struct* t);	// select a donee
 879 +	ikglp_wait_state_t* (*advise_donor_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst);	// select a donor to move to PQ
 880 +
 881 +	void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// fifo enqueue
 882 +	void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// fifo dequeue
 883 +	void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);	// replica acquired
 884 +	void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t);		// replica freed
 885 +	int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq);		// convert a replica # to a GPU (includes offsets and simult user folding)
 886 +};
 887 +
 888 +struct ikglp_affinity
 889 +{
 890 +	struct affinity_observer obs;
 891 +	struct ikglp_affinity_ops *ops;
 892 +	struct ikglp_queue_info *q_info;
 893 +	int *nr_cur_users_on_rsrc;
 894 +	int offset;
 895 +	int nr_simult;
 896 +	int nr_rsrc;
 897 +	int relax_max_fifo_len;
 898 +};
 899 +
 900 +static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
 901 +{
 902 +	return container_of(aff_obs, struct ikglp_affinity, obs);
 903 +}
 904 +
 905 +int ikglp_aff_obs_close(struct affinity_observer*);
 906 +void ikglp_aff_obs_free(struct affinity_observer*);
 907 +struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
 908 +												void* __user arg);
 909 +struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
 910 +												void* __user arg);
 911 +#endif
 912 +
 913 +
 914 +
 915 +#endif
 916 diff --git a/include/litmus/kexclu_affinity.h b/include/litmus/kexclu_affinity.h
 917 new file mode 100644
 918 index 0000000..f6355de
 919 --- /dev/null
 920 +++ b/include/litmus/kexclu_affinity.h
 921 @@ -0,0 +1,35 @@
 922 +#ifndef LITMUS_AFF_OBS_H
 923 +#define LITMUS_AFF_OBS_H
 924 +
 925 +#include <litmus/locking.h>
 926 +
 927 +struct affinity_observer_ops;
 928 +
 929 +struct affinity_observer
 930 +{
 931 +	struct affinity_observer_ops* ops;
 932 +	int type;
 933 +	int ident;
 934 +
 935 +	struct litmus_lock* lock;  // the lock under observation
 936 +};
 937 +
 938 +typedef int (*aff_obs_open_t)(struct affinity_observer* aff_obs,
 939 +							  void* __user arg);
 940 +typedef int (*aff_obs_close_t)(struct affinity_observer* aff_obs);
 941 +typedef void (*aff_obs_free_t)(struct affinity_observer* aff_obs);
 942 +
 943 +struct affinity_observer_ops
 944 +{
 945 +	aff_obs_open_t open;
 946 +	aff_obs_close_t close;
 947 +	aff_obs_free_t deallocate;
 948 +};
 949 +
 950 +struct litmus_lock* get_lock_from_od(int od);
 951 +
 952 +void affinity_observer_new(struct affinity_observer* aff,
 953 +						   struct affinity_observer_ops* ops,
 954 +						   struct affinity_observer_args* args);
 955 +
 956 +#endif
 957 diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
 958 new file mode 100644
 959 index 0000000..5f0aae6
 960 --- /dev/null
 961 +++ b/include/litmus/kfmlp_lock.h
 962 @@ -0,0 +1,97 @@
 963 +#ifndef LITMUS_KFMLP_H
 964 +#define LITMUS_KFMLP_H
 965 +
 966 +#include <litmus/litmus.h>
 967 +#include <litmus/locking.h>
 968 +
 969 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 970 +#include <litmus/kexclu_affinity.h>
 971 +
 972 +struct kfmlp_affinity;
 973 +#endif
 974 +
 975 +/* struct for semaphore with priority inheritance */
 976 +struct kfmlp_queue
 977 +{
 978 +	wait_queue_head_t wait;
 979 +	struct task_struct* owner;
 980 +	struct task_struct* hp_waiter;
 981 +	int count; /* number of waiters + holder */
 982 +};
 983 +
 984 +struct kfmlp_semaphore
 985 +{
 986 +	struct litmus_lock litmus_lock;
 987 +
 988 +	spinlock_t	lock;
 989 +
 990 +	int num_resources; /* aka k */
 991 +
 992 +	struct kfmlp_queue *queues; /* array */
 993 +	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
 994 +
 995 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
 996 +	struct kfmlp_affinity *aff_obs;
 997 +#endif
 998 +};
 999 +
1000 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
1001 +{
1002 +	return container_of(lock, struct kfmlp_semaphore, litmus_lock);
1003 +}
1004 +
1005 +int kfmlp_lock(struct litmus_lock* l);
1006 +int kfmlp_unlock(struct litmus_lock* l);
1007 +int kfmlp_close(struct litmus_lock* l);
1008 +void kfmlp_free(struct litmus_lock* l);
1009 +struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg);
1010 +
1011 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1012 +
1013 +struct kfmlp_queue_info
1014 +{
1015 +	struct kfmlp_queue* q;
1016 +	lt_t estimated_len;
1017 +	int *nr_cur_users;
1018 +};
1019 +
1020 +struct kfmlp_affinity_ops
1021 +{
1022 +	struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
1023 +	struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from);
1024 +	void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1025 +	void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1026 +	void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1027 +	void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
1028 +	int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
1029 +};
1030 +
1031 +struct kfmlp_affinity
1032 +{
1033 +	struct affinity_observer obs;
1034 +	struct kfmlp_affinity_ops *ops;
1035 +	struct kfmlp_queue_info *q_info;
1036 +	int *nr_cur_users_on_rsrc;
1037 +	int offset;
1038 +	int nr_simult;
1039 +	int nr_rsrc;
1040 +};
1041 +
1042 +static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
1043 +{
1044 +	return container_of(aff_obs, struct kfmlp_affinity, obs);
1045 +}
1046 +
1047 +int kfmlp_aff_obs_close(struct affinity_observer*);
1048 +void kfmlp_aff_obs_free(struct affinity_observer*);
1049 +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*,
1050 +											void* __user arg);
1051 +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
1052 +												void* __user arg);
1053 +
1054 +
1055 +#endif
1056 +
1057 +#endif
1058 +
1059 +
1060 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
1061 index 0b071fd..71df378 100644
1062 --- a/include/litmus/litmus.h
1063 +++ b/include/litmus/litmus.h
1064 @@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
1065  		);
1066  }
1067  
1068 +
1069  struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
1070  
1071  #define NO_CPU			0xffffffff
1072 @@ -53,12 +54,16 @@ void litmus_exit_task(struct task_struct *tsk);
1073  #define get_rt_phase(t)		(tsk_rt(t)->task_params.phase)
1074  #define get_partition(t) 	(tsk_rt(t)->task_params.cpu)
1075  #define get_deadline(t)		(tsk_rt(t)->job_params.deadline)
1076 +#define get_period(t)		(tsk_rt(t)->task_params.period)
1077  #define get_release(t)		(tsk_rt(t)->job_params.release)
1078  #define get_class(t)		(tsk_rt(t)->task_params.cls)
1079  
1080  #define is_priority_boosted(t)	(tsk_rt(t)->priority_boosted)
1081  #define get_boost_start(t)	(tsk_rt(t)->boost_start_time)
1082  
1083 +#define effective_priority(t) ((!(tsk_rt(t)->inh_task)) ? t : tsk_rt(t)->inh_task)
1084 +#define base_priority(t) (t)
1085 +
1086  inline static int budget_exhausted(struct task_struct* t)
1087  {
1088  	return get_exec_time(t) >= get_exec_cost(t);
1089 @@ -114,10 +119,12 @@ static inline lt_t litmus_clock(void)
1090  #define earlier_deadline(a, b) (lt_before(\
1091  	(a)->rt_param.job_params.deadline,\
1092  	(b)->rt_param.job_params.deadline))
1093 +#define shorter_period(a, b) (lt_before(\
1094 +	(a)->rt_param.task_params.period,\
1095 +	(b)->rt_param.task_params.period))
1096  #define earlier_release(a, b)  (lt_before(\
1097  	(a)->rt_param.job_params.release,\
1098  	(b)->rt_param.job_params.release))
1099 -
1100  void preempt_if_preemptable(struct task_struct* t, int on_cpu);
1101  
1102  #ifdef CONFIG_LITMUS_LOCKING
1103 diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
1104 new file mode 100644
1105 index 0000000..1eb5ea1
1106 --- /dev/null
1107 +++ b/include/litmus/litmus_softirq.h
1108 @@ -0,0 +1,199 @@
1109 +#ifndef __LITMUS_SOFTIRQ_H
1110 +#define __LITMUS_SOFTIRQ_H
1111 +
1112 +#include <linux/interrupt.h>
1113 +#include <linux/workqueue.h>
1114 +
1115 +/*
1116 +   Threaded tasklet handling for Litmus.  Tasklets
1117 +   are scheduled with the priority of the tasklet's
1118 +   owner---that is, the RT task on behalf the tasklet
1119 +   runs.
1120 +
1121 +   Tasklets are current scheduled in FIFO order with
1122 +   NO priority inheritance for "blocked" tasklets.
1123 +
1124 +   klitirqd assumes the priority of the owner of the
1125 +   tasklet when the tasklet is next to execute.
1126 +
1127 +   Currently, hi-tasklets are scheduled before
1128 +   low-tasklets, regardless of priority of low-tasklets.
1129 +   And likewise, low-tasklets are scheduled before work
1130 +   queue objects.  This priority inversion probably needs
1131 +   to be fixed, though it is not an issue if our work with
1132 +   GPUs as GPUs are owned (and associated klitirqds) for
1133 +   exclusive time periods, thus no inversions can
1134 +   occur.
1135 + */
1136 +
1137 +
1138 +
1139 +#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
1140 +
1141 +/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
1142 +   Actual launch of threads is deffered to kworker's
1143 +   workqueue, so daemons will likely not be immediately
1144 +   running when this function returns, though the required
1145 +   data will be initialized.
1146 +
1147 +   @affinity_set: an array expressing the processor affinity
1148 +    for each of the NR_LITMUS_SOFTIRQD daemons.  May be set
1149 +    to NULL for global scheduling.
1150 +
1151 +	- Examples -
1152 +	8-CPU system with two CPU clusters:
1153 +		affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
1154 +		NOTE: Daemons not actually bound to specified CPU, but rather
1155 +		cluster in which the CPU resides.
1156 +
1157 +	8-CPU system, partitioned:
1158 +		affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
1159 +
1160 +	FIXME: change array to a CPU topology or array of cpumasks
1161 +
1162 + */
1163 +void spawn_klitirqd(int* affinity);
1164 +
1165 +
1166 +/* Raises a flag to tell klitirqds to terminate.
1167 +   Termination is async, so some threads may be running
1168 +   after function return. */
1169 +void kill_klitirqd(void);
1170 +
1171 +
1172 +/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
1173 +   to handle tasklets. 0, otherwise.*/
1174 +int klitirqd_is_ready(void);
1175 +
1176 +/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
1177 +   to handle tasklets. 0, otherwise.*/
1178 +int klitirqd_is_dead(void);
1179 +
1180 +/* Flushes all pending work out to the OS for regular
1181 + * tasklet/work processing of the specified 'owner'
1182 + *
1183 + * PRECOND: klitirqd_thread must have a clear entry
1184 + * in the GPU registry, otherwise this call will become
1185 + * a no-op as work will loop back to the klitirqd_thread.
1186 + *
1187 + * Pass NULL for owner to flush ALL pending items.
1188 + */
1189 +void flush_pending(struct task_struct* klitirqd_thread,
1190 +				   struct task_struct* owner);
1191 +
1192 +struct task_struct* get_klitirqd(unsigned int k_id);
1193 +
1194 +
1195 +extern int __litmus_tasklet_schedule(
1196 +        struct tasklet_struct *t,
1197 +        unsigned int k_id);
1198 +
1199 +/* schedule a tasklet on klitirqd #k_id */
1200 +static inline int litmus_tasklet_schedule(
1201 +    struct tasklet_struct *t,
1202 +    unsigned int k_id)
1203 +{
1204 +	int ret = 0;
1205 +	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1206 +		ret = __litmus_tasklet_schedule(t, k_id);
1207 +	return(ret);
1208 +}
1209 +
1210 +/* for use by __tasklet_schedule() */
1211 +static inline int _litmus_tasklet_schedule(
1212 +    struct tasklet_struct *t,
1213 +    unsigned int k_id)
1214 +{
1215 +    return(__litmus_tasklet_schedule(t, k_id));
1216 +}
1217 +
1218 +
1219 +
1220 +
1221 +extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1222 +                                         unsigned int k_id);
1223 +
1224 +/* schedule a hi tasklet on klitirqd #k_id */
1225 +static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1226 +                                              unsigned int k_id)
1227 +{
1228 +	int ret = 0;
1229 +	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1230 +		ret = __litmus_tasklet_hi_schedule(t, k_id);
1231 +	return(ret);
1232 +}
1233 +
1234 +/* for use by __tasklet_hi_schedule() */
1235 +static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1236 +                                               unsigned int k_id)
1237 +{
1238 +    return(__litmus_tasklet_hi_schedule(t, k_id));
1239 +}
1240 +
1241 +
1242 +
1243 +
1244 +
1245 +extern int __litmus_tasklet_hi_schedule_first(
1246 +    struct tasklet_struct *t,
1247 +    unsigned int k_id);
1248 +
1249 +/* schedule a hi tasklet on klitirqd #k_id on next go-around */
1250 +/* PRECONDITION: Interrupts must be disabled. */
1251 +static inline int litmus_tasklet_hi_schedule_first(
1252 +    struct tasklet_struct *t,
1253 +    unsigned int k_id)
1254 +{
1255 +	int ret = 0;
1256 +	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1257 +		ret = __litmus_tasklet_hi_schedule_first(t, k_id);
1258 +	return(ret);
1259 +}
1260 +
1261 +/* for use by __tasklet_hi_schedule_first() */
1262 +static inline int _litmus_tasklet_hi_schedule_first(
1263 +    struct tasklet_struct *t,
1264 +    unsigned int k_id)
1265 +{
1266 +    return(__litmus_tasklet_hi_schedule_first(t, k_id));
1267 +}
1268 +
1269 +
1270 +
1271 +//////////////
1272 +
1273 +extern int __litmus_schedule_work(
1274 +	struct work_struct* w,
1275 +	unsigned int k_id);
1276 +
1277 +static inline int litmus_schedule_work(
1278 +	struct work_struct* w,
1279 +	unsigned int k_id)
1280 +{
1281 +	return(__litmus_schedule_work(w, k_id));
1282 +}
1283 +
1284 +
1285 +
1286 +///////////// mutex operations for client threads.
1287 +
1288 +void down_and_set_stat(struct task_struct* t,
1289 +					 enum klitirqd_sem_status to_set,
1290 +					 struct mutex* sem);
1291 +
1292 +void __down_and_reset_and_set_stat(struct task_struct* t,
1293 +				enum klitirqd_sem_status to_reset,
1294 +				enum klitirqd_sem_status to_set,
1295 +				struct mutex* sem);
1296 +
1297 +void up_and_set_stat(struct task_struct* t,
1298 +					enum klitirqd_sem_status to_set,
1299 +					struct mutex* sem);
1300 +
1301 +
1302 +
1303 +void release_klitirqd_lock(struct task_struct* t);
1304 +
1305 +int reacquire_klitirqd_lock(struct task_struct* t);
1306 +
1307 +#endif
1308 diff --git a/include/litmus/locking.h b/include/litmus/locking.h
1309 index 4d7b870..36647fe 100644
1310 --- a/include/litmus/locking.h
1311 +++ b/include/litmus/locking.h
1312 @@ -1,28 +1,160 @@
1313  #ifndef LITMUS_LOCKING_H
1314  #define LITMUS_LOCKING_H
1315  
1316 +#include <linux/list.h>
1317 +
1318  struct litmus_lock_ops;
1319  
1320 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1321 +struct nested_info
1322 +{
1323 +	struct litmus_lock *lock;
1324 +	struct task_struct *hp_waiter_eff_prio;
1325 +	struct task_struct **hp_waiter_ptr;
1326 +    struct binheap_node hp_binheap_node;
1327 +};
1328 +
1329 +static inline struct task_struct* top_priority(struct binheap_handle* handle) {
1330 +	if(!binheap_empty(handle)) {
1331 +		return (struct task_struct*)(binheap_top_entry(handle, struct nested_info, hp_binheap_node)->hp_waiter_eff_prio);
1332 +	}
1333 +	return NULL;
1334 +}
1335 +
1336 +void print_hp_waiters(struct binheap_node* n, int depth);
1337 +#endif
1338 +
1339 +
1340  /* Generic base struct for LITMUS^RT userspace semaphores.
1341   * This structure should be embedded in protocol-specific semaphores.
1342   */
1343  struct litmus_lock {
1344  	struct litmus_lock_ops *ops;
1345  	int type;
1346 +
1347 +	int ident;
1348 +
1349 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1350 +	struct nested_info nest;
1351 +//#ifdef CONFIG_DEBUG_SPINLOCK
1352 +	char cheat_lockdep[2];
1353 +	struct lock_class_key key;
1354 +//#endif
1355 +#endif
1356  };
1357  
1358 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1359 +
1360 +#define MAX_DGL_SIZE CONFIG_LITMUS_MAX_DGL_SIZE
1361 +
1362 +typedef struct dgl_wait_state {
1363 +	struct task_struct *task;	/* task waiting on DGL */
1364 +	struct litmus_lock *locks[MAX_DGL_SIZE];	/* requested locks in DGL */
1365 +	int size;			/* size of the DGL */
1366 +	int nr_remaining;	/* nr locks remainging before DGL is complete */
1367 +	int last_primary;	/* index lock in locks[] that has active priority */
1368 +	wait_queue_t wq_nodes[MAX_DGL_SIZE];
1369 +} dgl_wait_state_t;
1370 +
1371 +void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait);
1372 +void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/);
1373 +
1374 +void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait);
1375 +int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key);
1376 +void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait, struct task_struct **task);
1377 +#endif
1378 +
1379 +typedef int (*lock_op_t)(struct litmus_lock *l);
1380 +typedef lock_op_t lock_close_t;
1381 +typedef lock_op_t lock_lock_t;
1382 +typedef lock_op_t lock_unlock_t;
1383 +
1384 +typedef int (*lock_open_t)(struct litmus_lock *l, void* __user arg);
1385 +typedef void (*lock_free_t)(struct litmus_lock *l);
1386 +
1387  struct litmus_lock_ops {
1388  	/* Current task tries to obtain / drop a reference to a lock.
1389  	 * Optional methods, allowed by default. */
1390 -	int (*open)(struct litmus_lock*, void* __user);
1391 -	int (*close)(struct litmus_lock*);
1392 +	lock_open_t open;
1393 +	lock_close_t close;
1394  
1395  	/* Current tries to lock/unlock this lock (mandatory methods). */
1396 -	int (*lock)(struct litmus_lock*);
1397 -	int (*unlock)(struct litmus_lock*);
1398 +	lock_lock_t lock;
1399 +	lock_unlock_t unlock;
1400  
1401  	/* The lock is no longer being referenced (mandatory method). */
1402 -	void (*deallocate)(struct litmus_lock*);
1403 +	lock_free_t deallocate;
1404 +
1405 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1406 +	void (*propagate_increase_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
1407 +	void (*propagate_decrease_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
1408 +#endif
1409 +
1410 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1411 +	raw_spinlock_t* (*get_dgl_spin_lock)(struct litmus_lock *l);
1412 +	int (*dgl_lock)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
1413 +	int (*is_owner)(struct litmus_lock *l, struct task_struct *t);
1414 +	void (*enable_priority)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
1415 +#endif
1416  };
1417  
1418 +
1419 +/*
1420 + Nested inheritance can be achieved with fine-grain locking when there is
1421 + no need for DGL support, presuming locks are acquired in a partial order
1422 + (no cycles!).  However, DGLs allow locks to be acquired in any order.  This
1423 + makes nested inheritance very difficult (we don't yet know a solution) to
1424 + realize with fine-grain locks, so we use a big lock instead.
1425 +
1426 + Code contains both fine-grain and coarse-grain methods together, side-by-side.
1427 + Each lock operation *IS NOT* surrounded by ifdef/endif to help make code more
1428 + readable.  However, this leads to the odd situation where both code paths
1429 + appear together in code as if they were both active together.
1430 +
1431 + THIS IS NOT REALLY THE CASE!  ONLY ONE CODE PATH IS ACTUALLY ACTIVE!
1432 +
1433 + Example:
1434 +	lock_global_irqsave(coarseLock, flags);
1435 +	lock_fine_irqsave(fineLock, flags);
1436 +
1437 + Reality (coarse):
1438 +	lock_global_irqsave(coarseLock, flags);
1439 +	//lock_fine_irqsave(fineLock, flags);
1440 +
1441 + Reality (fine):
1442 +	//lock_global_irqsave(coarseLock, flags);
1443 +	lock_fine_irqsave(fineLock, flags);
1444 +
1445 + Be careful when you read code involving nested inheritance.
1446 + */
1447 +#if defined(CONFIG_LITMUS_DGL_SUPPORT)
1448 +/* DGL requires a big lock to implement nested inheritance */
1449 +#define lock_global_irqsave(lock, flags)		raw_spin_lock_irqsave((lock), (flags))
1450 +#define lock_global(lock)						raw_spin_lock((lock))
1451 +#define unlock_global_irqrestore(lock, flags)	raw_spin_unlock_irqrestore((lock), (flags))
1452 +#define unlock_global(lock)						raw_spin_unlock((lock))
1453 +
1454 +/* fine-grain locking are no-ops with DGL support */
1455 +#define lock_fine_irqsave(lock, flags)
1456 +#define lock_fine(lock)
1457 +#define unlock_fine_irqrestore(lock, flags)
1458 +#define unlock_fine(lock)
1459 +
1460 +#elif defined(CONFIG_LITMUS_NESTED_LOCKING)
1461 +/* Use fine-grain locking when DGLs are disabled. */
1462 +/* global locking are no-ops without DGL support */
1463 +#define lock_global_irqsave(lock, flags)
1464 +#define lock_global(lock)
1465 +#define unlock_global_irqrestore(lock, flags)
1466 +#define unlock_global(lock)
1467 +
1468 +#define lock_fine_irqsave(lock, flags)			raw_spin_lock_irqsave((lock), (flags))
1469 +#define lock_fine(lock)							raw_spin_lock((lock))
1470 +#define unlock_fine_irqrestore(lock, flags)		raw_spin_unlock_irqrestore((lock), (flags))
1471 +#define unlock_fine(lock)						raw_spin_unlock((lock))
1472 +
1473  #endif
1474 +
1475 +
1476 +#endif
1477 +
1478 diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
1479 new file mode 100644
1480 index 0000000..97c9577
1481 --- /dev/null
1482 +++ b/include/litmus/nvidia_info.h
1483 @@ -0,0 +1,46 @@
1484 +#ifndef __LITMUS_NVIDIA_H
1485 +#define __LITMUS_NVIDIA_H
1486 +
1487 +#include <linux/interrupt.h>
1488 +
1489 +
1490 +#include <litmus/litmus_softirq.h>
1491 +
1492 +
1493 +//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
1494 +#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
1495 +#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
1496 +
1497 +int init_nvidia_info(void);
1498 +void shutdown_nvidia_info(void);
1499 +
1500 +int is_nvidia_func(void* func_addr);
1501 +
1502 +void dump_nvidia_info(const struct tasklet_struct *t);
1503 +
1504 +
1505 +// Returns the Nvidia device # associated with provided tasklet and work_struct.
1506 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
1507 +u32 get_work_nv_device_num(const struct work_struct *t);
1508 +
1509 +
1510 +int init_nv_device_reg(void);
1511 +//int get_nv_device_id(struct task_struct* owner);
1512 +
1513 +
1514 +int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t);
1515 +
1516 +struct task_struct* get_nv_max_device_owner(u32 target_device_id);
1517 +//int is_nv_device_owner(u32 target_device_id);
1518 +
1519 +void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
1520 +void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
1521 +
1522 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1523 +void pai_check_priority_increase(struct task_struct *t, int reg_device_id);
1524 +void pai_check_priority_decrease(struct task_struct *t, int reg_device_id);
1525 +#endif
1526 +
1527 +//void increment_nv_int_count(u32 device);
1528 +
1529 +#endif
1530 diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
1531 index 380b886..8f3a9ca 100644
1532 --- a/include/litmus/preempt.h
1533 +++ b/include/litmus/preempt.h
1534 @@ -26,12 +26,12 @@ const char* sched_state_name(int s);
1535  				    (x), #x, __FUNCTION__);		\
1536  	} while (0);
1537  
1538 +//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
1539  #define TRACE_SCHED_STATE_CHANGE(x, y, cpu)				\
1540  	TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n",			\
1541  		    cpu,  (x), sched_state_name(x),			\
1542  		    (y), sched_state_name(y))
1543  
1544 -
1545  typedef enum scheduling_state {
1546  	TASK_SCHEDULED    = (1 << 0),  /* The currently scheduled task is the one that
1547  					* should be scheduled, and the processor does not
1548 diff --git a/include/litmus/rsm_lock.h b/include/litmus/rsm_lock.h
1549 new file mode 100644
1550 index 0000000..a151896
1551 --- /dev/null
1552 +++ b/include/litmus/rsm_lock.h
1553 @@ -0,0 +1,54 @@
1554 +#ifndef LITMUS_RSM_H
1555 +#define LITMUS_RSM_H
1556 +
1557 +#include <litmus/litmus.h>
1558 +#include <litmus/binheap.h>
1559 +#include <litmus/locking.h>
1560 +
1561 +/* struct for semaphore with priority inheritance */
1562 +struct rsm_mutex {
1563 +	struct litmus_lock litmus_lock;
1564 +
1565 +	/* current resource holder */
1566 +	struct task_struct *owner;
1567 +
1568 +	/* highest-priority waiter */
1569 +	struct task_struct *hp_waiter;
1570 +
1571 +	/* FIFO queue of waiting tasks -- for now.  time stamp in the future. */
1572 +	wait_queue_head_t	wait;
1573 +
1574 +	/* we do some nesting within spinlocks, so we can't use the normal
1575 +	 sleeplocks found in wait_queue_head_t. */
1576 +	raw_spinlock_t		lock;
1577 +};
1578 +
1579 +static inline struct rsm_mutex* rsm_mutex_from_lock(struct litmus_lock* lock)
1580 +{
1581 +	return container_of(lock, struct rsm_mutex, litmus_lock);
1582 +}
1583 +
1584 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1585 +int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t);
1586 +int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
1587 +void rsm_mutex_enable_priority(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
1588 +#endif
1589 +
1590 +void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
1591 +											  struct task_struct* t,
1592 +											  raw_spinlock_t* to_unlock,
1593 +											  unsigned long irqflags);
1594 +
1595 +void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
1596 +											  struct task_struct* t,
1597 +											  raw_spinlock_t* to_unlock,
1598 +											  unsigned long irqflags);
1599 +
1600 +int rsm_mutex_lock(struct litmus_lock* l);
1601 +int rsm_mutex_unlock(struct litmus_lock* l);
1602 +int rsm_mutex_close(struct litmus_lock* l);
1603 +void rsm_mutex_free(struct litmus_lock* l);
1604 +struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops*);
1605 +
1606 +
1607 +#endif
1608 \ No newline at end of file
1609 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
1610 index d6d7991..0198884 100644
1611 --- a/include/litmus/rt_param.h
1612 +++ b/include/litmus/rt_param.h
1613 @@ -5,6 +5,8 @@
1614  #ifndef _LINUX_RT_PARAM_H_
1615  #define _LINUX_RT_PARAM_H_
1616  
1617 +#include <litmus/fpmath.h>
1618 +
1619  /* Litmus time type. */
1620  typedef unsigned long long lt_t;
1621  
1622 @@ -24,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b)
1623  typedef enum {
1624  	RT_CLASS_HARD,
1625  	RT_CLASS_SOFT,
1626 +	RT_CLASS_SOFT_W_SLIP,
1627  	RT_CLASS_BEST_EFFORT
1628  } task_class_t;
1629  
1630 @@ -52,6 +55,19 @@ union np_flag {
1631  	} np;
1632  };
1633  
1634 +struct affinity_observer_args
1635 +{
1636 +	int lock_od;
1637 +};
1638 +
1639 +struct gpu_affinity_observer_args
1640 +{
1641 +	struct affinity_observer_args obs;
1642 +	int replica_to_gpu_offset;
1643 +	int nr_simult_users;
1644 +	int relaxed_rules;
1645 +};
1646 +
1647  /* The definition of the data that is shared between the kernel and real-time
1648   * tasks via a shared page (see litmus/ctrldev.c).
1649   *
1650 @@ -75,6 +91,9 @@ struct control_page {
1651  /* don't export internal data structures to user space (liblitmus) */
1652  #ifdef __KERNEL__
1653  
1654 +#include <litmus/binheap.h>
1655 +#include <linux/semaphore.h>
1656 +
1657  struct _rt_domain;
1658  struct bheap_node;
1659  struct release_heap;
1660 @@ -100,6 +119,31 @@ struct rt_job {
1661  
1662  struct pfair_param;
1663  
1664 +enum klitirqd_sem_status
1665 +{
1666 +	NEED_TO_REACQUIRE,
1667 +	REACQUIRING,
1668 +	NOT_HELD,
1669 +	HELD
1670 +};
1671 +
1672 +typedef enum gpu_migration_dist
1673 +{
1674 +	// TODO: Make this variable against NR_NVIDIA_GPUS
1675 +	MIG_LOCAL = 0,
1676 +	MIG_NEAR = 1,
1677 +	MIG_MED = 2,
1678 +	MIG_FAR = 3,	// 8 GPUs in a binary tree hierarchy
1679 +	MIG_NONE = 4,
1680 +
1681 +	MIG_LAST = MIG_NONE
1682 +} gpu_migration_dist_t;
1683 +
1684 +typedef struct feedback_est{
1685 +	fp_t est;
1686 +	fp_t accum_err;
1687 +} feedback_est_t;
1688 +
1689  /*	RT task parameters for scheduling extensions
1690   *	These parameters are inherited during clone and therefore must
1691   *	be explicitly set up before the task set is launched.
1692 @@ -114,6 +158,52 @@ struct rt_param {
1693  	/* is the task present? (true if it can be scheduled) */
1694  	unsigned int		present:1;
1695  
1696 +#ifdef CONFIG_LITMUS_SOFTIRQD
1697 +    /* proxy threads have minimum priority by default */
1698 +    unsigned int        is_proxy_thread:1;
1699 +
1700 +	/* pointer to klitirqd currently working on this
1701 +	   task_struct's behalf.  only set by the task pointed
1702 +	   to by klitirqd.
1703 +
1704 +	   ptr only valid if is_proxy_thread == 0
1705 +	 */
1706 +	struct task_struct* cur_klitirqd;
1707 +
1708 +	/* Used to implement mutual execution exclusion between
1709 +	 * job and klitirqd execution.  Job must always hold
1710 +	 * it's klitirqd_sem to execute.  klitirqd instance
1711 +	 * must hold the semaphore before executing on behalf
1712 +	 * of a job.
1713 +	 */
1714 +	struct mutex				klitirqd_sem;
1715 +
1716 +	/* status of held klitirqd_sem, even if the held klitirqd_sem is from
1717 +	   another task (only proxy threads do this though).
1718 +	 */
1719 +	atomic_t					klitirqd_sem_stat;
1720 +#endif
1721 +
1722 +#ifdef CONFIG_LITMUS_NVIDIA
1723 +	/* number of top-half interrupts handled on behalf of current job */
1724 +	atomic_t					nv_int_count;
1725 +	long unsigned int			held_gpus;  // bitmap of held GPUs.
1726 +
1727 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1728 +	fp_t	gpu_fb_param_a[MIG_LAST+1];
1729 +	fp_t	gpu_fb_param_b[MIG_LAST+1];
1730 +
1731 +	gpu_migration_dist_t	gpu_migration;
1732 +	int				last_gpu;
1733 +	feedback_est_t	gpu_migration_est[MIG_LAST+1]; // local, near, med, far
1734 +
1735 +	lt_t accum_gpu_time;
1736 +	lt_t gpu_time_stamp;
1737 +
1738 +	unsigned int suspend_gpu_tracker_on_block:1;
1739 +#endif
1740 +#endif
1741 +
1742  #ifdef CONFIG_LITMUS_LOCKING
1743  	/* Is the task being priority-boosted by a locking protocol? */
1744  	unsigned int		priority_boosted:1;
1745 @@ -133,7 +223,15 @@ struct rt_param {
1746  	 * could point to self if PI does not result in
1747  	 * an increased task priority.
1748  	 */
1749 -	 struct task_struct*	inh_task;
1750 +	struct task_struct*	inh_task;
1751 +
1752 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1753 +	raw_spinlock_t			hp_blocked_tasks_lock;
1754 +	struct binheap_handle	hp_blocked_tasks;
1755 +
1756 +	/* pointer to lock upon which is currently blocked */
1757 +	struct litmus_lock* blocked_lock;
1758 +#endif
1759  
1760  #ifdef CONFIG_NP_SECTION
1761  	/* For the FMLP under PSN-EDF, it is required to make the task
1762 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
1763 index 6e7cabd..24a6858 100644
1764 --- a/include/litmus/sched_plugin.h
1765 +++ b/include/litmus/sched_plugin.h
1766 @@ -11,6 +11,12 @@
1767  #include <litmus/locking.h>
1768  #endif
1769  
1770 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1771 +#include <litmus/kexclu_affinity.h>
1772 +#endif
1773 +
1774 +#include <linux/interrupt.h>
1775 +
1776  /************************ setup/tear down ********************/
1777  
1778  typedef long (*activate_plugin_t) (void);
1779 @@ -29,7 +35,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
1780   */
1781  typedef void (*finish_switch_t)(struct task_struct *prev);
1782  
1783 -
1784  /********************* task state changes ********************/
1785  
1786  /* Called to setup a new real-time task.
1787 @@ -58,6 +63,47 @@ typedef void (*task_exit_t)    (struct task_struct *);
1788  typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
1789  				 void* __user config);
1790  
1791 +struct affinity_observer;
1792 +typedef long (*allocate_affinity_observer_t) (
1793 +								struct affinity_observer **aff_obs, int type,
1794 +								void* __user config);
1795 +
1796 +typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
1797 +typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
1798 +typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
1799 +									  raw_spinlock_t *to_unlock, unsigned long irqflags);
1800 +typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
1801 +									  raw_spinlock_t *to_unlock, unsigned long irqflags);
1802 +
1803 +typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd,
1804 +                                        struct task_struct* old_owner,
1805 +                                        struct task_struct* new_owner);
1806 +typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd,
1807 +                                         struct task_struct* old_owner);
1808 +
1809 +
1810 +typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
1811 +typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio,
1812 +										  struct task_struct *new_prio);
1813 +typedef void (*run_tasklets_t)(struct task_struct* next);
1814 +
1815 +typedef raw_spinlock_t* (*get_dgl_spinlock_t) (struct task_struct *t);
1816 +
1817 +
1818 +typedef int (*higher_prio_t)(struct task_struct* a, struct task_struct* b);
1819 +
1820 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1821 +
1822 +typedef enum
1823 +{
1824 +	BASE,
1825 +	EFFECTIVE
1826 +} comparison_mode_t;
1827 +
1828 +typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod,
1829 +							  struct task_struct* b, comparison_mode_t b_mod);
1830 +#endif
1831 +
1832  
1833  /********************* sys call backends  ********************/
1834  /* This function causes the caller to sleep until the next release */
1835 @@ -88,14 +134,40 @@ struct sched_plugin {
1836  	/*	task state changes 	*/
1837  	admit_task_t		admit_task;
1838  
1839 -        task_new_t 		task_new;
1840 +    task_new_t			task_new;
1841  	task_wake_up_t		task_wake_up;
1842  	task_block_t		task_block;
1843  	task_exit_t 		task_exit;
1844  
1845 +	higher_prio_t		compare;
1846 +
1847  #ifdef CONFIG_LITMUS_LOCKING
1848  	/*	locking protocols	*/
1849  	allocate_lock_t		allocate_lock;
1850 +	increase_prio_t		increase_prio;
1851 +	decrease_prio_t		decrease_prio;
1852 +#endif
1853 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
1854 +	nested_increase_prio_t nested_increase_prio;
1855 +	nested_decrease_prio_t nested_decrease_prio;
1856 +	__higher_prio_t		__compare;
1857 +#endif
1858 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
1859 +	get_dgl_spinlock_t	get_dgl_spinlock;
1860 +#endif
1861 +
1862 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1863 +	allocate_affinity_observer_t allocate_aff_obs;
1864 +#endif
1865 +
1866 +#ifdef CONFIG_LITMUS_SOFTIRQD
1867 +    increase_prio_klitirq_t		increase_prio_klitirqd;
1868 +    decrease_prio_klitirqd_t	decrease_prio_klitirqd;
1869 +#endif
1870 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1871 +	enqueue_pai_tasklet_t		enqueue_pai_tasklet;
1872 +	change_prio_pai_tasklet_t	change_prio_pai_tasklet;
1873 +	run_tasklets_t				run_tasklets;
1874  #endif
1875  } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
1876  
1877 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
1878 index 7ca34cb..b1b71f6 100644
1879 --- a/include/litmus/sched_trace.h
1880 +++ b/include/litmus/sched_trace.h
1881 @@ -10,13 +10,14 @@ struct st_trace_header {
1882  	u8	type;		/* Of what type is this record?  */
1883  	u8	cpu;		/* On which CPU was it recorded? */
1884  	u16	pid;		/* PID of the task.              */
1885 -	u32	job;		/* The job sequence number.      */
1886 -};
1887 +	u32 job:24;		/* The job sequence number.      */
1888 +	u8  extra;
1889 +} __attribute__((packed));
1890  
1891  #define ST_NAME_LEN 16
1892  struct st_name_data {
1893  	char	cmd[ST_NAME_LEN];/* The name of the executable of this process. */
1894 -};
1895 +} __attribute__((packed));
1896  
1897  struct st_param_data {		/* regular params */
1898  	u32	wcet;
1899 @@ -25,30 +26,29 @@ struct st_param_data {		/* regular params */
1900  	u8	partition;
1901  	u8	class;
1902  	u8	__unused[2];
1903 -};
1904 +} __attribute__((packed));
1905  
1906  struct st_release_data {	/* A job is was/is going to be released. */
1907  	u64	release;	/* What's the release time?              */
1908  	u64	deadline;	/* By when must it finish?		 */
1909 -};
1910 +} __attribute__((packed));
1911  
1912  struct st_assigned_data {	/* A job was asigned to a CPU. 		 */
1913  	u64	when;
1914  	u8	target;		/* Where should it execute?	         */
1915  	u8	__unused[7];
1916 -};
1917 +} __attribute__((packed));
1918  
1919  struct st_switch_to_data {	/* A process was switched to on a given CPU.   */
1920  	u64	when;		/* When did this occur?                        */
1921  	u32	exec_time;	/* Time the current job has executed.          */
1922  	u8	__unused[4];
1923 -
1924 -};
1925 +} __attribute__((packed));
1926  
1927  struct st_switch_away_data {	/* A process was switched away from on a given CPU. */
1928  	u64	when;
1929  	u64	exec_time;
1930 -};
1931 +} __attribute__((packed));
1932  
1933  struct st_completion_data {	/* A job completed. */
1934  	u64	when;
1935 @@ -56,35 +56,108 @@ struct st_completion_data {	/* A job completed. */
1936  				 * next task automatically; set to 0 otherwise.
1937  				 */
1938  	u8	__uflags:7;
1939 -	u8	__unused[7];
1940 -};
1941 +	u16 nv_int_count;
1942 +	u8	__unused[5];
1943 +} __attribute__((packed));
1944  
1945  struct st_block_data {		/* A task blocks. */
1946  	u64	when;
1947  	u64	__unused;
1948 -};
1949 +} __attribute__((packed));
1950  
1951  struct st_resume_data {		/* A task resumes. */
1952  	u64	when;
1953  	u64	__unused;
1954 -};
1955 +} __attribute__((packed));
1956  
1957  struct st_action_data {
1958  	u64	when;
1959  	u8	action;
1960  	u8	__unused[7];
1961 -};
1962 +} __attribute__((packed));
1963  
1964  struct st_sys_release_data {
1965  	u64	when;
1966  	u64	release;
1967 -};
1968 +} __attribute__((packed));
1969 +
1970 +
1971 +struct st_tasklet_release_data {
1972 +	u64 when;
1973 +	u64 __unused;
1974 +} __attribute__((packed));
1975 +
1976 +struct st_tasklet_begin_data {
1977 +	u64 when;
1978 +	u16 exe_pid;
1979 +	u8  __unused[6];
1980 +} __attribute__((packed));
1981 +
1982 +struct st_tasklet_end_data {
1983 +	u64 when;
1984 +	u16 exe_pid;
1985 +	u8	flushed;
1986 +	u8	__unused[5];
1987 +} __attribute__((packed));
1988 +
1989 +
1990 +struct st_work_release_data {
1991 +	u64 when;
1992 +	u64 __unused;
1993 +} __attribute__((packed));
1994 +
1995 +struct st_work_begin_data {
1996 +	u64 when;
1997 +	u16 exe_pid;
1998 +	u8	__unused[6];
1999 +} __attribute__((packed));
2000 +
2001 +struct st_work_end_data {
2002 +	u64 when;
2003 +	u16 exe_pid;
2004 +	u8	flushed;
2005 +	u8	__unused[5];
2006 +} __attribute__((packed));
2007 +
2008 +struct st_effective_priority_change_data {
2009 +	u64 when;
2010 +	u16 inh_pid;
2011 +	u8	__unused[6];
2012 +} __attribute__((packed));
2013 +
2014 +struct st_nv_interrupt_begin_data {
2015 +	u64 when;
2016 +	u32 device;
2017 +	u32 serialNumber;
2018 +} __attribute__((packed));
2019 +
2020 +struct st_nv_interrupt_end_data {
2021 +	u64 when;
2022 +	u32 device;
2023 +	u32 serialNumber;
2024 +} __attribute__((packed));
2025 +
2026 +struct st_prediction_err_data {
2027 +	u64 distance;
2028 +	u64 rel_err;
2029 +} __attribute__((packed));
2030 +
2031 +struct st_migration_data {
2032 +	u64 observed;
2033 +	u64 estimated;
2034 +} __attribute__((packed));
2035 +
2036 +struct migration_info {
2037 +	u64 observed;
2038 +	u64 estimated;
2039 +	u8 distance;
2040 +} __attribute__((packed));
2041  
2042  #define DATA(x) struct st_ ## x ## _data x;
2043  
2044  typedef enum {
2045 -        ST_NAME = 1,		/* Start at one, so that we can spot
2046 -				 * uninitialized records. */
2047 +    ST_NAME = 1, /* Start at one, so that we can spot
2048 +				  * uninitialized records. */
2049  	ST_PARAM,
2050  	ST_RELEASE,
2051  	ST_ASSIGNED,
2052 @@ -94,7 +167,19 @@ typedef enum {
2053  	ST_BLOCK,
2054  	ST_RESUME,
2055  	ST_ACTION,
2056 -	ST_SYS_RELEASE
2057 +	ST_SYS_RELEASE,
2058 +	ST_TASKLET_RELEASE,
2059 +	ST_TASKLET_BEGIN,
2060 +	ST_TASKLET_END,
2061 +	ST_WORK_RELEASE,
2062 +	ST_WORK_BEGIN,
2063 +	ST_WORK_END,
2064 +	ST_EFF_PRIO_CHANGE,
2065 +	ST_NV_INTERRUPT_BEGIN,
2066 +	ST_NV_INTERRUPT_END,
2067 +
2068 +	ST_PREDICTION_ERR,
2069 +	ST_MIGRATION,
2070  } st_event_record_type_t;
2071  
2072  struct st_event_record {
2073 @@ -113,8 +198,20 @@ struct st_event_record {
2074  		DATA(resume);
2075  		DATA(action);
2076  		DATA(sys_release);
2077 +		DATA(tasklet_release);
2078 +		DATA(tasklet_begin);
2079 +		DATA(tasklet_end);
2080 +		DATA(work_release);
2081 +		DATA(work_begin);
2082 +		DATA(work_end);
2083 +		DATA(effective_priority_change);
2084 +		DATA(nv_interrupt_begin);
2085 +		DATA(nv_interrupt_end);
2086 +
2087 +		DATA(prediction_err);
2088 +		DATA(migration);
2089  	} data;
2090 -};
2091 +} __attribute__((packed));
2092  
2093  #undef DATA
2094  
2095 @@ -129,6 +226,8 @@ struct st_event_record {
2096  	ft_event1(id, callback, task)
2097  #define SCHED_TRACE2(id, callback, task, xtra) \
2098  	ft_event2(id, callback, task, xtra)
2099 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
2100 +	ft_event3(id, callback, task, xtra1, xtra2)
2101  
2102  /* provide prototypes; needed on sparc64 */
2103  #ifndef NO_TASK_TRACE_DECLS
2104 @@ -155,12 +254,58 @@ feather_callback void do_sched_trace_action(unsigned long id,
2105  feather_callback void do_sched_trace_sys_release(unsigned long id,
2106  						 lt_t* start);
2107  
2108 +
2109 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
2110 +												   struct task_struct* owner);
2111 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
2112 +												  struct task_struct* owner);
2113 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
2114 +												 struct task_struct* owner,
2115 +												 unsigned long flushed);
2116 +
2117 +feather_callback void do_sched_trace_work_release(unsigned long id,
2118 +													 struct task_struct* owner);
2119 +feather_callback void do_sched_trace_work_begin(unsigned long id,
2120 +												struct task_struct* owner,
2121 +												struct task_struct* exe);
2122 +feather_callback void do_sched_trace_work_end(unsigned long id,
2123 +											  struct task_struct* owner,
2124 +											  struct task_struct* exe,
2125 +											  unsigned long flushed);
2126 +
2127 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
2128 +											  struct task_struct* task,
2129 +											  struct task_struct* inh);
2130 +
2131 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
2132 +												u32 device);
2133 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
2134 +												unsigned long unused);
2135 +
2136 +feather_callback void do_sched_trace_prediction_err(unsigned long id,
2137 +													  struct task_struct* task,
2138 +													  gpu_migration_dist_t* distance,
2139 +													  fp_t* rel_err);
2140 +
2141 +
2142 +
2143 +
2144 +
2145 +feather_callback void do_sched_trace_migration(unsigned long id,
2146 +											  struct task_struct* task,
2147 +											  struct migration_info* mig_info);
2148 +
2149 +
2150 +/* returns true if we're tracing an interrupt on current CPU */
2151 +/* int is_interrupt_tracing_active(void); */
2152 +
2153  #endif
2154  
2155  #else
2156  
2157  #define SCHED_TRACE(id, callback, task)        /* no tracing */
2158  #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
2159 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
2160  
2161  #endif
2162  
2163 @@ -193,6 +338,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
2164  	SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
2165  
2166  
2167 +#define sched_trace_tasklet_release(t) \
2168 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
2169 +
2170 +#define sched_trace_tasklet_begin(t) \
2171 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
2172 +
2173 +#define sched_trace_tasklet_end(t, flushed) \
2174 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
2175 +
2176 +
2177 +#define sched_trace_work_release(t) \
2178 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
2179 +
2180 +#define sched_trace_work_begin(t, e) \
2181 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
2182 +
2183 +#define sched_trace_work_end(t, e, flushed) \
2184 +	SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
2185 +
2186 +
2187 +#define sched_trace_eff_prio_change(t, inh) \
2188 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
2189 +
2190 +
2191 +#define sched_trace_nv_interrupt_begin(d) \
2192 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
2193 +#define sched_trace_nv_interrupt_end(d) \
2194 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
2195 +
2196 +#define sched_trace_prediction_err(t, dist, rel_err) \
2197 +	SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
2198 +
2199 +#define sched_trace_migration(t, mig_info) \
2200 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
2201 +
2202  #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
2203  
2204  #endif /* __KERNEL__ */
2205 diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
2206 new file mode 100644
2207 index 0000000..e70e45e
2208 --- /dev/null
2209 +++ b/include/litmus/sched_trace_external.h
2210 @@ -0,0 +1,78 @@
2211 +/*
2212 + * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
2213 + */
2214 +#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
2215 +#define _LINUX_SCHED_TRACE_EXTERNAL_H_
2216 +
2217 +
2218 +#ifdef CONFIG_SCHED_TASK_TRACE
2219 +extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
2220 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
2221 +{
2222 +	__sched_trace_tasklet_begin_external(t);
2223 +}
2224 +
2225 +extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
2226 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
2227 +{
2228 +	__sched_trace_tasklet_end_external(t, flushed);
2229 +}
2230 +
2231 +extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
2232 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
2233 +{
2234 +	__sched_trace_work_begin_external(t, e);
2235 +}
2236 +
2237 +extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
2238 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
2239 +{
2240 +	__sched_trace_work_end_external(t, e, f);
2241 +}
2242 +
2243 +#ifdef CONFIG_LITMUS_NVIDIA
2244 +extern void __sched_trace_nv_interrupt_begin_external(u32 device);
2245 +static inline void sched_trace_nv_interrupt_begin_external(u32 device)
2246 +{
2247 +	__sched_trace_nv_interrupt_begin_external(device);
2248 +}
2249 +
2250 +extern void __sched_trace_nv_interrupt_end_external(u32 device);
2251 +static inline void sched_trace_nv_interrupt_end_external(u32 device)
2252 +{
2253 +	__sched_trace_nv_interrupt_end_external(device);
2254 +}
2255 +#endif
2256 +
2257 +#else
2258 +
2259 +// no tracing.
2260 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t){}
2261 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){}
2262 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){}
2263 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){}
2264 +
2265 +#ifdef CONFIG_LITMUS_NVIDIA
2266 +static inline void sched_trace_nv_interrupt_begin_external(u32 device){}
2267 +static inline void sched_trace_nv_interrupt_end_external(u32 device){}
2268 +#endif
2269 +
2270 +#endif
2271 +
2272 +
2273 +#ifdef CONFIG_LITMUS_NVIDIA
2274 +
2275 +#define EX_TS(evt) \
2276 +extern void __##evt(void); \
2277 +static inline void EX_##evt(void) { __##evt(); }
2278 +
2279 +EX_TS(TS_NV_TOPISR_START)
2280 +EX_TS(TS_NV_TOPISR_END)
2281 +EX_TS(TS_NV_BOTISR_START)
2282 +EX_TS(TS_NV_BOTISR_END)
2283 +EX_TS(TS_NV_RELEASE_BOTISR_START)
2284 +EX_TS(TS_NV_RELEASE_BOTISR_END)
2285 +
2286 +#endif
2287 +
2288 +#endif
2289 diff --git a/include/litmus/trace.h b/include/litmus/trace.h
2290 index e809376..e078aee 100644
2291 --- a/include/litmus/trace.h
2292 +++ b/include/litmus/trace.h
2293 @@ -103,14 +103,46 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
2294  #define TS_LOCK_START			TIMESTAMP(170)
2295  #define TS_LOCK_SUSPEND			TIMESTAMP(171)
2296  #define TS_LOCK_RESUME			TIMESTAMP(172)
2297 -#define TS_LOCK_END			TIMESTAMP(173)
2298 +#define TS_LOCK_END				TIMESTAMP(173)
2299 +
2300 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
2301 +#define TS_DGL_LOCK_START			TIMESTAMP(175)
2302 +#define TS_DGL_LOCK_SUSPEND			TIMESTAMP(176)
2303 +#define TS_DGL_LOCK_RESUME			TIMESTAMP(177)
2304 +#define TS_DGL_LOCK_END				TIMESTAMP(178)
2305 +#endif
2306  
2307  #define TS_UNLOCK_START			TIMESTAMP(180)
2308  #define TS_UNLOCK_END			TIMESTAMP(181)
2309  
2310 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
2311 +#define TS_DGL_UNLOCK_START			TIMESTAMP(185)
2312 +#define TS_DGL_UNLOCK_END			TIMESTAMP(186)
2313 +#endif
2314 +
2315  #define TS_SEND_RESCHED_START(c)	CTIMESTAMP(190, c)
2316  #define TS_SEND_RESCHED_END		DTIMESTAMP(191, TSK_UNKNOWN)
2317  
2318  #define TS_RELEASE_LATENCY(when)	LTIMESTAMP(208, &(when))
2319  
2320 +
2321 +#ifdef CONFIG_LITMUS_NVIDIA
2322 +
2323 +#define TS_NV_TOPISR_START		TIMESTAMP(200)
2324 +#define TS_NV_TOPISR_END		TIMESTAMP(201)
2325 +
2326 +#define TS_NV_BOTISR_START		TIMESTAMP(202)
2327 +#define TS_NV_BOTISR_END		TIMESTAMP(203)
2328 +
2329 +#define TS_NV_RELEASE_BOTISR_START	TIMESTAMP(204)
2330 +#define TS_NV_RELEASE_BOTISR_END	TIMESTAMP(205)
2331 +
2332 +#endif
2333 +
2334 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
2335 +#define TS_NV_SCHED_BOTISR_START	TIMESTAMP(206)
2336 +#define TS_NV_SCHED_BOTISR_END		TIMESTAMP(207)
2337 +#endif
2338 +
2339 +
2340  #endif /* !_SYS_TRACE_H_ */
2341 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
2342 index 94264c2..4fa514c 100644
2343 --- a/include/litmus/unistd_32.h
2344 +++ b/include/litmus/unistd_32.h
2345 @@ -17,5 +17,8 @@
2346  #define __NR_wait_for_ts_release __LSC(9)
2347  #define __NR_release_ts		__LSC(10)
2348  #define __NR_null_call		__LSC(11)
2349 +#define __NR_litmus_dgl_lock	__LSC(12)
2350 +#define __NR_litmus_dgl_unlock	__LSC(13)
2351 +#define __NR_register_nv_device			__LSC(14)
2352  
2353 -#define NR_litmus_syscalls 12
2354 +#define NR_litmus_syscalls 15
2355 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
2356 index d5ced0d..f80dc45 100644
2357 --- a/include/litmus/unistd_64.h
2358 +++ b/include/litmus/unistd_64.h
2359 @@ -29,5 +29,12 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
2360  __SYSCALL(__NR_release_ts, sys_release_ts)
2361  #define __NR_null_call				__LSC(11)
2362  __SYSCALL(__NR_null_call, sys_null_call)
2363 +#define __NR_litmus_dgl_lock		__LSC(12)
2364 +__SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
2365 +#define __NR_litmus_dgl_unlock		__LSC(13)
2366 +__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
2367 +#define __NR_register_nv_device			__LSC(14)
2368 +__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
2369  
2370 -#define NR_litmus_syscalls 12
2371 +
2372 +#define NR_litmus_syscalls 15
2373 diff --git a/kernel/lockdep.c b/kernel/lockdep.c
2374 index 298c927..2bdcdc3 100644
2375 --- a/kernel/lockdep.c
2376 +++ b/kernel/lockdep.c
2377 @@ -542,7 +542,7 @@ static void print_lock(struct held_lock *hlock)
2378  	print_ip_sym(hlock->acquire_ip);
2379  }
2380  
2381 -static void lockdep_print_held_locks(struct task_struct *curr)
2382 +void lockdep_print_held_locks(struct task_struct *curr)
2383  {
2384  	int i, depth = curr->lockdep_depth;
2385  
2386 @@ -558,6 +558,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
2387  		print_lock(curr->held_locks + i);
2388  	}
2389  }
2390 +EXPORT_SYMBOL(lockdep_print_held_locks);
2391  
2392  static void print_kernel_version(void)
2393  {
2394 @@ -583,6 +584,10 @@ static int static_obj(void *obj)
2395  		      end   = (unsigned long) &_end,
2396  		      addr  = (unsigned long) obj;
2397  
2398 +	// GLENN
2399 +	return 1;
2400 +
2401 +
2402  	/*
2403  	 * static variable?
2404  	 */
2405 diff --git a/kernel/mutex.c b/kernel/mutex.c
2406 index d607ed5..2f363b9 100644
2407 --- a/kernel/mutex.c
2408 +++ b/kernel/mutex.c
2409 @@ -498,3 +498,128 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
2410  	return 1;
2411  }
2412  EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
2413 +
2414 +
2415 +
2416 +
2417 +void mutex_lock_sfx(struct mutex *lock,
2418 +				   side_effect_t pre, unsigned long pre_arg,
2419 +				   side_effect_t post, unsigned long post_arg)
2420 +{
2421 +	long state = TASK_UNINTERRUPTIBLE;
2422 +
2423 +	struct task_struct *task = current;
2424 +	struct mutex_waiter waiter;
2425 +	unsigned long flags;
2426 +	
2427 +	preempt_disable();
2428 +	mutex_acquire(&lock->dep_map, subclass, 0, ip);
2429 +
2430 +	spin_lock_mutex(&lock->wait_lock, flags);
2431 +	
2432 +	if(pre)
2433 +	{
2434 +		if(unlikely(pre(pre_arg)))
2435 +		{
2436 +			// this will fuck with lockdep's CONFIG_PROVE_LOCKING...
2437 +			spin_unlock_mutex(&lock->wait_lock, flags);
2438 +			preempt_enable();
2439 +			return;
2440 +		}
2441 +	}
2442 +
2443 +	debug_mutex_lock_common(lock, &waiter);
2444 +	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
2445 +	
2446 +	/* add waiting tasks to the end of the waitqueue (FIFO): */
2447 +	list_add_tail(&waiter.list, &lock->wait_list);
2448 +	waiter.task = task;
2449 +	
2450 +	if (atomic_xchg(&lock->count, -1) == 1)
2451 +		goto done;
2452 +	
2453 +	lock_contended(&lock->dep_map, ip);
2454 +	
2455 +	for (;;) {
2456 +		/*
2457 +		 * Lets try to take the lock again - this is needed even if
2458 +		 * we get here for the first time (shortly after failing to
2459 +		 * acquire the lock), to make sure that we get a wakeup once
2460 +		 * it's unlocked. Later on, if we sleep, this is the
2461 +		 * operation that gives us the lock. We xchg it to -1, so
2462 +		 * that when we release the lock, we properly wake up the
2463 +		 * other waiters:
2464 +		 */
2465 +		if (atomic_xchg(&lock->count, -1) == 1)
2466 +			break;
2467 +		
2468 +		__set_task_state(task, state);
2469 +		
2470 +		/* didnt get the lock, go to sleep: */
2471 +		spin_unlock_mutex(&lock->wait_lock, flags);
2472 +		preempt_enable_no_resched();
2473 +		schedule();
2474 +		preempt_disable();
2475 +		spin_lock_mutex(&lock->wait_lock, flags);
2476 +	}
2477 +	
2478 +done:
2479 +	lock_acquired(&lock->dep_map, ip);
2480 +	/* got the lock - rejoice! */
2481 +	mutex_remove_waiter(lock, &waiter, current_thread_info());
2482 +	mutex_set_owner(lock);
2483 +	
2484 +	/* set it to 0 if there are no waiters left: */
2485 +	if (likely(list_empty(&lock->wait_list)))
2486 +		atomic_set(&lock->count, 0);
2487 +	
2488 +	if(post)
2489 +		post(post_arg);	
2490 +	
2491 +	spin_unlock_mutex(&lock->wait_lock, flags);
2492 +	
2493 +	debug_mutex_free_waiter(&waiter);
2494 +	preempt_enable();
2495 +}
2496 +EXPORT_SYMBOL(mutex_lock_sfx);
2497 +
2498 +void mutex_unlock_sfx(struct mutex *lock,
2499 +					side_effect_t pre, unsigned long pre_arg,
2500 +					side_effect_t post, unsigned long post_arg)
2501 +{
2502 +	unsigned long flags;
2503 +	
2504 +	spin_lock_mutex(&lock->wait_lock, flags);
2505 +	
2506 +	if(pre)
2507 +		pre(pre_arg);
2508 +	
2509 +	//mutex_release(&lock->dep_map, nested, _RET_IP_);
2510 +	mutex_release(&lock->dep_map, 1, _RET_IP_);
2511 +	debug_mutex_unlock(lock);
2512 +	
2513 +	/*
2514 +	 * some architectures leave the lock unlocked in the fastpath failure
2515 +	 * case, others need to leave it locked. In the later case we have to
2516 +	 * unlock it here
2517 +	 */
2518 +	if (__mutex_slowpath_needs_to_unlock())
2519 +		atomic_set(&lock->count, 1);
2520 +	
2521 +	if (!list_empty(&lock->wait_list)) {
2522 +		/* get the first entry from the wait-list: */
2523 +		struct mutex_waiter *waiter =
2524 +		list_entry(lock->wait_list.next,
2525 +				   struct mutex_waiter, list);
2526 +		
2527 +		debug_mutex_wake_waiter(lock, waiter);
2528 +		
2529 +		wake_up_process(waiter->task);
2530 +	}
2531 +	
2532 +	if(post)
2533 +		post(post_arg);
2534 +	
2535 +	spin_unlock_mutex(&lock->wait_lock, flags);	
2536 +}
2537 +EXPORT_SYMBOL(mutex_unlock_sfx);
2538 diff --git a/kernel/sched.c b/kernel/sched.c
2539 index baaca61..f3d9a69 100644
2540 --- a/kernel/sched.c
2541 +++ b/kernel/sched.c
2542 @@ -83,6 +83,10 @@
2543  #include <litmus/sched_trace.h>
2544  #include <litmus/trace.h>
2545  
2546 +#ifdef CONFIG_LITMUS_SOFTIRQD
2547 +#include <litmus/litmus_softirq.h>
2548 +#endif
2549 +
2550  static void litmus_tick(struct rq*, struct task_struct*);
2551  
2552  #define CREATE_TRACE_POINTS
2553 @@ -4305,6 +4309,7 @@ pick_next_task(struct rq *rq)
2554  	BUG(); /* the idle class will always have a runnable task */
2555  }
2556  
2557 +
2558  /*
2559   * schedule() is the main scheduler function.
2560   */
2561 @@ -4323,6 +4328,10 @@ need_resched:
2562  	rcu_note_context_switch(cpu);
2563  	prev = rq->curr;
2564  
2565 +#ifdef CONFIG_LITMUS_SOFTIRQD
2566 +	release_klitirqd_lock(prev);
2567 +#endif
2568 +
2569  	/* LITMUS^RT: quickly re-evaluate the scheduling decision
2570  	 * if the previous one is no longer valid after CTX.
2571  	 */
2572 @@ -4411,13 +4420,24 @@ litmus_need_resched_nonpreemptible:
2573  		goto litmus_need_resched_nonpreemptible;
2574  
2575  	preempt_enable_no_resched();
2576 +
2577  	if (need_resched())
2578  		goto need_resched;
2579  
2580 +#ifdef LITMUS_SOFTIRQD
2581 +	reacquire_klitirqd_lock(prev);
2582 +#endif
2583 +
2584 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
2585 +	litmus->run_tasklets(prev);
2586 +#endif	
2587 +	
2588  	srp_ceiling_block();
2589  }
2590  EXPORT_SYMBOL(schedule);
2591  
2592 +
2593 +
2594  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
2595  
2596  static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
2597 @@ -4561,6 +4581,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
2598  	}
2599  }
2600  
2601 +
2602  /**
2603   * __wake_up - wake up threads blocked on a waitqueue.
2604   * @q: the waitqueue
2605 @@ -4747,6 +4768,12 @@ void __sched wait_for_completion(struct completion *x)
2606  }
2607  EXPORT_SYMBOL(wait_for_completion);
2608  
2609 +void __sched __wait_for_completion_locked(struct completion *x)
2610 +{
2611 +	do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
2612 +}
2613 +EXPORT_SYMBOL(__wait_for_completion_locked);
2614 +
2615  /**
2616   * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
2617   * @x:  holds the state of this particular completion
2618 diff --git a/kernel/semaphore.c b/kernel/semaphore.c
2619 index 94a62c0..c947a04 100644
2620 --- a/kernel/semaphore.c
2621 +++ b/kernel/semaphore.c
2622 @@ -33,11 +33,11 @@
2623  #include <linux/spinlock.h>
2624  #include <linux/ftrace.h>
2625  
2626 -static noinline void __down(struct semaphore *sem);
2627 +noinline void __down(struct semaphore *sem);
2628  static noinline int __down_interruptible(struct semaphore *sem);
2629  static noinline int __down_killable(struct semaphore *sem);
2630  static noinline int __down_timeout(struct semaphore *sem, long jiffies);
2631 -static noinline void __up(struct semaphore *sem);
2632 +noinline void __up(struct semaphore *sem);
2633  
2634  /**
2635   * down - acquire the semaphore
2636 @@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
2637  
2638  /* Functions for the contended case */
2639  
2640 +/*
2641  struct semaphore_waiter {
2642  	struct list_head list;
2643  	struct task_struct *task;
2644  	int up;
2645  };
2646 + */
2647  
2648  /*
2649   * Because this function is inlined, the 'state' parameter will be
2650 @@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
2651  	return -EINTR;
2652  }
2653  
2654 -static noinline void __sched __down(struct semaphore *sem)
2655 +noinline void __sched __down(struct semaphore *sem)
2656  {
2657  	__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
2658  }
2659 +EXPORT_SYMBOL(__down);
2660 +
2661  
2662  static noinline int __sched __down_interruptible(struct semaphore *sem)
2663  {
2664 @@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
2665  	return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
2666  }
2667  
2668 -static noinline void __sched __up(struct semaphore *sem)
2669 +noinline void __sched __up(struct semaphore *sem)
2670  {
2671  	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
2672  						struct semaphore_waiter, list);
2673 @@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
2674  	waiter->up = 1;
2675  	wake_up_process(waiter->task);
2676  }
2677 +EXPORT_SYMBOL(__up);
2678 \ No newline at end of file
2679 diff --git a/kernel/softirq.c b/kernel/softirq.c
2680 index fca82c3..5ce2716 100644
2681 --- a/kernel/softirq.c
2682 +++ b/kernel/softirq.c
2683 @@ -29,6 +29,15 @@
2684  #include <trace/events/irq.h>
2685  
2686  #include <asm/irq.h>
2687 +
2688 +#include <litmus/litmus.h>
2689 +#include <litmus/sched_trace.h>
2690 +
2691 +#ifdef CONFIG_LITMUS_NVIDIA
2692 +#include <litmus/nvidia_info.h>
2693 +#include <litmus/trace.h>
2694 +#endif
2695 +
2696  /*
2697     - No shared variables, all the data are CPU local.
2698     - If a softirq needs serialization, let it serialize itself
2699 @@ -67,7 +76,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
2700   * to the pending events, so lets the scheduler to balance
2701   * the softirq load for us.
2702   */
2703 -static void wakeup_softirqd(void)
2704 +void wakeup_softirqd(void)
2705  {
2706  	/* Interrupts are disabled: no need to stop preemption */
2707  	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
2708 @@ -193,6 +202,7 @@ void local_bh_enable_ip(unsigned long ip)
2709  }
2710  EXPORT_SYMBOL(local_bh_enable_ip);
2711  
2712 +
2713  /*
2714   * We restart softirq processing MAX_SOFTIRQ_RESTART times,
2715   * and we fall back to softirqd after that.
2716 @@ -206,65 +216,65 @@ EXPORT_SYMBOL(local_bh_enable_ip);
2717  
2718  asmlinkage void __do_softirq(void)
2719  {
2720 -	struct softirq_action *h;
2721 -	__u32 pending;
2722 -	int max_restart = MAX_SOFTIRQ_RESTART;
2723 -	int cpu;
2724 +    struct softirq_action *h; 
2725 +    __u32 pending;
2726 +    int max_restart = MAX_SOFTIRQ_RESTART;
2727 +    int cpu;
2728  
2729 -	pending = local_softirq_pending();
2730 -	account_system_vtime(current);
2731 +    pending = local_softirq_pending();
2732 +    account_system_vtime(current);
2733  
2734 -	__local_bh_disable((unsigned long)__builtin_return_address(0),
2735 -				SOFTIRQ_OFFSET);
2736 -	lockdep_softirq_enter();
2737 +    __local_bh_disable((unsigned long)__builtin_return_address(0),
2738 +                SOFTIRQ_OFFSET);
2739 +    lockdep_softirq_enter();
2740  
2741 -	cpu = smp_processor_id();
2742 +    cpu = smp_processor_id();
2743  restart:
2744 -	/* Reset the pending bitmask before enabling irqs */
2745 -	set_softirq_pending(0);
2746 +    /* Reset the pending bitmask before enabling irqs */
2747 +    set_softirq_pending(0);
2748  
2749 -	local_irq_enable();
2750 +    local_irq_enable();
2751  
2752 -	h = softirq_vec;
2753 -
2754 -	do {
2755 -		if (pending & 1) {
2756 -			unsigned int vec_nr = h - softirq_vec;
2757 -			int prev_count = preempt_count();
2758 -
2759 -			kstat_incr_softirqs_this_cpu(vec_nr);
2760 -
2761 -			trace_softirq_entry(vec_nr);
2762 -			h->action(h);
2763 -			trace_softirq_exit(vec_nr);
2764 -			if (unlikely(prev_count != preempt_count())) {
2765 -				printk(KERN_ERR "huh, entered softirq %u %s %p"
2766 -				       "with preempt_count %08x,"
2767 -				       " exited with %08x?\n", vec_nr,
2768 -				       softirq_to_name[vec_nr], h->action,
2769 -				       prev_count, preempt_count());
2770 -				preempt_count() = prev_count;
2771 -			}
2772 +    h = softirq_vec;
2773  
2774 -			rcu_bh_qs(cpu);
2775 -		}
2776 -		h++;
2777 -		pending >>= 1;
2778 -	} while (pending);
2779 +    do {
2780 +        if (pending & 1) {
2781 +            unsigned int vec_nr = h - softirq_vec;
2782 +            int prev_count = preempt_count();
2783  
2784 -	local_irq_disable();
2785 +            kstat_incr_softirqs_this_cpu(vec_nr);
2786  
2787 -	pending = local_softirq_pending();
2788 -	if (pending && --max_restart)
2789 -		goto restart;
2790 +            trace_softirq_entry(vec_nr);
2791 +            h->action(h);
2792 +            trace_softirq_exit(vec_nr);
2793 +            if (unlikely(prev_count != preempt_count())) {
2794 +                printk(KERN_ERR "huh, entered softirq %u %s %p"
2795 +                       "with preempt_count %08x,"
2796 +                       " exited with %08x?\n", vec_nr,
2797 +                       softirq_to_name[vec_nr], h->action,
2798 +                       prev_count, preempt_count());
2799 +                preempt_count() = prev_count;
2800 +            }   
2801  
2802 -	if (pending)
2803 -		wakeup_softirqd();
2804 +            rcu_bh_qs(cpu);
2805 +        }   
2806 +        h++;
2807 +        pending >>= 1;
2808 +    } while (pending);
2809  
2810 -	lockdep_softirq_exit();
2811 +    local_irq_disable();
2812  
2813 -	account_system_vtime(current);
2814 -	__local_bh_enable(SOFTIRQ_OFFSET);
2815 +    pending = local_softirq_pending();
2816 +    if (pending && --max_restart)
2817 +        goto restart;
2818 +
2819 +    if (pending)
2820 +        wakeup_softirqd();
2821 +
2822 +    lockdep_softirq_exit();
2823 +
2824 +    account_system_vtime(current);
2825 +    __local_bh_enable(SOFTIRQ_OFFSET);
2826  }
2827  
2828  #ifndef __ARCH_HAS_DO_SOFTIRQ
2829 @@ -402,8 +412,99 @@ struct tasklet_head
2830  static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
2831  static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
2832  
2833 +#ifdef CONFIG_LITMUS_NVIDIA
2834 +static int __do_nv_now(struct tasklet_struct* tasklet)
2835 +{
2836 +	int success = 1;
2837 +
2838 +	if(tasklet_trylock(tasklet)) {
2839 +		if (!atomic_read(&tasklet->count)) {
2840 +			if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {    
2841 +				BUG();
2842 +			}
2843 +			tasklet->func(tasklet->data);
2844 +			tasklet_unlock(tasklet);
2845 +		}
2846 +		else {
2847 +			success = 0;
2848 +		}
2849 +
2850 +		tasklet_unlock(tasklet);
2851 +	}
2852 +	else {
2853 +		success = 0;
2854 +	}
2855 +
2856 +	return success;
2857 +}
2858 +#endif
2859 +
2860 +
2861  void __tasklet_schedule(struct tasklet_struct *t)
2862  {
2863 +#ifdef CONFIG_LITMUS_NVIDIA
2864 +	if(is_nvidia_func(t->func))
2865 +	{
2866 +#if 0
2867 +		// do nvidia tasklets right away and return
2868 +		if(__do_nv_now(t))
2869 +			return;
2870 +#else
2871 +		u32 nvidia_device = get_tasklet_nv_device_num(t);	
2872 +		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
2873 +		//			  __FUNCTION__, nvidia_device,litmus_clock());
2874 +
2875 +		unsigned long flags;
2876 +		struct task_struct* device_owner;
2877 +
2878 +		lock_nv_registry(nvidia_device, &flags);
2879 +
2880 +		device_owner = get_nv_max_device_owner(nvidia_device);
2881 +
2882 +		if(device_owner==NULL)
2883 +		{
2884 +			t->owner = NULL;
2885 +		}
2886 +		else
2887 +		{
2888 +			if(is_realtime(device_owner))
2889 +			{
2890 +				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
2891 +					  __FUNCTION__, nvidia_device,litmus_clock());				
2892 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2893 +					  __FUNCTION__,device_owner->pid,nvidia_device);
2894 +
2895 +				t->owner = device_owner;
2896 +				sched_trace_tasklet_release(t->owner);
2897 +
2898 +				if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
2899 +				{
2900 +					unlock_nv_registry(nvidia_device, &flags);
2901 +					return;
2902 +				}
2903 +				else
2904 +				{
2905 +					t->owner = NULL; /* fall through to normal scheduling */
2906 +				}
2907 +			}
2908 +			else
2909 +			{
2910 +				t->owner = NULL;
2911 +			}
2912 +		}
2913 +		unlock_nv_registry(nvidia_device, &flags);
2914 +#endif
2915 +	}
2916 +
2917 +#endif
2918 +
2919 +	___tasklet_schedule(t);
2920 +}
2921 +EXPORT_SYMBOL(__tasklet_schedule);
2922 +
2923 +
2924 +void ___tasklet_schedule(struct tasklet_struct *t)
2925 +{
2926  	unsigned long flags;
2927  
2928  	local_irq_save(flags);
2929 @@ -413,11 +514,65 @@ void __tasklet_schedule(struct tasklet_struct *t)
2930  	raise_softirq_irqoff(TASKLET_SOFTIRQ);
2931  	local_irq_restore(flags);
2932  }
2933 +EXPORT_SYMBOL(___tasklet_schedule);
2934  
2935 -EXPORT_SYMBOL(__tasklet_schedule);
2936  
2937  void __tasklet_hi_schedule(struct tasklet_struct *t)
2938  {
2939 +#ifdef CONFIG_LITMUS_NVIDIA
2940 +	if(is_nvidia_func(t->func))
2941 +	{	
2942 +		u32 nvidia_device = get_tasklet_nv_device_num(t);
2943 +		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
2944 +		//			  __FUNCTION__, nvidia_device,litmus_clock());
2945 +
2946 +		unsigned long flags;
2947 +		struct task_struct* device_owner;
2948 +		
2949 +		lock_nv_registry(nvidia_device, &flags);
2950 +		
2951 +		device_owner = get_nv_max_device_owner(nvidia_device);
2952 +
2953 +		if(device_owner==NULL) 
2954 +		{
2955 +			t->owner = NULL;
2956 +		}
2957 +		else
2958 +		{
2959 +			if( is_realtime(device_owner))
2960 +			{
2961 +				TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
2962 +					  __FUNCTION__, nvidia_device,litmus_clock());				
2963 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2964 +					  __FUNCTION__,device_owner->pid,nvidia_device);
2965 +				
2966 +				t->owner = device_owner;
2967 +				sched_trace_tasklet_release(t->owner);
2968 +				if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
2969 +				{
2970 +					unlock_nv_registry(nvidia_device, &flags);
2971 +					return;
2972 +				}
2973 +				else
2974 +				{
2975 +					t->owner = NULL; /* fall through to normal scheduling */
2976 +				}
2977 +			}
2978 +			else
2979 +			{
2980 +				t->owner = NULL;
2981 +			}
2982 +		}
2983 +		unlock_nv_registry(nvidia_device, &flags);
2984 +	}
2985 +#endif
2986 +
2987 +	___tasklet_hi_schedule(t);
2988 +}
2989 +EXPORT_SYMBOL(__tasklet_hi_schedule);
2990 +
2991 +void ___tasklet_hi_schedule(struct tasklet_struct* t)
2992 +{
2993  	unsigned long flags;
2994  
2995  	local_irq_save(flags);
2996 @@ -427,19 +582,72 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
2997  	raise_softirq_irqoff(HI_SOFTIRQ);
2998  	local_irq_restore(flags);
2999  }
3000 -
3001 -EXPORT_SYMBOL(__tasklet_hi_schedule);
3002 +EXPORT_SYMBOL(___tasklet_hi_schedule);
3003  
3004  void __tasklet_hi_schedule_first(struct tasklet_struct *t)
3005  {
3006  	BUG_ON(!irqs_disabled());
3007 +#ifdef CONFIG_LITMUS_NVIDIA	
3008 +	if(is_nvidia_func(t->func))
3009 +	{	
3010 +		u32 nvidia_device = get_tasklet_nv_device_num(t);
3011 +		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
3012 +		//			  __FUNCTION__, nvidia_device,litmus_clock());
3013 +		unsigned long flags;
3014 +		struct task_struct* device_owner;
3015 +		
3016 +		lock_nv_registry(nvidia_device, &flags);
3017 +
3018 +		device_owner = get_nv_max_device_owner(nvidia_device);
3019 +
3020 +		if(device_owner==NULL)
3021 +		{
3022 +			t->owner = NULL;
3023 +		}
3024 +		else
3025 +		{
3026 +			if(is_realtime(device_owner))
3027 +			{
3028 +				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
3029 +					  __FUNCTION__, nvidia_device,litmus_clock());
3030 +				
3031 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
3032 +					  __FUNCTION__,device_owner->pid,nvidia_device);
3033 +				
3034 +				t->owner = device_owner;
3035 +				sched_trace_tasklet_release(t->owner);
3036 +				if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
3037 +				{
3038 +					unlock_nv_registry(nvidia_device, &flags);
3039 +					return;
3040 +				}
3041 +				else
3042 +				{
3043 +					t->owner = NULL; /* fall through to normal scheduling */
3044 +				}
3045 +			}
3046 +			else
3047 +			{
3048 +				t->owner = NULL;
3049 +			}
3050 +		}
3051 +		unlock_nv_registry(nvidia_device, &flags);
3052 +	}
3053 +#endif
3054 +
3055 +	___tasklet_hi_schedule_first(t);
3056 +}
3057 +EXPORT_SYMBOL(__tasklet_hi_schedule_first);
3058 +
3059 +void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
3060 +{
3061 +	BUG_ON(!irqs_disabled());
3062  
3063  	t->next = __this_cpu_read(tasklet_hi_vec.head);
3064  	__this_cpu_write(tasklet_hi_vec.head, t);
3065  	__raise_softirq_irqoff(HI_SOFTIRQ);
3066  }
3067 -
3068 -EXPORT_SYMBOL(__tasklet_hi_schedule_first);
3069 +EXPORT_SYMBOL(___tasklet_hi_schedule_first);
3070  
3071  static void tasklet_action(struct softirq_action *a)
3072  {
3073 @@ -495,6 +703,7 @@ static void tasklet_hi_action(struct softirq_action *a)
3074  			if (!atomic_read(&t->count)) {
3075  				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
3076  					BUG();
3077 +
3078  				t->func(t->data);
3079  				tasklet_unlock(t);
3080  				continue;
3081 @@ -518,8 +727,13 @@ void tasklet_init(struct tasklet_struct *t,
3082  	t->next = NULL;
3083  	t->state = 0;
3084  	atomic_set(&t->count, 0);
3085 +
3086  	t->func = func;
3087  	t->data = data;
3088 +
3089 +#ifdef CONFIG_LITMUS_SOFTIRQD
3090 +	t->owner = NULL;
3091 +#endif
3092  }
3093  
3094  EXPORT_SYMBOL(tasklet_init);
3095 @@ -534,6 +748,7 @@ void tasklet_kill(struct tasklet_struct *t)
3096  			yield();
3097  		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
3098  	}
3099 +
3100  	tasklet_unlock_wait(t);
3101  	clear_bit(TASKLET_STATE_SCHED, &t->state);
3102  }
3103 @@ -808,6 +1023,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
3104  	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
3105  		if (*i == t) {
3106  			*i = t->next;
3107 +
3108  			/* If this was the tail element, move the tail ptr */
3109  			if (*i == NULL)
3110  				per_cpu(tasklet_vec, cpu).tail = i;
3111 diff --git a/kernel/workqueue.c b/kernel/workqueue.c
3112 index 0400553..6b59d59 100644
3113 --- a/kernel/workqueue.c
3114 +++ b/kernel/workqueue.c
3115 @@ -44,6 +44,13 @@
3116  
3117  #include "workqueue_sched.h"
3118  
3119 +#ifdef CONFIG_LITMUS_NVIDIA
3120 +#include <litmus/litmus.h>
3121 +#include <litmus/sched_trace.h>
3122 +#include <litmus/nvidia_info.h>
3123 +#endif
3124 +
3125 +
3126  enum {
3127  	/* global_cwq flags */
3128  	GCWQ_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
3129 @@ -1047,9 +1054,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
3130  		work_flags |= WORK_STRUCT_DELAYED;
3131  		worklist = &cwq->delayed_works;
3132  	}
3133 -
3134  	insert_work(cwq, work, worklist, work_flags);
3135 -
3136  	spin_unlock_irqrestore(&gcwq->lock, flags);
3137  }
3138  
3139 @@ -2687,10 +2692,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
3140   */
3141  int schedule_work(struct work_struct *work)
3142  {
3143 -	return queue_work(system_wq, work);
3144 +#if 0
3145 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
3146 +	if(is_nvidia_func(work->func))
3147 +	{
3148 +		u32 nvidiaDevice = get_work_nv_device_num(work);
3149 +		
3150 +		//1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
3151 +		unsigned long flags;
3152 +		struct task_struct* device_owner;
3153 +		
3154 +		lock_nv_registry(nvidiaDevice, &flags);
3155 +		
3156 +		device_owner = get_nv_max_device_owner(nvidiaDevice);
3157 +		
3158 +		//2) If there is an owner, set work->owner to the owner's task struct.
3159 +		if(device_owner==NULL) 
3160 +		{
3161 +			work->owner = NULL;
3162 +			//TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
3163 +		}
3164 +		else
3165 +		{
3166 +			if( is_realtime(device_owner))
3167 +			{
3168 +				TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
3169 +					  __FUNCTION__, nvidiaDevice,litmus_clock());
3170 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
3171 +					  __FUNCTION__,
3172 +					  device_owner->pid,
3173 +					  nvidiaDevice);
3174 +				
3175 +				//3) Call litmus_schedule_work() and return (don't execute the rest
3176 +				//	of schedule_schedule()).
3177 +				work->owner = device_owner;
3178 +				sched_trace_work_release(work->owner);
3179 +				if(likely(litmus_schedule_work(work, nvidiaDevice)))
3180 +				{
3181 +					unlock_nv_registry(nvidiaDevice, &flags);
3182 +					return 1;
3183 +				}
3184 +				else
3185 +				{
3186 +					work->owner = NULL; /* fall through to normal work scheduling */
3187 +				}
3188 +			}
3189 +			else
3190 +			{
3191 +				work->owner = NULL;
3192 +			}
3193 +		}
3194 +		unlock_nv_registry(nvidiaDevice, &flags);
3195 +	}
3196 +#endif
3197 +#endif
3198 +	return(__schedule_work(work));
3199  }
3200  EXPORT_SYMBOL(schedule_work);
3201  
3202 +int __schedule_work(struct work_struct* work)
3203 +{
3204 +	return queue_work(system_wq, work);
3205 +}
3206 +EXPORT_SYMBOL(__schedule_work);
3207 +
3208  /*
3209   * schedule_work_on - put work task on a specific cpu
3210   * @cpu: cpu to put the work task on
3211 diff --git a/litmus/Kconfig b/litmus/Kconfig
3212 index 94b48e1..8c156e4 100644
3213 --- a/litmus/Kconfig
3214 +++ b/litmus/Kconfig
3215 @@ -60,6 +60,42 @@ config LITMUS_LOCKING
3216  	  Say Yes if you want to include locking protocols such as the FMLP and
3217  	  Baker's SRP.
3218  
3219 +config LITMUS_AFFINITY_LOCKING
3220 +	bool "Enable affinity infrastructure in k-exclusion locking protocols."
3221 +	depends on LITMUS_LOCKING
3222 +	default n
3223 +	help
3224 +	  Enable affinity tracking infrastructure in k-exclusion locking protocols.
3225 +	  This only enabled the *infrastructure* not actual affinity algorithms.
3226 +
3227 +	  If unsure, say No.
3228 +
3229 +config LITMUS_NESTED_LOCKING
3230 +		bool "Support for nested inheritance in locking protocols"
3231 +	depends on LITMUS_LOCKING
3232 +	default n
3233 +	help
3234 +	  Enable nested priority inheritance.
3235 +
3236 +config LITMUS_DGL_SUPPORT
3237 +	bool "Support for dynamic group locks"
3238 +	depends on LITMUS_NESTED_LOCKING
3239 +	default n
3240 +	help
3241 +	  Enable dynamic group lock support.
3242 +
3243 +config LITMUS_MAX_DGL_SIZE
3244 +	int "Maximum size of a dynamic group lock."
3245 +	depends on LITMUS_DGL_SUPPORT
3246 +	range 1 128
3247 +	default "10"
3248 +	help
3249 +		Dynamic group lock data structures are allocated on the process
3250 +		stack when a group is requested. We set a maximum size of
3251 +		locks in a dynamic group lock to avoid dynamic allocation.
3252 +
3253 +		TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE.
3254 +
3255  endmenu
3256  
3257  menu "Performance Enhancements"
3258 @@ -121,7 +157,7 @@ config SCHED_TASK_TRACE
3259  config SCHED_TASK_TRACE_SHIFT
3260         int "Buffer size for sched_trace_xxx() events"
3261         depends on SCHED_TASK_TRACE
3262 -       range 8 13
3263 +       range 8 15
3264         default 9
3265         help
3266  
3267 @@ -215,4 +251,114 @@ config PREEMPT_STATE_TRACE
3268  
3269  endmenu
3270  
3271 +menu "Interrupt Handling"
3272 +
3273 +choice 
3274 +	prompt "Scheduling of interrupt bottom-halves in Litmus."
3275 +	default LITMUS_SOFTIRQD_NONE
3276 +	depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
3277 +	help
3278 +		Schedule tasklets with known priorities in Litmus.
3279 +
3280 +config LITMUS_SOFTIRQD_NONE
3281 +	bool "No tasklet scheduling in Litmus."
3282 +	help
3283 +	  Don't schedule tasklets in Litmus.  Default.
3284 +
3285 +config LITMUS_SOFTIRQD
3286 +	bool "Spawn klitirqd interrupt handling threads."
3287 +	help
3288 +	  Create klitirqd interrupt handling threads.  Work must be
3289 +	  specifically dispatched to these workers.  (Softirqs for
3290 +	  Litmus tasks are not magically redirected to klitirqd.)
3291 +
3292 +	  G-EDF/RM, C-EDF/RM ONLY for now!
3293 +
3294 +
3295 +config LITMUS_PAI_SOFTIRQD
3296 +	bool "Defer tasklets to context switch points."
3297 +	help
3298 +	  Only execute scheduled tasklet bottom halves at
3299 +	  scheduling points.  Trades context switch overhead
3300 +	  at the cost of non-preemptive durations of bottom half
3301 +	  processing.
3302 +		 
3303 +	  G-EDF/RM, C-EDF/RM ONLY for now!	 
3304 +		 
3305 +endchoice	   
3306 +	   
3307 +
3308 +config NR_LITMUS_SOFTIRQD
3309 +	   int "Number of klitirqd."
3310 +	   depends on LITMUS_SOFTIRQD
3311 +	   range 1 4096
3312 +	   default "1"
3313 +	   help
3314 +	     Should be <= to the number of CPUs in your system.
3315 +
3316 +config LITMUS_NVIDIA
3317 +	  bool "Litmus handling of NVIDIA interrupts."
3318 +	  default n
3319 +	  help
3320 +	    Direct tasklets from NVIDIA devices to Litmus's klitirqd
3321 +		or PAI interrupt handling routines.
3322 +
3323 +		If unsure, say No.
3324 +
3325 +config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
3326 +	  bool "Enable affinity-aware heuristics to improve GPU assignment."
3327 +	  depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING
3328 +	  default n
3329 +	  help
3330 +	    Enable several heuristics to improve the assignment
3331 +		of GPUs to real-time tasks to reduce the overheads
3332 +		of memory migrations.
3333 +
3334 +		If unsure, say No.
3335 +
3336 +config NV_DEVICE_NUM
3337 +	   int "Number of NVIDIA GPUs."
3338 +	   depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
3339 +	   range 1 4096
3340 +	   default "1"
3341 +	   help
3342 +	     Should be (<= to the number of CPUs) and
3343 +		 (<= to the number of GPUs) in your system.
3344 +
3345 +config NV_MAX_SIMULT_USERS
3346 +	int "Maximum number of threads sharing a GPU simultanously"
3347 +	depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
3348 +	range 1 3
3349 +	default "2"
3350 +	help
3351 +		Should be equal to the #copy_engines + #execution_engines
3352 +		of the GPUs in your system.
3353 +
3354 +		Scientific/Professional GPUs = 3  (ex. M2070, Quadro 6000?)
3355 +		Consumer Fermi/Kepler GPUs   = 2  (GTX-4xx thru -6xx)
3356 +		Older                        = 1  (ex. GTX-2xx)
3357 +
3358 +choice
3359 +	  prompt "CUDA/Driver Version Support"
3360 +	  default CUDA_4_0
3361 +	  depends on LITMUS_NVIDIA
3362 +	  help
3363 +	  	Select the version of CUDA/driver to support.
3364 +	
3365 +config CUDA_4_0
3366 +	  bool "CUDA 4.0"
3367 +	  depends on LITMUS_NVIDIA
3368 +	  help
3369 +	  	Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
3370 +
3371 +config CUDA_3_2
3372 +	  bool "CUDA 3.2"
3373 +	  depends on LITMUS_NVIDIA
3374 +	  help
3375 +	  	Support CUDA 3.2 (dev. driver version: x86_64-260.24)
3376 +
3377 +endchoice
3378 +
3379 +endmenu
3380 +
3381  endmenu
3382 diff --git a/litmus/Makefile b/litmus/Makefile
3383 index 7338180..080cbf6 100644
3384 --- a/litmus/Makefile
3385 +++ b/litmus/Makefile
3386 @@ -15,9 +15,11 @@ obj-y     = sched_plugin.o litmus.o \
3387  	    locking.o \
3388  	    srp.o \
3389  	    bheap.o \
3390 +        binheap.o \
3391  	    ctrldev.o \
3392  	    sched_gsn_edf.o \
3393 -	    sched_psn_edf.o
3394 +	    sched_psn_edf.o \
3395 +        kfmlp_lock.o
3396  
3397  obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
3398  obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
3399 @@ -27,3 +29,10 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
3400  obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
3401  obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
3402  obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
3403 +
3404 +obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
3405 +obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
3406 +obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
3407 +obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
3408 +
3409 +obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
3410 diff --git a/litmus/affinity.c b/litmus/affinity.c
3411 index 3fa6dd7..cd93249 100644
3412 --- a/litmus/affinity.c
3413 +++ b/litmus/affinity.c
3414 @@ -26,7 +26,7 @@ void init_topology(void) {
3415  					cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
3416  			}
3417  			printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
3418 -							cpu, neigh_info[cpu].size[i], i, 
3419 +							cpu, neigh_info[cpu].size[i], i,
3420  							*cpumask_bits(neigh_info[cpu].neighbors[i]));
3421  		}
3422  
3423 diff --git a/litmus/binheap.c b/litmus/binheap.c
3424 new file mode 100644
3425 index 0000000..8d42403
3426 --- /dev/null
3427 +++ b/litmus/binheap.c
3428 @@ -0,0 +1,443 @@
3429 +#include <litmus/binheap.h>
3430 +
3431 +//extern void dump_node_data(struct binheap_node* parent, struct binheap_node* child);
3432 +//extern void dump_node_data2(struct binheap_handle *handle, struct binheap_node* bad_node);
3433 +
3434 +int binheap_is_in_this_heap(struct binheap_node *node,
3435 +	struct binheap_handle* heap)
3436 +{
3437 +	if(!binheap_is_in_heap(node)) {
3438 +		return 0;
3439 +	}
3440 +
3441 +	while(node->parent != NULL) {
3442 +		node = node->parent;
3443 +	}
3444 +
3445 +	return (node == heap->root);
3446 +}
3447 +
3448 +/* Update the node reference pointers.  Same logic as Litmus binomial heap. */
3449 +static void __update_ref(struct binheap_node *parent,
3450 +	struct binheap_node *child)
3451 +{
3452 +	*(parent->ref_ptr) = child;
3453 +	*(child->ref_ptr) = parent;
3454 +
3455 +	swap(parent->ref_ptr, child->ref_ptr);
3456 +}
3457 +
3458 +/* Swaps data between two nodes. */
3459 +static void __binheap_swap(struct binheap_node *parent,
3460 +	struct binheap_node *child)
3461 +{
3462 +//	if(parent == BINHEAP_POISON || child == BINHEAP_POISON) {
3463 +//		dump_node_data(parent, child);
3464 +//		BUG();
3465 +//	}
3466 +
3467 +	swap(parent->data, child->data);
3468 +	__update_ref(parent, child);
3469 +}
3470 +
3471 +
3472 +/* Swaps memory and data between two nodes. Actual nodes swap instead of
3473 + * just data.  Needed when we delete nodes from the heap.
3474 + */
3475 +static void __binheap_swap_safe(struct binheap_handle *handle,
3476 +	struct binheap_node *a,
3477 +	struct binheap_node *b)
3478 +{
3479 +	swap(a->data, b->data);
3480 +	__update_ref(a, b);
3481 +
3482 +	if((a->parent != NULL) && (a->parent == b->parent)) {
3483 +		/* special case: shared parent */
3484 +		swap(a->parent->left, a->parent->right);
3485 +	}
3486 +	else {
3487 +		/* Update pointers to swap parents. */
3488 +
3489 +		if(a->parent) {
3490 +			if(a == a->parent->left) {
3491 +				a->parent->left = b;
3492 +			}
3493 +			else {
3494 +				a->parent->right = b;
3495 +			}
3496 +		}
3497 +
3498 +		if(b->parent) {
3499 +			if(b == b->parent->left) {
3500 +				b->parent->left = a;
3501 +			}
3502 +			else {
3503 +				b->parent->right = a;
3504 +			}
3505 +		}
3506 +
3507 +		swap(a->parent, b->parent);
3508 +	}
3509 +
3510 +	/* swap children */
3511 +
3512 +	if(a->left) {
3513 +		a->left->parent = b;
3514 +
3515 +		if(a->right) {
3516 +			a->right->parent = b;
3517 +		}
3518 +	}
3519 +
3520 +	if(b->left) {
3521 +		b->left->parent = a;
3522 +
3523 +		if(b->right) {
3524 +			b->right->parent = a;
3525 +		}
3526 +	}
3527 +
3528 +	swap(a->left, b->left);
3529 +	swap(a->right, b->right);
3530 +
3531 +
3532 +	/* update next/last/root pointers */
3533 +
3534 +	if(a == handle->next) {
3535 +		handle->next = b;
3536 +	}
3537 +	else if(b == handle->next) {
3538 +		handle->next = a;
3539 +	}
3540 +
3541 +	if(a == handle->last) {
3542 +		handle->last = b;
3543 +	}
3544 +	else if(b == handle->last) {
3545 +		handle->last = a;
3546 +	}
3547 +
3548 +	if(a == handle->root) {
3549 +		handle->root = b;
3550 +	}
3551 +	else if(b == handle->root) {
3552 +		handle->root = a;
3553 +	}
3554 +}
3555 +
3556 +
3557 +/**
3558 + * Update the pointer to the last node in the complete binary tree.
3559 + * Called internally after the root node has been deleted.
3560 + */
3561 +static void __binheap_update_last(struct binheap_handle *handle)
3562 +{
3563 +	struct binheap_node *temp = handle->last;
3564 +
3565 +	/* find a "bend" in the tree. */
3566 +	while(temp->parent && (temp == temp->parent->left)) {
3567 +		temp = temp->parent;
3568 +	}
3569 +
3570 +	/* step over to sibling if we're not at root */
3571 +	if(temp->parent != NULL) {
3572 +		temp = temp->parent->left;
3573 +	}
3574 +
3575 +	/* now travel right as far as possible. */
3576 +	while(temp->right != NULL) {
3577 +		temp = temp->right;
3578 +	}
3579 +
3580 +	/* take one step to the left if we're not at the bottom-most level. */
3581 +	if(temp->left != NULL) {
3582 +		temp = temp->left;
3583 +	}
3584 +
3585 +	//BUG_ON(!(temp->left == NULL && temp->right == NULL));
3586 +
3587 +	handle->last = temp;
3588 +}
3589 +
3590 +/**
3591 + * Update the pointer to the node that will take the next inserted node.
3592 + * Called internally after a node has been inserted.
3593 + */
3594 +static void __binheap_update_next(struct binheap_handle *handle)
3595 +{
3596 +	struct binheap_node *temp = handle->next;
3597 +
3598 +	/* find a "bend" in the tree. */
3599 +	while(temp->parent && (temp == temp->parent->right)) {
3600 +		temp = temp->parent;
3601 +	}
3602 +
3603 +	/* step over to sibling if we're not at root */
3604 +	if(temp->parent != NULL) {
3605 +		temp = temp->parent->right;
3606 +	}
3607 +
3608 +	/* now travel left as far as possible. */
3609 +	while(temp->left != NULL) {
3610 +		temp = temp->left;
3611 +	}
3612 +
3613 +	handle->next = temp;
3614 +}
3615 +
3616 +
3617 +
3618 +/* bubble node up towards root */
3619 +static void __binheap_bubble_up(
3620 +	struct binheap_handle *handle,
3621 +	struct binheap_node *node)
3622 +{
3623 +	//BUG_ON(!binheap_is_in_heap(node));
3624 +//	if(!binheap_is_in_heap(node))
3625 +//	{
3626 +//		dump_node_data2(handle, node);
3627 +//		BUG();
3628 +//	}
3629 +
3630 +	while((node->parent != NULL) &&
3631 +		  ((node->data == BINHEAP_POISON) /* let BINHEAP_POISON data bubble to the top */ ||
3632 +		   handle->compare(node, node->parent))) {
3633 +			  __binheap_swap(node->parent, node);
3634 +			  node = node->parent;
3635 +
3636 +//			  if(!binheap_is_in_heap(node))
3637 +//			  {
3638 +//				  dump_node_data2(handle, node);
3639 +//				  BUG();
3640 +//			  }
3641 +	}
3642 +}
3643 +
3644 +
3645 +/* bubble node down, swapping with min-child */
3646 +static void __binheap_bubble_down(struct binheap_handle *handle)
3647 +{
3648 +	struct binheap_node *node = handle->root;
3649 +
3650 +	while(node->left != NULL) {
3651 +		if(node->right && handle->compare(node->right, node->left)) {
3652 +			if(handle->compare(node->right, node)) {
3653 +				__binheap_swap(node, node->right);
3654 +				node = node->right;
3655 +			}
3656 +			else {
3657 +				break;
3658 +			}
3659 +		}
3660 +		else {
3661 +			if(handle->compare(node->left, node)) {
3662 +				__binheap_swap(node, node->left);
3663 +				node = node->left;
3664 +			}
3665 +			else {
3666 +				break;
3667 +			}
3668 +		}
3669 +	}
3670 +}
3671 +
3672 +
3673 +
3674 +void __binheap_add(struct binheap_node *new_node,
3675 +	struct binheap_handle *handle,
3676 +	void *data)
3677 +{
3678 +//	if(binheap_is_in_heap(new_node))
3679 +//	{
3680 +//		dump_node_data2(handle, new_node);
3681 +//		BUG();
3682 +//	}
3683 +
3684 +	new_node->data = data;
3685 +	new_node->ref = new_node;
3686 +	new_node->ref_ptr = &(new_node->ref);
3687 +
3688 +	if(!binheap_empty(handle)) {
3689 +		/* insert left side first */
3690 +		if(handle->next->left == NULL) {
3691 +			handle->next->left = new_node;
3692 +			new_node->parent = handle->next;
3693 +			new_node->left = NULL;
3694 +			new_node->right = NULL;
3695 +
3696 +			handle->last = new_node;
3697 +
3698 +			__binheap_bubble_up(handle, new_node);
3699 +		}
3700 +		else {
3701 +			/* left occupied. insert right. */
3702 +			handle->next->right = new_node;
3703 +			new_node->parent = handle->next;
3704 +			new_node->left = NULL;
3705 +			new_node->right = NULL;
3706 +
3707 +			handle->last = new_node;
3708 +
3709 +			__binheap_update_next(handle);
3710 +			__binheap_bubble_up(handle, new_node);
3711 +		}
3712 +	}
3713 +	else {
3714 +		/* first node in heap */
3715 +
3716 +		new_node->parent = NULL;
3717 +		new_node->left = NULL;
3718 +		new_node->right = NULL;
3719 +
3720 +		handle->root = new_node;
3721 +		handle->next = new_node;
3722 +		handle->last = new_node;
3723 +	}
3724 +}
3725 +
3726 +
3727 +
3728 +/**
3729 + * Removes the root node from the heap. The node is removed after coalescing
3730 + * the binheap_node with its original data pointer at the root of the tree.
3731 + *
3732 + * The 'last' node in the tree is then swapped up to the root and bubbled
3733 + * down.
3734 + */
3735 +void __binheap_delete_root(struct binheap_handle *handle,
3736 +	struct binheap_node *container)
3737 +{
3738 +	struct binheap_node *root = handle->root;
3739 +
3740 +//	if(!binheap_is_in_heap(container))
3741 +//	{
3742 +//		dump_node_data2(handle, container);
3743 +//		BUG();
3744 +//	}
3745 +
3746 +	if(root != container) {
3747 +		/* coalesce */
3748 +		__binheap_swap_safe(handle, root, container);
3749 +		root = container;
3750 +	}
3751 +
3752 +	if(handle->last != root) {
3753 +		/* swap 'last' node up to root and bubble it down. */
3754 +
3755 +		struct binheap_node *to_move = handle->last;
3756 +
3757 +		if(to_move->parent != root) {
3758 +			handle->next = to_move->parent;
3759 +
3760 +			if(handle->next->right == to_move) {
3761 +				/* disconnect from parent */
3762 +				to_move->parent->right = NULL;
3763 +				handle->last = handle->next->left;
3764 +			}
3765 +			else {
3766 +				/* find new 'last' before we disconnect */
3767 +				__binheap_update_last(handle);
3768 +
3769 +				/* disconnect from parent */
3770 +				to_move->parent->left = NULL;
3771 +			}
3772 +		}
3773 +		else {
3774 +			/* 'last' is direct child of root */
3775 +
3776 +			handle->next = to_move;
3777 +
3778 +			if(to_move == to_move->parent->right) {
3779 +				to_move->parent->right = NULL;
3780 +				handle->last = to_move->parent->left;
3781 +			}
3782 +			else {
3783 +				to_move->parent->left = NULL;
3784 +				handle->last = to_move;
3785 +			}
3786 +		}
3787 +		to_move->parent = NULL;
3788 +
3789 +		/* reconnect as root.  We can't just swap data ptrs since root node
3790 +		 * may be freed after this function returns.
3791 +		 */
3792 +		to_move->left = root->left;
3793 +		to_move->right = root->right;
3794 +		if(to_move->left != NULL) {
3795 +			to_move->left->parent = to_move;
3796 +		}
3797 +		if(to_move->right != NULL) {
3798 +			to_move->right->parent = to_move;
3799 +		}
3800 +
3801 +		handle->root = to_move;
3802 +
3803 +		/* bubble down */
3804 +		__binheap_bubble_down(handle);
3805 +	}
3806 +	else {
3807 +		/* removing last node in tree */
3808 +		handle->root = NULL;
3809 +		handle->next = NULL;
3810 +		handle->last = NULL;
3811 +	}
3812 +
3813 +	/* mark as removed */
3814 +	container->parent = BINHEAP_POISON;
3815 +}
3816 +
3817 +
3818 +/**
3819 + * Delete an arbitrary node.  Bubble node to delete up to the root,
3820 + * and then delete to root.
3821 + */
3822 +void __binheap_delete(struct binheap_node *node_to_delete,
3823 +	struct binheap_handle *handle)
3824 +{
3825 +	struct binheap_node *target = node_to_delete->ref;
3826 +	void *temp_data = target->data;
3827 +
3828 +//	if(!binheap_is_in_heap(node_to_delete))
3829 +//	{
3830 +//		dump_node_data2(handle, node_to_delete);
3831 +//		BUG();
3832 +//	}
3833 +//
3834 +//	if(!binheap_is_in_heap(target))
3835 +//	{
3836 +//		dump_node_data2(handle, target);
3837 +//		BUG();
3838 +//	}
3839 +
3840 +	/* temporarily set data to null to allow node to bubble up to the top. */
3841 +	target->data = BINHEAP_POISON;
3842 +
3843 +	__binheap_bubble_up(handle, target);
3844 +	__binheap_delete_root(handle, node_to_delete);
3845 +
3846 +	node_to_delete->data = temp_data;  /* restore node data pointer */
3847 +	//node_to_delete->parent = BINHEAP_POISON; /* poison the node */
3848 +}
3849 +
3850 +/**
3851 + * Bubble up a node whose pointer has decreased in value.
3852 + */
3853 +void __binheap_decrease(struct binheap_node *orig_node,
3854 +	struct binheap_handle *handle)
3855 +{
3856 +	struct binheap_node *target = orig_node->ref;
3857 +
3858 +//	if(!binheap_is_in_heap(orig_node))
3859 +//	{
3860 +//		dump_node_data2(handle, orig_node);
3861 +//		BUG();
3862 +//	}
3863 +//
3864 +//	if(!binheap_is_in_heap(target))
3865 +//	{
3866 +//		dump_node_data2(handle, target);
3867 +//		BUG();
3868 +//	}
3869 +//
3870 +	__binheap_bubble_up(handle, target);
3871 +}
3872 diff --git a/litmus/edf_common.c b/litmus/edf_common.c
3873 index 9b44dc2..b346bdd 100644
3874 --- a/litmus/edf_common.c
3875 +++ b/litmus/edf_common.c
3876 @@ -12,40 +12,61 @@
3877  #include <litmus/sched_plugin.h>
3878  #include <litmus/sched_trace.h>
3879  
3880 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3881 +#include <litmus/locking.h>
3882 +#endif
3883 +
3884  #include <litmus/edf_common.h>
3885  
3886 +
3887 +
3888  /* edf_higher_prio -  returns true if first has a higher EDF priority
3889   *                    than second. Deadline ties are broken by PID.
3890   *
3891   * both first and second may be NULL
3892   */
3893 -int edf_higher_prio(struct task_struct* first,
3894 -		    struct task_struct* second)
3895 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3896 +int __edf_higher_prio(
3897 +	struct task_struct* first, comparison_mode_t first_mode,
3898 +	struct task_struct* second, comparison_mode_t second_mode)
3899 +#else
3900 +int edf_higher_prio(struct task_struct* first, struct task_struct* second)
3901 +#endif
3902  {
3903  	struct task_struct *first_task = first;
3904  	struct task_struct *second_task = second;
3905  
3906  	/* There is no point in comparing a task to itself. */
3907  	if (first && first == second) {
3908 -		TRACE_TASK(first,
3909 -			   "WARNING: pointless edf priority comparison.\n");
3910 +		TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
3911 +		WARN_ON(1);
3912  		return 0;
3913  	}
3914  
3915  
3916  	/* check for NULL tasks */
3917 -	if (!first || !second)
3918 +	if (!first || !second) {
3919  		return first && !second;
3920 +	}
3921  
3922  #ifdef CONFIG_LITMUS_LOCKING
3923 -
3924 -	/* Check for inherited priorities. Change task
3925 +	/* Check for EFFECTIVE priorities. Change task
3926  	 * used for comparison in such a case.
3927  	 */
3928 -	if (unlikely(first->rt_param.inh_task))
3929 +	if (unlikely(first->rt_param.inh_task)
3930 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3931 +		&& (first_mode == EFFECTIVE)
3932 +#endif
3933 +		) {
3934  		first_task = first->rt_param.inh_task;
3935 -	if (unlikely(second->rt_param.inh_task))
3936 +	}
3937 +	if (unlikely(second->rt_param.inh_task)
3938 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
3939 +		&& (second_mode == EFFECTIVE)
3940 +#endif
3941 +		) {
3942  		second_task = second->rt_param.inh_task;
3943 +	}
3944  
3945  	/* Check for priority boosting. Tie-break by start of boosting.
3946  	 */
3947 @@ -53,37 +74,109 @@ int edf_higher_prio(struct task_struct* first,
3948  		/* first_task is boosted, how about second_task? */
3949  		if (!is_priority_boosted(second_task) ||
3950  		    lt_before(get_boost_start(first_task),
3951 -			      get_boost_start(second_task)))
3952 +					  get_boost_start(second_task))) {
3953  			return 1;
3954 -		else
3955 +		}
3956 +		else {
3957  			return 0;
3958 -	} else if (unlikely(is_priority_boosted(second_task)))
3959 +		}
3960 +	}
3961 +	else if (unlikely(is_priority_boosted(second_task))) {
3962  		/* second_task is boosted, first is not*/
3963  		return 0;
3964 +	}
3965  
3966  #endif
3967  
3968 +//	// rate-monotonic for testing
3969 +//	if (!is_realtime(second_task)) {
3970 +//		return true;
3971 +//	}
3972 +//
3973 +//	if (shorter_period(first_task, second_task)) {
3974 +//		return true;
3975 +//	}
3976 +//
3977 +//	if (get_period(first_task) == get_period(second_task)) {
3978 +//		if (first_task->pid < second_task->pid) {
3979 +//			return true;
3980 +//		}
3981 +//		else if (first_task->pid == second_task->pid) {
3982 +//			return !second->rt_param.inh_task;
3983 +//		}
3984 +//	}
3985 +
3986 +	if (!is_realtime(second_task)) {
3987 +		return true;
3988 +	}
3989 +
3990 +	if (earlier_deadline(first_task, second_task)) {
3991 +		return true;
3992 +	}
3993 +	if (get_deadline(first_task) == get_deadline(second_task)) {
3994 +
3995 +		if (shorter_period(first_task, second_task)) {
3996 +			return true;
3997 +		}
3998 +		if (get_rt_period(first_task) == get_rt_period(second_task)) {
3999 +			if (first_task->pid < second_task->pid) {
4000 +				return true;
4001 +			}
4002 +			if (first_task->pid == second_task->pid) {
4003 +#ifdef CONFIG_LITMUS_SOFTIRQD
4004 +				if (first_task->rt_param.is_proxy_thread <
4005 +					second_task->rt_param.is_proxy_thread) {
4006 +					return true;
4007 +				}
4008 +				if(first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
4009 +					return !second->rt_param.inh_task;
4010 +				}
4011 +#else
4012 +				return !second->rt_param.inh_task;
4013 +#endif
4014 +			}
4015 +
4016 +		}
4017 +	}
4018 +
4019 +	return false;
4020 +}
4021 +
4022 +
4023 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
4024 +int edf_higher_prio(struct task_struct* first, struct task_struct* second)
4025 +{
4026 +	return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
4027 +}
4028 +
4029 +int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b)
4030 +{
4031 +	struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
4032 +	struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
4033  
4034 -	return !is_realtime(second_task)  ||
4035 +	return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
4036 +}
4037  
4038 -		/* is the deadline of the first task earlier?
4039 -		 * Then it has higher priority.
4040 -		 */
4041 -		earlier_deadline(first_task, second_task) ||
4042 +int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b)
4043 +{
4044 +	return edf_max_heap_order(b, a);  // swap comparison
4045 +}
4046  
4047 -		/* Do we have a deadline tie?
4048 -		 * Then break by PID.
4049 -		 */
4050 -		(get_deadline(first_task) == get_deadline(second_task) &&
4051 -	        (first_task->pid < second_task->pid ||
4052 +int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
4053 +{
4054 +	struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
4055 +	struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
4056  
4057 -		/* If the PIDs are the same then the task with the inherited
4058 -		 * priority wins.
4059 -		 */
4060 -		(first_task->pid == second_task->pid &&
4061 -		 !second->rt_param.inh_task)));
4062 +	return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
4063  }
4064  
4065 +int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
4066 +{
4067 +	return edf_max_heap_base_priority_order(b, a);  // swap comparison
4068 +}
4069 +#endif
4070 +
4071 +
4072  int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
4073  {
4074  	return edf_higher_prio(bheap2task(a), bheap2task(b));
4075 diff --git a/litmus/fdso.c b/litmus/fdso.c
4076 index aa7b384..18fc61b 100644
4077 --- a/litmus/fdso.c
4078 +++ b/litmus/fdso.c
4079 @@ -20,9 +20,22 @@
4080  
4081  extern struct fdso_ops generic_lock_ops;
4082  
4083 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4084 +extern struct fdso_ops generic_affinity_ops;
4085 +#endif
4086 +
4087  static const struct fdso_ops* fdso_ops[] = {
4088  	&generic_lock_ops, /* FMLP_SEM */
4089  	&generic_lock_ops, /* SRP_SEM */
4090 +	&generic_lock_ops, /* RSM_MUTEX */
4091 +	&generic_lock_ops, /* IKGLP_SEM */
4092 +	&generic_lock_ops, /* KFMLP_SEM */
4093 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4094 +	&generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
4095 +	&generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
4096 +	&generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
4097 +	&generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
4098 +#endif
4099  };
4100  
4101  static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
4102 diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
4103 new file mode 100644
4104 index 0000000..9762be1
4105 --- /dev/null
4106 +++ b/litmus/gpu_affinity.c
4107 @@ -0,0 +1,113 @@
4108 +
4109 +#ifdef CONFIG_LITMUS_NVIDIA
4110 +
4111 +#include <linux/sched.h>
4112 +#include <litmus/litmus.h>
4113 +#include <litmus/gpu_affinity.h>
4114 +
4115 +#include <litmus/sched_trace.h>
4116 +
4117 +#define OBSERVATION_CAP 2*1e9
4118 +
4119 +static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
4120 +{
4121 +	fp_t relative_err;
4122 +	fp_t err, new;
4123 +	fp_t actual = _integer_to_fp(observed);
4124 +
4125 +	err = _sub(actual, fb->est);
4126 +	new = _add(_mul(a, err), _mul(b, fb->accum_err));
4127 +
4128 +	relative_err = _div(err, actual);
4129 +
4130 +	fb->est = new;
4131 +	fb->accum_err = _add(fb->accum_err, err);
4132 +
4133 +	return relative_err;
4134 +}
4135 +
4136 +void update_gpu_estimate(struct task_struct *t, lt_t observed)
4137 +{
4138 +	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
4139 +
4140 +	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
4141 +
4142 +	if(unlikely(fb->est.val == 0)) {
4143 +		// kludge-- cap observed values to prevent whacky estimations.
4144 +		// whacky stuff happens during the first few jobs.
4145 +		if(unlikely(observed > OBSERVATION_CAP)) {
4146 +			TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
4147 +					   observed, OBSERVATION_CAP);
4148 +			observed = OBSERVATION_CAP;
4149 +		}
4150 +
4151 +		// take the first observation as our estimate
4152 +		// (initial value of 0 was bogus anyhow)
4153 +		fb->est = _integer_to_fp(observed);
4154 +		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
4155 +	}
4156 +	else {
4157 +		fp_t rel_err = update_estimate(fb,
4158 +									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
4159 +									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
4160 +									   observed);
4161 +
4162 +		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
4163 +			TRACE_TASK(t, "Invalid estimate. Patching.\n");
4164 +			fb->est = _integer_to_fp(observed);
4165 +			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
4166 +		}
4167 +		else {
4168 +//			struct migration_info mig_info;
4169 +
4170 +			sched_trace_prediction_err(t,
4171 +									   &(tsk_rt(t)->gpu_migration),
4172 +									   &rel_err);
4173 +
4174 +//			mig_info.observed = observed;
4175 +//			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
4176 +//			mig_info.distance = tsk_rt(t)->gpu_migration;
4177 +//
4178 +//			sched_trace_migration(t, &mig_info);
4179 +		}
4180 +	}
4181 +
4182 +	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
4183 +			   tsk_rt(t)->gpu_migration,
4184 +			   observed,
4185 +			   _fp_to_integer(fb->est),
4186 +			   _point(fb->est));
4187 +}
4188 +
4189 +gpu_migration_dist_t gpu_migration_distance(int a, int b)
4190 +{
4191 +	// GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
4192 +	int i;
4193 +	int dist;
4194 +
4195 +	if(likely(a >= 0 && b >= 0)) {
4196 +		for(i = 0; i <= MIG_FAR; ++i) {
4197 +			if(a>>i == b>>i) {
4198 +				dist = i;
4199 +				goto out;
4200 +			}
4201 +		}
4202 +		dist = MIG_NONE; // hopefully never reached.
4203 +		TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
4204 +	}
4205 +	else {
4206 +		dist = MIG_NONE;
4207 +	}
4208 +
4209 +out:
4210 +	TRACE_CUR("Distance %d -> %d is %d\n",
4211 +			  a, b, dist);
4212 +
4213 +	return dist;
4214 +}
4215 +
4216 +
4217 +
4218 +
4219 +#endif
4220 +
4221 diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
4222 new file mode 100644
4223 index 0000000..83b708a
4224 --- /dev/null
4225 +++ b/litmus/ikglp_lock.c
4226 @@ -0,0 +1,2838 @@
4227 +#include <linux/slab.h>
4228 +#include <linux/uaccess.h>
4229 +
4230 +#include <litmus/trace.h>
4231 +#include <litmus/sched_plugin.h>
4232 +#include <litmus/fdso.h>
4233 +
4234 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
4235 +#include <litmus/gpu_affinity.h>
4236 +#include <litmus/nvidia_info.h>
4237 +#endif
4238 +
4239 +#include <litmus/ikglp_lock.h>
4240 +
4241 +// big signed value.
4242 +#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
4243 +
4244 +int ikglp_max_heap_base_priority_order(struct binheap_node *a,
4245 +										   struct binheap_node *b)
4246 +{
4247 +	ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
4248 +	ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
4249 +
4250 +	BUG_ON(!d_a);
4251 +	BUG_ON(!d_b);
4252 +
4253 +	return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
4254 +}
4255 +
4256 +int ikglp_min_heap_base_priority_order(struct binheap_node *a,
4257 +										   struct binheap_node *b)
4258 +{
4259 +	ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
4260 +	ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
4261 +
4262 +	return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
4263 +}
4264 +
4265 +int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
4266 +												 struct binheap_node *b)
4267 +{
4268 +	ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
4269 +	ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
4270 +
4271 +	return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
4272 +}
4273 +
4274 +
4275 +int ikglp_min_heap_donee_order(struct binheap_node *a,
4276 +								   struct binheap_node *b)
4277 +{
4278 +	struct task_struct *prio_a, *prio_b;
4279 +
4280 +	ikglp_donee_heap_node_t *d_a =
4281 +		binheap_entry(a, ikglp_donee_heap_node_t, node);
4282 +	ikglp_donee_heap_node_t *d_b =
4283 +		binheap_entry(b, ikglp_donee_heap_node_t, node);
4284 +
4285 +	if(!d_a->donor_info) {
4286 +		prio_a = d_a->task;
4287 +	}
4288 +	else {
4289 +		prio_a = d_a->donor_info->task;
4290 +		BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
4291 +	}
4292 +
4293 +	if(!d_b->donor_info) {
4294 +		prio_b = d_b->task;
4295 +	}
4296 +	else {
4297 +		prio_b = d_b->donor_info->task;
4298 +		BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
4299 +	}
4300 +
4301 +	// note reversed order
4302 +	return litmus->__compare(prio_b, BASE, prio_a, BASE);
4303 +}
4304 +
4305 +
4306 +
4307 +static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
4308 +								struct fifo_queue *queue)
4309 +{
4310 +	return (queue - &sem->fifo_queues[0]);
4311 +}
4312 +
4313 +static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
4314 +												 struct task_struct *holder)
4315 +{
4316 +	int i;
4317 +	for(i = 0; i < sem->nr_replicas; ++i)
4318 +		if(sem->fifo_queues[i].owner == holder)
4319 +			return(&sem->fifo_queues[i]);
4320 +	return(NULL);
4321 +}
4322 +
4323 +
4324 +
4325 +static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
4326 +												struct task_struct *skip)
4327 +{
4328 +	struct list_head *pos;
4329 +	struct task_struct *queued, *found = NULL;
4330 +
4331 +	list_for_each(pos, &kqueue->wait.task_list) {
4332 +		queued  = (struct task_struct*) list_entry(pos,
4333 +											wait_queue_t, task_list)->private;
4334 +
4335 +		/* Compare task prios, find high prio task. */
4336 +		if(queued != skip && litmus->compare(queued, found))
4337 +			found = queued;
4338 +	}
4339 +	return found;
4340 +}
4341 +
4342 +static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
4343 +											  struct fifo_queue *search_start)
4344 +{
4345 +	// we start our search at search_start instead of at the beginning of the
4346 +	// queue list to load-balance across all resources.
4347 +	struct fifo_queue* step = search_start;
4348 +	struct fifo_queue* shortest = sem->shortest_fifo_queue;
4349 +
4350 +	do {
4351 +		step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
4352 +		step+1 : &sem->fifo_queues[0];
4353 +
4354 +		if(step->count < shortest->count) {
4355 +			shortest = step;
4356 +			if(step->count == 0)
4357 +				break; /* can't get any shorter */
4358 +		}
4359 +
4360 +	}while(step != search_start);
4361 +
4362 +	return(shortest);
4363 +}
4364 +
4365 +static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
4366 +{
4367 +	return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
4368 +}
4369 +
4370 +
4371 +
4372 +#if 0
4373 +static void print_global_list(struct binheap_node* n, int depth)
4374 +{
4375 +	ikglp_heap_node_t *global_heap_node;
4376 +	char padding[81] = "                                                                                ";
4377 +
4378 +	if(n == NULL) {
4379 +		TRACE_CUR("+-> %p\n", NULL);
4380 +		return;
4381 +	}
4382 +
4383 +	global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
4384 +
4385 +	if(depth*2 <= 80)
4386 +		padding[depth*2] = '\0';
4387 +
4388 +	TRACE_CUR("%s+-> %s/%d\n",
4389 +			  padding,
4390 +			  global_heap_node->task->comm,
4391 +			  global_heap_node->task->pid);
4392 +
4393 +    if(n->left) print_global_list(n->left, depth+1);
4394 +    if(n->right) print_global_list(n->right, depth+1);
4395 +}
4396 +
4397 +static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
4398 +{
4399 +	ikglp_donee_heap_node_t *donee_node;
4400 +	char padding[81] = "                                                                                ";
4401 +	struct task_struct* donor = NULL;
4402 +
4403 +	if(n == NULL) {
4404 +		TRACE_CUR("+-> %p\n", NULL);
4405 +		return;
4406 +	}
4407 +
4408 +	donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
4409 +
4410 +	if(depth*2 <= 80)
4411 +		padding[depth*2] = '\0';
4412 +
4413 +	if(donee_node->donor_info) {
4414 +		donor = donee_node->donor_info->task;
4415 +	}
4416 +
4417 +	TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
4418 +			  padding,
4419 +			  donee_node->task->comm,
4420 +			  donee_node->task->pid,
4421 +			  (donor) ? donor->comm : "nil",
4422 +			  (donor) ? donor->pid : -1,
4423 +			  ikglp_get_idx(sem, donee_node->fq));
4424 +
4425 +    if(n->left) print_donees(sem, n->left, depth+1);
4426 +    if(n->right) print_donees(sem, n->right, depth+1);
4427 +}
4428 +
4429 +static void print_donors(struct binheap_node *n, int depth)
4430 +{
4431 +	ikglp_wait_state_t *donor_node;
4432 +	char padding[81] = "                                                                                ";
4433 +
4434 +	if(n == NULL) {
4435 +		TRACE_CUR("+-> %p\n", NULL);
4436 +		return;
4437 +	}
4438 +
4439 +	donor_node = binheap_entry(n, ikglp_wait_state_t, node);
4440 +
4441 +	if(depth*2 <= 80)
4442 +		padding[depth*2] = '\0';
4443 +
4444 +
4445 +	TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
4446 +			  padding,
4447 +			  donor_node->task->comm,
4448 +			  donor_node->task->pid,
4449 +			  donor_node->donee_info->task->comm,
4450 +			  donor_node->donee_info->task->pid);
4451 +
4452 +    if(n->left) print_donors(n->left, depth+1);
4453 +    if(n->right) print_donors(n->right, depth+1);
4454 +}
4455 +#endif
4456 +
4457 +static void ikglp_add_global_list(struct ikglp_semaphore *sem,
4458 +								  struct task_struct *t,
4459 +								  ikglp_heap_node_t *node)
4460 +{
4461 +
4462 +
4463 +	node->task = t;
4464 +	INIT_BINHEAP_NODE(&node->node);
4465 +
4466 +	if(sem->top_m_size < sem->m) {
4467 +		TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
4468 +				  t->comm, t->pid);
4469 +//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
4470 +//		print_global_list(sem->top_m.root, 1);
4471 +
4472 +		binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
4473 +		++(sem->top_m_size);
4474 +
4475 +//		TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
4476 +//		print_global_list(sem->top_m.root, 1);
4477 +	}
4478 +	else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
4479 +		ikglp_heap_node_t *evicted =
4480 +			binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
4481 +
4482 +		TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
4483 +				  t->comm, t->pid,
4484 +				  evicted->task->comm, evicted->task->pid);
4485 +
4486 +//		TRACE_CUR("Not-Top-M Before:\n");
4487 +//		print_global_list(sem->not_top_m.root, 1);
4488 +//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
4489 +//		print_global_list(sem->top_m.root, 1);
4490 +
4491 +
4492 +		binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
4493 +		INIT_BINHEAP_NODE(&evicted->node);
4494 +		binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
4495 +
4496 +		binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
4497 +
4498 +//		TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
4499 +//		print_global_list(sem->top_m.root, 1);
4500 +//		TRACE_CUR("Not-Top-M After:\n");
4501 +//		print_global_list(sem->not_top_m.root, 1);
4502 +	}
4503 +	else {
4504 +		TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
4505 +				  t->comm, t->pid);
4506 +//		TRACE_CUR("Not-Top-M Before:\n");
4507 +//		print_global_list(sem->not_top_m.root, 1);
4508 +
4509 +		binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
4510 +
4511 +//		TRACE_CUR("Not-Top-M After:\n");
4512 +//		print_global_list(sem->not_top_m.root, 1);
4513 +	}
4514 +}
4515 +
4516 +
4517 +static void ikglp_del_global_list(struct ikglp_semaphore *sem,
4518 +								  struct task_struct *t,
4519 +								  ikglp_heap_node_t *node)
4520 +{
4521 +	BUG_ON(!binheap_is_in_heap(&node->node));
4522 +
4523 +	TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
4524 +
4525 +	if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
4526 +		TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
4527 +
4528 +//		TRACE_CUR("Not-Top-M Before:\n");
4529 +//		print_global_list(sem->not_top_m.root, 1);
4530 +//		TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
4531 +//		print_global_list(sem->top_m.root, 1);
4532 +
4533 +
4534 +		binheap_delete(&node->node, &sem->top_m);
4535 +
4536 +		if(!binheap_empty(&sem->not_top_m)) {
4537 +			ikglp_heap_node_t *promoted =
4538 +				binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
4539 +
4540 +			TRACE_CUR("Promoting %s/%d to top-m\n",
4541 +					  promoted->task->comm, promoted->task->pid);
4542 +
4543 +			binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
4544 +			INIT_BINHEAP_NODE(&promoted->node);
4545 +
4546 +			binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
4547 +		}
4548 +		else {
4549 +			TRACE_CUR("No one to promote to top-m.\n");
4550 +			--(sem->top_m_size);
4551 +		}
4552 +
4553 +//		TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
4554 +//		print_global_list(sem->top_m.root, 1);
4555 +//		TRACE_CUR("Not-Top-M After:\n");
4556 +//		print_global_list(sem->not_top_m.root, 1);
4557 +	}
4558 +	else {
4559 +		TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
4560 +//		TRACE_CUR("Not-Top-M Before:\n");
4561 +//		print_global_list(sem->not_top_m.root, 1);
4562 +
4563 +		binheap_delete(&node->node, &sem->not_top_m);
4564 +
4565 +//		TRACE_CUR("Not-Top-M After:\n");
4566 +//		print_global_list(sem->not_top_m.root, 1);
4567 +	}
4568 +}
4569 +
4570 +
4571 +static void ikglp_add_donees(struct ikglp_semaphore *sem,
4572 +							 struct fifo_queue *fq,
4573 +							 struct task_struct *t,
4574 +							 ikglp_donee_heap_node_t* node)
4575 +{
4576 +//	TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
4577 +//	TRACE_CUR("donees Before:\n");
4578 +//	print_donees(sem, sem->donees.root, 1);
4579 +
4580 +	node->task = t;
4581 +	node->donor_info = NULL;
4582 +	node->fq = fq;
4583 +	INIT_BINHEAP_NODE(&node->node);
4584 +
4585 +	binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
4586 +
4587 +//	TRACE_CUR("donees After:\n");
4588 +//	print_donees(sem, sem->donees.root, 1);
4589 +}
4590 +
4591 +
4592 +static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
4593 +											   struct fifo_queue *fq,
4594 +											   struct ikglp_semaphore *sem,
4595 +											   unsigned long flags)
4596 +{
4597 +	// priority of 't' has increased (note: 't' might already be hp_waiter).
4598 +	if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
4599 +		struct task_struct *old_max_eff_prio;
4600 +		struct task_struct *new_max_eff_prio;
4601 +		struct task_struct *new_prio = NULL;
4602 +		struct task_struct *owner = fq->owner;
4603 +
4604 +		if(fq->hp_waiter)
4605 +			TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
4606 +					   fq->hp_waiter->comm, fq->hp_waiter->pid);
4607 +		else
4608 +			TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
4609 +
4610 +		if(owner)
4611 +		{
4612 +			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4613 +
4614 +//			TRACE_TASK(owner, "Heap Before:\n");
4615 +//			print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
4616 +
4617 +			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4618 +
4619 +			fq->hp_waiter = t;
4620 +			fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
4621 +
4622 +			binheap_decrease(&fq->nest.hp_binheap_node,
4623 +							 &tsk_rt(owner)->hp_blocked_tasks);
4624 +
4625 +//			TRACE_TASK(owner, "Heap After:\n");
4626 +//			print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
4627 +
4628 +			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4629 +
4630 +			if(new_max_eff_prio != old_max_eff_prio) {
4631 +				TRACE_TASK(t, "is new hp_waiter.\n");
4632 +
4633 +				if ((effective_priority(owner) == old_max_eff_prio) ||
4634 +					(litmus->__compare(new_max_eff_prio, BASE,
4635 +									   owner, EFFECTIVE))){
4636 +					new_prio = new_max_eff_prio;
4637 +				}
4638 +			}
4639 +			else {
4640 +				TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
4641 +			}
4642 +
4643 +			if(new_prio) {
4644 +				// set new inheritance and propagate
4645 +				TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
4646 +						   owner->comm, owner->pid,
4647 +						   new_prio->comm, new_prio->pid);
4648 +				litmus->nested_increase_prio(owner, new_prio, &sem->lock,
4649 +											 flags);  // unlocks lock.
4650 +			}
4651 +			else {
4652 +				TRACE_TASK(t, "No change in effective priority (is %s/%d).  Propagation halted.\n",
4653 +						   new_max_eff_prio->comm, new_max_eff_prio->pid);
4654 +				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4655 +				unlock_fine_irqrestore(&sem->lock, flags);
4656 +			}
4657 +		}
4658 +		else {
4659 +			fq->hp_waiter = t;
4660 +			fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
4661 +
4662 +			TRACE_TASK(t, "no owner.\n");
4663 +			unlock_fine_irqrestore(&sem->lock, flags);
4664 +		}
4665 +	}
4666 +	else {
4667 +		TRACE_TASK(t, "hp_waiter is unaffected.\n");
4668 +		unlock_fine_irqrestore(&sem->lock, flags);
4669 +	}
4670 +}
4671 +
4672 +// hp_waiter has decreased
4673 +static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
4674 +											   struct ikglp_semaphore *sem,
4675 +											   unsigned long flags)
4676 +{
4677 +	struct task_struct *owner = fq->owner;
4678 +
4679 +	struct task_struct *old_max_eff_prio;
4680 +	struct task_struct *new_max_eff_prio;
4681 +
4682 +	if(!owner) {
4683 +		TRACE_CUR("No owner.  Returning.\n");
4684 +		unlock_fine_irqrestore(&sem->lock, flags);
4685 +		return;
4686 +	}
4687 +
4688 +	TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
4689 +
4690 +	raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4691 +
4692 +	old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4693 +
4694 +	binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
4695 +	fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
4696 +	binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
4697 +				struct nested_info, hp_binheap_node);
4698 +
4699 +	new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4700 +
4701 +	if((old_max_eff_prio != new_max_eff_prio) &&
4702 +	   (effective_priority(owner) == old_max_eff_prio))
4703 +	{
4704 +		// Need to set new effective_priority for owner
4705 +		struct task_struct *decreased_prio;
4706 +
4707 +		TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
4708 +				  ikglp_get_idx(sem, fq));
4709 +
4710 +		if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
4711 +			TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
4712 +					  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
4713 +					  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
4714 +					  owner->comm,
4715 +					  owner->pid,
4716 +					  ikglp_get_idx(sem, fq));
4717 +
4718 +			decreased_prio = new_max_eff_prio;
4719 +		}
4720 +		else {
4721 +			TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
4722 +					  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
4723 +					  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
4724 +					  owner->comm,
4725 +					  owner->pid,
4726 +					  ikglp_get_idx(sem, fq));
4727 +
4728 +			decreased_prio = NULL;
4729 +		}
4730 +
4731 +		// beware: recursion
4732 +		litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags);	// will unlock mutex->lock
4733 +	}
4734 +	else {
4735 +		TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
4736 +		raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4737 +		unlock_fine_irqrestore(&sem->lock, flags);
4738 +	}
4739 +}
4740 +
4741 +
4742 +static void ikglp_remove_donation_from_owner(struct binheap_node *n,
4743 +											 struct fifo_queue *fq,
4744 +											 struct ikglp_semaphore *sem,
4745 +											 unsigned long flags)
4746 +{
4747 +	struct task_struct *owner = fq->owner;
4748 +
4749 +	struct task_struct *old_max_eff_prio;
4750 +	struct task_struct *new_max_eff_prio;
4751 +
4752 +	BUG_ON(!owner);
4753 +
4754 +	raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4755 +
4756 +	old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4757 +
4758 +	binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
4759 +
4760 +	new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
4761 +
4762 +	if((old_max_eff_prio != new_max_eff_prio) &&
4763 +	   (effective_priority(owner) == old_max_eff_prio))
4764 +	{
4765 +		// Need to set new effective_priority for owner
4766 +		struct task_struct *decreased_prio;
4767 +
4768 +		TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
4769 +				  ikglp_get_idx(sem, fq));
4770 +
4771 +		if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
4772 +			TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
4773 +					  ikglp_get_idx(sem, fq));
4774 +			decreased_prio = new_max_eff_prio;
4775 +		}
4776 +		else {
4777 +			TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
4778 +					  ikglp_get_idx(sem, fq));
4779 +			decreased_prio = NULL;
4780 +		}
4781 +
4782 +		// beware: recursion
4783 +		litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags);	// will unlock mutex->lock
4784 +	}
4785 +	else {
4786 +		TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
4787 +		raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
4788 +		unlock_fine_irqrestore(&sem->lock, flags);
4789 +	}
4790 +}
4791 +
4792 +static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
4793 +												 struct binheap_node *n)
4794 +{
4795 +	struct task_struct *old_max_eff_prio;
4796 +	struct task_struct *new_max_eff_prio;
4797 +
4798 +	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
4799 +
4800 +	old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
4801 +
4802 +	binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
4803 +
4804 +	new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
4805 +
4806 +	if((old_max_eff_prio != new_max_eff_prio) &&
4807 +	   (effective_priority(t) == old_max_eff_prio))
4808 +	{
4809 +		// Need to set new effective_priority for owner
4810 +		struct task_struct *decreased_prio;
4811 +
4812 +		if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
4813 +			decreased_prio = new_max_eff_prio;
4814 +		}
4815 +		else {
4816 +			decreased_prio = NULL;
4817 +		}
4818 +
4819 +		tsk_rt(t)->inh_task = decreased_prio;
4820 +	}
4821 +
4822 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
4823 +}
4824 +
4825 +static void ikglp_get_immediate(struct task_struct* t,
4826 +								struct fifo_queue *fq,
4827 +								struct ikglp_semaphore *sem,
4828 +								unsigned long flags)
4829 +{
4830 +	// resource available now
4831 +	TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
4832 +
4833 +	fq->owner = t;
4834 +
4835 +	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
4836 +	binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
4837 +				struct nested_info, hp_binheap_node);
4838 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
4839 +
4840 +	++(fq->count);
4841 +
4842 +	ikglp_add_global_list(sem, t, &fq->global_heap_node);
4843 +	ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
4844 +
4845 +	sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
4846 +
4847 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4848 +	if(sem->aff_obs) {
4849 +		sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
4850 +		sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
4851 +	}
4852 +#endif
4853 +
4854 +	unlock_fine_irqrestore(&sem->lock, flags);
4855 +}
4856 +
4857 +
4858 +
4859 +
4860 +
4861 +static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
4862 +								  struct fifo_queue* fq,
4863 +								  struct task_struct* t,
4864 +								  wait_queue_t *wait,
4865 +								  ikglp_heap_node_t *global_heap_node,
4866 +								  ikglp_donee_heap_node_t *donee_heap_node)
4867 +{
4868 +	/* resource is not free => must suspend and wait */
4869 +	TRACE_TASK(t, "Enqueuing on fq %d.\n",
4870 +			   ikglp_get_idx(sem, fq));
4871 +
4872 +	init_waitqueue_entry(wait, t);
4873 +
4874 +	__add_wait_queue_tail_exclusive(&fq->wait, wait);
4875 +
4876 +	++(fq->count);
4877 +	++(sem->nr_in_fifos);
4878 +
4879 +	// update global list.
4880 +	if(likely(global_heap_node)) {
4881 +		if(binheap_is_in_heap(&global_heap_node->node)) {
4882 +			WARN_ON(1);
4883 +			ikglp_del_global_list(sem, t, global_heap_node);
4884 +		}
4885 +		ikglp_add_global_list(sem, t, global_heap_node);
4886 +	}
4887 +	// update donor eligiblity list.
4888 +	if(likely(donee_heap_node)) {
4889 +//		if(binheap_is_in_heap(&donee_heap_node->node)) {
4890 +//			WARN_ON(1);
4891 +//		}
4892 +		ikglp_add_donees(sem, fq, t, donee_heap_node);
4893 +	}
4894 +
4895 +	if(sem->shortest_fifo_queue == fq) {
4896 +		sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
4897 +	}
4898 +
4899 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4900 +	if(sem->aff_obs) {
4901 +		sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
4902 +	}
4903 +#endif
4904 +
4905 +	TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
4906 +}
4907 +
4908 +
4909 +static void ikglp_enqueue_on_fq(
4910 +								struct ikglp_semaphore *sem,
4911 +								struct fifo_queue *fq,
4912 +								ikglp_wait_state_t *wait,
4913 +								unsigned long flags)
4914 +{
4915 +	/* resource is not free => must suspend and wait */
4916 +	TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
4917 +			   ikglp_get_idx(sem, fq));
4918 +
4919 +	INIT_BINHEAP_NODE(&wait->global_heap_node.node);
4920 +	INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
4921 +
4922 +	__ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
4923 +						  &wait->global_heap_node, &wait->donee_heap_node);
4924 +
4925 +	ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags);  // unlocks sem->lock
4926 +}
4927 +
4928 +
4929 +static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
4930 +								  ikglp_wait_state_t *wait)
4931 +{
4932 +	TRACE_TASK(wait->task, "goes to PQ.\n");
4933 +
4934 +	wait->pq_node.task = wait->task; // copy over task (little redundant...)
4935 +
4936 +	binheap_add(&wait->pq_node.node, &sem->priority_queue,
4937 +				ikglp_heap_node_t, node);
4938 +}
4939 +
4940 +static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
4941 +								ikglp_wait_state_t *wait)
4942 +{
4943 +	INIT_BINHEAP_NODE(&wait->global_heap_node.node);
4944 +	INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
4945 +	INIT_BINHEAP_NODE(&wait->pq_node.node);
4946 +
4947 +	__ikglp_enqueue_on_pq(sem, wait);
4948 +}
4949 +
4950 +static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
4951 +								   ikglp_wait_state_t* wait,
4952 +								   unsigned long flags)
4953 +{
4954 +	struct task_struct *t = wait->task;
4955 +	ikglp_donee_heap_node_t *donee_node = NULL;
4956 +	struct task_struct *donee;
4957 +
4958 +	struct task_struct *old_max_eff_prio;
4959 +	struct task_struct *new_max_eff_prio;
4960 +	struct task_struct *new_prio = NULL;
4961 +
4962 +	INIT_BINHEAP_NODE(&wait->global_heap_node.node);
4963 +	INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
4964 +	INIT_BINHEAP_NODE(&wait->pq_node.node);
4965 +	INIT_BINHEAP_NODE(&wait->node);
4966 +
4967 +//	TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
4968 +//	TRACE_CUR("donors Before:\n");
4969 +//	print_donors(sem->donors.root, 1);
4970 +
4971 +	// Add donor to the global list.
4972 +	ikglp_add_global_list(sem, t, &wait->global_heap_node);
4973 +
4974 +	// Select a donee
4975 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
4976 +	donee_node = (sem->aff_obs) ?
4977 +		sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
4978 +		binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
4979 +#else
4980 +	donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
4981 +#endif
4982 +
4983 +	donee = donee_node->task;
4984 +
4985 +	TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
4986 +
4987 +	TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
4988 +			  donee->comm, donee->pid);
4989 +//	TRACE_CUR("donees Before:\n");
4990 +//	print_donees(sem, sem->donees.root, 1);
4991 +
4992 +	//binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node);  // will re-add it shortly
4993 +	binheap_delete(&donee_node->node, &sem->donees);
4994 +
4995 +//	TRACE_CUR("donees After:\n");
4996 +//	print_donees(sem, sem->donees.root, 1);
4997 +
4998 +
4999 +	wait->donee_info = donee_node;
5000 +
5001 +	// Add t to donor heap.
5002 +	binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
5003 +
5004 +	// Now adjust the donee's priority.
5005 +
5006 +	// Lock the donee's inheritance heap.
5007 +	raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
5008 +
5009 +	old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
5010 +
5011 +	if(donee_node->donor_info) {
5012 +		// Steal donation relation.  Evict old donor to PQ.
5013 +
5014 +		// Remove old donor from donor heap
5015 +		ikglp_wait_state_t *old_wait = donee_node->donor_info;
5016 +		struct task_struct *old_donor = old_wait->task;
5017 +
5018 +		TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d.  Moving old donor to PQ.\n",
5019 +				   donee->comm, donee->pid, old_donor->comm, old_donor->pid);
5020 +
5021 +		binheap_delete(&old_wait->node, &sem->donors);
5022 +
5023 +		// Remove donation from donee's inheritance heap.
5024 +		binheap_delete(&old_wait->prio_donation.hp_binheap_node,
5025 +					   &tsk_rt(donee)->hp_blocked_tasks);
5026 +		// WARNING: have not updated inh_prio!
5027 +
5028 +		// Add old donor to PQ.
5029 +		__ikglp_enqueue_on_pq(sem, old_wait);
5030 +
5031 +		// Remove old donor from the global heap.
5032 +		ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
5033 +	}
5034 +
5035 +	// Add back donee's node to the donees heap with increased prio
5036 +	donee_node->donor_info = wait;
5037 +	INIT_BINHEAP_NODE(&donee_node->node);
5038 +
5039 +
5040 +	TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
5041 +//	TRACE_CUR("donees Before:\n");
5042 +//	print_donees(sem, sem->donees.root, 1);
5043 +
5044 +	binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
5045 +
5046 +//	TRACE_CUR("donees After:\n");
5047 +//	print_donees(sem, sem->donees.root, 1);
5048 +
5049 +	// Add an inheritance/donation to the donee's inheritance heap.
5050 +	wait->prio_donation.lock = (struct litmus_lock*)sem;
5051 +	wait->prio_donation.hp_waiter_eff_prio = t;
5052 +	wait->prio_donation.hp_waiter_ptr = NULL;
5053 +	INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
5054 +
5055 +	binheap_add(&wait->prio_donation.hp_binheap_node,
5056 +				&tsk_rt(donee)->hp_blocked_tasks,
5057 +				struct nested_info, hp_binheap_node);
5058 +
5059 +	new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
5060 +
5061 +	if(new_max_eff_prio != old_max_eff_prio) {
5062 +		if ((effective_priority(donee) == old_max_eff_prio) ||
5063 +			(litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
5064 +			TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
5065 +					   donee->comm, donee->pid);
5066 +			new_prio = new_max_eff_prio;
5067 +		}
5068 +//		else {
5069 +//			// should be bug.  donor would not be in top-m.
5070 +//			TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
5071 +//			WARN_ON(1);
5072 +//		}
5073 +//	}
5074 +//	else {
5075 +//		// should be bug.  donor would not be in top-m.
5076 +//		TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
5077 +//		WARN_ON(1);
5078 +	}
5079 +
5080 +	if(new_prio) {
5081 +		struct fifo_queue *donee_fq = donee_node->fq;
5082 +
5083 +		if(donee != donee_fq->owner) {
5084 +			TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
5085 +					   donee->comm, donee->pid,
5086 +					   donee_fq->owner->comm, donee_fq->owner->pid);
5087 +
5088 +			raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
5089 +			ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags);  // unlocks sem->lock
5090 +		}
5091 +		else {
5092 +			TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
5093 +					   donee->comm, donee->pid);
5094 +			litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags);  // unlocks sem->lock and donee's heap lock
5095 +		}
5096 +	}
5097 +	else {
5098 +		TRACE_TASK(t, "No change in effective priority (it is %d/%s).  BUG?\n",
5099 +				   new_max_eff_prio->comm, new_max_eff_prio->pid);
5100 +		raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
5101 +		unlock_fine_irqrestore(&sem->lock, flags);
5102 +	}
5103 +
5104 +
5105 +//	TRACE_CUR("donors After:\n");
5106 +//	print_donors(sem->donors.root, 1);
5107 +}
5108 +
5109 +int ikglp_lock(struct litmus_lock* l)
5110 +{
5111 +	struct task_struct* t = current;
5112 +	struct ikglp_semaphore *sem = ikglp_from_lock(l);
5113 +	unsigned long flags = 0, real_flags;
5114 +	struct fifo_queue *fq = NULL;
5115 +	int replica = -EINVAL;
5116 +
5117 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5118 +	raw_spinlock_t *dgl_lock;
5119 +#endif
5120 +
5121 +	ikglp_wait_state_t wait;
5122 +
5123 +	if (!is_realtime(t))
5124 +		return -EPERM;
5125 +
5126 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5127 +	dgl_lock = litmus->get_dgl_spinlock(t);
5128 +#endif
5129 +
5130 +	raw_spin_lock_irqsave(&sem->real_lock, real_flags);
5131 +
5132 +	lock_global_irqsave(dgl_lock, flags);
5133 +	lock_fine_irqsave(&sem->lock, flags);
5134 +
5135 +	if(sem->nr_in_fifos < sem->m) {
5136 +		// enqueue somwhere
5137 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5138 +		fq = (sem->aff_obs) ?
5139 +			sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
5140 +			sem->shortest_fifo_queue;
5141 +#else
5142 +		fq = sem->shortest_fifo_queue;
5143 +#endif
5144 +		if(fq->count == 0) {
5145 +			// take available resource
5146 +			replica = ikglp_get_idx(sem, fq);
5147 +
5148 +			ikglp_get_immediate(t, fq, sem, flags);  // unlocks sem->lock
5149 +
5150 +			unlock_global_irqrestore(dgl_lock, flags);
5151 +			raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5152 +			goto acquired;
5153 +		}
5154 +		else {
5155 +			wait.task = t;   // THIS IS CRITICALLY IMPORTANT!!!
5156 +
5157 +			tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem;  // record where we are blocked
5158 +			mb();
5159 +
5160 +			/* FIXME: interruptible would be nice some day */
5161 +			set_task_state(t, TASK_UNINTERRUPTIBLE);
5162 +
5163 +			ikglp_enqueue_on_fq(sem, fq, &wait, flags);  // unlocks sem->lock
5164 +		}
5165 +	}
5166 +	else {
5167 +		// donor!
5168 +		wait.task = t;   // THIS IS CRITICALLY IMPORTANT!!!
5169 +
5170 +		tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem;  // record where we are blocked
5171 +		mb();
5172 +
5173 +		/* FIXME: interruptible would be nice some day */
5174 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
5175 +
5176 +		if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
5177 +			// enqueue on PQ
5178 +			ikglp_enqueue_on_pq(sem, &wait);
5179 +			unlock_fine_irqrestore(&sem->lock, flags);
5180 +		}
5181 +		else {
5182 +			// enqueue as donor
5183 +			ikglp_enqueue_on_donor(sem, &wait, flags);	 // unlocks sem->lock
5184 +		}
5185 +	}
5186 +
5187 +	unlock_global_irqrestore(dgl_lock, flags);
5188 +	raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5189 +
5190 +	TS_LOCK_SUSPEND;
5191 +
5192 +	schedule();
5193 +
5194 +	TS_LOCK_RESUME;
5195 +
5196 +	fq = ikglp_get_queue(sem, t);
5197 +	BUG_ON(!fq);
5198 +
5199 +	replica = ikglp_get_idx(sem, fq);
5200 +
5201 +acquired:
5202 +	TRACE_CUR("Acquired lock %d, queue %d\n",
5203 +			  l->ident, replica);
5204 +
5205 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5206 +	if(sem->aff_obs) {
5207 +		return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
5208 +	}
5209 +#endif
5210 +
5211 +	return replica;
5212 +}
5213 +
5214 +//int ikglp_lock(struct litmus_lock* l)
5215 +//{
5216 +//	struct task_struct* t = current;
5217 +//	struct ikglp_semaphore *sem = ikglp_from_lock(l);
5218 +//	unsigned long flags = 0, real_flags;
5219 +//	struct fifo_queue *fq = NULL;
5220 +//	int replica = -EINVAL;
5221 +//
5222 +//#ifdef CONFIG_LITMUS_DGL_SUPPORT
5223 +//	raw_spinlock_t *dgl_lock;
5224 +//#endif
5225 +//
5226 +//	ikglp_wait_state_t wait;
5227 +//
5228 +//	if (!is_realtime(t))
5229 +//		return -EPERM;
5230 +//
5231 +//#ifdef CONFIG_LITMUS_DGL_SUPPORT
5232 +//	dgl_lock = litmus->get_dgl_spinlock(t);
5233 +//#endif
5234 +//
5235 +//	raw_spin_lock_irqsave(&sem->real_lock, real_flags);
5236 +//
5237 +//	lock_global_irqsave(dgl_lock, flags);
5238 +//	lock_fine_irqsave(&sem->lock, flags);
5239 +//
5240 +//
5241 +//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5242 +//	fq = (sem->aff_obs) ?
5243 +//		sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
5244 +//		sem->shortest_fifo_queue;
5245 +//#else
5246 +//	fq = sem->shortest_fifo_queue;
5247 +//#endif
5248 +//
5249 +//	if(fq->count == 0) {
5250 +//		// take available resource
5251 +//		replica = ikglp_get_idx(sem, fq);
5252 +//
5253 +//		ikglp_get_immediate(t, fq, sem, flags);  // unlocks sem->lock
5254 +//
5255 +//		unlock_global_irqrestore(dgl_lock, flags);
5256 +//		raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5257 +//	}
5258 +//	else
5259 +//	{
5260 +//		// we have to suspend.
5261 +//
5262 +//		wait.task = t;   // THIS IS CRITICALLY IMPORTANT!!!
5263 +//
5264 +//		tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem;  // record where we are blocked
5265 +//		mb();
5266 +//
5267 +//		/* FIXME: interruptible would be nice some day */
5268 +//		set_task_state(t, TASK_UNINTERRUPTIBLE);
5269 +//
5270 +//		if(fq->count < sem->max_fifo_len) {
5271 +//			// enqueue on fq
5272 +//			ikglp_enqueue_on_fq(sem, fq, &wait, flags);  // unlocks sem->lock
5273 +//		}
5274 +//		else {
5275 +//
5276 +//			TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
5277 +//
5278 +//			// no room in fifos.  Go to PQ or donors.
5279 +//
5280 +//			if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
5281 +//				// enqueue on PQ
5282 +//				ikglp_enqueue_on_pq(sem, &wait);
5283 +//				unlock_fine_irqrestore(&sem->lock, flags);
5284 +//			}
5285 +//			else {
5286 +//				// enqueue as donor
5287 +//				ikglp_enqueue_on_donor(sem, &wait, flags);	 // unlocks sem->lock
5288 +//			}
5289 +//		}
5290 +//
5291 +//		unlock_global_irqrestore(dgl_lock, flags);
5292 +//		raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5293 +//
5294 +//		TS_LOCK_SUSPEND;
5295 +//
5296 +//		schedule();
5297 +//
5298 +//		TS_LOCK_RESUME;
5299 +//
5300 +//		fq = ikglp_get_queue(sem, t);
5301 +//		BUG_ON(!fq);
5302 +//
5303 +//		replica = ikglp_get_idx(sem, fq);
5304 +//	}
5305 +//
5306 +//	TRACE_CUR("Acquired lock %d, queue %d\n",
5307 +//			  l->ident, replica);
5308 +//
5309 +//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5310 +//	if(sem->aff_obs) {
5311 +//		return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
5312 +//	}
5313 +//#endif
5314 +//
5315 +//	return replica;
5316 +//}
5317 +
5318 +static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
5319 +								   struct fifo_queue *fq,
5320 +								   ikglp_wait_state_t *donor_info)
5321 +{
5322 +	struct task_struct *t = donor_info->task;
5323 +
5324 +	TRACE_CUR("Donor %s/%d being moved to fq %d\n",
5325 +			  t->comm,
5326 +			  t->pid,
5327 +			  ikglp_get_idx(sem, fq));
5328 +
5329 +	binheap_delete(&donor_info->node, &sem->donors);
5330 +
5331 +	__ikglp_enqueue_on_fq(sem, fq, t,
5332 +						  &donor_info->fq_node,
5333 +						  NULL, // already in global_list, so pass null to prevent adding 2nd time.
5334 +						  &donor_info->donee_heap_node);
5335 +
5336 +	// warning:
5337 +	// ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
5338 +}
5339 +
5340 +static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
5341 +								struct fifo_queue *fq,
5342 +								ikglp_wait_state_t *wait)
5343 +{
5344 +	struct task_struct *t = wait->task;
5345 +
5346 +	TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
5347 +			  t->comm,
5348 +			  t->pid,
5349 +			  ikglp_get_idx(sem, fq));
5350 +
5351 +	binheap_delete(&wait->pq_node.node, &sem->priority_queue);
5352 +
5353 +	__ikglp_enqueue_on_fq(sem, fq, t,
5354 +						  &wait->fq_node,
5355 +						  &wait->global_heap_node,
5356 +						  &wait->donee_heap_node);
5357 +	// warning:
5358 +	// ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
5359 +}
5360 +
5361 +static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
5362 +	struct ikglp_semaphore* sem)
5363 +{
5364 +	/* must hold sem->lock */
5365 +
5366 +	struct fifo_queue *fq = NULL;
5367 +	struct list_head	*pos;
5368 +	struct task_struct 	*queued;
5369 +	int i;
5370 +
5371 +	for(i = 0; i < sem->nr_replicas; ++i) {
5372 +		if( (sem->fifo_queues[i].count > 1) &&
5373 +		   (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
5374 +
5375 +			TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
5376 +					  ikglp_get_idx(sem, &sem->fifo_queues[i]),
5377 +					  sem->fifo_queues[i].hp_waiter->comm,
5378 +					  sem->fifo_queues[i].hp_waiter->pid,
5379 +					  (fq) ? ikglp_get_idx(sem, fq) : -1,
5380 +					  (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
5381 +					  (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
5382 +
5383 +			fq = &sem->fifo_queues[i];
5384 +
5385 +			WARN_ON(!(fq->hp_waiter));
5386 +		}
5387 +	}
5388 +
5389 +	if(fq) {
5390 +		struct task_struct *max_hp = fq->hp_waiter;
5391 +		ikglp_wait_state_t* ret = NULL;
5392 +
5393 +		TRACE_CUR("Searching for %s/%d on fq %d\n",
5394 +				  max_hp->comm,
5395 +				  max_hp->pid,
5396 +				  ikglp_get_idx(sem, fq));
5397 +
5398 +		BUG_ON(!max_hp);
5399 +
5400 +		list_for_each(pos, &fq->wait.task_list) {
5401 +			wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
5402 +
5403 +			queued  = (struct task_struct*) wait->private;
5404 +
5405 +			TRACE_CUR("fq %d entry: %s/%d\n",
5406 +					  ikglp_get_idx(sem, fq),
5407 +					  queued->comm,
5408 +					  queued->pid);
5409 +
5410 +			/* Compare task prios, find high prio task. */
5411 +			if (queued == max_hp) {
5412 +				TRACE_CUR("Found it!\n");
5413 +				ret = container_of(wait, ikglp_wait_state_t, fq_node);
5414 +			}
5415 +		}
5416 +
5417 +		WARN_ON(!ret);
5418 +		return ret;
5419 +	}
5420 +
5421 +	return(NULL);
5422 +}
5423 +
5424 +static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
5425 +							  struct fifo_queue *fq,
5426 +							  ikglp_wait_state_t *fq_wait)
5427 +{
5428 +	struct task_struct *t = fq_wait->task;
5429 +	struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
5430 +
5431 +	TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
5432 +			  t->comm,
5433 +			  t->pid,
5434 +			  ikglp_get_idx(sem, fq));
5435 +
5436 +	fq_wait->donee_heap_node.fq = fq;  // just to be safe
5437 +
5438 +
5439 +	__remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
5440 +	--(fq_steal->count);
5441 +
5442 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5443 +	if(sem->aff_obs) {
5444 +		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
5445 +	}
5446 +#endif
5447 +
5448 +	if(t == fq_steal->hp_waiter) {
5449 +		fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
5450 +		TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
5451 +				   ikglp_get_idx(sem, fq_steal),
5452 +				   (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
5453 +				   (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
5454 +	}
5455 +
5456 +
5457 +	// Update shortest.
5458 +	if(fq_steal->count < sem->shortest_fifo_queue->count) {
5459 +		sem->shortest_fifo_queue = fq_steal;
5460 +	}
5461 +
5462 +	__ikglp_enqueue_on_fq(sem, fq, t,
5463 +						  &fq_wait->fq_node,
5464 +						  NULL,
5465 +						  NULL);
5466 +
5467 +	// warning: We have not checked the priority inheritance of fq's owner yet.
5468 +}
5469 +
5470 +
5471 +static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
5472 +												 struct fifo_queue *fq,
5473 +												 ikglp_wait_state_t *old_wait)
5474 +{
5475 +	struct task_struct *t = old_wait->task;
5476 +
5477 +	BUG_ON(old_wait->donee_heap_node.fq != fq);
5478 +
5479 +	TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
5480 +			   ikglp_get_idx(sem, fq));
5481 +
5482 +	// need to migrate global_heap_node and donee_heap_node off of the stack
5483 +	// to the nodes allocated for the owner of this fq.
5484 +
5485 +	// TODO: Enhance binheap() to perform this operation in place.
5486 +
5487 +	ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
5488 +	fq->global_heap_node = old_wait->global_heap_node;			// copy
5489 +	ikglp_add_global_list(sem, t, &fq->global_heap_node);		// re-add
5490 +
5491 +	binheap_delete(&old_wait->donee_heap_node.node, &sem->donees);  // remove
5492 +	fq->donee_heap_node = old_wait->donee_heap_node;  // copy
5493 +
5494 +	if(fq->donee_heap_node.donor_info) {
5495 +		// let donor know that our location has changed
5496 +		BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t);	// validate cross-link
5497 +		fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
5498 +	}
5499 +	INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
5500 +	binheap_add(&fq->donee_heap_node.node, &sem->donees,
5501 +				ikglp_donee_heap_node_t, node);  // re-add
5502 +}
5503 +
5504 +int ikglp_unlock(struct litmus_lock* l)
5505 +{
5506 +	struct ikglp_semaphore *sem = ikglp_from_lock(l);
5507 +	struct task_struct *t = current;
5508 +	struct task_struct *donee = NULL;
5509 +	struct task_struct *next = NULL;
5510 +	struct task_struct *new_on_fq = NULL;
5511 +	struct fifo_queue *fq_of_new_on_fq = NULL;
5512 +
5513 +	ikglp_wait_state_t *other_donor_info = NULL;
5514 +	struct fifo_queue *to_steal = NULL;
5515 +	int need_steal_prio_reeval = 0;
5516 +	struct fifo_queue *fq;
5517 +
5518 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5519 +	raw_spinlock_t *dgl_lock;
5520 +#endif
5521 +
5522 +	unsigned long flags = 0, real_flags;
5523 +
5524 +	int err = 0;
5525 +
5526 +	fq = ikglp_get_queue(sem, t);  // returns NULL if 't' is not owner.
5527 +
5528 +	if (!fq) {
5529 +		err = -EINVAL;
5530 +		goto out;
5531 +	}
5532 +
5533 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
5534 +	dgl_lock = litmus->get_dgl_spinlock(t);
5535 +#endif
5536 +	raw_spin_lock_irqsave(&sem->real_lock, real_flags);
5537 +
5538 +	lock_global_irqsave(dgl_lock, flags);  // TODO: Push this deeper
5539 +	lock_fine_irqsave(&sem->lock, flags);
5540 +
5541 +	TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
5542 +
5543 +
5544 +	// Remove 't' from the heaps, but data in nodes will still be good.
5545 +	ikglp_del_global_list(sem, t, &fq->global_heap_node);
5546 +	binheap_delete(&fq->donee_heap_node.node, &sem->donees);
5547 +
5548 +	fq->owner = NULL;  // no longer owned!!
5549 +	--(fq->count);
5550 +	if(fq->count < sem->shortest_fifo_queue->count) {
5551 +		sem->shortest_fifo_queue = fq;
5552 +	}
5553 +	--(sem->nr_in_fifos);
5554 +
5555 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5556 +	if(sem->aff_obs) {
5557 +		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
5558 +		sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
5559 +	}
5560 +#endif
5561 +
5562 +	// Move the next request into the FQ and update heaps as needed.
5563 +	// We defer re-evaluation of priorities to later in the function.
5564 +	if(fq->donee_heap_node.donor_info) {  // move my donor to FQ
5565 +		ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
5566 +
5567 +		new_on_fq = donor_info->task;
5568 +
5569 +		// donor moved to FQ
5570 +		donee = t;
5571 +
5572 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5573 +		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
5574 +			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
5575 +			if(fq_of_new_on_fq->count == 0) {
5576 +				// ignore it?
5577 +//				fq_of_new_on_fq = fq;
5578 +			}
5579 +		}
5580 +		else {
5581 +			fq_of_new_on_fq = fq;
5582 +		}
5583 +#else
5584 +		fq_of_new_on_fq = fq;
5585 +#endif
5586 +
5587 +		TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
5588 +				   new_on_fq->comm, new_on_fq->pid,
5589 +				   ikglp_get_idx(sem, fq_of_new_on_fq),
5590 +				   ikglp_get_idx(sem, fq));
5591 +
5592 +
5593 +		ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
5594 +	}
5595 +	else if(!binheap_empty(&sem->donors)) {  // No donor, so move any donor to FQ
5596 +											 // move other donor to FQ
5597 +		// Select a donor
5598 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5599 +		other_donor_info = (sem->aff_obs) ?
5600 +			sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
5601 +			binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
5602 +#else
5603 +		other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
5604 +#endif
5605 +
5606 +		new_on_fq = other_donor_info->task;
5607 +		donee = other_donor_info->donee_info->task;
5608 +
5609 +		// update the donee's heap position.
5610 +		other_donor_info->donee_info->donor_info = NULL;  // clear the cross-link
5611 +		binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
5612 +
5613 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5614 +		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
5615 +			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
5616 +			if(fq_of_new_on_fq->count == 0) {
5617 +				// ignore it?
5618 +//				fq_of_new_on_fq = fq;
5619 +			}
5620 +		}
5621 +		else {
5622 +			fq_of_new_on_fq = fq;
5623 +		}
5624 +#else
5625 +		fq_of_new_on_fq = fq;
5626 +#endif
5627 +
5628 +		TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
5629 +				   new_on_fq->comm, new_on_fq->pid,
5630 +				   ikglp_get_idx(sem, fq_of_new_on_fq),
5631 +				   ikglp_get_idx(sem, fq));
5632 +
5633 +		ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
5634 +	}
5635 +	else if(!binheap_empty(&sem->priority_queue)) {  // No donors, so move PQ
5636 +		ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
5637 +													   ikglp_heap_node_t, node);
5638 +		ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
5639 +												   pq_node);
5640 +
5641 +		new_on_fq = pq_wait->task;
5642 +
5643 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5644 +		if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
5645 +			fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
5646 +			if(fq_of_new_on_fq->count == 0) {
5647 +				// ignore it?
5648 +//				fq_of_new_on_fq = fq;
5649 +			}
5650 +		}
5651 +		else {
5652 +			fq_of_new_on_fq = fq;
5653 +		}
5654 +#else
5655 +		fq_of_new_on_fq = fq;
5656 +#endif
5657 +
5658 +		TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
5659 +				   new_on_fq->comm, new_on_fq->pid,
5660 +				   ikglp_get_idx(sem, fq_of_new_on_fq),
5661 +				   ikglp_get_idx(sem, fq));
5662 +
5663 +		ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
5664 +	}
5665 +	else if(fq->count == 0) {  // No PQ and this queue is empty, so steal.
5666 +		ikglp_wait_state_t *fq_wait;
5667 +
5668 +		TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
5669 +				   ikglp_get_idx(sem, fq));
5670 +
5671 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5672 +		fq_wait = (sem->aff_obs) ?
5673 +			sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
5674 +			ikglp_find_hp_waiter_to_steal(sem);
5675 +#else
5676 +		fq_wait = ikglp_find_hp_waiter_to_steal(sem);
5677 +#endif
5678 +
5679 +		if(fq_wait) {
5680 +			to_steal = fq_wait->donee_heap_node.fq;
5681 +
5682 +			new_on_fq = fq_wait->task;
5683 +			fq_of_new_on_fq = fq;
5684 +			need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
5685 +
5686 +			TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
5687 +					   new_on_fq->comm, new_on_fq->pid,
5688 +					   ikglp_get_idx(sem, to_steal),
5689 +					   ikglp_get_idx(sem, fq));
5690 +
5691 +			ikglp_steal_to_fq(sem, fq, fq_wait);
5692 +		}
5693 +		else {
5694 +			TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
5695 +					   ikglp_get_idx(sem, fq));
5696 +		}
5697 +	}
5698 +	else { // move no one
5699 +	}
5700 +
5701 +	// 't' must drop all priority and clean up data structures before hand-off.
5702 +
5703 +	// DROP ALL INHERITANCE.  IKGLP MUST BE OUTER-MOST
5704 +	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
5705 +	{
5706 +		int count = 0;
5707 +		while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
5708 +			binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
5709 +								struct nested_info, hp_binheap_node);
5710 +			++count;
5711 +		}
5712 +		litmus->decrease_prio(t, NULL);
5713 +		WARN_ON(count > 2); // should not be greater than 2.  only local fq inh and donation can be possible.
5714 +	}
5715 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
5716 +
5717 +
5718 +
5719 +	// Now patch up other priorities.
5720 +	//
5721 +	// At most one of the following:
5722 +	//   if(donee && donee != t), decrease prio, propagate to owner, or onward
5723 +	//   if(to_steal), update owner's prio (hp_waiter has already been set)
5724 +	//
5725 +
5726 +	BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
5727 +
5728 +	if(other_donor_info) {
5729 +		struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
5730 +
5731 +		BUG_ON(!donee);
5732 +		BUG_ON(donee == t);
5733 +
5734 +		TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
5735 +				   other_donor_info->task->comm, other_donor_info->task->pid,
5736 +				   donee->comm, donee->pid);
5737 +
5738 +		// need to terminate donation relation.
5739 +		if(donee == other_fq->owner) {
5740 +			TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
5741 +					   donee->comm, donee->pid,
5742 +					   ikglp_get_idx(sem, other_fq));
5743 +
5744 +			ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
5745 +			lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
5746 +		}
5747 +		else {
5748 +			TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
5749 +					   donee->comm, donee->pid,
5750 +					   ikglp_get_idx(sem, other_fq));
5751 +
5752 +			ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
5753 +			if(donee == other_fq->hp_waiter) {
5754 +				TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
5755 +						   donee->comm, donee->pid,
5756 +						   ikglp_get_idx(sem, other_fq));
5757 +
5758 +				other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
5759 +				TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
5760 +						   ikglp_get_idx(sem, other_fq),
5761 +						   (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
5762 +						   (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
5763 +
5764 +				ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock.  reacquire it.
5765 +				lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
5766 +			}
5767 +		}
5768 +	}
5769 +	else if(to_steal) {
5770 +		TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
5771 +				   ikglp_get_idx(sem, to_steal));
5772 +
5773 +		if(need_steal_prio_reeval) {
5774 +			ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock.  reacquire it.
5775 +			lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
5776 +		}
5777 +	}
5778 +
5779 +	// check for new HP waiter.
5780 +	if(new_on_fq) {
5781 +		if(fq == fq_of_new_on_fq) {
5782 +			// fq->owner is null, so just update the hp_waiter without locking.
5783 +			if(new_on_fq == fq->hp_waiter) {
5784 +				TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
5785 +						   fq->hp_waiter->comm, fq->hp_waiter->pid);
5786 +				fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);  // set this just to be sure...
5787 +			}
5788 +			else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
5789 +				if(fq->hp_waiter)
5790 +					TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
5791 +							   fq->hp_waiter->comm, fq->hp_waiter->pid);
5792 +				else
5793 +					TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
5794 +
5795 +				fq->hp_waiter = new_on_fq;
5796 +				fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
5797 +
5798 +				TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
5799 +						   ikglp_get_idx(sem, fq),
5800 +						   (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
5801 +						   (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
5802 +			}
5803 +		}
5804 +		else {
5805 +			ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock.  reacquire it.
5806 +			lock_fine_irqsave(&sem->lock, flags);  // there should be no contention!!!!
5807 +		}
5808 +	}
5809 +
5810 +wake_kludge:
5811 +	if(waitqueue_active(&fq->wait))
5812 +	{
5813 +		wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
5814 +		ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
5815 +		next = (struct task_struct*) wait->private;
5816 +
5817 +		__remove_wait_queue(&fq->wait, wait);
5818 +
5819 +		TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
5820 +				  ikglp_get_idx(sem, fq),
5821 +				  next->comm, next->pid);
5822 +
5823 +		// migrate wait-state to fifo-memory.
5824 +		ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
5825 +
5826 +		/* next becomes the resouce holder */
5827 +		fq->owner = next;
5828 +		tsk_rt(next)->blocked_lock = NULL;
5829 +
5830 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
5831 +		if(sem->aff_obs) {
5832 +			sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
5833 +		}
5834 +#endif
5835 +
5836 +		/* determine new hp_waiter if necessary */
5837 +		if (next == fq->hp_waiter) {
5838 +
5839 +			TRACE_TASK(next, "was highest-prio waiter\n");
5840 +			/* next has the highest priority --- it doesn't need to
5841 +			 * inherit.  However, we need to make sure that the
5842 +			 * next-highest priority in the queue is reflected in
5843 +			 * hp_waiter. */
5844 +			fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
5845 +			TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
5846 +					   ikglp_get_idx(sem, fq),
5847 +					   (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
5848 +					   (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
5849 +
5850 +			fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
5851 +								effective_priority(fq->hp_waiter) : NULL;
5852 +
5853 +			if (fq->hp_waiter)
5854 +				TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
5855 +			else
5856 +				TRACE("no further waiters\n");
5857 +
5858 +			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
5859 +
5860 +//			TRACE_TASK(next, "Heap Before:\n");
5861 +//			print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
5862 +
5863 +			binheap_add(&fq->nest.hp_binheap_node,
5864 +						&tsk_rt(next)->hp_blocked_tasks,
5865 +						struct nested_info,
5866 +						hp_binheap_node);
5867 +
5868 +//			TRACE_TASK(next, "Heap After:\n");
5869 +//			print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
5870 +
5871 +			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
5872 +		}
5873 +		else {
5874 +			/* Well, if 'next' is not the highest-priority waiter,
5875 +			 * then it (probably) ought to inherit the highest-priority
5876 +			 * waiter's priority. */
5877 +			TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
5878 +					   ikglp_get_idx(sem, fq),
5879 +					   (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
5880 +					   (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
5881 +
5882 +			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
5883 +
5884 +			binheap_add(&fq->nest.hp_binheap_node,
5885 +						&tsk_rt(next)->hp_blocked_tasks,
5886 +						struct nested_info,
5887 +						hp_binheap_node);
5888 +
5889 +			/* It is possible that 'next' *should* be the hp_waiter, but isn't
5890 +		     * because that update hasn't yet executed (update operation is
5891 +			 * probably blocked on mutex->lock). So only inherit if the top of
5892 +			 * 'next's top heap node is indeed the effective prio. of hp_waiter.
5893 +			 * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
5894 +			 * since the effective priority of hp_waiter can change (and the
5895 +			 * update has not made it to this lock).)
5896 +			 */
5897 +			if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
5898 +												fq->nest.hp_waiter_eff_prio))
5899 +			{
5900 +				if(fq->nest.hp_waiter_eff_prio)
5901 +					litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
5902 +				else
5903 +					WARN_ON(1);
5904 +			}
5905 +
5906 +			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
5907 +		}
5908 +
5909 +
5910 +		// wake up the new resource holder!
5911 +		wake_up_process(next);
5912 +	}
5913 +	if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
5914 +		// The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
5915 +		// Wake up the new guy too.
5916 +
5917 +		BUG_ON(fq_of_new_on_fq->owner != NULL);
5918 +
5919 +		fq = fq_of_new_on_fq;
5920 +		fq_of_new_on_fq = NULL;
5921 +		goto wake_kludge;
5922 +	}
5923 +
5924 +	unlock_fine_irqrestore(&sem->lock, flags);
5925 +	unlock_global_irqrestore(dgl_lock, flags);
5926 +
5927 +	raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
5928 +
5929 +out:
5930 +	return err;
5931 +}
5932 +
5933 +
5934 +
5935 +int ikglp_close(struct litmus_lock* l)
5936 +{
5937 +	struct task_struct *t = current;
5938 +	struct ikglp_semaphore *sem = ikglp_from_lock(l);
5939 +	unsigned long flags;
5940 +
5941 +	int owner = 0;
5942 +	int i;
5943 +
5944 +	raw_spin_lock_irqsave(&sem->real_lock, flags);
5945 +
5946 +	for(i = 0; i < sem->nr_replicas; ++i) {
5947 +		if(sem->fifo_queues[i].owner == t) {
5948 +			owner = 1;
5949 +			break;
5950 +		}
5951 +	}
5952 +
5953 +	raw_spin_unlock_irqrestore(&sem->real_lock, flags);
5954 +
5955 +	if (owner)
5956 +		ikglp_unlock(l);
5957 +
5958 +	return 0;
5959 +}
5960 +
5961 +void ikglp_free(struct litmus_lock* l)
5962 +{
5963 +	struct ikglp_semaphore *sem = ikglp_from_lock(l);
5964 +
5965 +	kfree(sem->fifo_queues);
5966 +	kfree(sem);
5967 +}
5968 +
5969 +
5970 +
5971 +struct litmus_lock* ikglp_new(int m,
5972 +							  struct litmus_lock_ops* ops,
5973 +							  void* __user arg)
5974 +{
5975 +	struct ikglp_semaphore* sem;
5976 +	int nr_replicas = 0;
5977 +	int i;
5978 +
5979 +	if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
5980 +	{
5981 +		return(NULL);
5982 +	}
5983 +	if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
5984 +	{
5985 +		return(NULL);
5986 +	}
5987 +	if(nr_replicas < 1)
5988 +	{
5989 +		return(NULL);
5990 +	}
5991 +
5992 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
5993 +	if(!sem)
5994 +	{
5995 +		return NULL;
5996 +	}
5997 +
5998 +	sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
5999 +	if(!sem->fifo_queues)
6000 +	{
6001 +		kfree(sem);
6002 +		return NULL;
6003 +	}
6004 +
6005 +	sem->litmus_lock.ops = ops;
6006 +
6007 +#ifdef CONFIG_DEBUG_SPINLOCK
6008 +	{
6009 +		__raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
6010 +	}
6011 +#else
6012 +	raw_spin_lock_init(&sem->lock);
6013 +#endif
6014 +
6015 +	raw_spin_lock_init(&sem->real_lock);
6016 +
6017 +	sem->nr_replicas = nr_replicas;
6018 +	sem->m = m;
6019 +	sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
6020 +	sem->nr_in_fifos = 0;
6021 +
6022 +	TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
6023 +		  sem->m,
6024 +		  sem->nr_replicas,
6025 +		  sem->max_fifo_len);
6026 +
6027 +	for(i = 0; i < nr_replicas; ++i)
6028 +	{
6029 +		struct fifo_queue* q = &(sem->fifo_queues[i]);
6030 +
6031 +		q->owner = NULL;
6032 +		q->hp_waiter = NULL;
6033 +		init_waitqueue_head(&q->wait);
6034 +		q->count = 0;
6035 +
6036 +		q->global_heap_node.task = NULL;
6037 +		INIT_BINHEAP_NODE(&q->global_heap_node.node);
6038 +
6039 +		q->donee_heap_node.task = NULL;
6040 +		q->donee_heap_node.donor_info = NULL;
6041 +		q->donee_heap_node.fq = NULL;
6042 +		INIT_BINHEAP_NODE(&q->donee_heap_node.node);
6043 +
6044 +		q->nest.lock = (struct litmus_lock*)sem;
6045 +		q->nest.hp_waiter_eff_prio = NULL;
6046 +		q->nest.hp_waiter_ptr = &q->hp_waiter;
6047 +		INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
6048 +	}
6049 +
6050 +	sem->shortest_fifo_queue = &sem->fifo_queues[0];
6051 +
6052 +	sem->top_m_size = 0;
6053 +
6054 +	// init heaps
6055 +	INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
6056 +	INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
6057 +	INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
6058 +	INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
6059 +	INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
6060 +
6061 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
6062 +	sem->aff_obs = NULL;
6063 +#endif
6064 +
6065 +	return &sem->litmus_lock;
6066 +}
6067 +
6068 +
6069 +
6070 +
6071 +
6072 +
6073 +
6074 +
6075 +
6076 +
6077 +
6078 +
6079 +
6080 +
6081 +
6082 +
6083 +
6084 +
6085 +
6086 +
6087 +
6088 +
6089 +
6090 +
6091 +
6092 +
6093 +
6094 +
6095 +
6096 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
6097 +
6098 +static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
6099 +{
6100 +	int gpu = replica % aff->nr_rsrc;
6101 +	return gpu;
6102 +}
6103 +
6104 +static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
6105 +{
6106 +	int gpu = __replica_to_gpu(aff, replica) + aff->offset;
6107 +	return gpu;
6108 +}
6109 +
6110 +static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
6111 +{
6112 +	int replica = gpu - aff->offset;
6113 +	return replica;
6114 +}
6115 +
6116 +
6117 +int ikglp_aff_obs_close(struct affinity_observer* obs)
6118 +{
6119 +	return 0;
6120 +}
6121 +
6122 +void ikglp_aff_obs_free(struct affinity_observer* obs)
6123 +{
6124 +	struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
6125 +	kfree(ikglp_aff->nr_cur_users_on_rsrc);
6126 +	kfree(ikglp_aff->q_info);
6127 +	kfree(ikglp_aff);
6128 +}
6129 +
6130 +static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
6131 +												   struct ikglp_affinity_ops* ikglp_ops,
6132 +												   void* __user args)
6133 +{
6134 +	struct ikglp_affinity* ikglp_aff;
6135 +	struct gpu_affinity_observer_args aff_args;
6136 +	struct ikglp_semaphore* sem;
6137 +	int i;
6138 +	unsigned long flags;
6139 +
6140 +	if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
6141 +		return(NULL);
6142 +	}
6143 +	if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
6144 +		return(NULL);
6145 +	}
6146 +
6147 +	sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
6148 +
6149 +	if(sem->litmus_lock.type != IKGLP_SEM) {
6150 +		TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
6151 +		return(NULL);
6152 +	}
6153 +
6154 +	if((aff_args.nr_simult_users <= 0) ||
6155 +	   (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
6156 +		TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
6157 +				  "(%d) per replica.  #replicas should be evenly divisible "
6158 +				  "by #simult_users.\n",
6159 +				  sem->litmus_lock.ident,
6160 +				  sem->nr_replicas,
6161 +				  aff_args.nr_simult_users);
6162 +		return(NULL);
6163 +	}
6164 +
6165 +	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
6166 +		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
6167 +				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
6168 +//		return(NULL);
6169 +	}
6170 +
6171 +	ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
6172 +	if(!ikglp_aff) {
6173 +		return(NULL);
6174 +	}
6175 +
6176 +	ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
6177 +	if(!ikglp_aff->q_info) {
6178 +		kfree(ikglp_aff);
6179 +		return(NULL);
6180 +	}
6181 +
6182 +	ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
6183 +	if(!ikglp_aff->nr_cur_users_on_rsrc) {
6184 +		kfree(ikglp_aff->q_info);
6185 +		kfree(ikglp_aff);
6186 +		return(NULL);
6187 +	}
6188 +
6189 +	affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
6190 +
6191 +	ikglp_aff->ops = ikglp_ops;
6192 +	ikglp_aff->offset = aff_args.replica_to_gpu_offset;
6193 +	ikglp_aff->nr_simult = aff_args.nr_simult_users;
6194 +	ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
6195 +	ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
6196 +
6197 +	TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
6198 +			  "nr_rsrc = %d, relaxed_fifo_len = %d\n",
6199 +			  ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
6200 +			  ikglp_aff->relax_max_fifo_len);
6201 +
6202 +	memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
6203 +
6204 +	for(i = 0; i < sem->nr_replicas; ++i) {
6205 +		ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
6206 +		ikglp_aff->q_info[i].estimated_len = 0;
6207 +
6208 +		// multiple q_info's will point to the same resource (aka GPU) if
6209 +		// aff_args.nr_simult_users > 1
6210 +		ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
6211 +	}
6212 +
6213 +	// attach observer to the lock
6214 +	raw_spin_lock_irqsave(&sem->real_lock, flags);
6215 +	sem->aff_obs = ikglp_aff;
6216 +	raw_spin_unlock_irqrestore(&sem->real_lock, flags);
6217 +
6218 +	return &ikglp_aff->obs;
6219 +}
6220 +
6221 +
6222 +
6223 +
6224 +static int gpu_replica_to_resource(struct ikglp_affinity* aff,
6225 +								   struct fifo_queue* fq) {
6226 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6227 +	return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
6228 +}
6229 +
6230 +
6231 +// Smart IKGLP Affinity
6232 +
6233 +//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
6234 +//{
6235 +//	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6236 +//	struct ikglp_queue_info *shortest = &aff->q_info[0];
6237 +//	int i;
6238 +//
6239 +//	for(i = 1; i < sem->nr_replicas; ++i) {
6240 +//		if(aff->q_info[i].estimated_len < shortest->estimated_len) {
6241 +//			shortest = &aff->q_info[i];
6242 +//		}
6243 +//	}
6244 +//
6245 +//	return(shortest);
6246 +//}
6247 +
6248 +struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
6249 +{
6250 +	// advise_enqueue must be smart as not not break IKGLP rules:
6251 +	//  * No queue can be greater than ceil(m/k) in length.  We may return
6252 +	//    such a queue, but IKGLP will be smart enough as to send requests
6253 +	//    to donors or PQ.
6254 +	//  * Cannot let a queue idle if there exist waiting PQ/donors
6255 +	//      -- needed to guarantee parallel progress of waiters.
6256 +	//
6257 +	// We may be able to relax some of these constraints, but this will have to
6258 +	// be carefully evaluated.
6259 +	//
6260 +	// Huristic strategy: Find the shortest queue that is not full.
6261 +
6262 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6263 +	lt_t min_len;
6264 +	int min_nr_users;
6265 +	struct ikglp_queue_info *shortest;
6266 +	struct fifo_queue *to_enqueue;
6267 +	int i;
6268 +	int affinity_gpu;
6269 +
6270 +	int max_fifo_len = (aff->relax_max_fifo_len) ?
6271 +		sem->m : sem->max_fifo_len;
6272 +
6273 +	// simply pick the shortest queue if, we have no affinity, or we have
6274 +	// affinity with the shortest
6275 +	if(unlikely(tsk_rt(t)->last_gpu < 0)) {
6276 +		affinity_gpu = aff->offset;  // first gpu
6277 +		TRACE_CUR("no affinity\n");
6278 +	}
6279 +	else {
6280 +		affinity_gpu = tsk_rt(t)->last_gpu;
6281 +	}
6282 +
6283 +	// all things being equal, let's start with the queue with which we have
6284 +	// affinity.  this helps us maintain affinity even when we don't have
6285 +	// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
6286 +	shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
6287 +
6288 +	//	if(shortest == aff->shortest_queue) {
6289 +	//		TRACE_CUR("special case: have affinity with shortest queue\n");
6290 +	//		goto out;
6291 +	//	}
6292 +
6293 +	min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
6294 +	min_nr_users = *(shortest->nr_cur_users);
6295 +
6296 +	TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
6297 +			  get_gpu_estimate(t, MIG_LOCAL),
6298 +			  ikglp_get_idx(sem, shortest->q),
6299 +			  shortest->q->count,
6300 +			  min_len);
6301 +
6302 +	for(i = 0; i < sem->nr_replicas; ++i) {
6303 +		if(&aff->q_info[i] != shortest) {
6304 +			if(aff->q_info[i].q->count < max_fifo_len) {
6305 +
6306 +				lt_t est_len =
6307 +					aff->q_info[i].estimated_len +
6308 +					get_gpu_estimate(t,
6309 +								gpu_migration_distance(tsk_rt(t)->last_gpu,
6310 +													replica_to_gpu(aff, i)));
6311 +
6312 +		// queue is smaller, or they're equal and the other has a smaller number
6313 +		// of total users.
6314 +		//
6315 +		// tie-break on the shortest number of simult users.  this only kicks in
6316 +		// when there are more than 1 empty queues.
6317 +				if((shortest->q->count >= max_fifo_len) ||		/* 'shortest' is full and i-th queue is not */
6318 +				   (est_len < min_len) ||						/* i-th queue has shortest length */
6319 +				   ((est_len == min_len) &&						/* equal lengths, but one has fewer over-all users */
6320 +					(*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
6321 +
6322 +					shortest = &aff->q_info[i];
6323 +					min_len = est_len;
6324 +					min_nr_users = *(aff->q_info[i].nr_cur_users);
6325 +				}
6326 +
6327 +				TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
6328 +						  get_gpu_estimate(t,
6329 +								gpu_migration_distance(tsk_rt(t)->last_gpu,
6330 +													   replica_to_gpu(aff, i))),
6331 +						  ikglp_get_idx(sem, aff->q_info[i].q),
6332 +						  aff->q_info[i].q->count,
6333 +						  est_len);
6334 +			}
6335 +			else {
6336 +				TRACE_CUR("queue %d is too long.  ineligible for enqueue.\n",
6337 +						  ikglp_get_idx(sem, aff->q_info[i].q));
6338 +			}
6339 +		}
6340 +	}
6341 +
6342 +	if(shortest->q->count >= max_fifo_len) {
6343 +		TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
6344 +				  ikglp_get_idx(sem, shortest->q));
6345 +	}
6346 +
6347 +	to_enqueue = shortest->q;
6348 +	TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
6349 +			  ikglp_get_idx(sem, to_enqueue),
6350 +			  to_enqueue->count,
6351 +			  ikglp_get_idx(sem, sem->shortest_fifo_queue));
6352 +
6353 +	return to_enqueue;
6354 +
6355 +	//return(sem->shortest_fifo_queue);
6356 +}
6357 +
6358 +
6359 +
6360 +
6361 +static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
6362 +									  int dest_gpu,
6363 +									  struct fifo_queue* fq)
6364 +{
6365 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6366 +	ikglp_wait_state_t *wait = NULL;
6367 +	int max_improvement = -(MIG_NONE+1);
6368 +	int replica = ikglp_get_idx(sem, fq);
6369 +
6370 +	if(waitqueue_active(&fq->wait)) {
6371 +		int this_gpu = replica_to_gpu(aff, replica);
6372 +		struct list_head *pos;
6373 +
6374 +		list_for_each(pos, &fq->wait.task_list) {
6375 +			wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
6376 +			ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
6377 +
6378 +			int tmp_improvement =
6379 +				gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
6380 +				gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
6381 +
6382 +			if(tmp_improvement > max_improvement) {
6383 +				wait = tmp_wait;
6384 +				max_improvement = tmp_improvement;
6385 +
6386 +				if(max_improvement >= (MIG_NONE-1)) {
6387 +					goto out;
6388 +				}
6389 +			}
6390 +		}
6391 +
6392 +		BUG_ON(!wait);
6393 +	}
6394 +	else {
6395 +		TRACE_CUR("fq %d is empty!\n", replica);
6396 +	}
6397 +
6398 +out:
6399 +
6400 +	TRACE_CUR("Candidate victim from fq %d is %s/%d.  aff improvement = %d.\n",
6401 +			  replica,
6402 +			  (wait) ? wait->task->comm : "nil",
6403 +			  (wait) ? wait->task->pid  : -1,
6404 +			  max_improvement);
6405 +
6406 +	return wait;
6407 +}
6408 +
6409 +
6410 +ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
6411 +										   struct fifo_queue* dst)
6412 +{
6413 +	// Huristic strategy: Find task with greatest improvement in affinity.
6414 +	//
6415 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6416 +	ikglp_wait_state_t *to_steal_state = NULL;
6417 +//	ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
6418 +	int max_improvement = -(MIG_NONE+1);
6419 +	int replica, i;
6420 +	int dest_gpu;
6421 +
6422 +	replica = ikglp_get_idx(sem, dst);
6423 +	dest_gpu = replica_to_gpu(aff, replica);
6424 +
6425 +	for(i = 0; i < sem->nr_replicas; ++i) {
6426 +		ikglp_wait_state_t *tmp_to_steal_state =
6427 +			pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
6428 +
6429 +		if(tmp_to_steal_state) {
6430 +			int tmp_improvement =
6431 +				gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
6432 +				gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
6433 +
6434 +			if(tmp_improvement > max_improvement) {
6435 +				to_steal_state = tmp_to_steal_state;
6436 +				max_improvement = tmp_improvement;
6437 +
6438 +				if(max_improvement >= (MIG_NONE-1)) {
6439 +					goto out;
6440 +				}
6441 +			}
6442 +		}
6443 +	}
6444 +
6445 +out:
6446 +	if(!to_steal_state) {
6447 +		TRACE_CUR("Could not find anyone to steal.\n");
6448 +	}
6449 +	else {
6450 +		TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
6451 +				  to_steal_state->task->comm, to_steal_state->task->pid,
6452 +				  ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
6453 +				  replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
6454 +				  ikglp_get_idx(sem, dst),
6455 +				  dest_gpu,
6456 +				  max_improvement);
6457 +
6458 +//		TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
6459 +//				  default_to_steal_state->task->comm, default_to_steal_state->task->pid,
6460 +//				  ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
6461 +//				  replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
6462 +//				  ikglp_get_idx(sem, dst),
6463 +//				  replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
6464 +//
6465 +//				  gpu_migration_distance(
6466 +//					  replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
6467 +//					  tsk_rt(default_to_steal_state->task)->last_gpu) -
6468 +//				  gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
6469 +	}
6470 +
6471 +	return(to_steal_state);
6472 +}
6473 +
6474 +
6475 +static inline int has_donor(wait_queue_t* fq_wait)
6476 +{
6477 +	ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
6478 +	return(wait->donee_heap_node.donor_info != NULL);
6479 +}
6480 +
6481 +static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
6482 +					  struct fifo_queue* fq,
6483 +					  int* dist_from_head)
6484 +{
6485 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6486 +	struct task_struct *donee;
6487 +	ikglp_donee_heap_node_t *donee_node;
6488 +	struct task_struct *mth_highest = ikglp_mth_highest(sem);
6489 +
6490 +//	lt_t now = litmus_clock();
6491 +//
6492 +//	TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
6493 +//			  ikglp_get_idx(sem, fq),
6494 +//			  mth_highest->comm, mth_highest->pid,
6495 +//			  (int)get_deadline(mth_highest) - now);
6496 +
6497 +	if(fq->owner &&
6498 +	   fq->donee_heap_node.donor_info == NULL &&
6499 +	   mth_highest != fq->owner &&
6500 +	   litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
6501 +		donee = fq->owner;
6502 +		donee_node = &(fq->donee_heap_node);
6503 +		*dist_from_head = 0;
6504 +
6505 +		BUG_ON(donee != donee_node->task);
6506 +
6507 +		TRACE_CUR("picked owner of fq %d as donee\n",
6508 +				  ikglp_get_idx(sem, fq));
6509 +
6510 +		goto out;
6511 +	}
6512 +	else if(waitqueue_active(&fq->wait)) {
6513 +		struct list_head	*pos;
6514 +
6515 +
6516 +//		TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
6517 +//				  "(mth_highest != fq->owner) = %d "
6518 +//				  "(mth_highest > fq->owner) = %d\n",
6519 +//				  ikglp_get_idx(sem, fq),
6520 +//				  (fq->owner) ? fq->owner->comm : "nil",
6521 +//				  (fq->owner) ? fq->owner->pid : -1,
6522 +//				  (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
6523 +//				  (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
6524 +//				  (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
6525 +//				  (mth_highest != fq->owner),
6526 +//				  (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
6527 +
6528 +
6529 +		*dist_from_head = 1;
6530 +
6531 +		// iterating from the start of the queue is nice since this means
6532 +		// the donee will be closer to obtaining a resource.
6533 +		list_for_each(pos, &fq->wait.task_list) {
6534 +			wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
6535 +			ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
6536 +
6537 +//			TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
6538 +//					  "(mth_highest != wait->task) = %d "
6539 +//					  "(mth_highest > wait->task) = %d\n",
6540 +//					  ikglp_get_idx(sem, fq),
6541 +//					  dist_from_head,
6542 +//					  wait->task->comm, wait->task->pid,
6543 +//					  (int)get_deadline(wait->task) - now,
6544 +//					  (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
6545 +//					  (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
6546 +//					  (mth_highest != wait->task),
6547 +//					  (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
6548 +
6549 +
6550 +			if(!has_donor(fq_wait) &&
6551 +			   mth_highest != wait->task &&
6552 +			   litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
6553 +				donee = (struct task_struct*) fq_wait->private;
6554 +				donee_node = &wait->donee_heap_node;
6555 +
6556 +				BUG_ON(donee != donee_node->task);
6557 +
6558 +				TRACE_CUR("picked waiter in fq %d as donee\n",
6559 +						  ikglp_get_idx(sem, fq));
6560 +
6561 +				goto out;
6562 +			}
6563 +			++(*dist_from_head);
6564 +		}
6565 +	}
6566 +
6567 +	donee = NULL;
6568 +	donee_node = NULL;
6569 +	//*dist_from_head = sem->max_fifo_len + 1;
6570 +	*dist_from_head = IKGLP_INVAL_DISTANCE;
6571 +
6572 +	TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
6573 +
6574 +out:
6575 +
6576 +	TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
6577 +			  ikglp_get_idx(sem, fq),
6578 +			  (donee) ? (donee)->comm : "nil",
6579 +			  (donee) ? (donee)->pid  : -1,
6580 +			  *dist_from_head);
6581 +
6582 +	return donee_node;
6583 +}
6584 +
6585 +ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
6586 +											struct ikglp_affinity* aff,
6587 +											struct task_struct* donor)
6588 +{
6589 +	// Huristic strategy: Find the highest-priority donee that is waiting on
6590 +	// a queue closest to our affinity.  (1) The donee CANNOT already have a
6591 +	// donor (exception: donee is the lowest-prio task in the donee heap).
6592 +	// (2) Requests in 'top_m' heap are ineligible.
6593 +	//
6594 +	// Further strategy: amongst elible donees waiting for the same GPU, pick
6595 +	// the one closest to the head of the FIFO queue (including owners).
6596 +	//
6597 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6598 +	ikglp_donee_heap_node_t *donee_node;
6599 +	gpu_migration_dist_t distance;
6600 +	int start, i, j;
6601 +
6602 +	ikglp_donee_heap_node_t *default_donee;
6603 +	ikglp_wait_state_t *default_donee_donor_info;
6604 +
6605 +	if(tsk_rt(donor)->last_gpu < 0) {
6606 +		// no affinity.  just return the min prio, like standard IKGLP
6607 +		// TODO: Find something closer to the head of the queue??
6608 +		donee_node = binheap_top_entry(&sem->donees,
6609 +									   ikglp_donee_heap_node_t,
6610 +									   node);
6611 +		goto out;
6612 +	}
6613 +
6614 +
6615 +	// Temporarily break any donation relation the default donee (the lowest
6616 +	// prio task in the FIFO queues) to make it eligible for selection below.
6617 +	//
6618 +	// NOTE: The original donor relation *must* be restored, even if we select
6619 +	// the default donee throug affinity-aware selection, before returning
6620 +	// from this function so we don't screw up our heap ordering.
6621 +	// The standard IKGLP algorithm will steal the donor relationship if needed.
6622 +	default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
6623 +	default_donee_donor_info = default_donee->donor_info;  // back-up donor relation
6624 +	default_donee->donor_info = NULL;  // temporarily break any donor relation.
6625 +
6626 +	// initialize our search
6627 +	donee_node = NULL;
6628 +	distance = MIG_NONE;
6629 +
6630 +	// TODO: The below search logic may work well for locating nodes to steal
6631 +	// when an FQ goes idle.  Validate this code and apply it to stealing.
6632 +
6633 +	// begin search with affinity GPU.
6634 +	start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
6635 +	i = start;
6636 +	do {  // "for each gpu" / "for each aff->nr_rsrc"
6637 +		gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
6638 +
6639 +		// only interested in queues that will improve our distance
6640 +		if(temp_distance < distance || donee_node == NULL) {
6641 +			int dist_from_head = IKGLP_INVAL_DISTANCE;
6642 +
6643 +			TRACE_CUR("searching for donor on GPU %d", i);
6644 +
6645 +			// visit each queue and pick a donee.  bail as soon as we find
6646 +			// one for this class.
6647 +
6648 +			for(j = 0; j < aff->nr_simult; ++j) {
6649 +				int temp_dist_from_head;
6650 +				ikglp_donee_heap_node_t *temp_donee_node;
6651 +				struct fifo_queue *fq;
6652 +
6653 +				fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
6654 +				temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
6655 +
6656 +				if(temp_dist_from_head < dist_from_head)
6657 +				{
6658 +					// we check all the FQs for this GPU to spread priorities
6659 +					// out across the queues.  does this decrease jitter?
6660 +					donee_node = temp_donee_node;
6661 +					dist_from_head = temp_dist_from_head;
6662 +				}
6663 +			}
6664 +
6665 +			if(dist_from_head != IKGLP_INVAL_DISTANCE) {
6666 +				TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
6667 +						  donee_node->task->comm, donee_node->task->pid,
6668 +						  dist_from_head);
6669 +			}
6670 +			else {
6671 +				TRACE_CUR("found no eligible donors from GPU %d\n", i);
6672 +			}
6673 +		}
6674 +		else {
6675 +			TRACE_CUR("skipping GPU %d (distance = %d, best donor "
6676 +					  "distance = %d)\n", i, temp_distance, distance);
6677 +		}
6678 +
6679 +		i = (i+1 < aff->nr_rsrc) ? i+1 : 0;  // increment with wrap-around
6680 +	} while (i != start);
6681 +
6682 +
6683 +	// restore old donor info state.
6684 +	default_donee->donor_info = default_donee_donor_info;
6685 +
6686 +	if(!donee_node) {
6687 +		donee_node = default_donee;
6688 +
6689 +		TRACE_CUR("Could not find a donee. We have to steal one.\n");
6690 +		WARN_ON(default_donee->donor_info == NULL);
6691 +	}
6692 +
6693 +out:
6694 +
6695 +	TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
6696 +			  donee_node->task->comm, donee_node->task->pid,
6697 +			  ikglp_get_idx(sem, donee_node->fq),
6698 +			  replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
6699 +			  donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
6700 +
6701 +	return(donee_node);
6702 +}
6703 +
6704 +
6705 +
6706 +static void __find_closest_donor(int target_gpu,
6707 +								 struct binheap_node* donor_node,
6708 +								 ikglp_wait_state_t** cur_closest,
6709 +								 int* cur_dist)
6710 +{
6711 +	ikglp_wait_state_t *this_donor =
6712 +		binheap_entry(donor_node, ikglp_wait_state_t, node);
6713 +
6714 +	int this_dist =
6715 +		gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
6716 +
6717 +//	TRACE_CUR("%s/%d: dist from target = %d\n",
6718 +//			  this_donor->task->comm,
6719 +//			  this_donor->task->pid,
6720 +//			  this_dist);
6721 +
6722 +	if(this_dist < *cur_dist) {
6723 +		// take this donor
6724 +		*cur_dist = this_dist;
6725 +		*cur_closest = this_donor;
6726 +	}
6727 +	else if(this_dist == *cur_dist) {
6728 +		// priority tie-break.  Even though this is a pre-order traversal,
6729 +		// this is a heap, not a binary tree, so we still need to do a priority
6730 +		// comparision.
6731 +		if(!(*cur_closest) ||
6732 +		   litmus->compare(this_donor->task, (*cur_closest)->task)) {
6733 +			*cur_dist = this_dist;
6734 +			*cur_closest = this_donor;
6735 +		}
6736 +	}
6737 +
6738 +    if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
6739 +    if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
6740 +}
6741 +
6742 +ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
6743 +{
6744 +	// Huristic strategy: Find donor with the closest affinity to fq.
6745 +	// Tie-break on priority.
6746 +
6747 +	// We need to iterate over all the donors to do this.  Unfortunatly,
6748 +	// our donors are organized in a heap.  We'll visit each node with a
6749 +	// recurisve call.  This is realitively safe since there are only sem->m
6750 +	// donors, at most.  We won't recurse too deeply to have to worry about
6751 +	// our stack.  (even with 128 CPUs, our nest depth is at most 7 deep).
6752 +
6753 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6754 +	ikglp_wait_state_t *donor = NULL;
6755 +	int distance = MIG_NONE;
6756 +	int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
6757 +	ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
6758 +
6759 +	__find_closest_donor(gpu, sem->donors.root, &donor, &distance);
6760 +
6761 +	TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
6762 +			  "(non-aff wanted %s/%d). differs = %d\n",
6763 +			  donor->task->comm, donor->task->pid,
6764 +			  distance,
6765 +			  ikglp_get_idx(sem, fq),
6766 +			  default_donor->task->comm, default_donor->task->pid,
6767 +			  (donor->task != default_donor->task)
6768 +			  );
6769 +
6770 +	return(donor);
6771 +}
6772 +
6773 +
6774 +
6775 +void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
6776 +{
6777 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6778 +	int replica = ikglp_get_idx(sem, fq);
6779 +	int gpu = replica_to_gpu(aff, replica);
6780 +	struct ikglp_queue_info *info = &aff->q_info[replica];
6781 +	lt_t est_time;
6782 +	lt_t est_len_before;
6783 +
6784 +	if(current == t) {
6785 +		tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
6786 +	}
6787 +
6788 +	est_len_before = info->estimated_len;
6789 +	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
6790 +	info->estimated_len += est_time;
6791 +
6792 +	TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
6793 +			  ikglp_get_idx(sem, info->q),
6794 +			  est_len_before, est_time,
6795 +			  info->estimated_len);
6796 +
6797 +	//	if(aff->shortest_queue == info) {
6798 +	//		// we may no longer be the shortest
6799 +	//		aff->shortest_queue = ikglp_aff_find_shortest(aff);
6800 +	//
6801 +	//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
6802 +	//				  ikglp_get_idx(sem, aff->shortest_queue->q),
6803 +	//				  aff->shortest_queue->q->count,
6804 +	//				  aff->shortest_queue->estimated_len);
6805 +	//	}
6806 +}
6807 +
6808 +void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
6809 +{
6810 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6811 +	int replica = ikglp_get_idx(sem, fq);
6812 +	int gpu = replica_to_gpu(aff, replica);
6813 +	struct ikglp_queue_info *info = &aff->q_info[replica];
6814 +	lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
6815 +
6816 +	if(est_time > info->estimated_len) {
6817 +		WARN_ON(1);
6818 +		info->estimated_len = 0;
6819 +	}
6820 +	else {
6821 +		info->estimated_len -= est_time;
6822 +	}
6823 +
6824 +	TRACE_CUR("fq %d est len is now %llu\n",
6825 +			  ikglp_get_idx(sem, info->q),
6826 +			  info->estimated_len);
6827 +
6828 +	// check to see if we're the shortest queue now.
6829 +	//	if((aff->shortest_queue != info) &&
6830 +	//	   (aff->shortest_queue->estimated_len > info->estimated_len)) {
6831 +	//
6832 +	//		aff->shortest_queue = info;
6833 +	//
6834 +	//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
6835 +	//				  ikglp_get_idx(sem, info->q),
6836 +	//				  info->q->count,
6837 +	//				  info->estimated_len);
6838 +	//	}
6839 +}
6840 +
6841 +void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
6842 +							   struct fifo_queue* fq,
6843 +							   struct task_struct* t)
6844 +{
6845 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6846 +	int replica = ikglp_get_idx(sem, fq);
6847 +	int gpu = replica_to_gpu(aff, replica);
6848 +
6849 +	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
6850 +
6851 +	TRACE_CUR("%s/%d acquired gpu %d (prev = %d).  migration type = %d\n",
6852 +			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
6853 +
6854 +	// count the number or resource holders
6855 +	++(*(aff->q_info[replica].nr_cur_users));
6856 +
6857 +	reg_nv_device(gpu, 1, t);  // register
6858 +
6859 +	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
6860 +	reset_gpu_tracker(t);
6861 +	start_gpu_tracker(t);
6862 +}
6863 +
6864 +void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
6865 +							struct fifo_queue* fq,
6866 +							struct task_struct* t)
6867 +{
6868 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6869 +	int replica = ikglp_get_idx(sem, fq);
6870 +	int gpu = replica_to_gpu(aff, replica);
6871 +	lt_t est_time;
6872 +
6873 +	stop_gpu_tracker(t);  // stop the tracker before we do anything else.
6874 +
6875 +	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
6876 +
6877 +	// count the number or resource holders
6878 +	--(*(aff->q_info[replica].nr_cur_users));
6879 +
6880 +	reg_nv_device(gpu, 0, t);	// unregister
6881 +
6882 +	// update estimates
6883 +	update_gpu_estimate(t, get_gpu_time(t));
6884 +
6885 +	TRACE_CUR("%s/%d freed gpu %d (prev = %d).  mig type = %d.  actual time was %llu.  "
6886 +			  "estimated was %llu.  diff is %d\n",
6887 +			  t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
6888 +			  tsk_rt(t)->gpu_migration,
6889 +			  get_gpu_time(t),
6890 +			  est_time,
6891 +			  (long long)get_gpu_time(t) - (long long)est_time);
6892 +
6893 +	tsk_rt(t)->last_gpu = gpu;
6894 +}
6895 +
6896 +struct ikglp_affinity_ops gpu_ikglp_affinity =
6897 +{
6898 +	.advise_enqueue = gpu_ikglp_advise_enqueue,
6899 +	.advise_steal = gpu_ikglp_advise_steal,
6900 +	.advise_donee_selection = gpu_ikglp_advise_donee_selection,
6901 +	.advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
6902 +
6903 +	.notify_enqueue = gpu_ikglp_notify_enqueue,
6904 +	.notify_dequeue = gpu_ikglp_notify_dequeue,
6905 +	.notify_acquired = gpu_ikglp_notify_acquired,
6906 +	.notify_freed = gpu_ikglp_notify_freed,
6907 +
6908 +	.replica_to_resource = gpu_replica_to_resource,
6909 +};
6910 +
6911 +struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
6912 +												void* __user args)
6913 +{
6914 +	return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
6915 +}
6916 +
6917 +
6918 +
6919 +
6920 +
6921 +
6922 +
6923 +
6924 +// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
6925 +
6926 +struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
6927 +{
6928 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6929 +	int min_count;
6930 +	int min_nr_users;
6931 +	struct ikglp_queue_info *shortest;
6932 +	struct fifo_queue *to_enqueue;
6933 +	int i;
6934 +
6935 +	//	TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
6936 +
6937 +	shortest = &aff->q_info[0];
6938 +	min_count = shortest->q->count;
6939 +	min_nr_users = *(shortest->nr_cur_users);
6940 +
6941 +	TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
6942 +			  ikglp_get_idx(sem, shortest->q),
6943 +			  shortest->q->count,
6944 +			  min_nr_users);
6945 +
6946 +	for(i = 1; i < sem->nr_replicas; ++i) {
6947 +		int len = aff->q_info[i].q->count;
6948 +
6949 +		// queue is smaller, or they're equal and the other has a smaller number
6950 +		// of total users.
6951 +		//
6952 +		// tie-break on the shortest number of simult users.  this only kicks in
6953 +		// when there are more than 1 empty queues.
6954 +		if((len < min_count) ||
6955 +		   ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
6956 +			shortest = &aff->q_info[i];
6957 +			min_count = shortest->q->count;
6958 +			min_nr_users = *(aff->q_info[i].nr_cur_users);
6959 +		}
6960 +
6961 +		TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
6962 +				  ikglp_get_idx(sem, aff->q_info[i].q),
6963 +				  aff->q_info[i].q->count,
6964 +				  *(aff->q_info[i].nr_cur_users));
6965 +	}
6966 +
6967 +	to_enqueue = shortest->q;
6968 +	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
6969 +			  ikglp_get_idx(sem, to_enqueue),
6970 +			  ikglp_get_idx(sem, sem->shortest_fifo_queue));
6971 +
6972 +	return to_enqueue;
6973 +}
6974 +
6975 +ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
6976 +												  struct fifo_queue* dst)
6977 +{
6978 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6979 +	//	TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
6980 +	return ikglp_find_hp_waiter_to_steal(sem);
6981 +}
6982 +
6983 +ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
6984 +{
6985 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6986 +	ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
6987 +	return(donee);
6988 +}
6989 +
6990 +ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
6991 +{
6992 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
6993 +	ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
6994 +	return(donor);
6995 +}
6996 +
6997 +void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
6998 +{
6999 +	//	TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
7000 +}
7001 +
7002 +void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
7003 +{
7004 +	//	TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
7005 +}
7006 +
7007 +void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
7008 +{
7009 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
7010 +	int replica = ikglp_get_idx(sem, fq);
7011 +	int gpu = replica_to_gpu(aff, replica);
7012 +
7013 +	//	TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
7014 +
7015 +	// count the number or resource holders
7016 +	++(*(aff->q_info[replica].nr_cur_users));
7017 +
7018 +	reg_nv_device(gpu, 1, t);  // register
7019 +}
7020 +
7021 +void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
7022 +{
7023 +	struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
7024 +	int replica = ikglp_get_idx(sem, fq);
7025 +	int gpu = replica_to_gpu(aff, replica);
7026 +
7027 +	//	TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
7028 +	// count the number or resource holders
7029 +	--(*(aff->q_info[replica].nr_cur_users));
7030 +
7031 +	reg_nv_device(gpu, 0, t);	// unregister
7032 +}
7033 +
7034 +struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
7035 +{
7036 +	.advise_enqueue = simple_gpu_ikglp_advise_enqueue,
7037 +	.advise_steal = simple_gpu_ikglp_advise_steal,
7038 +	.advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
7039 +	.advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
7040 +
7041 +	.notify_enqueue = simple_gpu_ikglp_notify_enqueue,
7042 +	.notify_dequeue = simple_gpu_ikglp_notify_dequeue,
7043 +	.notify_acquired = simple_gpu_ikglp_notify_acquired,
7044 +	.notify_freed = simple_gpu_ikglp_notify_freed,
7045 +
7046 +	.replica_to_resource = gpu_replica_to_resource,
7047 +};
7048 +
7049 +struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
7050 +													   void* __user args)
7051 +{
7052 +	return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
7053 +}
7054 +
7055 +#endif
7056 +
7057 +
7058 +
7059 +
7060 +
7061 +
7062 +
7063 +
7064 +
7065 diff --git a/litmus/jobs.c b/litmus/jobs.c
7066 index 36e3146..1d97462 100644
7067 --- a/litmus/jobs.c
7068 +++ b/litmus/jobs.c
7069 @@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
7070  {
7071  	BUG_ON(!t);
7072  	/* prepare next release */
7073 -	t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
7074 -	t->rt_param.job_params.deadline += get_rt_period(t);
7075 +
7076 +	if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
7077 +		/* allow the release point to slip if we've passed our deadline. */
7078 +		lt_t now = litmus_clock();
7079 +		t->rt_param.job_params.release =
7080 +			(t->rt_param.job_params.deadline < now) ?
7081 +				now : t->rt_param.job_params.deadline;
7082 +		t->rt_param.job_params.deadline =
7083 +			t->rt_param.job_params.release + get_rt_period(t);
7084 +	}
7085 +	else {
7086 +		t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
7087 +		t->rt_param.job_params.deadline += get_rt_period(t);
7088 +	}
7089 +
7090  	t->rt_param.job_params.exec_time = 0;
7091  	/* update job sequence number */
7092  	t->rt_param.job_params.job_no++;
7093 diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c
7094 new file mode 100644
7095 index 0000000..5ef5e54
7096 --- /dev/null
7097 +++ b/litmus/kexclu_affinity.c
7098 @@ -0,0 +1,92 @@
7099 +#include <litmus/fdso.h>
7100 +#include <litmus/sched_plugin.h>
7101 +#include <litmus/trace.h>
7102 +#include <litmus/litmus.h>
7103 +#include <litmus/locking.h>
7104 +
7105 +#include <litmus/kexclu_affinity.h>
7106 +
7107 +static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg);
7108 +static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg);
7109 +static int close_generic_aff_obs(struct od_table_entry* entry);
7110 +static void destroy_generic_aff_obs(obj_type_t type, void* sem);
7111 +
7112 +struct fdso_ops generic_affinity_ops = {
7113 +	.create  = create_generic_aff_obs,
7114 +	.open    = open_generic_aff_obs,
7115 +	.close   = close_generic_aff_obs,
7116 +	.destroy = destroy_generic_aff_obs
7117 +};
7118 +
7119 +static atomic_t aff_obs_id_gen = ATOMIC_INIT(0);
7120 +
7121 +static inline bool is_affinity_observer(struct od_table_entry *entry)
7122 +{
7123 +	return (entry->class == &generic_affinity_ops);
7124 +}
7125 +
7126 +static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry)
7127 +{
7128 +	BUG_ON(!is_affinity_observer(entry));
7129 +	return (struct affinity_observer*) entry->obj->obj;
7130 +}
7131 +
7132 +static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg)
7133 +{
7134 +	struct affinity_observer* aff_obs;
7135 +	int err;
7136 +
7137 +	err = litmus->allocate_aff_obs(&aff_obs, type, arg);
7138 +	if (err == 0) {
7139 +		BUG_ON(!aff_obs->lock);
7140 +		aff_obs->type = type;
7141 +		*obj_ref = aff_obs;
7142 +    }
7143 +	return err;
7144 +}
7145 +
7146 +static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg)
7147 +{
7148 +	struct affinity_observer* aff_obs = get_affinity_observer(entry);
7149 +	if (aff_obs->ops->open)
7150 +		return aff_obs->ops->open(aff_obs, arg);
7151 +	else
7152 +		return 0; /* default: any task can open it */
7153 +}
7154 +
7155 +static int close_generic_aff_obs(struct od_table_entry* entry)
7156 +{
7157 +	struct affinity_observer* aff_obs = get_affinity_observer(entry);
7158 +	if (aff_obs->ops->close)
7159 +		return aff_obs->ops->close(aff_obs);
7160 +	else
7161 +		return 0; /* default: closing succeeds */
7162 +}
7163 +
7164 +static void destroy_generic_aff_obs(obj_type_t type, void* obj)
7165 +{
7166 +	struct affinity_observer* aff_obs = (struct affinity_observer*) obj;
7167 +	aff_obs->ops->deallocate(aff_obs);
7168 +}
7169 +
7170 +
7171 +struct litmus_lock* get_lock_from_od(int od)
7172 +{
7173 +	extern struct fdso_ops generic_lock_ops;
7174 +
7175 +	struct od_table_entry *entry = get_entry_for_od(od);
7176 +
7177 +	if(entry && entry->class == &generic_lock_ops) {
7178 +		return (struct litmus_lock*) entry->obj->obj;
7179 +	}
7180 +	return NULL;
7181 +}
7182 +
7183 +void affinity_observer_new(struct affinity_observer* aff,
7184 +						   struct affinity_observer_ops* ops,
7185 +						   struct affinity_observer_args* args)
7186 +{
7187 +	aff->ops = ops;
7188 +	aff->lock = get_lock_from_od(args->lock_od);
7189 +	aff->ident = atomic_inc_return(&aff_obs_id_gen);
7190 +}
7191 \ No newline at end of file
7192 diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
7193 new file mode 100644
7194 index 0000000..bff857e
7195 --- /dev/null
7196 +++ b/litmus/kfmlp_lock.c
7197 @@ -0,0 +1,1002 @@
7198 +#include <linux/slab.h>
7199 +#include <linux/uaccess.h>
7200 +
7201 +#include <litmus/trace.h>
7202 +#include <litmus/sched_plugin.h>
7203 +#include <litmus/fdso.h>
7204 +
7205 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
7206 +#include <litmus/gpu_affinity.h>
7207 +#include <litmus/nvidia_info.h>
7208 +#endif
7209 +
7210 +#include <litmus/kfmlp_lock.h>
7211 +
7212 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
7213 +								struct kfmlp_queue* queue)
7214 +{
7215 +	return (queue - &sem->queues[0]);
7216 +}
7217 +
7218 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
7219 +												  struct task_struct* holder)
7220 +{
7221 +	int i;
7222 +	for(i = 0; i < sem->num_resources; ++i)
7223 +		if(sem->queues[i].owner == holder)
7224 +			return(&sem->queues[i]);
7225 +	return(NULL);
7226 +}
7227 +
7228 +/* caller is responsible for locking */
7229 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
7230 +												struct task_struct *skip)
7231 +{
7232 +	struct list_head	*pos;
7233 +	struct task_struct 	*queued, *found = NULL;
7234 +
7235 +	list_for_each(pos, &kqueue->wait.task_list) {
7236 +		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
7237 +												   task_list)->private;
7238 +
7239 +		/* Compare task prios, find high prio task. */
7240 +		//if (queued != skip && edf_higher_prio(queued, found))
7241 +		if (queued != skip && litmus->compare(queued, found))
7242 +			found = queued;
7243 +	}
7244 +	return found;
7245 +}
7246 +
7247 +static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
7248 +													  struct kfmlp_queue* search_start)
7249 +{
7250 +	// we start our search at search_start instead of at the beginning of the
7251 +	// queue list to load-balance across all resources.
7252 +	struct kfmlp_queue* step = search_start;
7253 +	struct kfmlp_queue* shortest = sem->shortest_queue;
7254 +
7255 +	do
7256 +	{
7257 +		step = (step+1 != &sem->queues[sem->num_resources]) ?
7258 +		step+1 : &sem->queues[0];
7259 +
7260 +		if(step->count < shortest->count)
7261 +		{
7262 +			shortest = step;
7263 +			if(step->count == 0)
7264 +				break; /* can't get any shorter */
7265 +		}
7266 +
7267 +	}while(step != search_start);
7268 +
7269 +	return(shortest);
7270 +}
7271 +
7272 +
7273 +static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
7274 +												 wait_queue_t** to_steal,
7275 +												 struct kfmlp_queue** to_steal_from)
7276 +{
7277 +	/* must hold sem->lock */
7278 +
7279 +	int i;
7280 +
7281 +	*to_steal = NULL;
7282 +	*to_steal_from = NULL;
7283 +
7284 +	for(i = 0; i < sem->num_resources; ++i)
7285 +	{
7286 +		if( (sem->queues[i].count > 1) &&
7287 +		   ((*to_steal_from == NULL) ||
7288 +			//(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
7289 +			(litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
7290 +		{
7291 +			*to_steal_from = &sem->queues[i];
7292 +		}
7293 +	}
7294 +
7295 +	if(*to_steal_from)
7296 +	{
7297 +		struct list_head *pos;
7298 +		struct task_struct *target = (*to_steal_from)->hp_waiter;
7299 +
7300 +		TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
7301 +				  target->comm,
7302 +				  target->pid,
7303 +				  kfmlp_get_idx(sem, *to_steal_from));
7304 +
7305 +		list_for_each(pos, &(*to_steal_from)->wait.task_list)
7306 +		{
7307 +			wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
7308 +			struct task_struct *queued = (struct task_struct*) node->private;
7309 +			/* Compare task prios, find high prio task. */
7310 +			if (queued == target)
7311 +			{
7312 +				*to_steal = node;
7313 +
7314 +				TRACE_CUR("steal: selected %s/%d from queue %d\n",
7315 +						  queued->comm, queued->pid,
7316 +						  kfmlp_get_idx(sem, *to_steal_from));
7317 +
7318 +				return queued;
7319 +			}
7320 +		}
7321 +
7322 +		TRACE_CUR("Could not find %s/%d in queue %d!!!  THIS IS A BUG!\n",
7323 +				  target->comm,
7324 +				  target->pid,
7325 +				  kfmlp_get_idx(sem, *to_steal_from));
7326 +	}
7327 +
7328 +	return NULL;
7329 +}
7330 +
7331 +static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
7332 +							 struct kfmlp_queue *dst,
7333 +							 wait_queue_t *wait,
7334 +							 struct kfmlp_queue *src)
7335 +{
7336 +	struct task_struct* t = (struct task_struct*) wait->private;
7337 +
7338 +	__remove_wait_queue(&src->wait, wait);
7339 +	--(src->count);
7340 +
7341 +	if(t == src->hp_waiter) {
7342 +		src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
7343 +
7344 +		TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
7345 +				  kfmlp_get_idx(sem, src),
7346 +				  (src->hp_waiter) ? src->hp_waiter->comm : "nil",
7347 +				  (src->hp_waiter) ? src->hp_waiter->pid : -1);
7348 +
7349 +		if(src->owner && tsk_rt(src->owner)->inh_task == t) {
7350 +			litmus->decrease_prio(src->owner, src->hp_waiter);
7351 +		}
7352 +	}
7353 +
7354 +	if(sem->shortest_queue->count > src->count) {
7355 +		sem->shortest_queue = src;
7356 +		TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
7357 +	}
7358 +
7359 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7360 +	if(sem->aff_obs) {
7361 +		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
7362 +	}
7363 +#endif
7364 +
7365 +	init_waitqueue_entry(wait, t);
7366 +	__add_wait_queue_tail_exclusive(&dst->wait, wait);
7367 +	++(dst->count);
7368 +
7369 +	if(litmus->compare(t, dst->hp_waiter)) {
7370 +		dst->hp_waiter = t;
7371 +
7372 +		TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
7373 +				  kfmlp_get_idx(sem, dst),
7374 +				  t->comm, t->pid);
7375 +
7376 +		if(dst->owner && litmus->compare(t, dst->owner))
7377 +		{
7378 +			litmus->increase_prio(dst->owner, t);
7379 +		}
7380 +	}
7381 +
7382 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7383 +	if(sem->aff_obs) {
7384 +		sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
7385 +	}
7386 +#endif
7387 +}
7388 +
7389 +
7390 +int kfmlp_lock(struct litmus_lock* l)
7391 +{
7392 +	struct task_struct* t = current;
7393 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7394 +	struct kfmlp_queue* my_queue = NULL;
7395 +	wait_queue_t wait;
7396 +	unsigned long flags;
7397 +
7398 +	if (!is_realtime(t))
7399 +		return -EPERM;
7400 +
7401 +	spin_lock_irqsave(&sem->lock, flags);
7402 +
7403 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7404 +	if(sem->aff_obs) {
7405 +		my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
7406 +	}
7407 +	if(!my_queue) {
7408 +		my_queue = sem->shortest_queue;
7409 +	}
7410 +#else
7411 +	my_queue = sem->shortest_queue;
7412 +#endif
7413 +
7414 +	if (my_queue->owner) {
7415 +		/* resource is not free => must suspend and wait */
7416 +		TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
7417 +				  kfmlp_get_idx(sem, my_queue),
7418 +				  my_queue->count);
7419 +
7420 +		init_waitqueue_entry(&wait, t);
7421 +
7422 +		/* FIXME: interruptible would be nice some day */
7423 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
7424 +
7425 +		__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
7426 +
7427 +		TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
7428 +				  kfmlp_get_idx(sem, my_queue),
7429 +				  (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
7430 +				  (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
7431 +
7432 +		/* check if we need to activate priority inheritance */
7433 +		//if (edf_higher_prio(t, my_queue->hp_waiter))
7434 +		if (litmus->compare(t, my_queue->hp_waiter)) {
7435 +			my_queue->hp_waiter = t;
7436 +			TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
7437 +					  kfmlp_get_idx(sem, my_queue),
7438 +					  t->comm, t->pid);
7439 +
7440 +			//if (edf_higher_prio(t, my_queue->owner))
7441 +			if (litmus->compare(t, my_queue->owner)) {
7442 +				litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
7443 +			}
7444 +		}
7445 +
7446 +		++(my_queue->count);
7447 +
7448 +		if(my_queue == sem->shortest_queue) {
7449 +			sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
7450 +			TRACE_CUR("queue %d is the shortest\n",
7451 +					  kfmlp_get_idx(sem, sem->shortest_queue));
7452 +		}
7453 +
7454 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7455 +		if(sem->aff_obs) {
7456 +			sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
7457 +		}
7458 +#endif
7459 +
7460 +		/* release lock before sleeping */
7461 +		spin_unlock_irqrestore(&sem->lock, flags);
7462 +
7463 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
7464 +		 * when we wake up; we are guaranteed to have the lock since
7465 +		 * there is only one wake up per release (or steal).
7466 +		 */
7467 +		schedule();
7468 +
7469 +
7470 +		if(my_queue->owner == t) {
7471 +			TRACE_CUR("queue %d: acquired through waiting\n",
7472 +					  kfmlp_get_idx(sem, my_queue));
7473 +		}
7474 +		else {
7475 +			/* this case may happen if our wait entry was stolen
7476 +			 between queues. record where we went. */
7477 +			my_queue = kfmlp_get_queue(sem, t);
7478 +
7479 +			BUG_ON(!my_queue);
7480 +			TRACE_CUR("queue %d: acquired through stealing\n",
7481 +					  kfmlp_get_idx(sem, my_queue));
7482 +		}
7483 +	}
7484 +	else {
7485 +		TRACE_CUR("queue %d: acquired immediately\n",
7486 +				  kfmlp_get_idx(sem, my_queue));
7487 +
7488 +		my_queue->owner = t;
7489 +
7490 +		++(my_queue->count);
7491 +
7492 +		if(my_queue == sem->shortest_queue) {
7493 +			sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
7494 +			TRACE_CUR("queue %d is the shortest\n",
7495 +					  kfmlp_get_idx(sem, sem->shortest_queue));
7496 +		}
7497 +
7498 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7499 +		if(sem->aff_obs) {
7500 +			sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
7501 +			sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
7502 +		}
7503 +#endif
7504 +
7505 +		spin_unlock_irqrestore(&sem->lock, flags);
7506 +	}
7507 +
7508 +
7509 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7510 +	if(sem->aff_obs) {
7511 +		return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
7512 +	}
7513 +#endif
7514 +	return kfmlp_get_idx(sem, my_queue);
7515 +}
7516 +
7517 +
7518 +int kfmlp_unlock(struct litmus_lock* l)
7519 +{
7520 +	struct task_struct *t = current, *next;
7521 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7522 +	struct kfmlp_queue *my_queue, *to_steal_from;
7523 +	unsigned long flags;
7524 +	int err = 0;
7525 +
7526 +	my_queue = kfmlp_get_queue(sem, t);
7527 +
7528 +	if (!my_queue) {
7529 +		err = -EINVAL;
7530 +		goto out;
7531 +	}
7532 +
7533 +	spin_lock_irqsave(&sem->lock, flags);
7534 +
7535 +	TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
7536 +
7537 +	my_queue->owner = NULL;  // clear ownership
7538 +	--(my_queue->count);
7539 +
7540 +	if(my_queue->count < sem->shortest_queue->count)
7541 +	{
7542 +		sem->shortest_queue = my_queue;
7543 +		TRACE_CUR("queue %d is the shortest\n",
7544 +				  kfmlp_get_idx(sem, sem->shortest_queue));
7545 +	}
7546 +
7547 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7548 +	if(sem->aff_obs) {
7549 +		sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
7550 +		sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
7551 +	}
7552 +#endif
7553 +
7554 +	/* we lose the benefit of priority inheritance (if any) */
7555 +	if (tsk_rt(t)->inh_task)
7556 +		litmus->decrease_prio(t, NULL);
7557 +
7558 +
7559 +	/* check if there are jobs waiting for this resource */
7560 +RETRY:
7561 +	next = __waitqueue_remove_first(&my_queue->wait);
7562 +	if (next) {
7563 +		/* next becomes the resouce holder */
7564 +		my_queue->owner = next;
7565 +
7566 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7567 +		if(sem->aff_obs) {
7568 +			sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
7569 +		}
7570 +#endif
7571 +
7572 +		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
7573 +				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
7574 +
7575 +		/* determine new hp_waiter if necessary */
7576 +		if (next == my_queue->hp_waiter) {
7577 +			TRACE_TASK(next, "was highest-prio waiter\n");
7578 +			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
7579 +			if (my_queue->hp_waiter)
7580 +				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
7581 +			else
7582 +				TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
7583 +		} else {
7584 +			/* Well, if next is not the highest-priority waiter,
7585 +			 * then it ought to inherit the highest-priority
7586 +			 * waiter's priority. */
7587 +			litmus->increase_prio(next, my_queue->hp_waiter);
7588 +		}
7589 +
7590 +		/* wake up next */
7591 +		wake_up_process(next);
7592 +	}
7593 +	else {
7594 +		// TODO: put this stealing logic before we attempt to release
7595 +		// our resource.  (simplifies code and gets rid of ugly goto RETRY.
7596 +		wait_queue_t *wait;
7597 +
7598 +		TRACE_CUR("queue %d: looking to steal someone...\n",
7599 +				  kfmlp_get_idx(sem, my_queue));
7600 +
7601 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7602 +		next = (sem->aff_obs) ?
7603 +			sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
7604 +			kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
7605 +#else
7606 +		next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
7607 +#endif
7608 +
7609 +		if(next) {
7610 +			TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
7611 +					  kfmlp_get_idx(sem, my_queue),
7612 +					  next->comm, next->pid,
7613 +					  kfmlp_get_idx(sem, to_steal_from));
7614 +
7615 +			kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
7616 +
7617 +			goto RETRY;  // will succeed this time.
7618 +		}
7619 +		else {
7620 +			TRACE_CUR("queue %d: no one to steal.\n",
7621 +					  kfmlp_get_idx(sem, my_queue));
7622 +		}
7623 +	}
7624 +
7625 +	spin_unlock_irqrestore(&sem->lock, flags);
7626 +
7627 +out:
7628 +	return err;
7629 +}
7630 +
7631 +int kfmlp_close(struct litmus_lock* l)
7632 +{
7633 +	struct task_struct *t = current;
7634 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7635 +	struct kfmlp_queue *my_queue;
7636 +	unsigned long flags;
7637 +
7638 +	int owner;
7639 +
7640 +	spin_lock_irqsave(&sem->lock, flags);
7641 +
7642 +	my_queue = kfmlp_get_queue(sem, t);
7643 +	owner = (my_queue) ? (my_queue->owner == t) : 0;
7644 +
7645 +	spin_unlock_irqrestore(&sem->lock, flags);
7646 +
7647 +	if (owner)
7648 +		kfmlp_unlock(l);
7649 +
7650 +	return 0;
7651 +}
7652 +
7653 +void kfmlp_free(struct litmus_lock* l)
7654 +{
7655 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
7656 +	kfree(sem->queues);
7657 +	kfree(sem);
7658 +}
7659 +
7660 +
7661 +
7662 +struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
7663 +{
7664 +	struct kfmlp_semaphore* sem;
7665 +	int num_resources = 0;
7666 +	int i;
7667 +
7668 +	if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
7669 +	{
7670 +		return(NULL);
7671 +	}
7672 +	if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
7673 +	{
7674 +		return(NULL);
7675 +	}
7676 +	if(num_resources < 1)
7677 +	{
7678 +		return(NULL);
7679 +	}
7680 +
7681 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
7682 +	if(!sem)
7683 +	{
7684 +		return(NULL);
7685 +	}
7686 +
7687 +	sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
7688 +	if(!sem->queues)
7689 +	{
7690 +		kfree(sem);
7691 +		return(NULL);
7692 +	}
7693 +
7694 +	sem->litmus_lock.ops = ops;
7695 +	spin_lock_init(&sem->lock);
7696 +	sem->num_resources = num_resources;
7697 +
7698 +	for(i = 0; i < num_resources; ++i)
7699 +	{
7700 +		sem->queues[i].owner = NULL;
7701 +		sem->queues[i].hp_waiter = NULL;
7702 +		init_waitqueue_head(&sem->queues[i].wait);
7703 +		sem->queues[i].count = 0;
7704 +	}
7705 +
7706 +	sem->shortest_queue = &sem->queues[0];
7707 +
7708 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
7709 +	sem->aff_obs = NULL;
7710 +#endif
7711 +
7712 +	return &sem->litmus_lock;
7713 +}
7714 +
7715 +
7716 +
7717 +
7718 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
7719 +
7720 +static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
7721 +{
7722 +	int gpu = replica % aff->nr_rsrc;
7723 +	return gpu;
7724 +}
7725 +
7726 +static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
7727 +{
7728 +	int gpu = __replica_to_gpu(aff, replica) + aff->offset;
7729 +	return gpu;
7730 +}
7731 +
7732 +static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
7733 +{
7734 +	int replica = gpu - aff->offset;
7735 +	return replica;
7736 +}
7737 +
7738 +
7739 +int kfmlp_aff_obs_close(struct affinity_observer* obs)
7740 +{
7741 +	return 0;
7742 +}
7743 +
7744 +void kfmlp_aff_obs_free(struct affinity_observer* obs)
7745 +{
7746 +	struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
7747 +	kfree(kfmlp_aff->nr_cur_users_on_rsrc);
7748 +	kfree(kfmlp_aff->q_info);
7749 +	kfree(kfmlp_aff);
7750 +}
7751 +
7752 +static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
7753 +												   struct kfmlp_affinity_ops* kfmlp_ops,
7754 +												   void* __user args)
7755 +{
7756 +	struct kfmlp_affinity* kfmlp_aff;
7757 +	struct gpu_affinity_observer_args aff_args;
7758 +	struct kfmlp_semaphore* sem;
7759 +	int i;
7760 +	unsigned long flags;
7761 +
7762 +	if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
7763 +		return(NULL);
7764 +	}
7765 +	if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
7766 +		return(NULL);
7767 +	}
7768 +
7769 +	sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
7770 +
7771 +	if(sem->litmus_lock.type != KFMLP_SEM) {
7772 +		TRACE_CUR("Lock type not supported.  Type = %d\n", sem->litmus_lock.type);
7773 +		return(NULL);
7774 +	}
7775 +
7776 +	if((aff_args.nr_simult_users <= 0) ||
7777 +	   (sem->num_resources%aff_args.nr_simult_users != 0)) {
7778 +		TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
7779 +				  "(%d) per replica.  #replicas should be evenly divisible "
7780 +				  "by #simult_users.\n",
7781 +				  sem->litmus_lock.ident,
7782 +				  sem->num_resources,
7783 +				  aff_args.nr_simult_users);
7784 +		return(NULL);
7785 +	}
7786 +
7787 +	if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
7788 +		TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
7789 +				  NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
7790 +//		return(NULL);
7791 +	}
7792 +
7793 +	kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
7794 +	if(!kfmlp_aff) {
7795 +		return(NULL);
7796 +	}
7797 +
7798 +	kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
7799 +	if(!kfmlp_aff->q_info) {
7800 +		kfree(kfmlp_aff);
7801 +		return(NULL);
7802 +	}
7803 +
7804 +	kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
7805 +	if(!kfmlp_aff->nr_cur_users_on_rsrc) {
7806 +		kfree(kfmlp_aff->q_info);
7807 +		kfree(kfmlp_aff);
7808 +		return(NULL);
7809 +	}
7810 +
7811 +	affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
7812 +
7813 +	kfmlp_aff->ops = kfmlp_ops;
7814 +	kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
7815 +	kfmlp_aff->nr_simult = aff_args.nr_simult_users;
7816 +	kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
7817 +
7818 +	memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
7819 +
7820 +	for(i = 0; i < sem->num_resources; ++i) {
7821 +		kfmlp_aff->q_info[i].q = &sem->queues[i];
7822 +		kfmlp_aff->q_info[i].estimated_len = 0;
7823 +
7824 +		// multiple q_info's will point to the same resource (aka GPU) if
7825 +		// aff_args.nr_simult_users > 1
7826 +		kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
7827 +	}
7828 +
7829 +	// attach observer to the lock
7830 +	spin_lock_irqsave(&sem->lock, flags);
7831 +	sem->aff_obs = kfmlp_aff;
7832 +	spin_unlock_irqrestore(&sem->lock, flags);
7833 +
7834 +	return &kfmlp_aff->obs;
7835 +}
7836 +
7837 +
7838 +
7839 +
7840 +static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
7841 +								   struct kfmlp_queue* fq) {
7842 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7843 +	return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
7844 +}
7845 +
7846 +
7847 +// Smart KFMLP Affinity
7848 +
7849 +//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
7850 +//{
7851 +//	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7852 +//	struct kfmlp_queue_info *shortest = &aff->q_info[0];
7853 +//	int i;
7854 +//
7855 +//	for(i = 1; i < sem->num_resources; ++i) {
7856 +//		if(aff->q_info[i].estimated_len < shortest->estimated_len) {
7857 +//			shortest = &aff->q_info[i];
7858 +//		}
7859 +//	}
7860 +//
7861 +//	return(shortest);
7862 +//}
7863 +
7864 +struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
7865 +{
7866 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7867 +	lt_t min_len;
7868 +	int min_nr_users;
7869 +	struct kfmlp_queue_info *shortest;
7870 +	struct kfmlp_queue *to_enqueue;
7871 +	int i;
7872 +	int affinity_gpu;
7873 +
7874 +	// simply pick the shortest queue if, we have no affinity, or we have
7875 +	// affinity with the shortest
7876 +	if(unlikely(tsk_rt(t)->last_gpu < 0)) {
7877 +		affinity_gpu = aff->offset;  // first gpu
7878 +		TRACE_CUR("no affinity\n");
7879 +	}
7880 +	else {
7881 +		affinity_gpu = tsk_rt(t)->last_gpu;
7882 +	}
7883 +
7884 +	// all things being equal, let's start with the queue with which we have
7885 +	// affinity.  this helps us maintain affinity even when we don't have
7886 +	// an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
7887 +	shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
7888 +
7889 +//	if(shortest == aff->shortest_queue) {
7890 +//		TRACE_CUR("special case: have affinity with shortest queue\n");
7891 +//		goto out;
7892 +//	}
7893 +
7894 +	min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
7895 +	min_nr_users = *(shortest->nr_cur_users);
7896 +
7897 +	TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
7898 +			  get_gpu_estimate(t, MIG_LOCAL),
7899 +			  kfmlp_get_idx(sem, shortest->q),
7900 +			  min_len);
7901 +
7902 +	for(i = 0; i < sem->num_resources; ++i) {
7903 +		if(&aff->q_info[i] != shortest) {
7904 +
7905 +			lt_t est_len =
7906 +				aff->q_info[i].estimated_len +
7907 +				get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
7908 +
7909 +			// queue is smaller, or they're equal and the other has a smaller number
7910 +			// of total users.
7911 +			//
7912 +			// tie-break on the shortest number of simult users.  this only kicks in
7913 +			// when there are more than 1 empty queues.
7914 +			if((est_len < min_len) ||
7915 +			   ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
7916 +				shortest = &aff->q_info[i];
7917 +				min_len = est_len;
7918 +				min_nr_users = *(aff->q_info[i].nr_cur_users);
7919 +			}
7920 +
7921 +			TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
7922 +					  get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
7923 +					  kfmlp_get_idx(sem, aff->q_info[i].q),
7924 +					  est_len);
7925 +		}
7926 +	}
7927 +
7928 +	to_enqueue = shortest->q;
7929 +	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
7930 +			  kfmlp_get_idx(sem, to_enqueue),
7931 +			  kfmlp_get_idx(sem, sem->shortest_queue));
7932 +
7933 +	return to_enqueue;
7934 +}
7935 +
7936 +struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
7937 +{
7938 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7939 +
7940 +	// For now, just steal highest priority waiter
7941 +	// TODO: Implement affinity-aware stealing.
7942 +
7943 +	return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
7944 +}
7945 +
7946 +
7947 +void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
7948 +{
7949 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7950 +	int replica = kfmlp_get_idx(sem, fq);
7951 +	int gpu = replica_to_gpu(aff, replica);
7952 +	struct kfmlp_queue_info *info = &aff->q_info[replica];
7953 +	lt_t est_time;
7954 +	lt_t est_len_before;
7955 +
7956 +	if(current == t) {
7957 +		tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
7958 +	}
7959 +
7960 +	est_len_before = info->estimated_len;
7961 +	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
7962 +	info->estimated_len += est_time;
7963 +
7964 +	TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
7965 +			  kfmlp_get_idx(sem, info->q),
7966 +			  est_len_before, est_time,
7967 +			  info->estimated_len);
7968 +
7969 +//	if(aff->shortest_queue == info) {
7970 +//		// we may no longer be the shortest
7971 +//		aff->shortest_queue = kfmlp_aff_find_shortest(aff);
7972 +//
7973 +//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
7974 +//				  kfmlp_get_idx(sem, aff->shortest_queue->q),
7975 +//				  aff->shortest_queue->q->count,
7976 +//				  aff->shortest_queue->estimated_len);
7977 +//	}
7978 +}
7979 +
7980 +void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
7981 +{
7982 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
7983 +	int replica = kfmlp_get_idx(sem, fq);
7984 +	int gpu = replica_to_gpu(aff, replica);
7985 +	struct kfmlp_queue_info *info = &aff->q_info[replica];
7986 +	lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
7987 +
7988 +	if(est_time > info->estimated_len) {
7989 +		WARN_ON(1);
7990 +		info->estimated_len = 0;
7991 +	}
7992 +	else {
7993 +		info->estimated_len -= est_time;
7994 +	}
7995 +
7996 +	TRACE_CUR("fq %d est len is now %llu\n",
7997 +			  kfmlp_get_idx(sem, info->q),
7998 +			  info->estimated_len);
7999 +
8000 +	// check to see if we're the shortest queue now.
8001 +//	if((aff->shortest_queue != info) &&
8002 +//	   (aff->shortest_queue->estimated_len > info->estimated_len)) {
8003 +//
8004 +//		aff->shortest_queue = info;
8005 +//
8006 +//		TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
8007 +//				  kfmlp_get_idx(sem, info->q),
8008 +//				  info->q->count,
8009 +//				  info->estimated_len);
8010 +//	}
8011 +}
8012 +
8013 +void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8014 +{
8015 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8016 +	int replica = kfmlp_get_idx(sem, fq);
8017 +	int gpu = replica_to_gpu(aff, replica);
8018 +
8019 +	tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu);  // record the type of migration
8020 +
8021 +	TRACE_CUR("%s/%d acquired gpu %d.  migration type = %d\n",
8022 +			  t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
8023 +
8024 +	// count the number or resource holders
8025 +	++(*(aff->q_info[replica].nr_cur_users));
8026 +
8027 +	reg_nv_device(gpu, 1, t);  // register
8028 +
8029 +	tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
8030 +	reset_gpu_tracker(t);
8031 +	start_gpu_tracker(t);
8032 +}
8033 +
8034 +void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8035 +{
8036 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8037 +	int replica = kfmlp_get_idx(sem, fq);
8038 +	int gpu = replica_to_gpu(aff, replica);
8039 +	lt_t est_time;
8040 +
8041 +	stop_gpu_tracker(t);  // stop the tracker before we do anything else.
8042 +
8043 +	est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
8044 +
8045 +	tsk_rt(t)->last_gpu = gpu;
8046 +
8047 +	// count the number or resource holders
8048 +	--(*(aff->q_info[replica].nr_cur_users));
8049 +
8050 +	reg_nv_device(gpu, 0, t);	// unregister
8051 +
8052 +	// update estimates
8053 +	update_gpu_estimate(t, get_gpu_time(t));
8054 +
8055 +	TRACE_CUR("%s/%d freed gpu %d.  actual time was %llu.  estimated was %llu.  diff is %d\n",
8056 +			  t->comm, t->pid, gpu,
8057 +			  get_gpu_time(t),
8058 +			  est_time,
8059 +			  (long long)get_gpu_time(t) - (long long)est_time);
8060 +}
8061 +
8062 +struct kfmlp_affinity_ops gpu_kfmlp_affinity =
8063 +{
8064 +	.advise_enqueue = gpu_kfmlp_advise_enqueue,
8065 +	.advise_steal = gpu_kfmlp_advise_steal,
8066 +	.notify_enqueue = gpu_kfmlp_notify_enqueue,
8067 +	.notify_dequeue = gpu_kfmlp_notify_dequeue,
8068 +	.notify_acquired = gpu_kfmlp_notify_acquired,
8069 +	.notify_freed = gpu_kfmlp_notify_freed,
8070 +	.replica_to_resource = gpu_replica_to_resource,
8071 +};
8072 +
8073 +struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
8074 +											void* __user args)
8075 +{
8076 +	return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
8077 +}
8078 +
8079 +
8080 +
8081 +
8082 +
8083 +
8084 +
8085 +
8086 +// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
8087 +
8088 +struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
8089 +{
8090 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8091 +	int min_count;
8092 +	int min_nr_users;
8093 +	struct kfmlp_queue_info *shortest;
8094 +	struct kfmlp_queue *to_enqueue;
8095 +	int i;
8096 +
8097 +//	TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
8098 +
8099 +	shortest = &aff->q_info[0];
8100 +	min_count = shortest->q->count;
8101 +	min_nr_users = *(shortest->nr_cur_users);
8102 +
8103 +	TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
8104 +			  kfmlp_get_idx(sem, shortest->q),
8105 +			  shortest->q->count,
8106 +			  min_nr_users);
8107 +
8108 +	for(i = 1; i < sem->num_resources; ++i) {
8109 +		int len = aff->q_info[i].q->count;
8110 +
8111 +		// queue is smaller, or they're equal and the other has a smaller number
8112 +		// of total users.
8113 +		//
8114 +		// tie-break on the shortest number of simult users.  this only kicks in
8115 +		// when there are more than 1 empty queues.
8116 +		if((len < min_count) ||
8117 +		   ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
8118 +			shortest = &aff->q_info[i];
8119 +			min_count = shortest->q->count;
8120 +			min_nr_users = *(aff->q_info[i].nr_cur_users);
8121 +		}
8122 +
8123 +		TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
8124 +				  kfmlp_get_idx(sem, aff->q_info[i].q),
8125 +				  aff->q_info[i].q->count,
8126 +				  *(aff->q_info[i].nr_cur_users));
8127 +	}
8128 +
8129 +	to_enqueue = shortest->q;
8130 +	TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
8131 +			  kfmlp_get_idx(sem, to_enqueue),
8132 +			  kfmlp_get_idx(sem, sem->shortest_queue));
8133 +
8134 +	return to_enqueue;
8135 +}
8136 +
8137 +struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
8138 +{
8139 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8140 +//	TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
8141 +	return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
8142 +}
8143 +
8144 +void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8145 +{
8146 +//	TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
8147 +}
8148 +
8149 +void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8150 +{
8151 +//	TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
8152 +}
8153 +
8154 +void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8155 +{
8156 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8157 +	int replica = kfmlp_get_idx(sem, fq);
8158 +	int gpu = replica_to_gpu(aff, replica);
8159 +
8160 +//	TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
8161 +
8162 +	// count the number or resource holders
8163 +	++(*(aff->q_info[replica].nr_cur_users));
8164 +
8165 +	reg_nv_device(gpu, 1, t);  // register
8166 +}
8167 +
8168 +void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
8169 +{
8170 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
8171 +	int replica = kfmlp_get_idx(sem, fq);
8172 +	int gpu = replica_to_gpu(aff, replica);
8173 +
8174 +//	TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
8175 +	// count the number or resource holders
8176 +	--(*(aff->q_info[replica].nr_cur_users));
8177 +
8178 +	reg_nv_device(gpu, 0, t);	// unregister
8179 +}
8180 +
8181 +struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
8182 +{
8183 +	.advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
8184 +	.advise_steal = simple_gpu_kfmlp_advise_steal,
8185 +	.notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
8186 +	.notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
8187 +	.notify_acquired = simple_gpu_kfmlp_notify_acquired,
8188 +	.notify_freed = simple_gpu_kfmlp_notify_freed,
8189 +	.replica_to_resource = gpu_replica_to_resource,
8190 +};
8191 +
8192 +struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
8193 +												void* __user args)
8194 +{
8195 +	return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
8196 +}
8197 +
8198 +#endif
8199 +
8200 diff --git a/litmus/litmus.c b/litmus/litmus.c
8201 index 3013901..d1f836c 100644
8202 --- a/litmus/litmus.c
8203 +++ b/litmus/litmus.c
8204 @@ -21,6 +21,10 @@
8205  #include <litmus/affinity.h>
8206  #endif
8207  
8208 +#ifdef CONFIG_LITMUS_NVIDIA
8209 +#include <litmus/nvidia_info.h>
8210 +#endif
8211 +
8212  /* Number of RT tasks that exist in the system */
8213  atomic_t rt_task_count 		= ATOMIC_INIT(0);
8214  static DEFINE_RAW_SPINLOCK(task_transition_lock);
8215 @@ -51,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
8216  struct release_heap* release_heap_alloc(int gfp_flags);
8217  void release_heap_free(struct release_heap* rh);
8218  
8219 +#ifdef CONFIG_LITMUS_NVIDIA
8220 +/*
8221 + * sys_register_nv_device
8222 + * @nv_device_id: The Nvidia device id that the task want to register
8223 + * @reg_action: set to '1' to register the specified device. zero otherwise.
8224 + * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
8225 + * Returns EFAULT  if nv_device_id is out of range.
8226 + *	   0       if success
8227 + */
8228 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
8229 +{
8230 +	/* register the device to caller (aka 'current') */
8231 +	return(reg_nv_device(nv_device_id, reg_action, current));
8232 +}
8233 +#else
8234 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
8235 +{
8236 +	return(-EINVAL);
8237 +}
8238 +#endif
8239 +
8240 +
8241  /*
8242   * sys_set_task_rt_param
8243   * @pid: Pid of the task which scheduling parameters must be changed
8244 @@ -269,6 +295,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
8245  	return retval;
8246  }
8247  
8248 +
8249  /* sys_null_call() is only used for determining raw system call
8250   * overheads (kernel entry, kernel exit). It has no useful side effects.
8251   * If ts is non-NULL, then the current Feather-Trace time is recorded.
8252 @@ -286,12 +313,42 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
8253  	return ret;
8254  }
8255  
8256 +
8257 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
8258 +void init_gpu_affinity_state(struct task_struct* p)
8259 +{
8260 +	// under-damped
8261 +	//p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
8262 +	//p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
8263 +
8264 +	// emperical;
8265 +	p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
8266 +	p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
8267 +
8268 +	p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
8269 +	p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
8270 +
8271 +	p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
8272 +	p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
8273 +
8274 +	p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
8275 +	p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
8276 +
8277 +	p->rt_param.gpu_migration = MIG_NONE;
8278 +	p->rt_param.last_gpu = -1;
8279 +}
8280 +#endif
8281 +
8282  /* p is a real-time task. Re-init its state as a best-effort task. */
8283  static void reinit_litmus_state(struct task_struct* p, int restore)
8284  {
8285  	struct rt_task  user_config = {};
8286  	void*  ctrl_page     = NULL;
8287  
8288 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8289 +	binheap_order_t	prio_order = NULL;
8290 +#endif
8291 +
8292  	if (restore) {
8293  		/* Safe user-space provided configuration data.
8294  		 * and allocated page. */
8295 @@ -299,11 +356,38 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
8296  		ctrl_page   = p->rt_param.ctrl_page;
8297  	}
8298  
8299 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8300 +	prio_order = p->rt_param.hp_blocked_tasks.compare;
8301 +#endif
8302 +
8303  	/* We probably should not be inheriting any task's priority
8304  	 * at this point in time.
8305  	 */
8306  	WARN_ON(p->rt_param.inh_task);
8307  
8308 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8309 +	WARN_ON(p->rt_param.blocked_lock);
8310 +    WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
8311 +#endif
8312 +
8313 +#ifdef CONFIG_LITMUS_SOFTIRQD
8314 +	/* We probably should not have any tasklets executing for
8315 +     * us at this time.
8316 +	 */
8317 +    WARN_ON(p->rt_param.cur_klitirqd);
8318 +	WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
8319 +
8320 +	if(p->rt_param.cur_klitirqd)
8321 +		flush_pending(p->rt_param.cur_klitirqd, p);
8322 +
8323 +	if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
8324 +		up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
8325 +#endif
8326 +
8327 +#ifdef CONFIG_LITMUS_NVIDIA
8328 +	WARN_ON(p->rt_param.held_gpus != 0);
8329 +#endif
8330 +
8331  	/* Cleanup everything else. */
8332  	memset(&p->rt_param, 0, sizeof(p->rt_param));
8333  
8334 @@ -312,6 +396,15 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
8335  		p->rt_param.task_params = user_config;
8336  		p->rt_param.ctrl_page   = ctrl_page;
8337  	}
8338 +
8339 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
8340 +	init_gpu_affinity_state(p);
8341 +#endif
8342 +
8343 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8344 +	INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
8345 +	raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
8346 +#endif
8347  }
8348  
8349  long litmus_admit_task(struct task_struct* tsk)
8350 @@ -358,6 +451,26 @@ long litmus_admit_task(struct task_struct* tsk)
8351  		bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
8352  	}
8353  
8354 +
8355 +#ifdef CONFIG_LITMUS_NVIDIA
8356 +	atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
8357 +#endif
8358 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
8359 +	init_gpu_affinity_state(tsk);
8360 +#endif
8361 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
8362 +	tsk_rt(tsk)->blocked_lock = NULL;
8363 +	raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
8364 +	//INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order);  // done by scheduler
8365 +#endif
8366 +#ifdef CONFIG_LITMUS_SOFTIRQD
8367 +	/* proxy thread off by default */
8368 +	tsk_rt(tsk)is_proxy_thread = 0;
8369 +    tsk_rt(tsk)cur_klitirqd = NULL;
8370 +	mutex_init(&tsk_rt(tsk)->klitirqd_sem);
8371 +	atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
8372 +#endif
8373 +
8374  	retval = litmus->admit_task(tsk);
8375  
8376  	if (!retval) {
8377 @@ -403,7 +516,7 @@ static void synch_on_plugin_switch(void* info)
8378   */
8379  int switch_sched_plugin(struct sched_plugin* plugin)
8380  {
8381 -	unsigned long flags;
8382 +	//unsigned long flags;
8383  	int ret = 0;
8384  
8385  	BUG_ON(!plugin);
8386 @@ -417,8 +530,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
8387  	while (atomic_read(&cannot_use_plugin) < num_online_cpus())
8388  		cpu_relax();
8389  
8390 +#ifdef CONFIG_LITMUS_SOFTIRQD
8391 +	if(!klitirqd_is_dead())
8392 +	{
8393 +		kill_klitirqd();
8394 +	}
8395 +#endif
8396 +
8397  	/* stop task transitions */
8398 -	raw_spin_lock_irqsave(&task_transition_lock, flags);
8399 +	//raw_spin_lock_irqsave(&task_transition_lock, flags);
8400  
8401  	/* don't switch if there are active real-time tasks */
8402  	if (atomic_read(&rt_task_count) == 0) {
8403 @@ -436,7 +556,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
8404  	} else
8405  		ret = -EBUSY;
8406  out:
8407 -	raw_spin_unlock_irqrestore(&task_transition_lock, flags);
8408 +	//raw_spin_unlock_irqrestore(&task_transition_lock, flags);
8409  	atomic_set(&cannot_use_plugin, 0);
8410  	return ret;
8411  }
8412 diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
8413 new file mode 100644
8414 index 0000000..300571a
8415 --- /dev/null
8416 +++ b/litmus/litmus_pai_softirq.c
8417 @@ -0,0 +1,64 @@
8418 +#include <linux/interrupt.h>
8419 +#include <linux/percpu.h>
8420 +#include <linux/cpu.h>
8421 +#include <linux/kthread.h>
8422 +#include <linux/ftrace.h>
8423 +#include <linux/smp.h>
8424 +#include <linux/slab.h>
8425 +#include <linux/mutex.h>
8426 +
8427 +#include <linux/sched.h>
8428 +#include <linux/cpuset.h>
8429 +
8430 +#include <litmus/litmus.h>
8431 +#include <litmus/sched_trace.h>
8432 +#include <litmus/jobs.h>
8433 +#include <litmus/sched_plugin.h>
8434 +#include <litmus/litmus_softirq.h>
8435 +
8436 +
8437 +
8438 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
8439 +{
8440 +	int ret = 0; /* assume failure */
8441 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
8442 +    {
8443 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
8444 +        BUG();
8445 +    }
8446 +
8447 +    ret = litmus->enqueue_pai_tasklet(t);
8448 +
8449 +	return(ret);
8450 +}
8451 +
8452 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
8453 +
8454 +
8455 +
8456 +// failure causes default Linux handling.
8457 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
8458 +{
8459 +	int ret = 0; /* assume failure */
8460 +	return(ret);
8461 +}
8462 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
8463 +
8464 +
8465 +// failure causes default Linux handling.
8466 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
8467 +{
8468 +	int ret = 0; /* assume failure */
8469 +	return(ret);
8470 +}
8471 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
8472 +
8473 +
8474 +// failure causes default Linux handling.
8475 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
8476 +{
8477 +	int ret = 0; /* assume failure */
8478 +	return(ret);
8479 +}
8480 +EXPORT_SYMBOL(__litmus_schedule_work);
8481 +
8482 diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
8483 index 4bf725a..9ab7e01 100644
8484 --- a/litmus/litmus_proc.c
8485 +++ b/litmus/litmus_proc.c
8486 @@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
8487  #ifdef CONFIG_RELEASE_MASTER
8488  	*release_master_file = NULL,
8489  #endif
8490 +#ifdef CONFIG_LITMUS_SOFTIRQD
8491 +	*klitirqd_file = NULL,
8492 +#endif
8493  	*plugs_file = NULL;
8494  
8495  /* in litmus/sync.c */
8496  int count_tasks_waiting_for_release(void);
8497  
8498 +extern int proc_read_klitirqd_stats(char *page, char **start,
8499 +									off_t off, int count,
8500 +									int *eof, void *data);
8501 +
8502  static int proc_read_stats(char *page, char **start,
8503  			   off_t off, int count,
8504  			   int *eof, void *data)
8505 @@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
8506  	release_master_file->write_proc  = proc_write_release_master;
8507  #endif
8508  
8509 +#ifdef CONFIG_LITMUS_SOFTIRQD
8510 +	klitirqd_file =
8511 +		create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
8512 +							   proc_read_klitirqd_stats, NULL);
8513 +#endif
8514 +
8515  	stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
8516  					   proc_read_stats, NULL);
8517  
8518 @@ -187,6 +200,10 @@ void exit_litmus_proc(void)
8519  		remove_proc_entry("stats", litmus_dir);
8520  	if (curr_file)
8521  		remove_proc_entry("active_plugin", litmus_dir);
8522 +#ifdef CONFIG_LITMUS_SOFTIRQD
8523 +	if (klitirqd_file)
8524 +		remove_proc_entry("klitirqd_stats", litmus_dir);
8525 +#endif
8526  #ifdef CONFIG_RELEASE_MASTER
8527  	if (release_master_file)
8528  		remove_proc_entry("release_master", litmus_dir);
8529 diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
8530 new file mode 100644
8531 index 0000000..9f7d9da
8532 --- /dev/null
8533 +++ b/litmus/litmus_softirq.c
8534 @@ -0,0 +1,1582 @@
8535 +#include <linux/interrupt.h>
8536 +#include <linux/percpu.h>
8537 +#include <linux/cpu.h>
8538 +#include <linux/kthread.h>
8539 +#include <linux/ftrace.h>
8540 +#include <linux/smp.h>
8541 +#include <linux/slab.h>
8542 +#include <linux/mutex.h>
8543 +
8544 +#include <linux/sched.h>
8545 +#include <linux/cpuset.h>
8546 +
8547 +#include <litmus/litmus.h>
8548 +#include <litmus/sched_trace.h>
8549 +#include <litmus/jobs.h>
8550 +#include <litmus/sched_plugin.h>
8551 +#include <litmus/litmus_softirq.h>
8552 +
8553 +/* TODO: Remove unneeded mb() and other barriers. */
8554 +
8555 +
8556 +/* counts number of daemons ready to handle litmus irqs. */
8557 +static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
8558 +
8559 +enum pending_flags
8560 +{
8561 +    LIT_TASKLET_LOW = 0x1,
8562 +    LIT_TASKLET_HI  = LIT_TASKLET_LOW<<1,
8563 +	LIT_WORK = LIT_TASKLET_HI<<1
8564 +};
8565 +
8566 +/* only support tasklet processing for now. */
8567 +struct tasklet_head
8568 +{
8569 +	struct tasklet_struct *head;
8570 +	struct tasklet_struct **tail;
8571 +};
8572 +
8573 +struct klitirqd_info
8574 +{
8575 +	struct task_struct*		klitirqd;
8576 +    struct task_struct*     current_owner;
8577 +    int						terminating;
8578 +
8579 +
8580 +	raw_spinlock_t			lock;
8581 +
8582 +	u32						pending;
8583 +	atomic_t				num_hi_pending;
8584 +	atomic_t				num_low_pending;
8585 +	atomic_t				num_work_pending;
8586 +
8587 +	/* in order of priority */
8588 +	struct tasklet_head     pending_tasklets_hi;
8589 +	struct tasklet_head		pending_tasklets;
8590 +	struct list_head		worklist;
8591 +};
8592 +
8593 +/* one list for each klitirqd */
8594 +static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
8595 +
8596 +
8597 +
8598 +
8599 +
8600 +int proc_read_klitirqd_stats(char *page, char **start,
8601 +							 off_t off, int count,
8602 +							 int *eof, void *data)
8603 +{
8604 +	int len = snprintf(page, PAGE_SIZE,
8605 +				"num ready klitirqds: %d\n\n",
8606 +				atomic_read(&num_ready_klitirqds));
8607 +
8608 +	if(klitirqd_is_ready())
8609 +	{
8610 +		int i;
8611 +		for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
8612 +		{
8613 +			len +=
8614 +				snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
8615 +						 "klitirqd_th%d: %s/%d\n"
8616 +						 "\tcurrent_owner: %s/%d\n"
8617 +						 "\tpending: %x\n"
8618 +						 "\tnum hi: %d\n"
8619 +						 "\tnum low: %d\n"
8620 +						 "\tnum work: %d\n\n",
8621 +						 i,
8622 +						 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
8623 +						 (klitirqds[i].current_owner != NULL) ?
8624 +						 	klitirqds[i].current_owner->comm : "(null)",
8625 +						 (klitirqds[i].current_owner != NULL) ?
8626 +							klitirqds[i].current_owner->pid : 0,
8627 +						 klitirqds[i].pending,
8628 +						 atomic_read(&klitirqds[i].num_hi_pending),
8629 +						 atomic_read(&klitirqds[i].num_low_pending),
8630 +						 atomic_read(&klitirqds[i].num_work_pending));
8631 +		}
8632 +	}
8633 +
8634 +	return(len);
8635 +}
8636 +
8637 +
8638 +
8639 +
8640 +
8641 +#if 0
8642 +static atomic_t dump_id = ATOMIC_INIT(0);
8643 +
8644 +static void __dump_state(struct klitirqd_info* which, const char* caller)
8645 +{
8646 +	struct tasklet_struct* list;
8647 +
8648 +	int id = atomic_inc_return(&dump_id);
8649 +
8650 +	//if(in_interrupt())
8651 +	{
8652 +		if(which->current_owner)
8653 +		{
8654 +			TRACE("(id: %d  caller: %s)\n"
8655 +				"klitirqd: %s/%d\n"
8656 +				"current owner: %s/%d\n"
8657 +				"pending: %x\n",
8658 +				id, caller,
8659 +				which->klitirqd->comm, which->klitirqd->pid,
8660 +				which->current_owner->comm, which->current_owner->pid,
8661 +				which->pending);
8662 +		}
8663 +		else
8664 +		{
8665 +			TRACE("(id: %d  caller: %s)\n"
8666 +				"klitirqd: %s/%d\n"
8667 +				"current owner: %p\n"
8668 +				"pending: %x\n",
8669 +				id, caller,
8670 +				which->klitirqd->comm, which->klitirqd->pid,
8671 +				NULL,
8672 +				which->pending);
8673 +		}
8674 +
8675 +		list = which->pending_tasklets.head;
8676 +		while(list)
8677 +		{
8678 +			struct tasklet_struct *t = list;
8679 +			list = list->next; /* advance */
8680 +			if(t->owner)
8681 +				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
8682 +			else
8683 +				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
8684 +		}
8685 +	}
8686 +}
8687 +
8688 +static void dump_state(struct klitirqd_info* which, const char* caller)
8689 +{
8690 +	unsigned long flags;
8691 +
8692 +	raw_spin_lock_irqsave(&which->lock, flags);
8693 +    __dump_state(which, caller);
8694 +    raw_spin_unlock_irqrestore(&which->lock, flags);
8695 +}
8696 +#endif
8697 +
8698 +
8699 +/* forward declarations */
8700 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
8701 +									   struct klitirqd_info *which,
8702 +									   int wakeup);
8703 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
8704 +										  struct klitirqd_info *which,
8705 +										  int wakeup);
8706 +static void ___litmus_schedule_work(struct work_struct *w,
8707 +									struct klitirqd_info *which,
8708 +									int wakeup);
8709 +
8710 +
8711 +
8712 +inline unsigned int klitirqd_id(struct task_struct* tsk)
8713 +{
8714 +    int i;
8715 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
8716 +    {
8717 +        if(klitirqds[i].klitirqd == tsk)
8718 +        {
8719 +            return i;
8720 +        }
8721 +    }
8722 +
8723 +    BUG();
8724 +
8725 +    return 0;
8726 +}
8727 +
8728 +
8729 +inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
8730 +{
8731 +    return (which->pending & LIT_TASKLET_HI);
8732 +}
8733 +
8734 +inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
8735 +{
8736 +    return (which->pending & LIT_TASKLET_LOW);
8737 +}
8738 +
8739 +inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
8740 +{
8741 +	return (which->pending & LIT_WORK);
8742 +}
8743 +
8744 +inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
8745 +{
8746 +    return(which->pending);
8747 +}
8748 +
8749 +
8750 +inline static u32 litirq_pending(struct klitirqd_info* which)
8751 +{
8752 +    unsigned long flags;
8753 +    u32 pending;
8754 +
8755 +    raw_spin_lock_irqsave(&which->lock, flags);
8756 +    pending = litirq_pending_irqoff(which);
8757 +    raw_spin_unlock_irqrestore(&which->lock, flags);
8758 +
8759 +    return pending;
8760 +};
8761 +
8762 +inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
8763 +{
8764 +	unsigned long flags;
8765 +	u32 pending;
8766 +
8767 +	raw_spin_lock_irqsave(&which->lock, flags);
8768 +	pending = litirq_pending_irqoff(which);
8769 +	if(pending)
8770 +	{
8771 +		if(which->current_owner != owner)
8772 +		{
8773 +			pending = 0;  // owner switch!
8774 +		}
8775 +	}
8776 +	raw_spin_unlock_irqrestore(&which->lock, flags);
8777 +
8778 +	return pending;
8779 +}
8780 +
8781 +
8782 +inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
8783 +				struct mutex** sem,
8784 +				struct task_struct** t)
8785 +{
8786 +	unsigned long flags;
8787 +	u32 pending;
8788 +
8789 +	/* init values */
8790 +	*sem = NULL;
8791 +	*t = NULL;
8792 +
8793 +	raw_spin_lock_irqsave(&which->lock, flags);
8794 +
8795 +	pending = litirq_pending_irqoff(which);
8796 +	if(pending)
8797 +	{
8798 +		if(which->current_owner != NULL)
8799 +		{
8800 +			*t = which->current_owner;
8801 +			*sem = &tsk_rt(which->current_owner)->klitirqd_sem;
8802 +		}
8803 +		else
8804 +		{
8805 +			BUG();
8806 +		}
8807 +	}
8808 +	raw_spin_unlock_irqrestore(&which->lock, flags);
8809 +
8810 +	if(likely(*sem))
8811 +	{
8812 +		return pending;
8813 +	}
8814 +	else
8815 +	{
8816 +		return 0;
8817 +	}
8818 +}
8819 +
8820 +/* returns true if the next piece of work to do is from a different owner.
8821 + */
8822 +static int tasklet_ownership_change(
8823 +				struct klitirqd_info* which,
8824 +				enum pending_flags taskletQ)
8825 +{
8826 +	/* this function doesn't have to look at work objects since they have
8827 +	   priority below tasklets. */
8828 +
8829 +    unsigned long flags;
8830 +    int ret = 0;
8831 +
8832 +    raw_spin_lock_irqsave(&which->lock, flags);
8833 +
8834 +	switch(taskletQ)
8835 +	{
8836 +	case LIT_TASKLET_HI:
8837 +		if(litirq_pending_hi_irqoff(which))
8838 +		{
8839 +			ret = (which->pending_tasklets_hi.head->owner !=
8840 +						which->current_owner);
8841 +		}
8842 +		break;
8843 +	case LIT_TASKLET_LOW:
8844 +		if(litirq_pending_low_irqoff(which))
8845 +		{
8846 +			ret = (which->pending_tasklets.head->owner !=
8847 +						which->current_owner);
8848 +		}
8849 +		break;
8850 +	default:
8851 +		break;
8852 +	}
8853 +
8854 +    raw_spin_unlock_irqrestore(&which->lock, flags);
8855 +
8856 +    TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
8857 +
8858 +    return ret;
8859 +}
8860 +
8861 +
8862 +static void __reeval_prio(struct klitirqd_info* which)
8863 +{
8864 +    struct task_struct* next_owner = NULL;
8865 +	struct task_struct* klitirqd = which->klitirqd;
8866 +
8867 +	/* Check in prio-order */
8868 +	u32 pending = litirq_pending_irqoff(which);
8869 +
8870 +	//__dump_state(which, "__reeval_prio: before");
8871 +
8872 +	if(pending)
8873 +	{
8874 +		if(pending & LIT_TASKLET_HI)
8875 +		{
8876 +			next_owner = which->pending_tasklets_hi.head->owner;
8877 +		}
8878 +		else if(pending & LIT_TASKLET_LOW)
8879 +		{
8880 +			next_owner = which->pending_tasklets.head->owner;
8881 +		}
8882 +		else if(pending & LIT_WORK)
8883 +		{
8884 +			struct work_struct* work =
8885 +				list_first_entry(&which->worklist, struct work_struct, entry);
8886 +			next_owner = work->owner;
8887 +		}
8888 +	}
8889 +
8890 +	if(next_owner != which->current_owner)
8891 +	{
8892 +		struct task_struct* old_owner = which->current_owner;
8893 +
8894 +		/* bind the next owner. */
8895 +		which->current_owner = next_owner;
8896 +		mb();
8897 +
8898 +        if(next_owner != NULL)
8899 +        {
8900 +			if(!in_interrupt())
8901 +			{
8902 +				TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
8903 +						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
8904 +						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
8905 +						next_owner->comm, next_owner->pid);
8906 +			}
8907 +			else
8908 +			{
8909 +				TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
8910 +					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
8911 +					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
8912 +					next_owner->comm, next_owner->pid);
8913 +			}
8914 +
8915 +			litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
8916 +        }
8917 +        else
8918 +        {
8919 +			if(likely(!in_interrupt()))
8920 +			{
8921 +				TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
8922 +						__FUNCTION__, klitirqd->comm, klitirqd->pid);
8923 +			}
8924 +			else
8925 +			{
8926 +				// is this a bug?
8927 +				TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
8928 +					__FUNCTION__, klitirqd->comm, klitirqd->pid);
8929 +			}
8930 +
8931 +			BUG_ON(pending != 0);
8932 +			litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
8933 +        }
8934 +    }
8935 +
8936 +	//__dump_state(which, "__reeval_prio: after");
8937 +}
8938 +
8939 +static void reeval_prio(struct klitirqd_info* which)
8940 +{
8941 +    unsigned long flags;
8942 +
8943 +    raw_spin_lock_irqsave(&which->lock, flags);
8944 +    __reeval_prio(which);
8945 +    raw_spin_unlock_irqrestore(&which->lock, flags);
8946 +}
8947 +
8948 +
8949 +static void wakeup_litirqd_locked(struct klitirqd_info* which)
8950 +{
8951 +	/* Interrupts are disabled: no need to stop preemption */
8952 +	if (which && which->klitirqd)
8953 +	{
8954 +        __reeval_prio(which); /* configure the proper priority */
8955 +
8956 +		if(which->klitirqd->state != TASK_RUNNING)
8957 +		{
8958 +        	TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
8959 +			  	which->klitirqd->comm, which->klitirqd->pid);
8960 +
8961 +			wake_up_process(which->klitirqd);
8962 +		}
8963 +    }
8964 +}
8965 +
8966 +
8967 +static void do_lit_tasklet(struct klitirqd_info* which,
8968 +						   struct tasklet_head* pending_tasklets)
8969 +{
8970 +    unsigned long flags;
8971 +	struct tasklet_struct *list;
8972 +	atomic_t* count;
8973 +
8974 +    raw_spin_lock_irqsave(&which->lock, flags);
8975 +
8976 +	//__dump_state(which, "do_lit_tasklet: before steal");
8977 +
8978 +	/* copy out the tasklets for our private use. */
8979 +	list = pending_tasklets->head;
8980 +	pending_tasklets->head = NULL;
8981 +	pending_tasklets->tail = &pending_tasklets->head;
8982 +
8983 +	/* remove pending flag */
8984 +	which->pending &= (pending_tasklets == &which->pending_tasklets) ?
8985 +		~LIT_TASKLET_LOW :
8986 +		~LIT_TASKLET_HI;
8987 +
8988 +	count = (pending_tasklets == &which->pending_tasklets) ?
8989 +		&which->num_low_pending:
8990 +		&which->num_hi_pending;
8991 +
8992 +	//__dump_state(which, "do_lit_tasklet: after steal");
8993 +
8994 +    raw_spin_unlock_irqrestore(&which->lock, flags);
8995 +
8996 +
8997 +    while(list)
8998 +    {
8999 +        struct tasklet_struct *t = list;
9000 +
9001 +        /* advance, lest we forget */
9002 +		list = list->next;
9003 +
9004 +        /* execute tasklet if it has my priority and is free */
9005 +		if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
9006 +			if (!atomic_read(&t->count)) {
9007 +
9008 +				sched_trace_tasklet_begin(t->owner);
9009 +
9010 +				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
9011 +                {
9012 +					BUG();
9013 +                }
9014 +                TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
9015 +				t->func(t->data);
9016 +				tasklet_unlock(t);
9017 +
9018 +				atomic_dec(count);
9019 +
9020 +				sched_trace_tasklet_end(t->owner, 0ul);
9021 +
9022 +				continue;  /* process more tasklets */
9023 +			}
9024 +			tasklet_unlock(t);
9025 +		}
9026 +
9027 +        TRACE_CUR("%s: Could not invoke tasklet.  Requeuing.\n", __FUNCTION__);
9028 +
9029 +		/* couldn't process tasklet.  put it back at the end of the queue. */
9030 +		if(pending_tasklets == &which->pending_tasklets)
9031 +			___litmus_tasklet_schedule(t, which, 0);
9032 +		else
9033 +			___litmus_tasklet_hi_schedule(t, which, 0);
9034 +    }
9035 +}
9036 +
9037 +
9038 +// returns 1 if priorities need to be changed to continue processing
9039 +// pending tasklets.
9040 +static int do_litirq(struct klitirqd_info* which)
9041 +{
9042 +    u32 pending;
9043 +    int resched = 0;
9044 +
9045 +    if(in_interrupt())
9046 +    {
9047 +        TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
9048 +        return(0);
9049 +    }
9050 +
9051 +	if(which->klitirqd != current)
9052 +	{
9053 +        TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
9054 +				  __FUNCTION__, current->comm, current->pid,
9055 +				  which->klitirqd->comm, which->klitirqd->pid);
9056 +        return(0);
9057 +	}
9058 +
9059 +    if(!is_realtime(current))
9060 +    {
9061 +        TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
9062 +				  __FUNCTION__, current->policy);
9063 +        return(0);
9064 +    }
9065 +
9066 +
9067 +    /* We only handle tasklets & work objects, no need for RCU triggers? */
9068 +
9069 +    pending = litirq_pending(which);
9070 +    if(pending)
9071 +    {
9072 +        /* extract the work to do and do it! */
9073 +        if(pending & LIT_TASKLET_HI)
9074 +        {
9075 +            TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
9076 +            do_lit_tasklet(which, &which->pending_tasklets_hi);
9077 +            resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
9078 +
9079 +            if(resched)
9080 +            {
9081 +                TRACE_CUR("%s: HI tasklets of another owner remain. "
9082 +						  "Skipping any LOW tasklets.\n", __FUNCTION__);
9083 +            }
9084 +        }
9085 +
9086 +        if(!resched && (pending & LIT_TASKLET_LOW))
9087 +        {
9088 +            TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
9089 +			do_lit_tasklet(which, &which->pending_tasklets);
9090 +			resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
9091 +
9092 +            if(resched)
9093 +            {
9094 +                TRACE_CUR("%s: LOW tasklets of another owner remain. "
9095 +						  "Skipping any work objects.\n", __FUNCTION__);
9096 +            }
9097 +        }
9098 +    }
9099 +
9100 +	return(resched);
9101 +}
9102 +
9103 +
9104 +static void do_work(struct klitirqd_info* which)
9105 +{
9106 +	unsigned long flags;
9107 +	work_func_t f;
9108 +	struct work_struct* work;
9109 +
9110 +	// only execute one work-queue item to yield to tasklets.
9111 +	// ...is this a good idea, or should we just batch them?
9112 +	raw_spin_lock_irqsave(&which->lock, flags);
9113 +
9114 +	if(!litirq_pending_work_irqoff(which))
9115 +	{
9116 +		raw_spin_unlock_irqrestore(&which->lock, flags);
9117 +		goto no_work;
9118 +	}
9119 +
9120 +	work = list_first_entry(&which->worklist, struct work_struct, entry);
9121 +	list_del_init(&work->entry);
9122 +
9123 +	if(list_empty(&which->worklist))
9124 +	{
9125 +		which->pending &= ~LIT_WORK;
9126 +	}
9127 +
9128 +	raw_spin_unlock_irqrestore(&which->lock, flags);
9129 +
9130 +
9131 +
9132 +	/* safe to read current_owner outside of lock since only this thread
9133 +	 may write to the pointer. */
9134 +	if(work->owner == which->current_owner)
9135 +	{
9136 +		TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
9137 +		// do the work!
9138 +		work_clear_pending(work);
9139 +		f = work->func;
9140 +		f(work);  /* can't touch 'work' after this point,
9141 +				   the user may have freed it. */
9142 +
9143 +		atomic_dec(&which->num_work_pending);
9144 +	}
9145 +	else
9146 +	{
9147 +		TRACE_CUR("%s: Could not invoke work object.  Requeuing.\n",
9148 +				  __FUNCTION__);
9149 +		___litmus_schedule_work(work, which, 0);
9150 +	}
9151 +
9152 +no_work:
9153 +	return;
9154 +}
9155 +
9156 +
9157 +static int set_litmus_daemon_sched(void)
9158 +{
9159 +    /* set up a daemon job that will never complete.
9160 +       it should only ever run on behalf of another
9161 +       real-time task.
9162 +
9163 +       TODO: Transition to a new job whenever a
9164 +       new tasklet is handled */
9165 +
9166 +    int ret = 0;
9167 +
9168 +	struct rt_task tp = {
9169 +		.exec_cost = 0,
9170 +		.period = 1000000000, /* dummy 1 second period */
9171 +		.phase = 0,
9172 +		.cpu = task_cpu(current),
9173 +		.budget_policy = NO_ENFORCEMENT,
9174 +		.cls = RT_CLASS_BEST_EFFORT
9175 +	};
9176 +
9177 +	struct sched_param param = { .sched_priority = 0};
9178 +
9179 +
9180 +	/* set task params, mark as proxy thread, and init other data */
9181 +	tsk_rt(current)->task_params = tp;
9182 +	tsk_rt(current)->is_proxy_thread = 1;
9183 +	tsk_rt(current)->cur_klitirqd = NULL;
9184 +	mutex_init(&tsk_rt(current)->klitirqd_sem);
9185 +	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
9186 +
9187 +	/* inform the OS we're SCHED_LITMUS --
9188 +	   sched_setscheduler_nocheck() calls litmus_admit_task(). */
9189 +	sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);
9190 +
9191 +    return ret;
9192 +}
9193 +
9194 +static void enter_execution_phase(struct klitirqd_info* which,
9195 +								  struct mutex* sem,
9196 +								  struct task_struct* t)
9197 +{
9198 +	TRACE_CUR("%s: Trying to enter execution phase. "
9199 +			  "Acquiring semaphore of %s/%d\n", __FUNCTION__,
9200 +			  t->comm, t->pid);
9201 +	down_and_set_stat(current, HELD, sem);
9202 +	TRACE_CUR("%s: Execution phase entered! "
9203 +			  "Acquired semaphore of %s/%d\n", __FUNCTION__,
9204 +			  t->comm, t->pid);
9205 +}
9206 +
9207 +static void exit_execution_phase(struct klitirqd_info* which,
9208 +								 struct mutex* sem,
9209 +								 struct task_struct* t)
9210 +{
9211 +	TRACE_CUR("%s: Exiting execution phase. "
9212 +			  "Releasing semaphore of %s/%d\n", __FUNCTION__,
9213 +			  t->comm, t->pid);
9214 +	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
9215 +	{
9216 +		up_and_set_stat(current, NOT_HELD, sem);
9217 +		TRACE_CUR("%s: Execution phase exited! "
9218 +				  "Released semaphore of %s/%d\n", __FUNCTION__,
9219 +				  t->comm, t->pid);
9220 +	}
9221 +	else
9222 +	{
9223 +		TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
9224 +	}
9225 +}
9226 +
9227 +/* main loop for klitsoftirqd */
9228 +static int run_klitirqd(void* unused)
9229 +{
9230 +	struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
9231 +	struct mutex* sem;
9232 +	struct task_struct* owner;
9233 +
9234 +    int rt_status = set_litmus_daemon_sched();
9235 +
9236 +    if(rt_status != 0)
9237 +    {
9238 +        TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
9239 +        goto rt_failed;
9240 +    }
9241 +
9242 +	atomic_inc(&num_ready_klitirqds);
9243 +
9244 +	set_current_state(TASK_INTERRUPTIBLE);
9245 +
9246 +	while (!kthread_should_stop())
9247 +	{
9248 +		preempt_disable();
9249 +		if (!litirq_pending(which))
9250 +		{
9251 +            /* sleep for work */
9252 +            TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
9253 +					  __FUNCTION__);
9254 +			preempt_enable_no_resched();
9255 +            schedule();
9256 +
9257 +			if(kthread_should_stop()) /* bail out */
9258 +			{
9259 +				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
9260 +				continue;
9261 +			}
9262 +
9263 +			preempt_disable();
9264 +		}
9265 +
9266 +		__set_current_state(TASK_RUNNING);
9267 +
9268 +		while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
9269 +		{
9270 +			int needs_resched = 0;
9271 +
9272 +			preempt_enable_no_resched();
9273 +
9274 +			BUG_ON(sem == NULL);
9275 +
9276 +			// wait to enter execution phase; wait for 'current_owner' to block.
9277 +			enter_execution_phase(which, sem, owner);
9278 +
9279 +			if(kthread_should_stop())
9280 +			{
9281 +				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
9282 +				break;
9283 +			}
9284 +
9285 +			preempt_disable();
9286 +
9287 +			/* Double check that there's still pending work and the owner hasn't
9288 +			 * changed. Pending items may have been flushed while we were sleeping.
9289 +			 */
9290 +			if(litirq_pending_with_owner(which, owner))
9291 +			{
9292 +				TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
9293 +						  __FUNCTION__);
9294 +
9295 +				needs_resched = do_litirq(which);
9296 +
9297 +				preempt_enable_no_resched();
9298 +
9299 +				// work objects are preemptible.
9300 +				if(!needs_resched)
9301 +				{
9302 +					do_work(which);
9303 +				}
9304 +
9305 +				// exit execution phase.
9306 +				exit_execution_phase(which, sem, owner);
9307 +
9308 +				TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
9309 +				reeval_prio(which); /* check if we need to change priority here */
9310 +			}
9311 +			else
9312 +			{
9313 +				TRACE_CUR("%s: Pending work was flushed!  Prev owner was %s/%d\n",
9314 +								__FUNCTION__,
9315 +								owner->comm, owner->pid);
9316 +				preempt_enable_no_resched();
9317 +
9318 +				// exit execution phase.
9319 +				exit_execution_phase(which, sem, owner);
9320 +			}
9321 +
9322 +			cond_resched();
9323 +			preempt_disable();
9324 +		}
9325 +		preempt_enable();
9326 +		set_current_state(TASK_INTERRUPTIBLE);
9327 +	}
9328 +	__set_current_state(TASK_RUNNING);
9329 +
9330 +	atomic_dec(&num_ready_klitirqds);
9331 +
9332 +rt_failed:
9333 +    litmus_exit_task(current);
9334 +
9335 +	return rt_status;
9336 +}
9337 +
9338 +
9339 +struct klitirqd_launch_data
9340 +{
9341 +	int* cpu_affinity;
9342 +	struct work_struct work;
9343 +};
9344 +
9345 +/* executed by a kworker from workqueues */
9346 +static void launch_klitirqd(struct work_struct *work)
9347 +{
9348 +    int i;
9349 +
9350 +	struct klitirqd_launch_data* launch_data =
9351 +		container_of(work, struct klitirqd_launch_data, work);
9352 +
9353 +    TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
9354 +
9355 +    /* create the daemon threads */
9356 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9357 +    {
9358 +		if(launch_data->cpu_affinity)
9359 +		{
9360 +			klitirqds[i].klitirqd =
9361 +				kthread_create(
9362 +				   run_klitirqd,
9363 +				   /* treat the affinity as a pointer, we'll cast it back later */
9364 +				   (void*)(long long)launch_data->cpu_affinity[i],
9365 +				   "klitirqd_th%d/%d",
9366 +				   i,
9367 +				   launch_data->cpu_affinity[i]);
9368 +
9369 +			/* litmus will put is in the right cluster. */
9370 +			kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
9371 +		}
9372 +		else
9373 +		{
9374 +			klitirqds[i].klitirqd =
9375 +				kthread_create(
9376 +				   run_klitirqd,
9377 +				   /* treat the affinity as a pointer, we'll cast it back later */
9378 +				   (void*)(long long)(-1),
9379 +				   "klitirqd_th%d",
9380 +				   i);
9381 +		}
9382 +    }
9383 +
9384 +    TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
9385 +
9386 +    /* unleash the daemons */
9387 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9388 +    {
9389 +        wake_up_process(klitirqds[i].klitirqd);
9390 +    }
9391 +
9392 +	if(launch_data->cpu_affinity)
9393 +		kfree(launch_data->cpu_affinity);
9394 +	kfree(launch_data);
9395 +}
9396 +
9397 +
9398 +void spawn_klitirqd(int* affinity)
9399 +{
9400 +    int i;
9401 +    struct klitirqd_launch_data* delayed_launch;
9402 +
9403 +	if(atomic_read(&num_ready_klitirqds) != 0)
9404 +	{
9405 +		TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
9406 +		return;
9407 +	}
9408 +
9409 +    /* init the tasklet & work queues */
9410 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9411 +    {
9412 +		klitirqds[i].terminating = 0;
9413 +		klitirqds[i].pending = 0;
9414 +
9415 +		klitirqds[i].num_hi_pending.counter = 0;
9416 +		klitirqds[i].num_low_pending.counter = 0;
9417 +		klitirqds[i].num_work_pending.counter = 0;
9418 +
9419 +        klitirqds[i].pending_tasklets_hi.head = NULL;
9420 +        klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
9421 +
9422 +        klitirqds[i].pending_tasklets.head = NULL;
9423 +        klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
9424 +
9425 +		INIT_LIST_HEAD(&klitirqds[i].worklist);
9426 +
9427 +		raw_spin_lock_init(&klitirqds[i].lock);
9428 +    }
9429 +
9430 +    /* wait to flush the initializations to memory since other threads
9431 +       will access it. */
9432 +    mb();
9433 +
9434 +    /* tell a work queue to launch the threads.  we can't make scheduling
9435 +       calls since we're in an atomic state. */
9436 +    TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
9437 +	delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
9438 +	if(affinity)
9439 +	{
9440 +		delayed_launch->cpu_affinity =
9441 +			kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
9442 +
9443 +		memcpy(delayed_launch->cpu_affinity, affinity,
9444 +			sizeof(int)*NR_LITMUS_SOFTIRQD);
9445 +	}
9446 +	else
9447 +	{
9448 +		delayed_launch->cpu_affinity = NULL;
9449 +	}
9450 +    INIT_WORK(&delayed_launch->work, launch_klitirqd);
9451 +    schedule_work(&delayed_launch->work);
9452 +}
9453 +
9454 +
9455 +void kill_klitirqd(void)
9456 +{
9457 +	if(!klitirqd_is_dead())
9458 +	{
9459 +    	int i;
9460 +
9461 +    	TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
9462 +
9463 +    	for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
9464 +    	{
9465 +			if(klitirqds[i].terminating != 1)
9466 +			{
9467 +				klitirqds[i].terminating = 1;
9468 +				mb(); /* just to be sure? */
9469 +				flush_pending(klitirqds[i].klitirqd, NULL);
9470 +
9471 +				/* signal termination */
9472 +       			kthread_stop(klitirqds[i].klitirqd);
9473 +			}
9474 +    	}
9475 +	}
9476 +}
9477 +
9478 +
9479 +int klitirqd_is_ready(void)
9480 +{
9481 +	return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
9482 +}
9483 +
9484 +int klitirqd_is_dead(void)
9485 +{
9486 +	return(atomic_read(&num_ready_klitirqds) == 0);
9487 +}
9488 +
9489 +
9490 +struct task_struct* get_klitirqd(unsigned int k_id)
9491 +{
9492 +	return(klitirqds[k_id].klitirqd);
9493 +}
9494 +
9495 +
9496 +void flush_pending(struct task_struct* klitirqd_thread,
9497 +				   struct task_struct* owner)
9498 +{
9499 +	unsigned int k_id = klitirqd_id(klitirqd_thread);
9500 +	struct klitirqd_info *which = &klitirqds[k_id];
9501 +
9502 +	unsigned long flags;
9503 +	struct tasklet_struct *list;
9504 +
9505 +	u32 work_flushed = 0;
9506 +
9507 +	raw_spin_lock_irqsave(&which->lock, flags);
9508 +
9509 +	//__dump_state(which, "flush_pending: before");
9510 +
9511 +	// flush hi tasklets.
9512 +	if(litirq_pending_hi_irqoff(which))
9513 +	{
9514 +		which->pending &= ~LIT_TASKLET_HI;
9515 +
9516 +		list = which->pending_tasklets_hi.head;
9517 +		which->pending_tasklets_hi.head = NULL;
9518 +		which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
9519 +
9520 +		TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
9521 +
9522 +		while(list)
9523 +		{
9524 +			struct tasklet_struct *t = list;
9525 +			list = list->next;
9526 +
9527 +			if(likely((t->owner == owner) || (owner == NULL)))
9528 +			{
9529 +				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
9530 +				{
9531 +					BUG();
9532 +				}
9533 +
9534 +				work_flushed |= LIT_TASKLET_HI;
9535 +
9536 +				t->owner = NULL;
9537 +
9538 +				// WTF?
9539 +				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
9540 +				{
9541 +					atomic_dec(&which->num_hi_pending);
9542 +					___tasklet_hi_schedule(t);
9543 +				}
9544 +				else
9545 +				{
9546 +					TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
9547 +					BUG();
9548 +				}
9549 +			}
9550 +			else
9551 +			{
9552 +				TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
9553 +				// put back on queue.
9554 +				___litmus_tasklet_hi_schedule(t, which, 0);
9555 +			}
9556 +		}
9557 +	}
9558 +
9559 +	// flush low tasklets.
9560 +	if(litirq_pending_low_irqoff(which))
9561 +	{
9562 +		which->pending &= ~LIT_TASKLET_LOW;
9563 +
9564 +		list = which->pending_tasklets.head;
9565 +		which->pending_tasklets.head = NULL;
9566 +		which->pending_tasklets.tail = &which->pending_tasklets.head;
9567 +
9568 +		TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
9569 +
9570 +		while(list)
9571 +		{
9572 +			struct tasklet_struct *t = list;
9573 +			list = list->next;
9574 +
9575 +			if(likely((t->owner == owner) || (owner == NULL)))
9576 +			{
9577 +				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
9578 +				{
9579 +					BUG();
9580 +				}
9581 +
9582 +				work_flushed |= LIT_TASKLET_LOW;
9583 +
9584 +				t->owner = NULL;
9585 +				sched_trace_tasklet_end(owner, 1ul);
9586 +
9587 +				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
9588 +				{
9589 +					atomic_dec(&which->num_low_pending);
9590 +					___tasklet_schedule(t);
9591 +				}
9592 +				else
9593 +				{
9594 +					TRACE("%s: dropped tasklet??\n", __FUNCTION__);
9595 +					BUG();
9596 +				}
9597 +			}
9598 +			else
9599 +			{
9600 +				TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
9601 +				// put back on queue
9602 +				___litmus_tasklet_schedule(t, which, 0);
9603 +			}
9604 +		}
9605 +	}
9606 +
9607 +	// flush work objects
9608 +	if(litirq_pending_work_irqoff(which))
9609 +	{
9610 +		which->pending &= ~LIT_WORK;
9611 +
9612 +		TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
9613 +
9614 +		while(!list_empty(&which->worklist))
9615 +		{
9616 +			struct work_struct* work =
9617 +				list_first_entry(&which->worklist, struct work_struct, entry);
9618 +			list_del_init(&work->entry);
9619 +
9620 +			if(likely((work->owner == owner) || (owner == NULL)))
9621 +			{
9622 +				work_flushed |= LIT_WORK;
9623 +				atomic_dec(&which->num_work_pending);
9624 +
9625 +				work->owner = NULL;
9626 +				sched_trace_work_end(owner, current, 1ul);
9627 +				__schedule_work(work);
9628 +			}
9629 +			else
9630 +			{
9631 +				TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
9632 +				// put back on queue
9633 +				___litmus_schedule_work(work, which, 0);
9634 +			}
9635 +		}
9636 +	}
9637 +
9638 +	//__dump_state(which, "flush_pending: after (before reeval prio)");
9639 +
9640 +
9641 +	mb(); /* commit changes to pending flags */
9642 +
9643 +	/* reset the scheduling priority */
9644 +	if(work_flushed)
9645 +	{
9646 +		__reeval_prio(which);
9647 +
9648 +		/* Try to offload flushed tasklets to Linux's ksoftirqd. */
9649 +		if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
9650 +		{
9651 +			wakeup_softirqd();
9652 +		}
9653 +	}
9654 +	else
9655 +	{
9656 +		TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
9657 +	}
9658 +
9659 +	raw_spin_unlock_irqrestore(&which->lock, flags);
9660 +}
9661 +
9662 +
9663 +
9664 +
9665 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
9666 +									   struct klitirqd_info *which,
9667 +									   int wakeup)
9668 +{
9669 +	unsigned long flags;
9670 +	u32 old_pending;
9671 +
9672 +	t->next = NULL;
9673 +
9674 +    raw_spin_lock_irqsave(&which->lock, flags);
9675 +
9676 +	//__dump_state(which, "___litmus_tasklet_schedule: before queuing");
9677 +
9678 +    *(which->pending_tasklets.tail) = t;
9679 +    which->pending_tasklets.tail = &t->next;
9680 +
9681 +	old_pending = which->pending;
9682 +	which->pending |= LIT_TASKLET_LOW;
9683 +
9684 +	atomic_inc(&which->num_low_pending);
9685 +
9686 +	mb();
9687 +
9688 +	if(!old_pending && wakeup)
9689 +	{
9690 +		wakeup_litirqd_locked(which); /* wake up the klitirqd */
9691 +	}
9692 +
9693 +	//__dump_state(which, "___litmus_tasklet_schedule: after queuing");
9694 +
9695 +    raw_spin_unlock_irqrestore(&which->lock, flags);
9696 +}
9697 +
9698 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
9699 +{
9700 +	int ret = 0; /* assume failure */
9701 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
9702 +    {
9703 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9704 +        BUG();
9705 +    }
9706 +
9707 +    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9708 +    {
9709 +        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
9710 +        BUG();
9711 +    }
9712 +
9713 +	if(likely(!klitirqds[k_id].terminating))
9714 +	{
9715 +		/* Can't accept tasklets while we're processing a workqueue
9716 +		   because they're handled by the same thread. This case is
9717 +		   very RARE.
9718 +
9719 +		   TODO: Use a separate thread for work objects!!!!!!
9720 +         */
9721 +		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
9722 +		{
9723 +			ret = 1;
9724 +			___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
9725 +		}
9726 +		else
9727 +		{
9728 +			TRACE("%s: rejected tasklet because of pending work.\n",
9729 +						__FUNCTION__);
9730 +		}
9731 +	}
9732 +	return(ret);
9733 +}
9734 +
9735 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
9736 +
9737 +
9738 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
9739 +									   struct klitirqd_info *which,
9740 +									   int wakeup)
9741 +{
9742 +	unsigned long flags;
9743 +	u32 old_pending;
9744 +
9745 +	t->next = NULL;
9746 +
9747 +    raw_spin_lock_irqsave(&which->lock, flags);
9748 +
9749 +    *(which->pending_tasklets_hi.tail) = t;
9750 +    which->pending_tasklets_hi.tail = &t->next;
9751 +
9752 +	old_pending = which->pending;
9753 +	which->pending |= LIT_TASKLET_HI;
9754 +
9755 +	atomic_inc(&which->num_hi_pending);
9756 +
9757 +	mb();
9758 +
9759 +	if(!old_pending && wakeup)
9760 +	{
9761 +		wakeup_litirqd_locked(which); /* wake up the klitirqd */
9762 +	}
9763 +
9764 +    raw_spin_unlock_irqrestore(&which->lock, flags);
9765 +}
9766 +
9767 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
9768 +{
9769 +	int ret = 0; /* assume failure */
9770 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
9771 +    {
9772 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9773 +        BUG();
9774 +    }
9775 +
9776 +    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9777 +    {
9778 +        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
9779 +        BUG();
9780 +    }
9781 +
9782 +    if(unlikely(!klitirqd_is_ready()))
9783 +    {
9784 +        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
9785 +        BUG();
9786 +    }
9787 +
9788 +	if(likely(!klitirqds[k_id].terminating))
9789 +	{
9790 +		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
9791 +		{
9792 +			ret = 1;
9793 +			___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
9794 +		}
9795 +		else
9796 +		{
9797 +			TRACE("%s: rejected tasklet because of pending work.\n",
9798 +						__FUNCTION__);
9799 +		}
9800 +	}
9801 +	return(ret);
9802 +}
9803 +
9804 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
9805 +
9806 +
9807 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
9808 +{
9809 +	int ret = 0; /* assume failure */
9810 +	u32 old_pending;
9811 +
9812 +	BUG_ON(!irqs_disabled());
9813 +
9814 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
9815 +    {
9816 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9817 +        BUG();
9818 +    }
9819 +
9820 +    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9821 +    {
9822 +        TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
9823 +        BUG();
9824 +    }
9825 +
9826 +    if(unlikely(!klitirqd_is_ready()))
9827 +    {
9828 +        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
9829 +        BUG();
9830 +    }
9831 +
9832 +	if(likely(!klitirqds[k_id].terminating))
9833 +	{
9834 +    	raw_spin_lock(&klitirqds[k_id].lock);
9835 +
9836 +		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
9837 +		{
9838 +			ret = 1;  // success!
9839 +
9840 +			t->next = klitirqds[k_id].pending_tasklets_hi.head;
9841 +    		klitirqds[k_id].pending_tasklets_hi.head = t;
9842 +
9843 +			old_pending = klitirqds[k_id].pending;
9844 +			klitirqds[k_id].pending |= LIT_TASKLET_HI;
9845 +
9846 +			atomic_inc(&klitirqds[k_id].num_hi_pending);
9847 +
9848 +			mb();
9849 +
9850 +			if(!old_pending)
9851 +    			wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
9852 +		}
9853 +		else
9854 +		{
9855 +			TRACE("%s: rejected tasklet because of pending work.\n",
9856 +					__FUNCTION__);
9857 +		}
9858 +
9859 +    	raw_spin_unlock(&klitirqds[k_id].lock);
9860 +	}
9861 +	return(ret);
9862 +}
9863 +
9864 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
9865 +
9866 +
9867 +
9868 +static void ___litmus_schedule_work(struct work_struct *w,
9869 +									struct klitirqd_info *which,
9870 +									int wakeup)
9871 +{
9872 +	unsigned long flags;
9873 +	u32 old_pending;
9874 +
9875 +	raw_spin_lock_irqsave(&which->lock, flags);
9876 +
9877 +	work_pending(w);
9878 +	list_add_tail(&w->entry, &which->worklist);
9879 +
9880 +	old_pending = which->pending;
9881 +	which->pending |= LIT_WORK;
9882 +
9883 +	atomic_inc(&which->num_work_pending);
9884 +
9885 +	mb();
9886 +
9887 +	if(!old_pending && wakeup)
9888 +	{
9889 +		wakeup_litirqd_locked(which); /* wakeup the klitirqd */
9890 +	}
9891 +
9892 +	raw_spin_unlock_irqrestore(&which->lock, flags);
9893 +}
9894 +
9895 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
9896 +{
9897 +	int ret = 1; /* assume success */
9898 +	if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
9899 +	{
9900 +		TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
9901 +		BUG();
9902 +	}
9903 +
9904 +	if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
9905 +	{
9906 +		TRACE("%s: No klitirqd_th%u!\n", k_id);
9907 +		BUG();
9908 +	}
9909 +
9910 +    if(unlikely(!klitirqd_is_ready()))
9911 +    {
9912 +        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
9913 +        BUG();
9914 +    }
9915 +
9916 +	if(likely(!klitirqds[k_id].terminating))
9917 +		___litmus_schedule_work(w, &klitirqds[k_id], 1);
9918 +	else
9919 +		ret = 0;
9920 +	return(ret);
9921 +}
9922 +EXPORT_SYMBOL(__litmus_schedule_work);
9923 +
9924 +
9925 +static int set_klitirqd_sem_status(unsigned long stat)
9926 +{
9927 +	TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
9928 +					atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
9929 +					stat);
9930 +	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
9931 +	//mb();
9932 +
9933 +	return(0);
9934 +}
9935 +
9936 +static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
9937 +{
9938 +	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
9939 +	{
9940 +		return(set_klitirqd_sem_status(stat));
9941 +	}
9942 +	return(-1);
9943 +}
9944 +
9945 +
9946 +void __down_and_reset_and_set_stat(struct task_struct* t,
9947 +					   enum klitirqd_sem_status to_reset,
9948 +					   enum klitirqd_sem_status to_set,
9949 +					   struct mutex* sem)
9950 +{
9951 +#if 0
9952 +	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
9953 +	struct task_struct* task = container_of(param, struct task_struct, rt_param);
9954 +
9955 +	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
9956 +					__FUNCTION__, task->comm, task->pid);
9957 +#endif
9958 +
9959 +	mutex_lock_sfx(sem,
9960 +				   set_klitirqd_sem_status_if_not_held, to_reset,
9961 +				   set_klitirqd_sem_status, to_set);
9962 +#if 0
9963 +	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
9964 +					__FUNCTION__, task->comm, task->pid);
9965 +#endif
9966 +}
9967 +
9968 +void down_and_set_stat(struct task_struct* t,
9969 +					   enum klitirqd_sem_status to_set,
9970 +					   struct mutex* sem)
9971 +{
9972 +#if 0
9973 +	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
9974 +	struct task_struct* task = container_of(param, struct task_struct, rt_param);
9975 +
9976 +	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
9977 +					__FUNCTION__, task->comm, task->pid);
9978 +#endif
9979 +
9980 +	mutex_lock_sfx(sem,
9981 +				   NULL, 0,
9982 +				   set_klitirqd_sem_status, to_set);
9983 +
9984 +#if 0
9985 +	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
9986 +					__FUNCTION__, task->comm, task->pid);
9987 +#endif
9988 +}
9989 +
9990 +
9991 +void up_and_set_stat(struct task_struct* t,
9992 +					 enum klitirqd_sem_status to_set,
9993 +					 struct mutex* sem)
9994 +{
9995 +#if 0
9996 +	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
9997 +	struct task_struct* task = container_of(param, struct task_struct, rt_param);
9998 +
9999 +	TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
10000 +					__FUNCTION__,
10001 +					task->comm, task->pid);
10002 +#endif
10003 +
10004 +	mutex_unlock_sfx(sem, NULL, 0,
10005 +					 set_klitirqd_sem_status, to_set);
10006 +
10007 +#if 0
10008 +	TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
10009 +					__FUNCTION__,
10010 +					task->comm, task->pid);
10011 +#endif
10012 +}
10013 +
10014 +
10015 +
10016 +void release_klitirqd_lock(struct task_struct* t)
10017 +{
10018 +	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
10019 +	{
10020 +		struct mutex* sem;
10021 +		struct task_struct* owner = t;
10022 +
10023 +		if(t->state == TASK_RUNNING)
10024 +		{
10025 +			TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
10026 +			return;
10027 +		}
10028 +
10029 +		if(likely(!tsk_rt(t)->is_proxy_thread))
10030 +		{
10031 +			sem = &tsk_rt(t)->klitirqd_sem;
10032 +		}
10033 +		else
10034 +		{
10035 +			unsigned int k_id = klitirqd_id(t);
10036 +			owner = klitirqds[k_id].current_owner;
10037 +
10038 +			BUG_ON(t != klitirqds[k_id].klitirqd);
10039 +
10040 +			if(likely(owner))
10041 +			{
10042 +				sem = &tsk_rt(owner)->klitirqd_sem;
10043 +			}
10044 +			else
10045 +			{
10046 +				BUG();
10047 +
10048 +				// We had the rug pulled out from under us.  Abort attempt
10049 +				// to reacquire the lock since our client no longer needs us.
10050 +				TRACE_CUR("HUH?!  How did this happen?\n");
10051 +				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
10052 +				return;
10053 +			}
10054 +		}
10055 +
10056 +		//TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
10057 +		up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
10058 +		//TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
10059 +	}
10060 +	/*
10061 +	else if(is_realtime(t))
10062 +	{
10063 +		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
10064 +	}
10065 +	*/
10066 +}
10067 +
10068 +int reacquire_klitirqd_lock(struct task_struct* t)
10069 +{
10070 +	int ret = 0;
10071 +
10072 +	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
10073 +	{
10074 +		struct mutex* sem;
10075 +		struct task_struct* owner = t;
10076 +
10077 +		if(likely(!tsk_rt(t)->is_proxy_thread))
10078 +		{
10079 +			sem = &tsk_rt(t)->klitirqd_sem;
10080 +		}
10081 +		else
10082 +		{
10083 +			unsigned int k_id = klitirqd_id(t);
10084 +			//struct task_struct* owner = klitirqds[k_id].current_owner;
10085 +			owner = klitirqds[k_id].current_owner;
10086 +
10087 +			BUG_ON(t != klitirqds[k_id].klitirqd);
10088 +
10089 +			if(likely(owner))
10090 +			{
10091 +				sem = &tsk_rt(owner)->klitirqd_sem;
10092 +			}
10093 +			else
10094 +			{
10095 +				// We had the rug pulled out from under us.  Abort attempt
10096 +				// to reacquire the lock since our client no longer needs us.
10097 +				TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
10098 +				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
10099 +				return(0);
10100 +			}
10101 +		}
10102 +
10103 +		//TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
10104 +		__down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
10105 +		//TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
10106 +	}
10107 +	/*
10108 +	else if(is_realtime(t))
10109 +	{
10110 +		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
10111 +	}
10112 +	*/
10113 +
10114 +	return(ret);
10115 +}
10116 +
10117 diff --git a/litmus/locking.c b/litmus/locking.c
10118 index 0c1aa6a..718a5a3 100644
10119 --- a/litmus/locking.c
10120 +++ b/litmus/locking.c
10121 @@ -4,6 +4,15 @@
10122  
10123  #include <litmus/sched_plugin.h>
10124  #include <litmus/trace.h>
10125 +#include <litmus/litmus.h>
10126 +
10127 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
10128 +#include <linux/uaccess.h>
10129 +#endif
10130 +
10131 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
10132 +#include <litmus/gpu_affinity.h>
10133 +#endif
10134  
10135  static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
10136  static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
10137 @@ -17,6 +26,9 @@ struct fdso_ops generic_lock_ops = {
10138  	.destroy = destroy_generic_lock
10139  };
10140  
10141 +static atomic_t lock_id_gen = ATOMIC_INIT(0);
10142 +
10143 +
10144  static inline bool is_lock(struct od_table_entry* entry)
10145  {
10146  	return entry->class == &generic_lock_ops;
10147 @@ -34,8 +46,21 @@ static  int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
10148  	int err;
10149  
10150  	err = litmus->allocate_lock(&lock, type, arg);
10151 -	if (err == 0)
10152 +	if (err == 0) {
10153 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
10154 +		lock->nest.lock = lock;
10155 +		lock->nest.hp_waiter_eff_prio = NULL;
10156 +
10157 +		INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
10158 +		if(!lock->nest.hp_waiter_ptr) {
10159 +			TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in "
10160 +					  "most uses. (exception: IKGLP donors)\n");
10161 +		}
10162 +#endif
10163 +		lock->type = type;
10164 +		lock->ident = atomic_inc_return(&lock_id_gen);
10165  		*obj_ref = lock;
10166 +    }
10167  	return err;
10168  }
10169  
10170 @@ -74,7 +99,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
10171  	entry = get_entry_for_od(lock_od);
10172  	if (entry && is_lock(entry)) {
10173  		l = get_lock(entry);
10174 -		TRACE_CUR("attempts to lock 0x%p\n", l);
10175 +		//TRACE_CUR("attempts to lock 0x%p\n", l);
10176 +		TRACE_CUR("attempts to lock %d\n", l->ident);
10177  		err = l->ops->lock(l);
10178  	}
10179  
10180 @@ -96,7 +122,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
10181  	entry = get_entry_for_od(lock_od);
10182  	if (entry && is_lock(entry)) {
10183  		l = get_lock(entry);
10184 -		TRACE_CUR("attempts to unlock 0x%p\n", l);
10185 +		//TRACE_CUR("attempts to unlock 0x%p\n", l);
10186 +		TRACE_CUR("attempts to unlock %d\n", l->ident);
10187  		err = l->ops->unlock(l);
10188  	}
10189  
10190 @@ -121,8 +148,366 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
10191  	return(t);
10192  }
10193  
10194 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
10195 +
10196 +void print_hp_waiters(struct binheap_node* n, int depth)
10197 +{
10198 +	struct litmus_lock *l;
10199 +	struct nested_info *nest;
10200 +	char padding[81] = "                                                                                ";
10201 +	struct task_struct *hp = NULL;
10202 +	struct task_struct *hp_eff = NULL;
10203 +	struct task_struct *node_prio = NULL;
10204 +
10205 +
10206 +	if(n == NULL) {
10207 +		TRACE("+-> %p\n", NULL);
10208 +		return;
10209 +	}
10210 +
10211 +	nest = binheap_entry(n, struct nested_info, hp_binheap_node);
10212 +	l = nest->lock;
10213 +
10214 +	if(depth*2 <= 80)
10215 +		padding[depth*2] = '\0';
10216 +
10217 +	if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) {
10218 +		hp = *(nest->hp_waiter_ptr);
10219 +
10220 +		if(tsk_rt(hp)->inh_task) {
10221 +			hp_eff = tsk_rt(hp)->inh_task;
10222 +		}
10223 +	}
10224 +
10225 +	node_prio = nest->hp_waiter_eff_prio;
10226 +
10227 +	TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n",
10228 +		  padding,
10229 +		  (node_prio) ? node_prio->comm : "nil",
10230 +		  (node_prio) ? node_prio->pid : -1,
10231 +		  (hp) ? hp->comm : "nil",
10232 +		  (hp) ? hp->pid : -1,
10233 +		  (hp_eff) ? hp_eff->comm : "nil",
10234 +		  (hp_eff) ? hp_eff->pid : -1,
10235 +		  l->ident);
10236 +
10237 +    if(n->left) print_hp_waiters(n->left, depth+1);
10238 +    if(n->right) print_hp_waiters(n->right, depth+1);
10239 +}
10240 +#endif
10241 +
10242 +
10243 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
10244 +
10245 +void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
10246 +{
10247 +	/*
10248 +	 We pick the next lock in reverse order. This causes inheritance propagation
10249 +	 from locks received earlier to flow in the same direction as regular nested
10250 +	 locking. This might make fine-grain DGL easier in the future.
10251 +	 */
10252 +
10253 +	BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
10254 +
10255 +	//WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock);
10256 +
10257 +	// note reverse order
10258 +	for(dgl_wait->last_primary = dgl_wait->last_primary - 1;
10259 +		dgl_wait->last_primary >= 0;
10260 +		--(dgl_wait->last_primary)){
10261 +		if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner(
10262 +				dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) {
10263 +
10264 +			tsk_rt(dgl_wait->task)->blocked_lock =
10265 +					dgl_wait->locks[dgl_wait->last_primary];
10266 +			mb();
10267 +
10268 +			TRACE_CUR("New blocked lock is %d\n",
10269 +					  dgl_wait->locks[dgl_wait->last_primary]->ident);
10270 +
10271 +			break;
10272 +		}
10273 +	}
10274 +}
10275 +
10276 +int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
10277 +{
10278 +	// should never be called.
10279 +	BUG();
10280 +	return 1;
10281 +}
10282 +
10283 +void __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
10284 +								  dgl_wait_state_t** dgl_wait,
10285 +								  struct task_struct **task)
10286 +{
10287 +	wait_queue_t *q;
10288 +
10289 +	*dgl_wait = NULL;
10290 +	*task = NULL;
10291 +
10292 +	if (waitqueue_active(wq)) {
10293 +		q = list_entry(wq->task_list.next,
10294 +					   wait_queue_t, task_list);
10295 +
10296 +		if(q->func == dgl_wake_up) {
10297 +			*dgl_wait = (dgl_wait_state_t*) q->private;
10298 +		}
10299 +		else {
10300 +			*task = (struct task_struct*) q->private;
10301 +		}
10302 +
10303 +		__remove_wait_queue(wq, q);
10304 +	}
10305 +}
10306 +
10307 +void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
10308 +{
10309 +	init_waitqueue_entry(wq_node, dgl_wait->task);
10310 +	wq_node->private = dgl_wait;
10311 +	wq_node->func = dgl_wake_up;
10312 +}
10313 +
10314 +
10315 +static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
10316 +{
10317 +	int i;
10318 +	unsigned long irqflags; //, dummyflags;
10319 +	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
10320 +
10321 +	BUG_ON(dgl_wait->task != current);
10322 +
10323 +	raw_spin_lock_irqsave(dgl_lock, irqflags);
10324 +
10325 +
10326 +	dgl_wait->nr_remaining = dgl_wait->size;
10327 +
10328 +	TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size);
10329 +
10330 +	// try to acquire each lock.  enqueue (non-blocking) if it is unavailable.
10331 +	for(i = 0; i < dgl_wait->size; ++i) {
10332 +		struct litmus_lock *l = dgl_wait->locks[i];
10333 +
10334 +		// dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
10335 +
10336 +		if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
10337 +			--(dgl_wait->nr_remaining);
10338 +			TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
10339 +		}
10340 +	}
10341 +
10342 +	if(dgl_wait->nr_remaining == 0) {
10343 +		// acquired entire group immediatly
10344 +		TRACE_CUR("Acquired all locks in DGL immediatly!\n");
10345 +	}
10346 +	else {
10347 +
10348 +		TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
10349 +				  dgl_wait->nr_remaining);
10350 +
10351 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
10352 +		// KLUDGE: don't count this suspension as time in the critical gpu
10353 +		// critical section
10354 +		if(tsk_rt(dgl_wait->task)->held_gpus) {
10355 +			tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
10356 +		}
10357 +#endif
10358 +
10359 +		// note reverse order.  see comments in select_next_lock for reason.
10360 +		for(i = dgl_wait->size - 1; i >= 0; --i) {
10361 +			struct litmus_lock *l = dgl_wait->locks[i];
10362 +			if(!l->ops->is_owner(l, dgl_wait->task)) {  // double-check to be thread safe
10363 +
10364 +				TRACE_CUR("Activating priority inheritance on lock %d\n",
10365 +						  l->ident);
10366 +
10367 +				TS_DGL_LOCK_SUSPEND;
10368 +
10369 +				l->ops->enable_priority(l, dgl_wait);
10370 +				dgl_wait->last_primary = i;
10371 +
10372 +				TRACE_CUR("Suspending for lock %d\n", l->ident);
10373 +
10374 +				raw_spin_unlock_irqrestore(dgl_lock, irqflags);  // free dgl_lock before suspending
10375 +
10376 +				schedule();  // suspend!!!
10377 +
10378 +				TS_DGL_LOCK_RESUME;
10379 +
10380 +				TRACE_CUR("Woken up from DGL suspension.\n");
10381 +
10382 +				goto all_acquired;  // we should hold all locks when we wake up.
10383 +			}
10384 +		}
10385 +
10386 +		TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
10387 +		//BUG();
10388 +	}
10389 +
10390 +	raw_spin_unlock_irqrestore(dgl_lock, irqflags);
10391 +
10392 +all_acquired:
10393 +
10394 +	// FOR SANITY CHECK FOR TESTING
10395 +//	for(i = 0; i < dgl_wait->size; ++i) {
10396 +//		struct litmus_lock *l = dgl_wait->locks[i];
10397 +//		BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
10398 +//	}
10399 +
10400 +	TRACE_CUR("Acquired entire DGL\n");
10401 +
10402 +	return 0;
10403 +}
10404 +
10405 +static int supports_dgl(struct litmus_lock *l)
10406 +{
10407 +	struct litmus_lock_ops* ops = l->ops;
10408 +
10409 +	return (ops->dgl_lock			&&
10410 +			ops->is_owner			&&
10411 +			ops->enable_priority);
10412 +}
10413 +
10414 +asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
10415 +{
10416 +	struct task_struct *t = current;
10417 +	long err = -EINVAL;
10418 +	int dgl_ods[MAX_DGL_SIZE];
10419 +	int i;
10420 +
10421 +	dgl_wait_state_t dgl_wait_state;  // lives on the stack until all resources in DGL are held.
10422 +
10423 +	if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
10424 +		goto out;
10425 +
10426 +	if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
10427 +		goto out;
10428 +
10429 +	if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
10430 +		goto out;
10431 +
10432 +	if (!is_realtime(t)) {
10433 +		err = -EPERM;
10434 +		goto out;
10435 +	}
10436 +
10437 +	for(i = 0; i < dgl_size; ++i) {
10438 +		struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
10439 +		if(entry && is_lock(entry)) {
10440 +			dgl_wait_state.locks[i] = get_lock(entry);
10441 +			if(!supports_dgl(dgl_wait_state.locks[i])) {
10442 +				TRACE_CUR("Lock %d does not support all required DGL operations.\n",
10443 +						  dgl_wait_state.locks[i]->ident);
10444 +				goto out;
10445 +			}
10446 +		}
10447 +		else {
10448 +			TRACE_CUR("Invalid lock identifier\n");
10449 +			goto out;
10450 +		}
10451 +	}
10452 +
10453 +	dgl_wait_state.task = t;
10454 +	dgl_wait_state.size = dgl_size;
10455 +
10456 +	TS_DGL_LOCK_START;
10457 +	err = do_litmus_dgl_lock(&dgl_wait_state);
10458 +
10459 +	/* Note: task my have been suspended or preempted in between!  Take
10460 +	 * this into account when computing overheads. */
10461 +	TS_DGL_LOCK_END;
10462 +
10463 +out:
10464 +	return err;
10465 +}
10466 +
10467 +static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size)
10468 +{
10469 +	int i;
10470 +	long err = 0;
10471 +
10472 +	TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size);
10473 +
10474 +	for(i = dgl_size - 1; i >= 0; --i) {  // unlock in reverse order
10475 +
10476 +		struct litmus_lock *l = dgl_locks[i];
10477 +		long tmp_err;
10478 +
10479 +		TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident);
10480 +
10481 +		tmp_err = l->ops->unlock(l);
10482 +
10483 +		if(tmp_err) {
10484 +			TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err);
10485 +			err = tmp_err;
10486 +		}
10487 +	}
10488 +
10489 +	TRACE_CUR("DGL unlocked. err = %d\n", err);
10490 +
10491 +	return err;
10492 +}
10493 +
10494 +asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
10495 +{
10496 +	long err = -EINVAL;
10497 +	int dgl_ods[MAX_DGL_SIZE];
10498 +	struct od_table_entry* entry;
10499 +	int i;
10500 +
10501 +	struct litmus_lock* dgl_locks[MAX_DGL_SIZE];
10502 +
10503 +	if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
10504 +		goto out;
10505 +
10506 +	if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
10507 +		goto out;
10508 +
10509 +	if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
10510 +		goto out;
10511 +
10512 +	for(i = 0; i < dgl_size; ++i) {
10513 +		entry = get_entry_for_od(dgl_ods[i]);
10514 +		if(entry && is_lock(entry)) {
10515 +			dgl_locks[i] = get_lock(entry);
10516 +			if(!supports_dgl(dgl_locks[i])) {
10517 +				TRACE_CUR("Lock %d does not support all required DGL operations.\n",
10518 +						  dgl_locks[i]->ident);
10519 +				goto out;
10520 +			}
10521 +		}
10522 +		else {
10523 +			TRACE_CUR("Invalid lock identifier\n");
10524 +			goto out;
10525 +		}
10526 +	}
10527 +
10528 +	TS_DGL_UNLOCK_START;
10529 +	err = do_litmus_dgl_unlock(dgl_locks, dgl_size);
10530 +
10531 +	/* Note: task my have been suspended or preempted in between!  Take
10532 +	 * this into account when computing overheads. */
10533 +	TS_DGL_UNLOCK_END;
10534 +
10535 +out:
10536 +	return err;
10537 +}
10538 +
10539 +#else  // CONFIG_LITMUS_DGL_SUPPORT
10540 +
10541 +asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
10542 +{
10543 +	return -ENOSYS;
10544 +}
10545 +
10546 +asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
10547 +{
10548 +	return -ENOSYS;
10549 +}
10550 +
10551 +#endif
10552  
10553 -#else
10554 +#else  // CONFIG_LITMUS_LOCKING
10555  
10556  struct fdso_ops generic_lock_ops = {};
10557  
10558 diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
10559 new file mode 100644
10560 index 0000000..4b86a50
10561 --- /dev/null
10562 +++ b/litmus/nvidia_info.c
10563 @@ -0,0 +1,597 @@
10564 +#include <linux/module.h>
10565 +#include <linux/semaphore.h>
10566 +#include <linux/pci.h>
10567 +
10568 +#include <litmus/sched_trace.h>
10569 +#include <litmus/nvidia_info.h>
10570 +#include <litmus/litmus.h>
10571 +
10572 +#include <litmus/sched_plugin.h>
10573 +
10574 +#include <litmus/binheap.h>
10575 +
10576 +typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
10577 +typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
10578 +typedef unsigned char      NvU8;  /* 0 to 255                                */
10579 +typedef unsigned short     NvU16; /* 0 to 65535                              */
10580 +typedef signed char        NvS8;  /* -128 to 127                             */
10581 +typedef signed short       NvS16; /* -32768 to 32767                         */
10582 +typedef float              NvF32; /* IEEE Single Precision (S1E8M23)         */
10583 +typedef double             NvF64; /* IEEE Double Precision (S1E11M52)        */
10584 +typedef unsigned int       NvV32; /* "void": enumerated or multiple fields   */
10585 +typedef unsigned int       NvU32; /* 0 to 4294967295                         */
10586 +typedef unsigned long long NvU64; /* 0 to 18446744073709551615          */
10587 +typedef union
10588 +{
10589 +    volatile NvV8 Reg008[1];
10590 +    volatile NvV16 Reg016[1];
10591 +    volatile NvV32 Reg032[1];
10592 +} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
10593 +
10594 +typedef struct
10595 +{
10596 +    NvU64 address;
10597 +    NvU64 size;
10598 +    NvU32 offset;
10599 +    NvU32 *map;
10600 +    litmus_nv_phwreg_t map_u;
10601 +} litmus_nv_aperture_t;
10602 +
10603 +typedef struct
10604 +{
10605 +    void  *priv;                    /* private data */
10606 +    void  *os_state;                /* os-specific device state */
10607 +
10608 +    int    rmInitialized;
10609 +    int    flags;
10610 +
10611 +    /* PCI config info */
10612 +    NvU32 domain;
10613 +    NvU16 bus;
10614 +    NvU16 slot;
10615 +    NvU16 vendor_id;
10616 +    NvU16 device_id;
10617 +    NvU16 subsystem_id;
10618 +    NvU32 gpu_id;
10619 +    void *handle;
10620 +
10621 +    NvU32 pci_cfg_space[16];
10622 +
10623 +    /* physical characteristics */
10624 +    litmus_nv_aperture_t bars[3];
10625 +    litmus_nv_aperture_t *regs;
10626 +    litmus_nv_aperture_t *fb, ud;
10627 +    litmus_nv_aperture_t agp;
10628 +
10629 +    NvU32  interrupt_line;
10630 +
10631 +    NvU32 agp_config;
10632 +    NvU32 agp_status;
10633 +
10634 +    NvU32 primary_vga;
10635 +
10636 +    NvU32 sim_env;
10637 +
10638 +    NvU32 rc_timer_enabled;
10639 +
10640 +    /* list of events allocated for this device */
10641 +    void *event_list;
10642 +
10643 +    void *kern_mappings;
10644 +
10645 +} litmus_nv_state_t;
10646 +
10647 +typedef struct work_struct litmus_nv_task_t;
10648 +
10649 +typedef struct litmus_nv_work_s {
10650 +    litmus_nv_task_t task;
10651 +    void *data;
10652 +} litmus_nv_work_t;
10653 +
10654 +typedef struct litmus_nv_linux_state_s {
10655 +    litmus_nv_state_t nv_state;
10656 +    atomic_t usage_count;
10657 +
10658 +    struct pci_dev *dev;
10659 +    void *agp_bridge;
10660 +    void *alloc_queue;
10661 +
10662 +    void *timer_sp;
10663 +    void *isr_sp;
10664 +    void *pci_cfgchk_sp;
10665 +    void *isr_bh_sp;
10666 +
10667 +#ifdef CONFIG_CUDA_4_0
10668 +	char registry_keys[512];
10669 +#endif
10670 +
10671 +    /* keep track of any pending bottom halfes */
10672 +    struct tasklet_struct tasklet;
10673 +    litmus_nv_work_t work;
10674 +
10675 +    /* get a timer callback every second */
10676 +    struct timer_list rc_timer;
10677 +
10678 +    /* lock for linux-specific data, not used by core rm */
10679 +    struct semaphore ldata_lock;
10680 +
10681 +    /* lock for linux-specific alloc queue */
10682 +    struct semaphore at_lock;
10683 +
10684 +#if 0
10685 +#if defined(NV_USER_MAP)
10686 +    /* list of user mappings */
10687 +    struct nv_usermap_s *usermap_list;
10688 +
10689 +    /* lock for VMware-specific mapping list */
10690 +    struct semaphore mt_lock;
10691 +#endif /* defined(NV_USER_MAP) */
10692 +#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
10693 +	void *apm_nv_dev;
10694 +#endif
10695 +#endif
10696 +
10697 +    NvU32 device_num;
10698 +    struct litmus_nv_linux_state_s *next;
10699 +} litmus_nv_linux_state_t;
10700 +
10701 +void dump_nvidia_info(const struct tasklet_struct *t)
10702 +{
10703 +	litmus_nv_state_t* nvstate = NULL;
10704 +	litmus_nv_linux_state_t* linuxstate =  NULL;
10705 +	struct pci_dev* pci = NULL;
10706 +
10707 +	nvstate = (litmus_nv_state_t*)(t->data);
10708 +
10709 +	if(nvstate)
10710 +	{
10711 +		TRACE("NV State:\n"
10712 +			  "\ttasklet ptr = %p\n"
10713 +			  "\tstate ptr = %p\n"
10714 +			  "\tprivate data ptr = %p\n"
10715 +			  "\tos state ptr = %p\n"
10716 +			  "\tdomain = %u\n"
10717 +			  "\tbus = %u\n"
10718 +			  "\tslot = %u\n"
10719 +			  "\tvender_id = %u\n"
10720 +			  "\tdevice_id = %u\n"
10721 +			  "\tsubsystem_id = %u\n"
10722 +			  "\tgpu_id = %u\n"
10723 +			  "\tinterrupt_line = %u\n",
10724 +			  t,
10725 +			  nvstate,
10726 +			  nvstate->priv,
10727 +			  nvstate->os_state,
10728 +			  nvstate->domain,
10729 +			  nvstate->bus,
10730 +			  nvstate->slot,
10731 +			  nvstate->vendor_id,
10732 +			  nvstate->device_id,
10733 +			  nvstate->subsystem_id,
10734 +			  nvstate->gpu_id,
10735 +			  nvstate->interrupt_line);
10736 +
10737 +		linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
10738 +	}
10739 +	else
10740 +	{
10741 +		TRACE("INVALID NVSTATE????\n");
10742 +	}
10743 +
10744 +	if(linuxstate)
10745 +	{
10746 +		int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
10747 +		int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
10748 +		int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
10749 +
10750 +
10751 +		TRACE("LINUX NV State:\n"
10752 +			  "\tlinux nv state ptr: %p\n"
10753 +			  "\taddress of tasklet: %p\n"
10754 +			  "\taddress of work: %p\n"
10755 +			  "\tusage_count: %d\n"
10756 +			  "\tdevice_num: %u\n"
10757 +			  "\ttasklet addr == this tasklet: %d\n"
10758 +			  "\tpci: %p\n",
10759 +			  linuxstate,
10760 +			  &(linuxstate->tasklet),
10761 +			  &(linuxstate->work),
10762 +			  atomic_read(&(linuxstate->usage_count)),
10763 +			  linuxstate->device_num,
10764 +			  (t == &(linuxstate->tasklet)),
10765 +			  linuxstate->dev);
10766 +
10767 +		pci = linuxstate->dev;
10768 +
10769 +		TRACE("Offsets:\n"
10770 +			  "\tOffset from LinuxState: %d, %x\n"
10771 +			  "\tOffset from NVState: %d, %x\n"
10772 +			  "\tOffset from parameter: %d, %x\n"
10773 +			  "\tdevice_num: %u\n",
10774 +			  ls_offset, ls_offset,
10775 +			  ns_offset_raw, ns_offset_raw,
10776 +			  ns_offset_desired, ns_offset_desired,
10777 +			  *((u32*)((void*)nvstate + ns_offset_desired)));
10778 +	}
10779 +	else
10780 +	{
10781 +		TRACE("INVALID LINUXNVSTATE?????\n");
10782 +	}
10783 +
10784 +#if 0
10785 +	if(pci)
10786 +	{
10787 +		TRACE("PCI DEV Info:\n"
10788 +			  "pci device ptr: %p\n"
10789 +			  "\tdevfn = %d\n"
10790 +			  "\tvendor = %d\n"
10791 +			  "\tdevice = %d\n"
10792 +			  "\tsubsystem_vendor = %d\n"
10793 +			  "\tsubsystem_device = %d\n"
10794 +			  "\tslot # = %d\n",
10795 +			  pci,
10796 +			  pci->devfn,
10797 +			  pci->vendor,
10798 +			  pci->device,
10799 +			  pci->subsystem_vendor,
10800 +			  pci->subsystem_device,
10801 +			  pci->slot->number);
10802 +	}
10803 +	else
10804 +	{
10805 +		TRACE("INVALID PCIDEV PTR?????\n");
10806 +	}
10807 +#endif
10808 +}
10809 +
10810 +static struct module* nvidia_mod = NULL;
10811 +int init_nvidia_info(void)
10812 +{
10813 +	mutex_lock(&module_mutex);
10814 +	nvidia_mod = find_module("nvidia");
10815 +	mutex_unlock(&module_mutex);
10816 +	if(nvidia_mod != NULL)
10817 +	{
10818 +		TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
10819 +			  (void*)(nvidia_mod->module_core),
10820 +			  (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
10821 +		init_nv_device_reg();
10822 +		return(0);
10823 +	}
10824 +	else
10825 +	{
10826 +		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
10827 +		return(-1);
10828 +	}
10829 +}
10830 +
10831 +void shutdown_nvidia_info(void)
10832 +{
10833 +	nvidia_mod = NULL;
10834 +	mb();
10835 +}
10836 +
10837 +/* works with pointers to static data inside the module too. */
10838 +int is_nvidia_func(void* func_addr)
10839 +{
10840 +	int ret = 0;
10841 +	if(nvidia_mod)
10842 +	{
10843 +		ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
10844 +		/*
10845 +		if(ret)
10846 +		{
10847 +			TRACE("%s : %p is in NVIDIA module: %d\n",
10848 +			  	__FUNCTION__, func_addr, ret);
10849 +		}*/
10850 +	}
10851 +
10852 +	return(ret);
10853 +}
10854 +
10855 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
10856 +{
10857 +	// life is too short to use hard-coded offsets.  update this later.
10858 +	litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
10859 +	litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
10860 +
10861 +	BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
10862 +
10863 +	return(linuxstate->device_num);
10864 +
10865 +	//int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
10866 +
10867 +#if 0
10868 +	// offset determined though observed behavior of the NV driver.
10869 +	//const int DEVICE_NUM_OFFSET = 0x480;  // CUDA 4.0 RC1
10870 +	//const int DEVICE_NUM_OFFSET = 0x510;  // CUDA 4.0 RC2
10871 +
10872 +	void* state = (void*)(t->data);
10873 +	void* device_num_ptr = state + DEVICE_NUM_OFFSET;
10874 +
10875 +	//dump_nvidia_info(t);
10876 +	return(*((u32*)device_num_ptr));
10877 +#endif
10878 +}
10879 +
10880 +u32 get_work_nv_device_num(const struct work_struct *t)
10881 +{
10882 +	// offset determined though observed behavior of the NV driver.
10883 +	const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
10884 +	void* state = (void*)(t);
10885 +	void** device_num_ptr = state + DEVICE_NUM_OFFSET;
10886 +	return(*((u32*)(*device_num_ptr)));
10887 +}
10888 +
10889 +
10890 +typedef struct {
10891 +	raw_spinlock_t	lock;
10892 +	int	nr_owners;
10893 +	struct task_struct* max_prio_owner;
10894 +	struct task_struct*	owners[NV_MAX_SIMULT_USERS];
10895 +}nv_device_registry_t;
10896 +
10897 +static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
10898 +
10899 +int init_nv_device_reg(void)
10900 +{
10901 +	int i;
10902 +
10903 +	memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
10904 +
10905 +	for(i = 0; i < NV_DEVICE_NUM; ++i)
10906 +	{
10907 +		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
10908 +	}
10909 +
10910 +	return(1);
10911 +}
10912 +
10913 +/* use to get nv_device_id by given owner.
10914 + (if return -1, can't get the assocaite device id)*/
10915 +/*
10916 +int get_nv_device_id(struct task_struct* owner)
10917 +{
10918 +	int i;
10919 +	if(!owner)
10920 +	{
10921 +		return(-1);
10922 +	}
10923 +	for(i = 0; i < NV_DEVICE_NUM; ++i)
10924 +	{
10925 +		if(NV_DEVICE_REG[i].device_owner == owner)
10926 +			return(i);
10927 +	}
10928 +	return(-1);
10929 +}
10930 +*/
10931 +
10932 +static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
10933 +	int i;
10934 +	struct task_struct *found = NULL;
10935 +	for(i = 0; i < reg->nr_owners; ++i) {
10936 +		if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
10937 +			found = reg->owners[i];
10938 +		}
10939 +	}
10940 +	return found;
10941 +}
10942 +
10943 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10944 +void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
10945 +{
10946 +	unsigned long flags;
10947 +	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
10948 +
10949 +	if(reg->max_prio_owner != t) {
10950 +
10951 +		raw_spin_lock_irqsave(&reg->lock, flags);
10952 +
10953 +		if(reg->max_prio_owner != t) {
10954 +			if(litmus->compare(t, reg->max_prio_owner)) {
10955 +				litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
10956 +				reg->max_prio_owner = t;
10957 +			}
10958 +		}
10959 +
10960 +		raw_spin_unlock_irqrestore(&reg->lock, flags);
10961 +	}
10962 +}
10963 +
10964 +
10965 +void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
10966 +{
10967 +	unsigned long flags;
10968 +	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
10969 +
10970 +	if(reg->max_prio_owner == t) {
10971 +
10972 +		raw_spin_lock_irqsave(&reg->lock, flags);
10973 +
10974 +		if(reg->max_prio_owner == t) {
10975 +			reg->max_prio_owner = find_hp_owner(reg, NULL);
10976 +			if(reg->max_prio_owner != t) {
10977 +				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
10978 +			}
10979 +		}
10980 +
10981 +		raw_spin_unlock_irqrestore(&reg->lock, flags);
10982 +	}
10983 +}
10984 +#endif
10985 +
10986 +static int __reg_nv_device(int reg_device_id, struct task_struct *t)
10987 +{
10988 +	int ret = 0;
10989 +	int i;
10990 +	struct task_struct *old_max = NULL;
10991 +	unsigned long flags;
10992 +	nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
10993 +
10994 +    if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
10995 +		// TODO: check if taks is already registered.
10996 +		return ret;  // assume already registered.
10997 +	}
10998 +
10999 +
11000 +	raw_spin_lock_irqsave(&reg->lock, flags);
11001 +
11002 +	if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
11003 +		TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
11004 +		for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
11005 +			if(reg->owners[i] == NULL) {
11006 +				reg->owners[i] = t;
11007 +
11008 +				//if(edf_higher_prio(t, reg->max_prio_owner)) {
11009 +				if(litmus->compare(t, reg->max_prio_owner)) {
11010 +					old_max = reg->max_prio_owner;
11011 +					reg->max_prio_owner = t;
11012 +
11013 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11014 +					litmus->change_prio_pai_tasklet(old_max, t);
11015 +#endif
11016 +				}
11017 +
11018 +#ifdef CONFIG_LITMUS_SOFTIRQD
11019 +				down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
11020 +#endif
11021 +				++(reg->nr_owners);
11022 +
11023 +				break;
11024 +			}
11025 +		}
11026 +	}
11027 +	else
11028 +	{
11029 +		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
11030 +		//ret = -EBUSY;
11031 +	}
11032 +
11033 +	raw_spin_unlock_irqrestore(&reg->lock, flags);
11034 +
11035 +	__set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
11036 +
11037 +	return(ret);
11038 +}
11039 +
11040 +static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
11041 +{
11042 +	int ret = 0;
11043 +	int i;
11044 +	unsigned long flags;
11045 +	nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
11046 +
11047 +#ifdef CONFIG_LITMUS_SOFTIRQD
11048 +    struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
11049 +#endif
11050 +
11051 +	if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
11052 +		return ret;
11053 +	}
11054 +
11055 +	raw_spin_lock_irqsave(&reg->lock, flags);
11056 +
11057 +	TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
11058 +
11059 +	for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
11060 +		if(reg->owners[i] == t) {
11061 +#ifdef CONFIG_LITMUS_SOFTIRQD
11062 +			flush_pending(klitirqd_th, t);
11063 +#endif
11064 +			if(reg->max_prio_owner == t) {
11065 +				reg->max_prio_owner = find_hp_owner(reg, t);
11066 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11067 +				litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
11068 +#endif
11069 +			}
11070 +
11071 +#ifdef CONFIG_LITMUS_SOFTIRQD
11072 +			up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
11073 +#endif
11074 +
11075 +			reg->owners[i] = NULL;
11076 +			--(reg->nr_owners);
11077 +
11078 +			break;
11079 +		}
11080 +	}
11081 +
11082 +	raw_spin_unlock_irqrestore(&reg->lock, flags);
11083 +
11084 +	__clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
11085 +
11086 +	return(ret);
11087 +}
11088 +
11089 +
11090 +int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
11091 +{
11092 +	int ret;
11093 +
11094 +	if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
11095 +	{
11096 +		if(reg_action)
11097 +			ret = __reg_nv_device(reg_device_id, t);
11098 +		else
11099 +			ret = __clear_reg_nv_device(reg_device_id, t);
11100 +	}
11101 +	else
11102 +	{
11103 +		ret = -ENODEV;
11104 +	}
11105 +
11106 +	return(ret);
11107 +}
11108 +
11109 +/* use to get the owner of nv_device_id. */
11110 +struct task_struct* get_nv_max_device_owner(u32 target_device_id)
11111 +{
11112 +	struct task_struct *owner = NULL;
11113 +	BUG_ON(target_device_id >= NV_DEVICE_NUM);
11114 +	owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
11115 +	return(owner);
11116 +}
11117 +
11118 +void lock_nv_registry(u32 target_device_id, unsigned long* flags)
11119 +{
11120 +	BUG_ON(target_device_id >= NV_DEVICE_NUM);
11121 +
11122 +	if(in_interrupt())
11123 +		TRACE("Locking registry for %d.\n", target_device_id);
11124 +	else
11125 +		TRACE_CUR("Locking registry for %d.\n", target_device_id);
11126 +
11127 +	raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
11128 +}
11129 +
11130 +void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
11131 +{
11132 +	BUG_ON(target_device_id >= NV_DEVICE_NUM);
11133 +
11134 +	if(in_interrupt())
11135 +		TRACE("Unlocking registry for %d.\n", target_device_id);
11136 +	else
11137 +		TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
11138 +
11139 +	raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
11140 +}
11141 +
11142 +
11143 +//void increment_nv_int_count(u32 device)
11144 +//{
11145 +//	unsigned long flags;
11146 +//	struct task_struct* owner;
11147 +//
11148 +//	lock_nv_registry(device, &flags);
11149 +//
11150 +//	owner = NV_DEVICE_REG[device].device_owner;
11151 +//	if(owner)
11152 +//	{
11153 +//		atomic_inc(&tsk_rt(owner)->nv_int_count);
11154 +//	}
11155 +//
11156 +//	unlock_nv_registry(device, &flags);
11157 +//}
11158 +//EXPORT_SYMBOL(increment_nv_int_count);
11159 +
11160 +
11161 diff --git a/litmus/preempt.c b/litmus/preempt.c
11162 index 5704d0b..28368d5 100644
11163 --- a/litmus/preempt.c
11164 +++ b/litmus/preempt.c
11165 @@ -30,6 +30,7 @@ void sched_state_will_schedule(struct task_struct* tsk)
11166  		/* Litmus tasks should never be subject to a remote
11167  		 * set_tsk_need_resched(). */
11168  		BUG_ON(is_realtime(tsk));
11169 +
11170  #ifdef CONFIG_PREEMPT_STATE_TRACE
11171  	TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
11172  		   __builtin_return_address(0));
11173 @@ -45,13 +46,17 @@ void sched_state_ipi(void)
11174  		/* Cause scheduler to be invoked.
11175  		 * This will cause a transition to WILL_SCHEDULE. */
11176  		set_tsk_need_resched(current);
11177 +		/*
11178  		TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
11179  			    current->comm, current->pid);
11180 +		*/
11181  	} else {
11182  		/* ignore */
11183 +		/*
11184  		TRACE_STATE("ignoring IPI in state %x (%s)\n",
11185  			    get_sched_state(),
11186  			    sched_state_name(get_sched_state()));
11187 +		*/
11188  	}
11189  }
11190  
11191 diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
11192 new file mode 100644
11193 index 0000000..75ed87c
11194 --- /dev/null
11195 +++ b/litmus/rsm_lock.c
11196 @@ -0,0 +1,796 @@
11197 +#include <linux/slab.h>
11198 +#include <linux/uaccess.h>
11199 +
11200 +#include <litmus/trace.h>
11201 +#include <litmus/sched_plugin.h>
11202 +#include <litmus/rsm_lock.h>
11203 +
11204 +//#include <litmus/edf_common.h>
11205 +
11206 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11207 +#include <litmus/gpu_affinity.h>
11208 +#endif
11209 +
11210 +
11211 +/* caller is responsible for locking */
11212 +static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
11213 +                                             struct task_struct* skip)
11214 +{
11215 +    wait_queue_t        *q;
11216 +    struct list_head    *pos;
11217 +    struct task_struct  *queued = NULL, *found = NULL;
11218 +
11219 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11220 +    dgl_wait_state_t    *dgl_wait = NULL;
11221 +#endif
11222 +
11223 +    list_for_each(pos, &mutex->wait.task_list) {
11224 +        q = list_entry(pos, wait_queue_t, task_list);
11225 +
11226 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11227 +        if(q->func == dgl_wake_up) {
11228 +            dgl_wait = (dgl_wait_state_t*) q->private;
11229 +            if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) {
11230 +                queued = dgl_wait->task;
11231 +            }
11232 +            else {
11233 +                queued = NULL;  // skip it.
11234 +            }
11235 +        }
11236 +        else {
11237 +            queued = (struct task_struct*) q->private;
11238 +        }
11239 +#else
11240 +        queued = (struct task_struct*) q->private;
11241 +#endif
11242 +
11243 +        /* Compare task prios, find high prio task. */
11244 +        //if (queued && queued != skip && edf_higher_prio(queued, found)) {
11245 +		if (queued && queued != skip && litmus->compare(queued, found)) {
11246 +            found = queued;
11247 +        }
11248 +    }
11249 +    return found;
11250 +}
11251 +
11252 +
11253 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11254 +
11255 +int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
11256 +{
11257 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11258 +	return(mutex->owner == t);
11259 +}
11260 +
11261 +// return 1 if resource was immediatly acquired.
11262 +// Assumes mutex->lock is held.
11263 +// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
11264 +int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
11265 +					   wait_queue_t* wq_node)
11266 +{
11267 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11268 +	struct task_struct *t = dgl_wait->task;
11269 +
11270 +	int acquired_immediatly = 0;
11271 +
11272 +	BUG_ON(t != current);
11273 +
11274 +	if (mutex->owner) {
11275 +		TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident);
11276 +
11277 +		init_dgl_waitqueue_entry(wq_node, dgl_wait);
11278 +
11279 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
11280 +		__add_wait_queue_tail_exclusive(&mutex->wait, wq_node);
11281 +	} else {
11282 +		TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
11283 +
11284 +		/* it's ours now */
11285 +		mutex->owner = t;
11286 +
11287 +		raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
11288 +		binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
11289 +					struct nested_info, hp_binheap_node);
11290 +		raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
11291 +
11292 +		acquired_immediatly = 1;
11293 +	}
11294 +
11295 +	return acquired_immediatly;
11296 +}
11297 +
11298 +void rsm_mutex_enable_priority(struct litmus_lock *l,
11299 +							   dgl_wait_state_t* dgl_wait)
11300 +{
11301 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11302 +	struct task_struct *t = dgl_wait->task;
11303 +	struct task_struct *owner = mutex->owner;
11304 +	unsigned long flags = 0;  // these are unused under DGL coarse-grain locking
11305 +
11306 +	BUG_ON(owner == t);
11307 +
11308 +	tsk_rt(t)->blocked_lock = l;
11309 +	mb();
11310 +
11311 +	//if (edf_higher_prio(t, mutex->hp_waiter)) {
11312 +	if (litmus->compare(t, mutex->hp_waiter)) {
11313 +
11314 +		struct task_struct *old_max_eff_prio;
11315 +		struct task_struct *new_max_eff_prio;
11316 +		struct task_struct *new_prio = NULL;
11317 +
11318 +		if(mutex->hp_waiter)
11319 +			TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
11320 +					   mutex->hp_waiter->comm, mutex->hp_waiter->pid);
11321 +		else
11322 +			TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
11323 +
11324 +		raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11325 +
11326 +		old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11327 +		mutex->hp_waiter = t;
11328 +		l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
11329 +		binheap_decrease(&l->nest.hp_binheap_node,
11330 +						 &tsk_rt(owner)->hp_blocked_tasks);
11331 +		new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11332 +
11333 +		if(new_max_eff_prio != old_max_eff_prio) {
11334 +			TRACE_TASK(t, "is new hp_waiter.\n");
11335 +
11336 +			if ((effective_priority(owner) == old_max_eff_prio) ||
11337 +				//(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11338 +				(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11339 +				new_prio = new_max_eff_prio;
11340 +			}
11341 +		}
11342 +		else {
11343 +			TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
11344 +		}
11345 +
11346 +		if(new_prio) {
11347 +			litmus->nested_increase_prio(owner, new_prio,
11348 +										 &mutex->lock, flags);  // unlocks lock.
11349 +		}
11350 +		else {
11351 +			raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11352 +			unlock_fine_irqrestore(&mutex->lock, flags);
11353 +		}
11354 +	}
11355 +	else {
11356 +		TRACE_TASK(t, "no change in hp_waiter.\n");
11357 +		unlock_fine_irqrestore(&mutex->lock, flags);
11358 +	}
11359 +}
11360 +
11361 +static void select_next_lock_if_primary(struct litmus_lock *l,
11362 +										dgl_wait_state_t *dgl_wait)
11363 +{
11364 +	if(tsk_rt(dgl_wait->task)->blocked_lock == l) {
11365 +		TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
11366 +				  l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
11367 +		tsk_rt(dgl_wait->task)->blocked_lock = NULL;
11368 +		mb();
11369 +		select_next_lock(dgl_wait /*, l*/);  // pick the next lock to be blocked on
11370 +	}
11371 +	else {
11372 +		TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
11373 +				  l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
11374 +	}
11375 +}
11376 +#endif
11377 +
11378 +
11379 +
11380 +
11381 +int rsm_mutex_lock(struct litmus_lock* l)
11382 +{
11383 +	struct task_struct *t = current;
11384 +	struct task_struct *owner;
11385 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11386 +	wait_queue_t wait;
11387 +	unsigned long flags;
11388 +
11389 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11390 +	raw_spinlock_t *dgl_lock;
11391 +#endif
11392 +
11393 +	if (!is_realtime(t))
11394 +		return -EPERM;
11395 +
11396 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11397 +	dgl_lock = litmus->get_dgl_spinlock(t);
11398 +#endif
11399 +
11400 +	lock_global_irqsave(dgl_lock, flags);
11401 +	lock_fine_irqsave(&mutex->lock, flags);
11402 +
11403 +	if (mutex->owner) {
11404 +		TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
11405 +
11406 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11407 +		// KLUDGE: don't count this suspension as time in the critical gpu
11408 +		// critical section
11409 +		if(tsk_rt(t)->held_gpus) {
11410 +			tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
11411 +		}
11412 +#endif
11413 +
11414 +		/* resource is not free => must suspend and wait */
11415 +
11416 +		owner = mutex->owner;
11417 +
11418 +		init_waitqueue_entry(&wait, t);
11419 +
11420 +		tsk_rt(t)->blocked_lock = l;  /* record where we are blocked */
11421 +		mb();  // needed?
11422 +
11423 +		/* FIXME: interruptible would be nice some day */
11424 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
11425 +
11426 +		__add_wait_queue_tail_exclusive(&mutex->wait, &wait);
11427 +
11428 +		/* check if we need to activate priority inheritance */
11429 +		//if (edf_higher_prio(t, mutex->hp_waiter)) {
11430 +		if (litmus->compare(t, mutex->hp_waiter)) {
11431 +
11432 +			struct task_struct *old_max_eff_prio;
11433 +			struct task_struct *new_max_eff_prio;
11434 +			struct task_struct *new_prio = NULL;
11435 +
11436 +			if(mutex->hp_waiter)
11437 +				TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
11438 +						   mutex->hp_waiter->comm, mutex->hp_waiter->pid);
11439 +			else
11440 +				TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
11441 +
11442 +			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11443 +
11444 +			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11445 +			mutex->hp_waiter = t;
11446 +			l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
11447 +			binheap_decrease(&l->nest.hp_binheap_node,
11448 +							 &tsk_rt(owner)->hp_blocked_tasks);
11449 +			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11450 +
11451 +			if(new_max_eff_prio != old_max_eff_prio) {
11452 +				TRACE_TASK(t, "is new hp_waiter.\n");
11453 +
11454 +				if ((effective_priority(owner) == old_max_eff_prio) ||
11455 +					//(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11456 +					(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
11457 +					new_prio = new_max_eff_prio;
11458 +				}
11459 +			}
11460 +			else {
11461 +				TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
11462 +			}
11463 +
11464 +			if(new_prio) {
11465 +				litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
11466 +											 flags);  // unlocks lock.
11467 +			}
11468 +			else {
11469 +				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11470 +				unlock_fine_irqrestore(&mutex->lock, flags);
11471 +			}
11472 +		}
11473 +		else {
11474 +			TRACE_TASK(t, "no change in hp_waiter.\n");
11475 +
11476 +			unlock_fine_irqrestore(&mutex->lock, flags);
11477 +		}
11478 +
11479 +		unlock_global_irqrestore(dgl_lock, flags);
11480 +
11481 +		TS_LOCK_SUSPEND;
11482 +
11483 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
11484 +		 * when we wake up; we are guaranteed to have the lock since
11485 +		 * there is only one wake up per release.
11486 +		 */
11487 +
11488 +		schedule();
11489 +
11490 +		TS_LOCK_RESUME;
11491 +
11492 +		/* Since we hold the lock, no other task will change
11493 +		 * ->owner. We can thus check it without acquiring the spin
11494 +		 * lock. */
11495 +		BUG_ON(mutex->owner != t);
11496 +
11497 +		TRACE_TASK(t, "Acquired lock %d.\n", l->ident);
11498 +
11499 +	} else {
11500 +		TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
11501 +
11502 +		/* it's ours now */
11503 +		mutex->owner = t;
11504 +
11505 +		raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
11506 +		binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
11507 +					struct nested_info, hp_binheap_node);
11508 +		raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
11509 +
11510 +
11511 +		unlock_fine_irqrestore(&mutex->lock, flags);
11512 +		unlock_global_irqrestore(dgl_lock, flags);
11513 +	}
11514 +
11515 +	return 0;
11516 +}
11517 +
11518 +
11519 +
11520 +int rsm_mutex_unlock(struct litmus_lock* l)
11521 +{
11522 +	struct task_struct *t = current, *next = NULL;
11523 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11524 +	unsigned long flags;
11525 +
11526 +	struct task_struct *old_max_eff_prio;
11527 +
11528 +	int wake_up_task = 1;
11529 +
11530 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11531 +	dgl_wait_state_t *dgl_wait = NULL;
11532 +	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
11533 +#endif
11534 +
11535 +	int err = 0;
11536 +
11537 +	if (mutex->owner != t) {
11538 +		err = -EINVAL;
11539 +		return err;
11540 +	}
11541 +
11542 +	lock_global_irqsave(dgl_lock, flags);
11543 +	lock_fine_irqsave(&mutex->lock, flags);
11544 +
11545 +	raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
11546 +
11547 +	TRACE_TASK(t, "Freeing lock %d\n", l->ident);
11548 +
11549 +	old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
11550 +	binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);
11551 +
11552 +	if(tsk_rt(t)->inh_task){
11553 +		struct task_struct *new_max_eff_prio =
11554 +			top_priority(&tsk_rt(t)->hp_blocked_tasks);
11555 +
11556 +		if((new_max_eff_prio == NULL) ||
11557 +		      /* there was a change in eff prio */
11558 +		   (  (new_max_eff_prio != old_max_eff_prio) &&
11559 +			/* and owner had the old eff prio */
11560 +			  (effective_priority(t) == old_max_eff_prio))  )
11561 +		{
11562 +			// old_max_eff_prio > new_max_eff_prio
11563 +
11564 +			//if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) {
11565 +			if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
11566 +				TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d   task: %s/%d [%s/%d]\n",
11567 +						   new_max_eff_prio->comm, new_max_eff_prio->pid,
11568 +						   t->comm, t->pid, tsk_rt(t)->inh_task->comm,
11569 +						   tsk_rt(t)->inh_task->pid);
11570 +				WARN_ON(1);
11571 +			}
11572 +
11573 +			litmus->decrease_prio(t, new_max_eff_prio);
11574 +		}
11575 +	}
11576 +
11577 +	if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
11578 +	   tsk_rt(t)->inh_task != NULL)
11579 +	{
11580 +		WARN_ON(tsk_rt(t)->inh_task != NULL);
11581 +		TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
11582 +				   tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
11583 +	}
11584 +
11585 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
11586 +
11587 +
11588 +	/* check if there are jobs waiting for this resource */
11589 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11590 +	__waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next);
11591 +	if(dgl_wait) {
11592 +		next = dgl_wait->task;
11593 +		//select_next_lock_if_primary(l, dgl_wait);
11594 +	}
11595 +#else
11596 +	next = __waitqueue_remove_first(&mutex->wait);
11597 +#endif
11598 +	if (next) {
11599 +		/* next becomes the resouce holder */
11600 +		mutex->owner = next;
11601 +		TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
11602 +
11603 +		/* determine new hp_waiter if necessary */
11604 +		if (next == mutex->hp_waiter) {
11605 +
11606 +			TRACE_TASK(next, "was highest-prio waiter\n");
11607 +			/* next has the highest priority --- it doesn't need to
11608 +			 * inherit.  However, we need to make sure that the
11609 +			 * next-highest priority in the queue is reflected in
11610 +			 * hp_waiter. */
11611 +			mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next);
11612 +			l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
11613 +				effective_priority(mutex->hp_waiter) :
11614 +				NULL;
11615 +
11616 +			if (mutex->hp_waiter)
11617 +				TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
11618 +			else
11619 +				TRACE("no further waiters\n");
11620 +
11621 +			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
11622 +
11623 +			binheap_add(&l->nest.hp_binheap_node,
11624 +						&tsk_rt(next)->hp_blocked_tasks,
11625 +						struct nested_info, hp_binheap_node);
11626 +
11627 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11628 +			if(dgl_wait) {
11629 +				select_next_lock_if_primary(l, dgl_wait);
11630 +				//wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining);
11631 +				--(dgl_wait->nr_remaining);
11632 +				wake_up_task = (dgl_wait->nr_remaining == 0);
11633 +			}
11634 +#endif
11635 +			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
11636 +		}
11637 +		else {
11638 +			/* Well, if 'next' is not the highest-priority waiter,
11639 +			 * then it (probably) ought to inherit the highest-priority
11640 +			 * waiter's priority. */
11641 +			TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);
11642 +
11643 +			raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
11644 +
11645 +			binheap_add(&l->nest.hp_binheap_node,
11646 +						&tsk_rt(next)->hp_blocked_tasks,
11647 +						struct nested_info, hp_binheap_node);
11648 +
11649 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11650 +			if(dgl_wait) {
11651 +				select_next_lock_if_primary(l, dgl_wait);
11652 +				--(dgl_wait->nr_remaining);
11653 +				wake_up_task = (dgl_wait->nr_remaining == 0);
11654 +			}
11655 +#endif
11656 +
11657 +			/* It is possible that 'next' *should* be the hp_waiter, but isn't
11658 +		     * because that update hasn't yet executed (update operation is
11659 +			 * probably blocked on mutex->lock). So only inherit if the top of
11660 +			 * 'next's top heap node is indeed the effective prio. of hp_waiter.
11661 +			 * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
11662 +			 * since the effective priority of hp_waiter can change (and the
11663 +			 * update has not made it to this lock).)
11664 +			 */
11665 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11666 +			if((l->nest.hp_waiter_eff_prio != NULL) &&
11667 +			   (top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
11668 +													l->nest.hp_waiter_eff_prio))
11669 +			{
11670 +				if(dgl_wait && tsk_rt(next)->blocked_lock) {
11671 +					BUG_ON(wake_up_task);
11672 +					//if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
11673 +					if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
11674 +						litmus->nested_increase_prio(next,
11675 +							l->nest.hp_waiter_eff_prio, &mutex->lock, flags);  // unlocks lock && hp_blocked_tasks_lock.
11676 +						goto out;  // all spinlocks are released.  bail out now.
11677 +					}
11678 +				}
11679 +				else {
11680 +					litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
11681 +				}
11682 +			}
11683 +
11684 +			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
11685 +#else
11686 +			if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
11687 +													l->nest.hp_waiter_eff_prio))
11688 +			{
11689 +				litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
11690 +			}
11691 +			raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
11692 +#endif
11693 +		}
11694 +
11695 +		if(wake_up_task) {
11696 +			TRACE_TASK(next, "waking up since it is no longer blocked.\n");
11697 +
11698 +			tsk_rt(next)->blocked_lock = NULL;
11699 +			mb();
11700 +
11701 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11702 +			// re-enable tracking
11703 +			if(tsk_rt(next)->held_gpus) {
11704 +				tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
11705 +			}
11706 +#endif
11707 +
11708 +			wake_up_process(next);
11709 +		}
11710 +		else {
11711 +			TRACE_TASK(next, "is still blocked.\n");
11712 +		}
11713 +	}
11714 +	else {
11715 +		/* becomes available */
11716 +		mutex->owner = NULL;
11717 +	}
11718 +
11719 +	unlock_fine_irqrestore(&mutex->lock, flags);
11720 +
11721 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11722 +out:
11723 +#endif
11724 +	unlock_global_irqrestore(dgl_lock, flags);
11725 +
11726 +	return err;
11727 +}
11728 +
11729 +
11730 +void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
11731 +											struct task_struct* t,
11732 +											raw_spinlock_t* to_unlock,
11733 +											unsigned long irqflags)
11734 +{
11735 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11736 +
11737 +	// relay-style locking
11738 +	lock_fine(&mutex->lock);
11739 +	unlock_fine(to_unlock);
11740 +
11741 +	if(tsk_rt(t)->blocked_lock == l) {  // prevent race on tsk_rt(t)->blocked
11742 +		struct task_struct *owner = mutex->owner;
11743 +
11744 +		struct task_struct *old_max_eff_prio;
11745 +		struct task_struct *new_max_eff_prio;
11746 +
11747 +		raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11748 +
11749 +		old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11750 +
11751 +		//if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) {
11752 +		if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
11753 +			TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
11754 +			mutex->hp_waiter = t;
11755 +		}
11756 +		if(t == mutex->hp_waiter) {
11757 +			// reflect the decreased priority in the heap node.
11758 +			l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
11759 +
11760 +			BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
11761 +			BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
11762 +											&tsk_rt(owner)->hp_blocked_tasks));
11763 +
11764 +			binheap_decrease(&l->nest.hp_binheap_node,
11765 +							 &tsk_rt(owner)->hp_blocked_tasks);
11766 +		}
11767 +
11768 +		new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11769 +
11770 +
11771 +		if(new_max_eff_prio != old_max_eff_prio) {
11772 +			// new_max_eff_prio > old_max_eff_prio holds.
11773 +			if ((effective_priority(owner) == old_max_eff_prio) ||
11774 +				//(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
11775 +				(litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
11776 +				TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
11777 +						  l->ident);
11778 +
11779 +				// beware: recursion
11780 +				litmus->nested_increase_prio(owner, new_max_eff_prio,
11781 +											 &mutex->lock, irqflags);  // unlocks mutex->lock
11782 +			}
11783 +			else {
11784 +				TRACE_CUR("Lower priority than holder %s/%d.  No propagation.\n",
11785 +						  owner->comm, owner->pid);
11786 +				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11787 +				unlock_fine_irqrestore(&mutex->lock, irqflags);
11788 +			}
11789 +		}
11790 +		else {
11791 +			TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
11792 +			raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11793 +			unlock_fine_irqrestore(&mutex->lock, irqflags);
11794 +		}
11795 +	}
11796 +	else {
11797 +		struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
11798 +
11799 +		TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
11800 +		if(still_blocked) {
11801 +			TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
11802 +					   still_blocked->ident);
11803 +			if(still_blocked->ops->propagate_increase_inheritance) {
11804 +				/* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
11805 +				 we know that task 't' has not released any locks behind us in this
11806 +				 chain.  Propagation just needs to catch up with task 't'. */
11807 +				still_blocked->ops->propagate_increase_inheritance(still_blocked,
11808 +																   t,
11809 +																   &mutex->lock,
11810 +																   irqflags);
11811 +			}
11812 +			else {
11813 +				TRACE_TASK(t,
11814 +						   "Inheritor is blocked on lock (%p) that does not "
11815 +						   "support nesting!\n",
11816 +						   still_blocked);
11817 +				unlock_fine_irqrestore(&mutex->lock, irqflags);
11818 +			}
11819 +		}
11820 +		else {
11821 +			unlock_fine_irqrestore(&mutex->lock, irqflags);
11822 +		}
11823 +	}
11824 +}
11825 +
11826 +
11827 +void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
11828 +											 struct task_struct* t,
11829 +											 raw_spinlock_t* to_unlock,
11830 +											 unsigned long irqflags)
11831 +{
11832 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11833 +
11834 +	// relay-style locking
11835 +	lock_fine(&mutex->lock);
11836 +	unlock_fine(to_unlock);
11837 +
11838 +	if(tsk_rt(t)->blocked_lock == l) {  // prevent race on tsk_rt(t)->blocked
11839 +		if(t == mutex->hp_waiter) {
11840 +			struct task_struct *owner = mutex->owner;
11841 +
11842 +			struct task_struct *old_max_eff_prio;
11843 +			struct task_struct *new_max_eff_prio;
11844 +
11845 +			raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11846 +
11847 +			old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11848 +
11849 +			binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
11850 +			mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL);
11851 +			l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
11852 +				effective_priority(mutex->hp_waiter) : NULL;
11853 +			binheap_add(&l->nest.hp_binheap_node,
11854 +						&tsk_rt(owner)->hp_blocked_tasks,
11855 +						struct nested_info, hp_binheap_node);
11856 +
11857 +			new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
11858 +
11859 +			if((old_max_eff_prio != new_max_eff_prio) &&
11860 +			   (effective_priority(owner) == old_max_eff_prio))
11861 +			{
11862 +				// Need to set new effective_priority for owner
11863 +
11864 +				struct task_struct *decreased_prio;
11865 +
11866 +				TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
11867 +						  l->ident);
11868 +
11869 +				//if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
11870 +				if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
11871 +					TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
11872 +							  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
11873 +							  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
11874 +							  owner->comm,
11875 +							  owner->pid,
11876 +							  l->ident);
11877 +
11878 +					decreased_prio = new_max_eff_prio;
11879 +				}
11880 +				else {
11881 +					TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
11882 +							  (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
11883 +							  (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
11884 +							  owner->comm,
11885 +							  owner->pid,
11886 +							  l->ident);
11887 +
11888 +					decreased_prio = NULL;
11889 +				}
11890 +
11891 +				// beware: recursion
11892 +				litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags);	// will unlock mutex->lock
11893 +			}
11894 +			else {
11895 +				raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
11896 +				unlock_fine_irqrestore(&mutex->lock, irqflags);
11897 +			}
11898 +		}
11899 +		else {
11900 +			TRACE_TASK(t, "is not hp_waiter.  No propagation.\n");
11901 +			unlock_fine_irqrestore(&mutex->lock, irqflags);
11902 +		}
11903 +	}
11904 +	else {
11905 +		struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
11906 +
11907 +		TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
11908 +		if(still_blocked) {
11909 +			TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
11910 +					   still_blocked->ident);
11911 +			if(still_blocked->ops->propagate_decrease_inheritance) {
11912 +				/* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
11913 +				 we know that task 't' has not released any locks behind us in this
11914 +				 chain.  propagation just needs to catch up with task 't' */
11915 +				still_blocked->ops->propagate_decrease_inheritance(still_blocked,
11916 +																   t,
11917 +																   &mutex->lock,
11918 +																   irqflags);
11919 +			}
11920 +			else {
11921 +				TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
11922 +						   still_blocked);
11923 +				unlock_fine_irqrestore(&mutex->lock, irqflags);
11924 +			}
11925 +		}
11926 +		else {
11927 +			unlock_fine_irqrestore(&mutex->lock, irqflags);
11928 +		}
11929 +	}
11930 +}
11931 +
11932 +
11933 +int rsm_mutex_close(struct litmus_lock* l)
11934 +{
11935 +	struct task_struct *t = current;
11936 +	struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
11937 +	unsigned long flags;
11938 +
11939 +	int owner;
11940 +
11941 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
11942 +	raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
11943 +#endif
11944 +
11945 +	lock_global_irqsave(dgl_lock, flags);
11946 +	lock_fine_irqsave(&mutex->lock, flags);
11947 +
11948 +	owner = (mutex->owner == t);
11949 +
11950 +	unlock_fine_irqrestore(&mutex->lock, flags);
11951 +	unlock_global_irqrestore(dgl_lock, flags);
11952 +
11953 +	if (owner)
11954 +		rsm_mutex_unlock(l);
11955 +
11956 +	return 0;
11957 +}
11958 +
11959 +void rsm_mutex_free(struct litmus_lock* lock)
11960 +{
11961 +	kfree(rsm_mutex_from_lock(lock));
11962 +}
11963 +
11964 +struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops)
11965 +{
11966 +	struct rsm_mutex* mutex;
11967 +
11968 +	mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
11969 +	if (!mutex)
11970 +		return NULL;
11971 +
11972 +	mutex->litmus_lock.ops = ops;
11973 +	mutex->owner   = NULL;
11974 +	mutex->hp_waiter = NULL;
11975 +	init_waitqueue_head(&mutex->wait);
11976 +
11977 +
11978 +#ifdef CONFIG_DEBUG_SPINLOCK
11979 +	{
11980 +		__raw_spin_lock_init(&mutex->lock,
11981 +							 ((struct litmus_lock*)mutex)->cheat_lockdep,
11982 +							 &((struct litmus_lock*)mutex)->key);
11983 +	}
11984 +#else
11985 +	raw_spin_lock_init(&mutex->lock);
11986 +#endif
11987 +
11988 +	((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;
11989 +
11990 +	return &mutex->litmus_lock;
11991 +}
11992 +
11993 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
11994 index 480c62b..be14dbe 100644
11995 --- a/litmus/sched_cedf.c
11996 +++ b/litmus/sched_cedf.c
11997 @@ -29,7 +29,7 @@
11998  #include <linux/percpu.h>
11999  #include <linux/sched.h>
12000  #include <linux/slab.h>
12001 -
12002 +#include <linux/uaccess.h>
12003  #include <linux/module.h>
12004  
12005  #include <litmus/litmus.h>
12006 @@ -42,6 +42,16 @@
12007  #include <litmus/clustered.h>
12008  
12009  #include <litmus/bheap.h>
12010 +#include <litmus/binheap.h>
12011 +
12012 +#ifdef CONFIG_LITMUS_LOCKING
12013 +#include <litmus/kfmlp_lock.h>
12014 +#endif
12015 +
12016 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12017 +#include <litmus/rsm_lock.h>
12018 +#include <litmus/ikglp_lock.h>
12019 +#endif
12020  
12021  #ifdef CONFIG_SCHED_CPU_AFFINITY
12022  #include <litmus/affinity.h>
12023 @@ -49,7 +59,27 @@
12024  
12025  /* to configure the cluster size */
12026  #include <litmus/litmus_proc.h>
12027 -#include <linux/uaccess.h>
12028 +
12029 +#ifdef CONFIG_SCHED_CPU_AFFINITY
12030 +#include <litmus/affinity.h>
12031 +#endif
12032 +
12033 +#ifdef CONFIG_LITMUS_SOFTIRQD
12034 +#include <litmus/litmus_softirq.h>
12035 +#endif
12036 +
12037 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12038 +#include <linux/interrupt.h>
12039 +#include <litmus/trace.h>
12040 +#endif
12041 +
12042 +#ifdef CONFIG_LITMUS_NVIDIA
12043 +#include <litmus/nvidia_info.h>
12044 +#endif
12045 +
12046 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
12047 +#include <litmus/gpu_affinity.h>
12048 +#endif
12049  
12050  /* Reference configuration variable. Determines which cache level is used to
12051   * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
12052 @@ -70,7 +100,7 @@ typedef struct  {
12053  	struct task_struct*	linked;		/* only RT tasks */
12054  	struct task_struct*	scheduled;	/* only RT tasks */
12055  	atomic_t		will_schedule;	/* prevent unneeded IPIs */
12056 -	struct bheap_node*	hn;
12057 +	struct binheap_node hn;
12058  } cpu_entry_t;
12059  
12060  /* one cpu_entry_t per CPU */
12061 @@ -83,6 +113,14 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
12062  #define test_will_schedule(cpu) \
12063  	(atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
12064  
12065 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12066 +struct tasklet_head
12067 +{
12068 +	struct tasklet_struct *head;
12069 +	struct tasklet_struct **tail;
12070 +};
12071 +#endif
12072 +
12073  /*
12074   * In C-EDF there is a cedf domain _per_ cluster
12075   * The number of clusters is dynamically determined accordingly to the
12076 @@ -96,10 +134,17 @@ typedef struct clusterdomain {
12077  	/* map of this cluster cpus */
12078  	cpumask_var_t	cpu_map;
12079  	/* the cpus queue themselves according to priority in here */
12080 -	struct bheap_node *heap_node;
12081 -	struct bheap      cpu_heap;
12082 +	struct binheap_handle cpu_heap;
12083  	/* lock for this cluster */
12084  #define cluster_lock domain.ready_lock
12085 +
12086 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12087 +	struct tasklet_head pending_tasklets;
12088 +#endif
12089 +
12090 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
12091 +	raw_spinlock_t dgl_lock;
12092 +#endif
12093  } cedf_domain_t;
12094  
12095  /* a cedf_domain per cluster; allocation is done at init/activation time */
12096 @@ -108,6 +153,22 @@ cedf_domain_t *cedf;
12097  #define remote_cluster(cpu)	((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
12098  #define task_cpu_cluster(task)	remote_cluster(get_partition(task))
12099  
12100 +/* total number of cluster */
12101 +static int num_clusters;
12102 +/* we do not support cluster of different sizes */
12103 +static unsigned int cluster_size;
12104 +
12105 +static int clusters_allocated = 0;
12106 +
12107 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
12108 +static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
12109 +{
12110 +	cedf_domain_t *cluster = task_cpu_cluster(t);
12111 +	return(&cluster->dgl_lock);
12112 +}
12113 +#endif
12114 +
12115 +
12116  /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
12117   * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
12118   * information during the initialization of the plugin (e.g., topology)
12119 @@ -115,11 +176,11 @@ cedf_domain_t *cedf;
12120   */
12121  #define VERBOSE_INIT
12122  
12123 -static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
12124 +static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
12125  {
12126 -	cpu_entry_t *a, *b;
12127 -	a = _a->value;
12128 -	b = _b->value;
12129 +	cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
12130 +	cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
12131 +
12132  	/* Note that a and b are inverted: we want the lowest-priority CPU at
12133  	 * the top of the heap.
12134  	 */
12135 @@ -133,20 +194,17 @@ static void update_cpu_position(cpu_entry_t *entry)
12136  {
12137  	cedf_domain_t *cluster = entry->cluster;
12138  
12139 -	if (likely(bheap_node_in_heap(entry->hn)))
12140 -		bheap_delete(cpu_lower_prio,
12141 -				&cluster->cpu_heap,
12142 -				entry->hn);
12143 +	if (likely(binheap_is_in_heap(&entry->hn))) {
12144 +		binheap_delete(&entry->hn, &cluster->cpu_heap);
12145 +	}
12146  
12147 -	bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
12148 +	binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
12149  }
12150  
12151  /* caller must hold cedf lock */
12152  static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
12153  {
12154 -	struct bheap_node* hn;
12155 -	hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
12156 -	return hn->value;
12157 +	return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
12158  }
12159  
12160  
12161 @@ -208,7 +266,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
12162  }
12163  
12164  /* unlink - Make sure a task is not linked any longer to an entry
12165 - *          where it was linked before. Must hold cedf_lock.
12166 + *          where it was linked before. Must hold cluster_lock.
12167   */
12168  static noinline void unlink(struct task_struct* t)
12169  {
12170 @@ -244,7 +302,7 @@ static void preempt(cpu_entry_t *entry)
12171  }
12172  
12173  /* requeue - Put an unlinked task into gsn-edf domain.
12174 - *           Caller must hold cedf_lock.
12175 + *           Caller must hold cluster_lock.
12176   */
12177  static noinline void requeue(struct task_struct* task)
12178  {
12179 @@ -339,13 +397,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
12180  	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12181  }
12182  
12183 -/* caller holds cedf_lock */
12184 +/* caller holds cluster_lock */
12185  static noinline void job_completion(struct task_struct *t, int forced)
12186  {
12187  	BUG_ON(!t);
12188  
12189  	sched_trace_task_completion(t, forced);
12190  
12191 +#ifdef CONFIG_LITMUS_NVIDIA
12192 +	atomic_set(&tsk_rt(t)->nv_int_count, 0);
12193 +#endif
12194 +
12195  	TRACE_TASK(t, "job_completion().\n");
12196  
12197  	/* set flags */
12198 @@ -389,6 +451,314 @@ static void cedf_tick(struct task_struct* t)
12199  	}
12200  }
12201  
12202 +
12203 +
12204 +
12205 +
12206 +
12207 +
12208 +
12209 +
12210 +
12211 +
12212 +
12213 +
12214 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12215 +
12216 +
12217 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
12218 +{
12219 +	if (!atomic_read(&tasklet->count)) {
12220 +		if(tasklet->owner) {
12221 +			sched_trace_tasklet_begin(tasklet->owner);
12222 +		}
12223 +
12224 +		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
12225 +		{
12226 +			BUG();
12227 +		}
12228 +		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
12229 +			  __FUNCTION__,
12230 +			  (tasklet->owner) ? tasklet->owner->pid : -1,
12231 +			  (tasklet->owner) ? 0 : 1);
12232 +		tasklet->func(tasklet->data);
12233 +		tasklet_unlock(tasklet);
12234 +
12235 +		if(tasklet->owner) {
12236 +			sched_trace_tasklet_end(tasklet->owner, flushed);
12237 +		}
12238 +	}
12239 +	else {
12240 +		BUG();
12241 +	}
12242 +}
12243 +
12244 +
12245 +static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
12246 +{
12247 +	int work_to_do = 1;
12248 +	struct tasklet_struct *tasklet = NULL;
12249 +	unsigned long flags;
12250 +
12251 +	while(work_to_do) {
12252 +
12253 +		TS_NV_SCHED_BOTISR_START;
12254 +
12255 +		raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12256 +
12257 +		if(cluster->pending_tasklets.head != NULL) {
12258 +			// remove tasklet at head.
12259 +			struct tasklet_struct *prev = NULL;
12260 +			tasklet = cluster->pending_tasklets.head;
12261 +
12262 +			// find a tasklet with prio to execute; skip ones where
12263 +			// sched_task has a higher priority.
12264 +			// We use the '!edf' test instead of swaping function arguments since
12265 +			// both sched_task and owner could be NULL.  In this case, we want to
12266 +			// still execute the tasklet.
12267 +			while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
12268 +				prev = tasklet;
12269 +				tasklet = tasklet->next;
12270 +			}
12271 +
12272 +			if(tasklet) {  // found something to execuite
12273 +				// remove the tasklet from the queue
12274 +				if(prev) {
12275 +					prev->next = tasklet->next;
12276 +					if(prev->next == NULL) {
12277 +						TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
12278 +						cluster->pending_tasklets.tail = &(prev);
12279 +					}
12280 +				}
12281 +				else {
12282 +					cluster->pending_tasklets.head = tasklet->next;
12283 +					if(tasklet->next == NULL) {
12284 +						TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
12285 +						cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
12286 +					}
12287 +				}
12288 +			}
12289 +			else {
12290 +				TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
12291 +			}
12292 +		}
12293 +		else {
12294 +			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
12295 +		}
12296 +
12297 +		raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12298 +
12299 +		if(tasklet) {
12300 +			__do_lit_tasklet(tasklet, 0ul);
12301 +			tasklet = NULL;
12302 +		}
12303 +		else {
12304 +			work_to_do = 0;
12305 +		}
12306 +
12307 +		TS_NV_SCHED_BOTISR_END;
12308 +	}
12309 +}
12310 +
12311 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
12312 +{
12313 +	struct tasklet_struct* step;
12314 +
12315 +	tasklet->next = NULL;  // make sure there are no old values floating around
12316 +
12317 +	step = cluster->pending_tasklets.head;
12318 +	if(step == NULL) {
12319 +		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
12320 +		// insert at tail.
12321 +		*(cluster->pending_tasklets.tail) = tasklet;
12322 +		cluster->pending_tasklets.tail = &(tasklet->next);
12323 +	}
12324 +	else if((*(cluster->pending_tasklets.tail) != NULL) &&
12325 +			edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
12326 +		// insert at tail.
12327 +		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
12328 +
12329 +		*(cluster->pending_tasklets.tail) = tasklet;
12330 +		cluster->pending_tasklets.tail = &(tasklet->next);
12331 +	}
12332 +	else {
12333 +
12334 +		// insert the tasklet somewhere in the middle.
12335 +
12336 +        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
12337 +
12338 +		while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
12339 +			step = step->next;
12340 +		}
12341 +
12342 +		// insert tasklet right before step->next.
12343 +
12344 +		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
12345 +			  tasklet->owner->pid,
12346 +			  (step->owner) ?
12347 +			  step->owner->pid :
12348 +			  -1,
12349 +			  (step->next) ?
12350 +			  ((step->next->owner) ?
12351 +			   step->next->owner->pid :
12352 +			   -1) :
12353 +			  -1);
12354 +
12355 +		tasklet->next = step->next;
12356 +		step->next = tasklet;
12357 +
12358 +		// patch up the head if needed.
12359 +		if(cluster->pending_tasklets.head == step)
12360 +		{
12361 +			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
12362 +			cluster->pending_tasklets.head = tasklet;
12363 +		}
12364 +	}
12365 +}
12366 +
12367 +static void cedf_run_tasklets(struct task_struct* sched_task)
12368 +{
12369 +	cedf_domain_t* cluster;
12370 +
12371 +	preempt_disable();
12372 +
12373 +	cluster = (is_realtime(sched_task)) ?
12374 +		task_cpu_cluster(sched_task) :
12375 +		remote_cluster(smp_processor_id());
12376 +
12377 +	if(cluster && cluster->pending_tasklets.head != NULL) {
12378 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
12379 +		do_lit_tasklets(cluster, sched_task);
12380 +	}
12381 +
12382 +	preempt_enable_no_resched();
12383 +}
12384 +
12385 +
12386 +
12387 +static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
12388 +{
12389 +#if 0
12390 +	cedf_domain_t *cluster = NULL;
12391 +	cpu_entry_t *targetCPU = NULL;
12392 +	int thisCPU;
12393 +	int runLocal = 0;
12394 +	int runNow = 0;
12395 +	unsigned long flags;
12396 +
12397 +    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
12398 +    {
12399 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
12400 +		return 0;
12401 +    }
12402 +
12403 +	cluster = task_cpu_cluster(tasklet->owner);
12404 +
12405 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12406 +
12407 +	thisCPU = smp_processor_id();
12408 +
12409 +#ifdef CONFIG_SCHED_CPU_AFFINITY
12410 +	{
12411 +		cpu_entry_t* affinity = NULL;
12412 +
12413 +		// use this CPU if it is in our cluster and isn't running any RT work.
12414 +		if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
12415 +			affinity = &(__get_cpu_var(cedf_cpu_entries));
12416 +		}
12417 +		else {
12418 +			// this CPU is busy or shouldn't run tasklet in this cluster.
12419 +			// look for available near by CPUs.
12420 +			// NOTE: Affinity towards owner and not this CPU.  Is this right?
12421 +			affinity =
12422 +				cedf_get_nearest_available_cpu(cluster,
12423 +								&per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
12424 +		}
12425 +
12426 +		targetCPU = affinity;
12427 +	}
12428 +#endif
12429 +
12430 +	if (targetCPU == NULL) {
12431 +		targetCPU = lowest_prio_cpu(cluster);
12432 +	}
12433 +
12434 +	if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
12435 +		if (thisCPU == targetCPU->cpu) {
12436 +			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
12437 +			runLocal = 1;
12438 +			runNow = 1;
12439 +		}
12440 +		else {
12441 +			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
12442 +			runLocal = 0;
12443 +			runNow = 1;
12444 +		}
12445 +	}
12446 +	else {
12447 +		runLocal = 0;
12448 +		runNow = 0;
12449 +	}
12450 +
12451 +	if(!runLocal) {
12452 +		// enqueue the tasklet
12453 +		__add_pai_tasklet(tasklet, cluster);
12454 +	}
12455 +
12456 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12457 +
12458 +
12459 +	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
12460 +		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
12461 +		__do_lit_tasklet(tasklet, 0ul);
12462 +	}
12463 +	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
12464 +		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
12465 +		preempt(targetCPU);  // need to be protected by cluster_lock?
12466 +	}
12467 +	else {
12468 +		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
12469 +	}
12470 +#else
12471 +	TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
12472 +	__do_lit_tasklet(tasklet, 0ul);
12473 +#endif
12474 +	return(1); // success
12475 +}
12476 +
12477 +static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
12478 +										 struct task_struct *new_prio)
12479 +{
12480 +	struct tasklet_struct* step;
12481 +	unsigned long flags;
12482 +	cedf_domain_t *cluster;
12483 +	struct task_struct *probe;
12484 +
12485 +	// identify the cluster by the assignment of these tasks.  one should
12486 +	// be non-NULL.
12487 +	probe = (old_prio) ? old_prio : new_prio;
12488 +
12489 +	if(probe) {
12490 +		cluster = task_cpu_cluster(probe);
12491 +
12492 +		if(cluster->pending_tasklets.head != NULL) {
12493 +			raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12494 +			for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
12495 +				if(step->owner == old_prio) {
12496 +					TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
12497 +					step->owner = new_prio;
12498 +				}
12499 +			}
12500 +			raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12501 +		}
12502 +	}
12503 +	else {
12504 +		TRACE("%s: Both priorities were NULL\n");
12505 +	}
12506 +}
12507 +
12508 +#endif  // PAI
12509 +
12510  /* Getting schedule() right is a bit tricky. schedule() may not make any
12511   * assumptions on the state of the current task since it may be called for a
12512   * number of reasons. The reasons include a scheduler_tick() determined that it
12513 @@ -465,6 +835,19 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
12514  	if (blocks)
12515  		unlink(entry->scheduled);
12516  
12517 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
12518 +	if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
12519 +		if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
12520 +			// don't track preemptions or locking protocol suspensions.
12521 +			TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
12522 +			stop_gpu_tracker(entry->scheduled);
12523 +		}
12524 +		else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
12525 +			TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
12526 +		}
12527 +	}
12528 +#endif
12529 +
12530  	/* Request a sys_exit_np() call if we would like to preempt but cannot.
12531  	 * We need to make sure to update the link structure anyway in case
12532  	 * that we are still linked. Multiple calls to request_exit_np() don't
12533 @@ -514,7 +897,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
12534  	raw_spin_unlock(&cluster->cluster_lock);
12535  
12536  #ifdef WANT_ALL_SCHED_EVENTS
12537 -	TRACE("cedf_lock released, next=0x%p\n", next);
12538 +	TRACE("cluster_lock released, next=0x%p\n", next);
12539  
12540  	if (next)
12541  		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
12542 @@ -522,7 +905,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
12543  		TRACE("becomes idle at %llu.\n", litmus_clock());
12544  #endif
12545  
12546 -
12547  	return next;
12548  }
12549  
12550 @@ -548,7 +930,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
12551  	cpu_entry_t* 		entry;
12552  	cedf_domain_t*		cluster;
12553  
12554 -	TRACE("gsn edf: task new %d\n", t->pid);
12555 +	TRACE("c-edf: task new %d\n", t->pid);
12556  
12557  	/* the cluster doesn't change even if t is running */
12558  	cluster = task_cpu_cluster(t);
12559 @@ -586,7 +968,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
12560  static void cedf_task_wake_up(struct task_struct *task)
12561  {
12562  	unsigned long flags;
12563 -	lt_t now;
12564 +	//lt_t now;
12565  	cedf_domain_t *cluster;
12566  
12567  	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
12568 @@ -594,6 +976,8 @@ static void cedf_task_wake_up(struct task_struct *task)
12569  	cluster = task_cpu_cluster(task);
12570  
12571  	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12572 +
12573 +#if 0 // sproadic task model
12574  	/* We need to take suspensions because of semaphores into
12575  	 * account! If a job resumes after being suspended due to acquiring
12576  	 * a semaphore, it should never be treated as a new job release.
12577 @@ -615,7 +999,13 @@ static void cedf_task_wake_up(struct task_struct *task)
12578  			}
12579  		}
12580  	}
12581 -	cedf_job_arrival(task);
12582 +#else
12583 +	set_rt_flags(task, RT_F_RUNNING);  // periodic model
12584 +#endif
12585 +
12586 +	if(tsk_rt(task)->linked_on == NO_CPU)
12587 +		cedf_job_arrival(task);
12588 +
12589  	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
12590  }
12591  
12592 @@ -642,6 +1032,10 @@ static void cedf_task_exit(struct task_struct * t)
12593  	unsigned long flags;
12594  	cedf_domain_t *cluster = task_cpu_cluster(t);
12595  
12596 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12597 +	cedf_change_prio_pai_tasklet(t, NULL);
12598 +#endif
12599 +
12600  	/* unlink if necessary */
12601  	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
12602  	unlink(t);
12603 @@ -659,13 +1053,536 @@ static void cedf_task_exit(struct task_struct * t)
12604  
12605  static long cedf_admit_task(struct task_struct* tsk)
12606  {
12607 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12608 +	INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
12609 +						edf_max_heap_base_priority_order);
12610 +#endif
12611 +
12612  	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
12613  }
12614  
12615 -/* total number of cluster */
12616 -static int num_clusters;
12617 -/* we do not support cluster of different sizes */
12618 -static unsigned int cluster_size;
12619 +
12620 +
12621 +#ifdef CONFIG_LITMUS_LOCKING
12622 +
12623 +#include <litmus/fdso.h>
12624 +
12625 +
12626 +
12627 +/* called with IRQs off */
12628 +static void __increase_priority_inheritance(struct task_struct* t,
12629 +										    struct task_struct* prio_inh)
12630 +{
12631 +	int linked_on;
12632 +	int check_preempt = 0;
12633 +
12634 +	cedf_domain_t* cluster = task_cpu_cluster(t);
12635 +
12636 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12637 +	/* this sanity check allows for weaker locking in protocols */
12638 +	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
12639 +	if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
12640 +#endif
12641 +		TRACE_TASK(t, "inherits priority from %s/%d\n",
12642 +				   prio_inh->comm, prio_inh->pid);
12643 +		tsk_rt(t)->inh_task = prio_inh;
12644 +
12645 +		linked_on  = tsk_rt(t)->linked_on;
12646 +
12647 +		/* If it is scheduled, then we need to reorder the CPU heap. */
12648 +		if (linked_on != NO_CPU) {
12649 +			TRACE_TASK(t, "%s: linked  on %d\n",
12650 +					   __FUNCTION__, linked_on);
12651 +			/* Holder is scheduled; need to re-order CPUs.
12652 +			 * We can't use heap_decrease() here since
12653 +			 * the cpu_heap is ordered in reverse direction, so
12654 +			 * it is actually an increase. */
12655 +			binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
12656 +						   &cluster->cpu_heap);
12657 +			binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
12658 +						&cluster->cpu_heap, cpu_entry_t, hn);
12659 +
12660 +		} else {
12661 +			/* holder may be queued: first stop queue changes */
12662 +			raw_spin_lock(&cluster->domain.release_lock);
12663 +			if (is_queued(t)) {
12664 +				TRACE_TASK(t, "%s: is queued\n",
12665 +						   __FUNCTION__);
12666 +				/* We need to update the position of holder in some
12667 +				 * heap. Note that this could be a release heap if we
12668 +				 * budget enforcement is used and this job overran. */
12669 +				check_preempt =
12670 +					!bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
12671 +			} else {
12672 +				/* Nothing to do: if it is not queued and not linked
12673 +				 * then it is either sleeping or currently being moved
12674 +				 * by other code (e.g., a timer interrupt handler) that
12675 +				 * will use the correct priority when enqueuing the
12676 +				 * task. */
12677 +				TRACE_TASK(t, "%s: is NOT queued => Done.\n",
12678 +						   __FUNCTION__);
12679 +			}
12680 +			raw_spin_unlock(&cluster->domain.release_lock);
12681 +
12682 +			/* If holder was enqueued in a release heap, then the following
12683 +			 * preemption check is pointless, but we can't easily detect
12684 +			 * that case. If you want to fix this, then consider that
12685 +			 * simply adding a state flag requires O(n) time to update when
12686 +			 * releasing n tasks, which conflicts with the goal to have
12687 +			 * O(log n) merges. */
12688 +			if (check_preempt) {
12689 +				/* heap_decrease() hit the top level of the heap: make
12690 +				 * sure preemption checks get the right task, not the
12691 +				 * potentially stale cache. */
12692 +				bheap_uncache_min(edf_ready_order,
12693 +								  &cluster->domain.ready_queue);
12694 +				check_for_preemptions(cluster);
12695 +			}
12696 +		}
12697 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12698 +	}
12699 +	else {
12700 +		TRACE_TASK(t, "Spurious invalid priority increase. "
12701 +				   "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
12702 +				   "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
12703 +				   t->comm, t->pid,
12704 +				   effective_priority(t)->comm, effective_priority(t)->pid,
12705 +				   (prio_inh) ? prio_inh->comm : "nil",
12706 +				   (prio_inh) ? prio_inh->pid : -1);
12707 +		WARN_ON(!prio_inh);
12708 +	}
12709 +#endif
12710 +}
12711 +
12712 +/* called with IRQs off */
12713 +static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
12714 +{
12715 +	cedf_domain_t* cluster = task_cpu_cluster(t);
12716 +
12717 +	raw_spin_lock(&cluster->cluster_lock);
12718 +
12719 +	__increase_priority_inheritance(t, prio_inh);
12720 +
12721 +#ifdef CONFIG_LITMUS_SOFTIRQD
12722 +	if(tsk_rt(t)->cur_klitirqd != NULL)
12723 +	{
12724 +		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
12725 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12726 +
12727 +		__increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
12728 +	}
12729 +#endif
12730 +
12731 +	raw_spin_unlock(&cluster->cluster_lock);
12732 +
12733 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
12734 +	if(tsk_rt(t)->held_gpus) {
12735 +		int i;
12736 +		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
12737 +			i < NV_DEVICE_NUM;
12738 +			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
12739 +			pai_check_priority_increase(t, i);
12740 +		}
12741 +	}
12742 +#endif
12743 +}
12744 +
12745 +/* called with IRQs off */
12746 +static void __decrease_priority_inheritance(struct task_struct* t,
12747 +											struct task_struct* prio_inh)
12748 +{
12749 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12750 +	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
12751 +#endif
12752 +		/* A job only stops inheriting a priority when it releases a
12753 +		 * resource. Thus we can make the following assumption.*/
12754 +		if(prio_inh)
12755 +			TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
12756 +					   prio_inh->comm, prio_inh->pid);
12757 +		else
12758 +			TRACE_TASK(t, "base priority restored.\n");
12759 +
12760 +		tsk_rt(t)->inh_task = prio_inh;
12761 +
12762 +		if(tsk_rt(t)->scheduled_on != NO_CPU) {
12763 +			TRACE_TASK(t, "is scheduled.\n");
12764 +
12765 +			/* Check if rescheduling is necessary. We can't use heap_decrease()
12766 +			 * since the priority was effectively lowered. */
12767 +			unlink(t);
12768 +			cedf_job_arrival(t);
12769 +		}
12770 +		else {
12771 +			cedf_domain_t* cluster = task_cpu_cluster(t);
12772 +			/* task is queued */
12773 +			raw_spin_lock(&cluster->domain.release_lock);
12774 +			if (is_queued(t)) {
12775 +				TRACE_TASK(t, "is queued.\n");
12776 +
12777 +				/* decrease in priority, so we have to re-add to binomial heap */
12778 +				unlink(t);
12779 +				cedf_job_arrival(t);
12780 +			}
12781 +			else {
12782 +				TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
12783 +			}
12784 +			raw_spin_unlock(&cluster->domain.release_lock);
12785 +		}
12786 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12787 +	}
12788 +	else {
12789 +		TRACE_TASK(t, "Spurious invalid priority decrease. "
12790 +				   "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
12791 +				   "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
12792 +				   t->comm, t->pid,
12793 +				   effective_priority(t)->comm, effective_priority(t)->pid,
12794 +				   (prio_inh) ? prio_inh->comm : "nil",
12795 +				   (prio_inh) ? prio_inh->pid : -1);
12796 +	}
12797 +#endif
12798 +}
12799 +
12800 +static void decrease_priority_inheritance(struct task_struct* t,
12801 +										struct task_struct* prio_inh)
12802 +{
12803 +	cedf_domain_t* cluster = task_cpu_cluster(t);
12804 +
12805 +	raw_spin_lock(&cluster->cluster_lock);
12806 +	__decrease_priority_inheritance(t, prio_inh);
12807 +
12808 +#ifdef CONFIG_LITMUS_SOFTIRQD
12809 +	if(tsk_rt(t)->cur_klitirqd != NULL)
12810 +	{
12811 +		TRACE_TASK(t, "%s/%d decreases in priority!\n",
12812 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12813 +
12814 +		__decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
12815 +	}
12816 +#endif
12817 +
12818 +	raw_spin_unlock(&cluster->cluster_lock);
12819 +
12820 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
12821 +	if(tsk_rt(t)->held_gpus) {
12822 +		int i;
12823 +		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
12824 +			i < NV_DEVICE_NUM;
12825 +			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
12826 +			pai_check_priority_decrease(t, i);
12827 +		}
12828 +	}
12829 +#endif
12830 +}
12831 +
12832 +
12833 +
12834 +
12835 +
12836 +#ifdef CONFIG_LITMUS_SOFTIRQD
12837 +/* called with IRQs off */
12838 +static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12839 +											  struct task_struct* old_owner,
12840 +											  struct task_struct* new_owner)
12841 +{
12842 +	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
12843 +
12844 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12845 +
12846 +	raw_spin_lock(&cluster->cluster_lock);
12847 +
12848 +	if(old_owner != new_owner)
12849 +	{
12850 +		if(old_owner)
12851 +		{
12852 +			// unreachable?
12853 +			tsk_rt(old_owner)->cur_klitirqd = NULL;
12854 +		}
12855 +
12856 +		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
12857 +				   new_owner->comm, new_owner->pid);
12858 +
12859 +		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
12860 +	}
12861 +
12862 +	__decrease_priority_inheritance(klitirqd, NULL);  // kludge to clear out cur prio.
12863 +
12864 +	__increase_priority_inheritance(klitirqd,
12865 +			(tsk_rt(new_owner)->inh_task == NULL) ?
12866 +				new_owner :
12867 +				tsk_rt(new_owner)->inh_task);
12868 +
12869 +	raw_spin_unlock(&cluster->cluster_lock);
12870 +}
12871 +
12872 +
12873 +/* called with IRQs off */
12874 +static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12875 +												   struct task_struct* old_owner,
12876 +												   struct task_struct* new_owner)
12877 +{
12878 +	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
12879 +
12880 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12881 +
12882 +	raw_spin_lock(&cluster->cluster_lock);
12883 +
12884 +    TRACE_TASK(klitirqd, "priority restored\n");
12885 +
12886 +	__decrease_priority_inheritance(klitirqd, new_owner);
12887 +
12888 +	tsk_rt(old_owner)->cur_klitirqd = NULL;
12889 +
12890 +	raw_spin_unlock(&cluster->cluster_lock);
12891 +}
12892 +#endif // CONFIG_LITMUS_SOFTIRQD
12893 +
12894 +
12895 +
12896 +
12897 +
12898 +
12899 +
12900 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
12901 +
12902 +/* called with IRQs off */
12903 +/* preconditions:
12904 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
12905 + (2) The lock 'to_unlock' is held.
12906 + */
12907 +static void nested_increase_priority_inheritance(struct task_struct* t,
12908 +												 struct task_struct* prio_inh,
12909 +												 raw_spinlock_t *to_unlock,
12910 +												 unsigned long irqflags)
12911 +{
12912 +	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
12913 +
12914 +	if(tsk_rt(t)->inh_task != prio_inh) { 		// shield redundent calls.
12915 +		increase_priority_inheritance(t, prio_inh);  // increase our prio.
12916 +	}
12917 +
12918 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
12919 +
12920 +
12921 +	if(blocked_lock) {
12922 +		if(blocked_lock->ops->propagate_increase_inheritance) {
12923 +			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
12924 +					   blocked_lock->ident);
12925 +
12926 +			// beware: recursion
12927 +			blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
12928 +															  t, to_unlock,
12929 +															  irqflags);
12930 +		}
12931 +		else {
12932 +			TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
12933 +					   blocked_lock->ident);
12934 +			unlock_fine_irqrestore(to_unlock, irqflags);
12935 +		}
12936 +	}
12937 +	else {
12938 +		TRACE_TASK(t, "is not blocked.  No propagation.\n");
12939 +		unlock_fine_irqrestore(to_unlock, irqflags);
12940 +	}
12941 +}
12942 +
12943 +/* called with IRQs off */
12944 +/* preconditions:
12945 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
12946 + (2) The lock 'to_unlock' is held.
12947 + */
12948 +static void nested_decrease_priority_inheritance(struct task_struct* t,
12949 +												 struct task_struct* prio_inh,
12950 +												 raw_spinlock_t *to_unlock,
12951 +												 unsigned long irqflags)
12952 +{
12953 +	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
12954 +	decrease_priority_inheritance(t, prio_inh);
12955 +
12956 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
12957 +
12958 +	if(blocked_lock) {
12959 +		if(blocked_lock->ops->propagate_decrease_inheritance) {
12960 +			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
12961 +					   blocked_lock->ident);
12962 +
12963 +			// beware: recursion
12964 +			blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
12965 +															  to_unlock,
12966 +															  irqflags);
12967 +		}
12968 +		else {
12969 +			TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
12970 +					   blocked_lock);
12971 +			unlock_fine_irqrestore(to_unlock, irqflags);
12972 +		}
12973 +	}
12974 +	else {
12975 +		TRACE_TASK(t, "is not blocked.  No propagation.\n");
12976 +		unlock_fine_irqrestore(to_unlock, irqflags);
12977 +	}
12978 +}
12979 +
12980 +
12981 +/* ******************** RSM MUTEX ********************** */
12982 +
12983 +static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
12984 +	.lock   = rsm_mutex_lock,
12985 +	.unlock = rsm_mutex_unlock,
12986 +	.close  = rsm_mutex_close,
12987 +	.deallocate = rsm_mutex_free,
12988 +
12989 +	.propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
12990 +	.propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
12991 +
12992 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
12993 +	.dgl_lock = rsm_mutex_dgl_lock,
12994 +	.is_owner = rsm_mutex_is_owner,
12995 +	.enable_priority = rsm_mutex_enable_priority,
12996 +#endif
12997 +};
12998 +
12999 +static struct litmus_lock* cedf_new_rsm_mutex(void)
13000 +{
13001 +	return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
13002 +}
13003 +
13004 +/* ******************** IKGLP ********************** */
13005 +
13006 +static struct litmus_lock_ops cedf_ikglp_lock_ops = {
13007 +	.lock   = ikglp_lock,
13008 +	.unlock = ikglp_unlock,
13009 +	.close  = ikglp_close,
13010 +	.deallocate = ikglp_free,
13011 +
13012 +	// ikglp can only be an outer-most lock.
13013 +	.propagate_increase_inheritance = NULL,
13014 +	.propagate_decrease_inheritance = NULL,
13015 +};
13016 +
13017 +static struct litmus_lock* cedf_new_ikglp(void* __user arg)
13018 +{
13019 +	// assumes clusters of uniform size.
13020 +	return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
13021 +}
13022 +
13023 +#endif  /* CONFIG_LITMUS_NESTED_LOCKING */
13024 +
13025 +
13026 +
13027 +
13028 +/* ******************** KFMLP support ********************** */
13029 +
13030 +static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
13031 +	.lock   = kfmlp_lock,
13032 +	.unlock = kfmlp_unlock,
13033 +	.close  = kfmlp_close,
13034 +	.deallocate = kfmlp_free,
13035 +
13036 +	// kfmlp can only be an outer-most lock.
13037 +	.propagate_increase_inheritance = NULL,
13038 +	.propagate_decrease_inheritance = NULL,
13039 +};
13040 +
13041 +
13042 +static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
13043 +{
13044 +	return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
13045 +}
13046 +
13047 +
13048 +/* **** lock constructor **** */
13049 +
13050 +static long cedf_allocate_lock(struct litmus_lock **lock, int type,
13051 +								 void* __user args)
13052 +{
13053 +	int err;
13054 +
13055 +	switch (type) {
13056 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13057 +		case RSM_MUTEX:
13058 +			*lock = cedf_new_rsm_mutex();
13059 +			break;
13060 +
13061 +		case IKGLP_SEM:
13062 +			*lock = cedf_new_ikglp(args);
13063 +			break;
13064 +#endif
13065 +		case KFMLP_SEM:
13066 +			*lock = cedf_new_kfmlp(args);
13067 +			break;
13068 +
13069 +		default:
13070 +			err = -ENXIO;
13071 +			goto UNSUPPORTED_LOCK;
13072 +	};
13073 +
13074 +	if (*lock)
13075 +		err = 0;
13076 +	else
13077 +		err = -ENOMEM;
13078 +
13079 +UNSUPPORTED_LOCK:
13080 +	return err;
13081 +}
13082 +
13083 +#endif  // CONFIG_LITMUS_LOCKING
13084 +
13085 +
13086 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
13087 +static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
13088 +	.close = kfmlp_aff_obs_close,
13089 +	.deallocate = kfmlp_aff_obs_free,
13090 +};
13091 +
13092 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13093 +static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
13094 +	.close = ikglp_aff_obs_close,
13095 +	.deallocate = ikglp_aff_obs_free,
13096 +};
13097 +#endif
13098 +
13099 +static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
13100 +											int type,
13101 +											void* __user args)
13102 +{
13103 +	int err;
13104 +
13105 +	switch (type) {
13106 +
13107 +		case KFMLP_SIMPLE_GPU_AFF_OBS:
13108 +			*aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
13109 +			break;
13110 +
13111 +		case KFMLP_GPU_AFF_OBS:
13112 +			*aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
13113 +			break;
13114 +
13115 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13116 +		case IKGLP_SIMPLE_GPU_AFF_OBS:
13117 +			*aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
13118 +			break;
13119 +
13120 +		case IKGLP_GPU_AFF_OBS:
13121 +			*aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
13122 +			break;
13123 +#endif
13124 +		default:
13125 +			err = -ENXIO;
13126 +			goto UNSUPPORTED_AFF_OBS;
13127 +	};
13128 +
13129 +	if (*aff_obs)
13130 +		err = 0;
13131 +	else
13132 +		err = -ENOMEM;
13133 +
13134 +UNSUPPORTED_AFF_OBS:
13135 +	return err;
13136 +}
13137 +#endif
13138 +
13139 +
13140 +
13141  
13142  #ifdef VERBOSE_INIT
13143  static void print_cluster_topology(cpumask_var_t mask, int cpu)
13144 @@ -680,16 +1597,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
13145  }
13146  #endif
13147  
13148 -static int clusters_allocated = 0;
13149 -
13150  static void cleanup_cedf(void)
13151  {
13152  	int i;
13153  
13154 +#ifdef CONFIG_LITMUS_NVIDIA
13155 +	shutdown_nvidia_info();
13156 +#endif
13157 +
13158  	if (clusters_allocated) {
13159  		for (i = 0; i < num_clusters; i++) {
13160  			kfree(cedf[i].cpus);
13161 -			kfree(cedf[i].heap_node);
13162  			free_cpumask_var(cedf[i].cpu_map);
13163  		}
13164  
13165 @@ -749,12 +1667,16 @@ static long cedf_activate_plugin(void)
13166  
13167  		cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
13168  				GFP_ATOMIC);
13169 -		cedf[i].heap_node = kmalloc(
13170 -				cluster_size * sizeof(struct bheap_node),
13171 -				GFP_ATOMIC);
13172 -		bheap_init(&(cedf[i].cpu_heap));
13173 +		INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
13174  		edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
13175  
13176 +
13177 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13178 +		cedf[i].pending_tasklets.head = NULL;
13179 +		cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
13180 +#endif
13181 +
13182 +
13183  		if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
13184  			return -ENOMEM;
13185  #ifdef CONFIG_RELEASE_MASTER
13186 @@ -765,6 +1687,10 @@ static long cedf_activate_plugin(void)
13187  	/* cycle through cluster and add cpus to them */
13188  	for (i = 0; i < num_clusters; i++) {
13189  
13190 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
13191 +		raw_spin_lock_init(&cedf[i].dgl_lock);
13192 +#endif
13193 +
13194  		for_each_online_cpu(cpu) {
13195  			/* check if the cpu is already in a cluster */
13196  			for (j = 0; j < num_clusters; j++)
13197 @@ -795,8 +1721,8 @@ static long cedf_activate_plugin(void)
13198  				atomic_set(&entry->will_schedule, 0);
13199  				entry->cpu = ccpu;
13200  				entry->cluster = &cedf[i];
13201 -				entry->hn = &(cedf[i].heap_node[cpu_count]);
13202 -				bheap_node_init(&entry->hn, entry);
13203 +
13204 +				INIT_BINHEAP_NODE(&entry->hn);
13205  
13206  				cpu_count++;
13207  
13208 @@ -813,6 +1739,40 @@ static long cedf_activate_plugin(void)
13209  		}
13210  	}
13211  
13212 +#ifdef CONFIG_LITMUS_SOFTIRQD
13213 +	{
13214 +		/* distribute the daemons evenly across the clusters. */
13215 +		int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
13216 +		int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
13217 +		int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
13218 +
13219 +		int daemon = 0;
13220 +		for(i = 0; i < num_clusters; ++i)
13221 +		{
13222 +			int num_on_this_cluster = num_daemons_per_cluster;
13223 +			if(left_over)
13224 +			{
13225 +				++num_on_this_cluster;
13226 +				--left_over;
13227 +			}
13228 +
13229 +			for(j = 0; j < num_on_this_cluster; ++j)
13230 +			{
13231 +				// first CPU of this cluster
13232 +				affinity[daemon++] = i*cluster_size;
13233 +			}
13234 +		}
13235 +
13236 +		spawn_klitirqd(affinity);
13237 +
13238 +		kfree(affinity);
13239 +	}
13240 +#endif
13241 +
13242 +#ifdef CONFIG_LITMUS_NVIDIA
13243 +	init_nvidia_info();
13244 +#endif
13245 +
13246  	free_cpumask_var(mask);
13247  	clusters_allocated = 1;
13248  	return 0;
13249 @@ -831,6 +1791,32 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
13250  	.task_block		= cedf_task_block,
13251  	.admit_task		= cedf_admit_task,
13252  	.activate_plugin	= cedf_activate_plugin,
13253 +	.compare		= edf_higher_prio,
13254 +#ifdef CONFIG_LITMUS_LOCKING
13255 +	.allocate_lock		= cedf_allocate_lock,
13256 +	.increase_prio		= increase_priority_inheritance,
13257 +	.decrease_prio		= decrease_priority_inheritance,
13258 +#endif
13259 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13260 +	.nested_increase_prio		= nested_increase_priority_inheritance,
13261 +	.nested_decrease_prio		= nested_decrease_priority_inheritance,
13262 +	.__compare					= __edf_higher_prio,
13263 +#endif
13264 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
13265 +	.get_dgl_spinlock = cedf_get_dgl_spinlock,
13266 +#endif
13267 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
13268 +	.allocate_aff_obs = cedf_allocate_affinity_observer,
13269 +#endif
13270 +#ifdef CONFIG_LITMUS_SOFTIRQD
13271 +	.increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
13272 +	.decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
13273 +#endif
13274 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13275 +	.enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
13276 +	.change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
13277 +	.run_tasklets = cedf_run_tasklets,
13278 +#endif
13279  };
13280  
13281  static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
13282 diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
13283 index 6ed504f..8c48757 100644
13284 --- a/litmus/sched_gsn_edf.c
13285 +++ b/litmus/sched_gsn_edf.c
13286 @@ -12,23 +12,49 @@
13287  #include <linux/percpu.h>
13288  #include <linux/sched.h>
13289  #include <linux/slab.h>
13290 +#include <linux/uaccess.h>
13291 +#include <linux/module.h>
13292  
13293  #include <litmus/litmus.h>
13294  #include <litmus/jobs.h>
13295  #include <litmus/sched_plugin.h>
13296  #include <litmus/edf_common.h>
13297  #include <litmus/sched_trace.h>
13298 -#include <litmus/trace.h>
13299  
13300  #include <litmus/preempt.h>
13301  
13302  #include <litmus/bheap.h>
13303 +#include <litmus/binheap.h>
13304 +
13305 +#ifdef CONFIG_LITMUS_LOCKING
13306 +#include <litmus/kfmlp_lock.h>
13307 +#endif
13308 +
13309 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13310 +#include <litmus/rsm_lock.h>
13311 +#include <litmus/ikglp_lock.h>
13312 +#endif
13313  
13314  #ifdef CONFIG_SCHED_CPU_AFFINITY
13315  #include <litmus/affinity.h>
13316  #endif
13317  
13318 -#include <linux/module.h>
13319 +#ifdef CONFIG_LITMUS_SOFTIRQD
13320 +#include <litmus/litmus_softirq.h>
13321 +#endif
13322 +
13323 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13324 +#include <linux/interrupt.h>
13325 +#include <litmus/trace.h>
13326 +#endif
13327 +
13328 +#ifdef CONFIG_LITMUS_NVIDIA
13329 +#include <litmus/nvidia_info.h>
13330 +#endif
13331 +
13332 +#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
13333 +#include <litmus/gpu_affinity.h>
13334 +#endif
13335  
13336  /* Overview of GSN-EDF operations.
13337   *
13338 @@ -103,52 +129,70 @@ typedef struct  {
13339  	int 			cpu;
13340  	struct task_struct*	linked;		/* only RT tasks */
13341  	struct task_struct*	scheduled;	/* only RT tasks */
13342 -	struct bheap_node*	hn;
13343 +	struct binheap_node hn;
13344  } cpu_entry_t;
13345  DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
13346  
13347  cpu_entry_t* gsnedf_cpus[NR_CPUS];
13348  
13349  /* the cpus queue themselves according to priority in here */
13350 -static struct bheap_node gsnedf_heap_node[NR_CPUS];
13351 -static struct bheap      gsnedf_cpu_heap;
13352 +static struct binheap_handle gsnedf_cpu_heap;
13353  
13354  static rt_domain_t gsnedf;
13355  #define gsnedf_lock (gsnedf.ready_lock)
13356  
13357 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
13358 +static raw_spinlock_t dgl_lock;
13359 +
13360 +static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
13361 +{
13362 +	return(&dgl_lock);
13363 +}
13364 +#endif
13365 +
13366 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13367 +struct tasklet_head
13368 +{
13369 +	struct tasklet_struct *head;
13370 +	struct tasklet_struct **tail;
13371 +};
13372 +
13373 +struct tasklet_head gsnedf_pending_tasklets;
13374 +#endif
13375 +
13376  
13377  /* Uncomment this if you want to see all scheduling decisions in the
13378   * TRACE() log.
13379  #define WANT_ALL_SCHED_EVENTS
13380   */
13381  
13382 -static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
13383 +static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
13384  {
13385 -	cpu_entry_t *a, *b;
13386 -	a = _a->value;
13387 -	b = _b->value;
13388 +	cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
13389 +	cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
13390 +
13391  	/* Note that a and b are inverted: we want the lowest-priority CPU at
13392  	 * the top of the heap.
13393  	 */
13394  	return edf_higher_prio(b->linked, a->linked);
13395  }
13396  
13397 +
13398  /* update_cpu_position - Move the cpu entry to the correct place to maintain
13399   *                       order in the cpu queue. Caller must hold gsnedf lock.
13400   */
13401  static void update_cpu_position(cpu_entry_t *entry)
13402  {
13403 -	if (likely(bheap_node_in_heap(entry->hn)))
13404 -		bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
13405 -	bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
13406 +	if (likely(binheap_is_in_heap(&entry->hn))) {
13407 +		binheap_delete(&entry->hn, &gsnedf_cpu_heap);
13408 +	}
13409 +	binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn);
13410  }
13411  
13412  /* caller must hold gsnedf lock */
13413  static cpu_entry_t* lowest_prio_cpu(void)
13414  {
13415 -	struct bheap_node* hn;
13416 -	hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
13417 -	return hn->value;
13418 +	return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn);
13419  }
13420  
13421  
13422 @@ -337,6 +381,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
13423  
13424  	sched_trace_task_completion(t, forced);
13425  
13426 +#ifdef CONFIG_LITMUS_NVIDIA
13427 +	atomic_set(&tsk_rt(t)->nv_int_count, 0);
13428 +#endif
13429 +
13430  	TRACE_TASK(t, "job_completion().\n");
13431  
13432  	/* set flags */
13433 @@ -379,6 +427,318 @@ static void gsnedf_tick(struct task_struct* t)
13434  	}
13435  }
13436  
13437 +
13438 +
13439 +
13440 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13441 +
13442 +
13443 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
13444 +{
13445 +	if (!atomic_read(&tasklet->count)) {
13446 +		if(tasklet->owner) {
13447 +			sched_trace_tasklet_begin(tasklet->owner);
13448 +		}
13449 +
13450 +		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
13451 +		{
13452 +			BUG();
13453 +		}
13454 +		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
13455 +			  __FUNCTION__,
13456 +			  (tasklet->owner) ? tasklet->owner->pid : -1,
13457 +			  (tasklet->owner) ? 0 : 1);
13458 +		tasklet->func(tasklet->data);
13459 +		tasklet_unlock(tasklet);
13460 +
13461 +		if(tasklet->owner) {
13462 +			sched_trace_tasklet_end(tasklet->owner, flushed);
13463 +		}
13464 +	}
13465 +	else {
13466 +		BUG();
13467 +	}
13468 +}
13469 +
13470 +static void do_lit_tasklets(struct task_struct* sched_task)
13471 +{
13472 +	int work_to_do = 1;
13473 +	struct tasklet_struct *tasklet = NULL;
13474 +	unsigned long flags;
13475 +
13476 +	while(work_to_do) {
13477 +
13478 +		TS_NV_SCHED_BOTISR_START;
13479 +
13480 +		// execute one tasklet that has higher priority
13481 +		raw_spin_lock_irqsave(&gsnedf_lock, flags);
13482 +
13483 +		if(gsnedf_pending_tasklets.head != NULL) {
13484 +			struct tasklet_struct *prev = NULL;
13485 +			tasklet = gsnedf_pending_tasklets.head;
13486 +
13487 +			while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) {
13488 +				prev = tasklet;
13489 +				tasklet = tasklet->next;
13490 +			}
13491 +
13492 +			// remove the tasklet from the queue
13493 +			if(prev) {
13494 +				prev->next = tasklet->next;
13495 +				if(prev->next == NULL) {
13496 +					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13497 +					gsnedf_pending_tasklets.tail = &(prev);
13498 +				}
13499 +			}
13500 +			else {
13501 +				gsnedf_pending_tasklets.head = tasklet->next;
13502 +				if(tasklet->next == NULL) {
13503 +					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13504 +					gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13505 +				}
13506 +			}
13507 +		}
13508 +		else {
13509 +			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
13510 +		}
13511 +
13512 +		raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13513 +
13514 +		if(tasklet) {
13515 +			__do_lit_tasklet(tasklet, 0ul);
13516 +			tasklet = NULL;
13517 +		}
13518 +		else {
13519 +			work_to_do = 0;
13520 +		}
13521 +
13522 +		TS_NV_SCHED_BOTISR_END;
13523 +	}
13524 +}
13525 +
13526 +//static void do_lit_tasklets(struct task_struct* sched_task)
13527 +//{
13528 +//	int work_to_do = 1;
13529 +//	struct tasklet_struct *tasklet = NULL;
13530 +//	//struct tasklet_struct *step;
13531 +//	unsigned long flags;
13532 +//
13533 +//	while(work_to_do) {
13534 +//
13535 +//		TS_NV_SCHED_BOTISR_START;
13536 +//
13537 +//		// remove tasklet at head of list if it has higher priority.
13538 +//		raw_spin_lock_irqsave(&gsnedf_lock, flags);
13539 +//
13540 +//		if(gsnedf_pending_tasklets.head != NULL) {
13541 +//			// remove tasklet at head.
13542 +//			tasklet = gsnedf_pending_tasklets.head;
13543 +//
13544 +//			if(edf_higher_prio(tasklet->owner, sched_task)) {
13545 +//
13546 +//				if(NULL == tasklet->next) {
13547 +//					// tasklet is at the head, list only has one element
13548 +//					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13549 +//					gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13550 +//				}
13551 +//
13552 +//				// remove the tasklet from the queue
13553 +//				gsnedf_pending_tasklets.head = tasklet->next;
13554 +//
13555 +//				TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13556 +//			}
13557 +//			else {
13558 +//				TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
13559 +//				tasklet = NULL;
13560 +//			}
13561 +//		}
13562 +//		else {
13563 +//			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
13564 +//		}
13565 +//
13566 +//		raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13567 +//
13568 +//		TS_NV_SCHED_BOTISR_END;
13569 +//
13570 +//		if(tasklet) {
13571 +//			__do_lit_tasklet(tasklet, 0ul);
13572 +//			tasklet = NULL;
13573 +//		}
13574 +//		else {
13575 +//			work_to_do = 0;
13576 +//		}
13577 +//	}
13578 +//
13579 +//	//TRACE("%s: exited.\n", __FUNCTION__);
13580 +//}
13581 +
13582 +static void __add_pai_tasklet(struct tasklet_struct* tasklet)
13583 +{
13584 +	struct tasklet_struct* step;
13585 +
13586 +	tasklet->next = NULL;  // make sure there are no old values floating around
13587 +
13588 +	step = gsnedf_pending_tasklets.head;
13589 +	if(step == NULL) {
13590 +		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
13591 +		// insert at tail.
13592 +		*(gsnedf_pending_tasklets.tail) = tasklet;
13593 +		gsnedf_pending_tasklets.tail = &(tasklet->next);
13594 +	}
13595 +	else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
13596 +			edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
13597 +		// insert at tail.
13598 +		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
13599 +
13600 +		*(gsnedf_pending_tasklets.tail) = tasklet;
13601 +		gsnedf_pending_tasklets.tail = &(tasklet->next);
13602 +	}
13603 +	else {
13604 +		// insert the tasklet somewhere in the middle.
13605 +
13606 +        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
13607 +
13608 +		while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
13609 +			step = step->next;
13610 +		}
13611 +
13612 +		// insert tasklet right before step->next.
13613 +
13614 +		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
13615 +
13616 +		tasklet->next = step->next;
13617 +		step->next = tasklet;
13618 +
13619 +		// patch up the head if needed.
13620 +		if(gsnedf_pending_tasklets.head == step)
13621 +		{
13622 +			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
13623 +			gsnedf_pending_tasklets.head = tasklet;
13624 +		}
13625 +	}
13626 +}
13627 +
13628 +static void gsnedf_run_tasklets(struct task_struct* sched_task)
13629 +{
13630 +	preempt_disable();
13631 +
13632 +	if(gsnedf_pending_tasklets.head != NULL) {
13633 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
13634 +		do_lit_tasklets(sched_task);
13635 +	}
13636 +
13637 +	preempt_enable_no_resched();
13638 +}
13639 +
13640 +static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
13641 +{
13642 +	cpu_entry_t *targetCPU = NULL;
13643 +	int thisCPU;
13644 +	int runLocal = 0;
13645 +	int runNow = 0;
13646 +	unsigned long flags;
13647 +
13648 +    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
13649 +    {
13650 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
13651 +		return 0;
13652 +    }
13653 +
13654 +
13655 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13656 +
13657 +	thisCPU = smp_processor_id();
13658 +
13659 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13660 +	{
13661 +		cpu_entry_t* affinity = NULL;
13662 +
13663 +		// use this CPU if it is in our cluster and isn't running any RT work.
13664 +		if(
13665 +#ifdef CONFIG_RELEASE_MASTER
13666 +		   (thisCPU != gsnedf.release_master) &&
13667 +#endif
13668 +		   (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
13669 +			affinity = &(__get_cpu_var(gsnedf_cpu_entries));
13670 +		}
13671 +		else {
13672 +			// this CPU is busy or shouldn't run tasklet in this cluster.
13673 +			// look for available near by CPUs.
13674 +			// NOTE: Affinity towards owner and not this CPU.  Is this right?
13675 +			affinity =
13676 +				gsnedf_get_nearest_available_cpu(
13677 +					&per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
13678 +		}
13679 +
13680 +		targetCPU = affinity;
13681 +	}
13682 +#endif
13683 +
13684 +	if (targetCPU == NULL) {
13685 +		targetCPU = lowest_prio_cpu();
13686 +	}
13687 +
13688 +	if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
13689 +		if (thisCPU == targetCPU->cpu) {
13690 +			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
13691 +			runLocal = 1;
13692 +			runNow = 1;
13693 +		}
13694 +		else {
13695 +			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
13696 +			runLocal = 0;
13697 +			runNow = 1;
13698 +		}
13699 +	}
13700 +	else {
13701 +		runLocal = 0;
13702 +		runNow = 0;
13703 +	}
13704 +
13705 +	if(!runLocal) {
13706 +		// enqueue the tasklet
13707 +		__add_pai_tasklet(tasklet);
13708 +	}
13709 +
13710 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13711 +
13712 +
13713 +	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
13714 +		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
13715 +		__do_lit_tasklet(tasklet, 0ul);
13716 +	}
13717 +	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
13718 +		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
13719 +		preempt(targetCPU);  // need to be protected by cedf_lock?
13720 +	}
13721 +	else {
13722 +		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
13723 +	}
13724 +
13725 +	return(1); // success
13726 +}
13727 +
13728 +static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio,
13729 +										   struct task_struct *new_prio)
13730 +{
13731 +	struct tasklet_struct* step;
13732 +	unsigned long flags;
13733 +
13734 +	if(gsnedf_pending_tasklets.head != NULL) {
13735 +		raw_spin_lock_irqsave(&gsnedf_lock, flags);
13736 +		for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) {
13737 +			if(step->owner == old_prio) {
13738 +				TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
13739 +				step->owner = new_prio;
13740 +			}
13741 +		}
13742 +		raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13743 +	}
13744 +}
13745 +
13746 +#endif  // end PAI
13747 +
13748 +
13749  /* Getting schedule() right is a bit tricky. schedule() may not make any
13750   * assumptions on the state of the current task since it may be called for a
13751   * number of reasons. The reasons include a scheduler_tick() determined that it
13752 @@ -437,21 +797,32 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13753  	TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
13754  #endif
13755  
13756 +	/*
13757  	if (exists)
13758  		TRACE_TASK(prev,
13759  			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
13760  			   "state:%d sig:%d\n",
13761  			   blocks, out_of_time, np, sleep, preempt,
13762  			   prev->state, signal_pending(prev));
13763 +	 */
13764 +
13765  	if (entry->linked && preempt)
13766  		TRACE_TASK(prev, "will be preempted by %s/%d\n",
13767  			   entry->linked->comm, entry->linked->pid);
13768  
13769 -
13770  	/* If a task blocks we have no choice but to reschedule.
13771  	 */
13772 -	if (blocks)
13773 +	if (blocks) {
13774  		unlink(entry->scheduled);
13775 +	}
13776 +
13777 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
13778 +	if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
13779 +		if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
13780 +			stop_gpu_tracker(entry->scheduled);
13781 +		}
13782 +	}
13783 +#endif
13784  
13785  	/* Request a sys_exit_np() call if we would like to preempt but cannot.
13786  	 * We need to make sure to update the link structure anyway in case
13787 @@ -492,12 +863,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13788  			entry->scheduled->rt_param.scheduled_on = NO_CPU;
13789  			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
13790  		}
13791 -	} else
13792 +	}
13793 +	else
13794 +	{
13795  		/* Only override Linux scheduler if we have a real-time task
13796  		 * scheduled that needs to continue.
13797  		 */
13798  		if (exists)
13799  			next = prev;
13800 +	}
13801  
13802  	sched_state_task_picked();
13803  
13804 @@ -524,6 +898,7 @@ static void gsnedf_finish_switch(struct task_struct *prev)
13805  	cpu_entry_t* 	entry = &__get_cpu_var(gsnedf_cpu_entries);
13806  
13807  	entry->scheduled = is_realtime(current) ? current : NULL;
13808 +
13809  #ifdef WANT_ALL_SCHED_EVENTS
13810  	TRACE_TASK(prev, "switched away from\n");
13811  #endif
13812 @@ -572,11 +947,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
13813  static void gsnedf_task_wake_up(struct task_struct *task)
13814  {
13815  	unsigned long flags;
13816 -	lt_t now;
13817 +	//lt_t now;
13818  
13819  	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
13820  
13821  	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13822 +
13823 +
13824 +#if 0  // sporadic task model
13825  	/* We need to take suspensions because of semaphores into
13826  	 * account! If a job resumes after being suspended due to acquiring
13827  	 * a semaphore, it should never be treated as a new job release.
13828 @@ -598,19 +976,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
13829  			}
13830  		}
13831  	}
13832 +#else  // periodic task model
13833 +	set_rt_flags(task, RT_F_RUNNING);
13834 +#endif
13835 +
13836  	gsnedf_job_arrival(task);
13837  	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13838  }
13839  
13840  static void gsnedf_task_block(struct task_struct *t)
13841  {
13842 +	// TODO: is this called on preemption??
13843  	unsigned long flags;
13844  
13845  	TRACE_TASK(t, "block at %llu\n", litmus_clock());
13846  
13847  	/* unlink if necessary */
13848  	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13849 +
13850  	unlink(t);
13851 +
13852  	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13853  
13854  	BUG_ON(!is_realtime(t));
13855 @@ -621,6 +1006,10 @@ static void gsnedf_task_exit(struct task_struct * t)
13856  {
13857  	unsigned long flags;
13858  
13859 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13860 +	gsnedf_change_prio_pai_tasklet(t, NULL);
13861 +#endif
13862 +
13863  	/* unlink if necessary */
13864  	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13865  	unlink(t);
13866 @@ -637,101 +1026,423 @@ static void gsnedf_task_exit(struct task_struct * t)
13867  
13868  static long gsnedf_admit_task(struct task_struct* tsk)
13869  {
13870 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13871 +	INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
13872 +						edf_max_heap_base_priority_order);
13873 +#endif
13874 +
13875  	return 0;
13876  }
13877  
13878 +
13879 +
13880 +
13881 +
13882 +
13883  #ifdef CONFIG_LITMUS_LOCKING
13884  
13885  #include <litmus/fdso.h>
13886  
13887  /* called with IRQs off */
13888 -static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13889 +static void __increase_priority_inheritance(struct task_struct* t,
13890 +										    struct task_struct* prio_inh)
13891  {
13892  	int linked_on;
13893  	int check_preempt = 0;
13894  
13895 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13896 +	/* this sanity check allows for weaker locking in protocols */
13897 +	/* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
13898 +	if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
13899 +#endif
13900 +		TRACE_TASK(t, "inherits priority from %s/%d\n",
13901 +				   prio_inh->comm, prio_inh->pid);
13902 +		tsk_rt(t)->inh_task = prio_inh;
13903 +
13904 +		linked_on  = tsk_rt(t)->linked_on;
13905 +
13906 +		/* If it is scheduled, then we need to reorder the CPU heap. */
13907 +		if (linked_on != NO_CPU) {
13908 +			TRACE_TASK(t, "%s: linked  on %d\n",
13909 +				   __FUNCTION__, linked_on);
13910 +			/* Holder is scheduled; need to re-order CPUs.
13911 +			 * We can't use heap_decrease() here since
13912 +			 * the cpu_heap is ordered in reverse direction, so
13913 +			 * it is actually an increase. */
13914 +			binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap);
13915 +			binheap_add(&gsnedf_cpus[linked_on]->hn,
13916 +					&gsnedf_cpu_heap, cpu_entry_t, hn);
13917 +		} else {
13918 +			/* holder may be queued: first stop queue changes */
13919 +			raw_spin_lock(&gsnedf.release_lock);
13920 +			if (is_queued(t)) {
13921 +				TRACE_TASK(t, "%s: is queued\n",
13922 +					   __FUNCTION__);
13923 +				/* We need to update the position of holder in some
13924 +				 * heap. Note that this could be a release heap if we
13925 +				 * budget enforcement is used and this job overran. */
13926 +				check_preempt =
13927 +					!bheap_decrease(edf_ready_order,
13928 +							   tsk_rt(t)->heap_node);
13929 +			} else {
13930 +				/* Nothing to do: if it is not queued and not linked
13931 +				 * then it is either sleeping or currently being moved
13932 +				 * by other code (e.g., a timer interrupt handler) that
13933 +				 * will use the correct priority when enqueuing the
13934 +				 * task. */
13935 +				TRACE_TASK(t, "%s: is NOT queued => Done.\n",
13936 +					   __FUNCTION__);
13937 +			}
13938 +			raw_spin_unlock(&gsnedf.release_lock);
13939 +
13940 +			/* If holder was enqueued in a release heap, then the following
13941 +			 * preemption check is pointless, but we can't easily detect
13942 +			 * that case. If you want to fix this, then consider that
13943 +			 * simply adding a state flag requires O(n) time to update when
13944 +			 * releasing n tasks, which conflicts with the goal to have
13945 +			 * O(log n) merges. */
13946 +			if (check_preempt) {
13947 +				/* heap_decrease() hit the top level of the heap: make
13948 +				 * sure preemption checks get the right task, not the
13949 +				 * potentially stale cache. */
13950 +				bheap_uncache_min(edf_ready_order,
13951 +						 &gsnedf.ready_queue);
13952 +				check_for_preemptions();
13953 +			}
13954 +		}
13955 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
13956 +	}
13957 +	else {
13958 +		TRACE_TASK(t, "Spurious invalid priority increase. "
13959 +				      "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
13960 +					  "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
13961 +				   t->comm, t->pid,
13962 +				   effective_priority(t)->comm, effective_priority(t)->pid,
13963 +				   (prio_inh) ? prio_inh->comm : "nil",
13964 +				   (prio_inh) ? prio_inh->pid : -1);
13965 +		WARN_ON(!prio_inh);
13966 +	}
13967 +#endif
13968 +}
13969 +
13970 +/* called with IRQs off */
13971 +static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13972 +{
13973  	raw_spin_lock(&gsnedf_lock);
13974  
13975 -	TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
13976 -	tsk_rt(t)->inh_task = prio_inh;
13977 -
13978 -	linked_on  = tsk_rt(t)->linked_on;
13979 -
13980 -	/* If it is scheduled, then we need to reorder the CPU heap. */
13981 -	if (linked_on != NO_CPU) {
13982 -		TRACE_TASK(t, "%s: linked  on %d\n",
13983 -			   __FUNCTION__, linked_on);
13984 -		/* Holder is scheduled; need to re-order CPUs.
13985 -		 * We can't use heap_decrease() here since
13986 -		 * the cpu_heap is ordered in reverse direction, so
13987 -		 * it is actually an increase. */
13988 -		bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
13989 -			    gsnedf_cpus[linked_on]->hn);
13990 -		bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
13991 -			    gsnedf_cpus[linked_on]->hn);
13992 -	} else {
13993 -		/* holder may be queued: first stop queue changes */
13994 -		raw_spin_lock(&gsnedf.release_lock);
13995 -		if (is_queued(t)) {
13996 -			TRACE_TASK(t, "%s: is queued\n",
13997 -				   __FUNCTION__);
13998 -			/* We need to update the position of holder in some
13999 -			 * heap. Note that this could be a release heap if we
14000 -			 * budget enforcement is used and this job overran. */
14001 -			check_preempt =
14002 -				!bheap_decrease(edf_ready_order,
14003 -					       tsk_rt(t)->heap_node);
14004 -		} else {
14005 -			/* Nothing to do: if it is not queued and not linked
14006 -			 * then it is either sleeping or currently being moved
14007 -			 * by other code (e.g., a timer interrupt handler) that
14008 -			 * will use the correct priority when enqueuing the
14009 -			 * task. */
14010 -			TRACE_TASK(t, "%s: is NOT queued => Done.\n",
14011 -				   __FUNCTION__);
14012 +	__increase_priority_inheritance(t, prio_inh);
14013 +
14014 +#ifdef CONFIG_LITMUS_SOFTIRQD
14015 +	if(tsk_rt(t)->cur_klitirqd != NULL)
14016 +	{
14017 +		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
14018 +				tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
14019 +
14020 +		__increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
14021 +	}
14022 +#endif
14023 +
14024 +	raw_spin_unlock(&gsnedf_lock);
14025 +
14026 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
14027 +	if(tsk_rt(t)->held_gpus) {
14028 +		int i;
14029 +		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
14030 +			i < NV_DEVICE_NUM;
14031 +			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
14032 +			pai_check_priority_increase(t, i);
14033 +		}
14034 +	}
14035 +#endif
14036 +}
14037 +
14038 +
14039 +/* called with IRQs off */
14040 +static void __decrease_priority_inheritance(struct task_struct* t,
14041 +											struct task_struct* prio_inh)
14042 +{
14043 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14044 +	if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
14045 +#endif
14046 +		/* A job only stops inheriting a priority when it releases a
14047 +		 * resource. Thus we can make the following assumption.*/
14048 +		if(prio_inh)
14049 +			TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
14050 +					   prio_inh->comm, prio_inh->pid);
14051 +		else
14052 +			TRACE_TASK(t, "base priority restored.\n");
14053 +
14054 +		tsk_rt(t)->inh_task = prio_inh;
14055 +
14056 +		if(tsk_rt(t)->scheduled_on != NO_CPU) {
14057 +			TRACE_TASK(t, "is scheduled.\n");
14058 +
14059 +			/* Check if rescheduling is necessary. We can't use heap_decrease()
14060 +			 * since the priority was effectively lowered. */
14061 +			unlink(t);
14062 +			gsnedf_job_arrival(t);
14063  		}
14064 -		raw_spin_unlock(&gsnedf.release_lock);
14065 -
14066 -		/* If holder was enqueued in a release heap, then the following
14067 -		 * preemption check is pointless, but we can't easily detect
14068 -		 * that case. If you want to fix this, then consider that
14069 -		 * simply adding a state flag requires O(n) time to update when
14070 -		 * releasing n tasks, which conflicts with the goal to have
14071 -		 * O(log n) merges. */
14072 -		if (check_preempt) {
14073 -			/* heap_decrease() hit the top level of the heap: make
14074 -			 * sure preemption checks get the right task, not the
14075 -			 * potentially stale cache. */
14076 -			bheap_uncache_min(edf_ready_order,
14077 -					 &gsnedf.ready_queue);
14078 -			check_for_preemptions();
14079 +		else {
14080 +			/* task is queued */
14081 +			raw_spin_lock(&gsnedf.release_lock);
14082 +			if (is_queued(t)) {
14083 +				TRACE_TASK(t, "is queued.\n");
14084 +
14085 +				/* decrease in priority, so we have to re-add to binomial heap */
14086 +				unlink(t);
14087 +				gsnedf_job_arrival(t);
14088 +			}
14089 +			else {
14090 +				TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
14091 +			}
14092 +			raw_spin_unlock(&gsnedf.release_lock);
14093  		}
14094 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14095 +	}
14096 +	else {
14097 +		TRACE_TASK(t, "Spurious invalid priority decrease. "
14098 +				   "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
14099 +				   "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
14100 +				   t->comm, t->pid,
14101 +				   effective_priority(t)->comm, effective_priority(t)->pid,
14102 +				   (prio_inh) ? prio_inh->comm : "nil",
14103 +				   (prio_inh) ? prio_inh->pid : -1);
14104  	}
14105 +#endif
14106 +}
14107 +
14108 +static void decrease_priority_inheritance(struct task_struct* t,
14109 +										  struct task_struct* prio_inh)
14110 +{
14111 +	raw_spin_lock(&gsnedf_lock);
14112 +	__decrease_priority_inheritance(t, prio_inh);
14113 +
14114 +#ifdef CONFIG_LITMUS_SOFTIRQD
14115 +	if(tsk_rt(t)->cur_klitirqd != NULL)
14116 +	{
14117 +		TRACE_TASK(t, "%s/%d decreases in priority!\n",
14118 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
14119 +
14120 +		__decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
14121 +	}
14122 +#endif
14123  
14124  	raw_spin_unlock(&gsnedf_lock);
14125 +
14126 +#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
14127 +	if(tsk_rt(t)->held_gpus) {
14128 +		int i;
14129 +		for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
14130 +			i < NV_DEVICE_NUM;
14131 +			i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
14132 +			pai_check_priority_decrease(t, i);
14133 +		}
14134 +	}
14135 +#endif
14136  }
14137  
14138 +
14139 +#ifdef CONFIG_LITMUS_SOFTIRQD
14140  /* called with IRQs off */
14141 -static void clear_priority_inheritance(struct task_struct* t)
14142 +static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
14143 +											  struct task_struct* old_owner,
14144 +											  struct task_struct* new_owner)
14145  {
14146 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
14147 +
14148  	raw_spin_lock(&gsnedf_lock);
14149  
14150 -	/* A job only stops inheriting a priority when it releases a
14151 -	 * resource. Thus we can make the following assumption.*/
14152 -	BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
14153 +	if(old_owner != new_owner)
14154 +	{
14155 +		if(old_owner)
14156 +		{
14157 +			// unreachable?
14158 +			tsk_rt(old_owner)->cur_klitirqd = NULL;
14159 +		}
14160  
14161 -	TRACE_TASK(t, "priority restored\n");
14162 -	tsk_rt(t)->inh_task = NULL;
14163 +		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
14164 +				   new_owner->comm, new_owner->pid);
14165  
14166 -	/* Check if rescheduling is necessary. We can't use heap_decrease()
14167 -	 * since the priority was effectively lowered. */
14168 -	unlink(t);
14169 -	gsnedf_job_arrival(t);
14170 +		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
14171 +	}
14172 +
14173 +	__decrease_priority_inheritance(klitirqd, NULL);  // kludge to clear out cur prio.
14174 +
14175 +	__increase_priority_inheritance(klitirqd,
14176 +			(tsk_rt(new_owner)->inh_task == NULL) ?
14177 +				new_owner :
14178 +				tsk_rt(new_owner)->inh_task);
14179  
14180  	raw_spin_unlock(&gsnedf_lock);
14181  }
14182  
14183  
14184 +/* called with IRQs off */
14185 +static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
14186 +												   struct task_struct* old_owner,
14187 +												   struct task_struct* new_owner)
14188 +{
14189 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
14190 +
14191 +	raw_spin_lock(&gsnedf_lock);
14192 +
14193 +    TRACE_TASK(klitirqd, "priority restored\n");
14194 +
14195 +	__decrease_priority_inheritance(klitirqd, new_owner);
14196 +
14197 +	tsk_rt(old_owner)->cur_klitirqd = NULL;
14198 +
14199 +	raw_spin_unlock(&gsnedf_lock);
14200 +}
14201 +#endif
14202 +
14203 +
14204 +
14205 +
14206 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14207 +
14208 +/* called with IRQs off */
14209 +/* preconditions:
14210 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
14211 + (2) The lock 'to_unlock' is held.
14212 + */
14213 +static void nested_increase_priority_inheritance(struct task_struct* t,
14214 +												 struct task_struct* prio_inh,
14215 +												 raw_spinlock_t *to_unlock,
14216 +												 unsigned long irqflags)
14217 +{
14218 +	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
14219 +
14220 +	if(tsk_rt(t)->inh_task != prio_inh) { 		// shield redundent calls.
14221 +		increase_priority_inheritance(t, prio_inh);  // increase our prio.
14222 +	}
14223 +
14224 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
14225 +
14226 +
14227 +	if(blocked_lock) {
14228 +		if(blocked_lock->ops->propagate_increase_inheritance) {
14229 +			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
14230 +					   blocked_lock->ident);
14231 +
14232 +			// beware: recursion
14233 +			blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
14234 +															  t, to_unlock,
14235 +															  irqflags);
14236 +		}
14237 +		else {
14238 +			TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
14239 +					   blocked_lock->ident);
14240 +			unlock_fine_irqrestore(to_unlock, irqflags);
14241 +		}
14242 +	}
14243 +	else {
14244 +		TRACE_TASK(t, "is not blocked.  No propagation.\n");
14245 +		unlock_fine_irqrestore(to_unlock, irqflags);
14246 +	}
14247 +}
14248 +
14249 +/* called with IRQs off */
14250 +/* preconditions:
14251 + (1) The 'hp_blocked_tasks_lock' of task 't' is held.
14252 + (2) The lock 'to_unlock' is held.
14253 + */
14254 +static void nested_decrease_priority_inheritance(struct task_struct* t,
14255 +												 struct task_struct* prio_inh,
14256 +												 raw_spinlock_t *to_unlock,
14257 +												 unsigned long irqflags)
14258 +{
14259 +	struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
14260 +	decrease_priority_inheritance(t, prio_inh);
14261 +
14262 +	raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);  // unlock the t's heap.
14263 +
14264 +	if(blocked_lock) {
14265 +		if(blocked_lock->ops->propagate_decrease_inheritance) {
14266 +			TRACE_TASK(t, "Inheritor is blocked (...perhaps).  Checking lock %d.\n",
14267 +					   blocked_lock->ident);
14268 +
14269 +			// beware: recursion
14270 +			blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
14271 +															  to_unlock,
14272 +															  irqflags);
14273 +		}
14274 +		else {
14275 +			TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
14276 +					   blocked_lock);
14277 +			unlock_fine_irqrestore(to_unlock, irqflags);
14278 +		}
14279 +	}
14280 +	else {
14281 +		TRACE_TASK(t, "is not blocked.  No propagation.\n");
14282 +		unlock_fine_irqrestore(to_unlock, irqflags);
14283 +	}
14284 +}
14285 +
14286 +
14287 +/* ******************** RSM MUTEX ********************** */
14288 +
14289 +static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = {
14290 +	.lock   = rsm_mutex_lock,
14291 +	.unlock = rsm_mutex_unlock,
14292 +	.close  = rsm_mutex_close,
14293 +	.deallocate = rsm_mutex_free,
14294 +
14295 +	.propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
14296 +	.propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
14297 +
14298 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14299 +	.dgl_lock = rsm_mutex_dgl_lock,
14300 +	.is_owner = rsm_mutex_is_owner,
14301 +	.enable_priority = rsm_mutex_enable_priority,
14302 +#endif
14303 +};
14304 +
14305 +static struct litmus_lock* gsnedf_new_rsm_mutex(void)
14306 +{
14307 +	return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops);
14308 +}
14309 +
14310 +/* ******************** IKGLP ********************** */
14311 +
14312 +static struct litmus_lock_ops gsnedf_ikglp_lock_ops = {
14313 +	.lock   = ikglp_lock,
14314 +	.unlock = ikglp_unlock,
14315 +	.close  = ikglp_close,
14316 +	.deallocate = ikglp_free,
14317 +
14318 +	// ikglp can only be an outer-most lock.
14319 +	.propagate_increase_inheritance = NULL,
14320 +	.propagate_decrease_inheritance = NULL,
14321 +};
14322 +
14323 +static struct litmus_lock* gsnedf_new_ikglp(void* __user arg)
14324 +{
14325 +	return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg);
14326 +}
14327 +
14328 +#endif  /* CONFIG_LITMUS_NESTED_LOCKING */
14329 +
14330 +
14331 +/* ******************** KFMLP support ********************** */
14332 +
14333 +static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
14334 +	.lock   = kfmlp_lock,
14335 +	.unlock = kfmlp_unlock,
14336 +	.close  = kfmlp_close,
14337 +	.deallocate = kfmlp_free,
14338 +
14339 +	// kfmlp can only be an outer-most lock.
14340 +	.propagate_increase_inheritance = NULL,
14341 +	.propagate_decrease_inheritance = NULL,
14342 +};
14343 +
14344 +
14345 +static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
14346 +{
14347 +	return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
14348 +}
14349 +
14350  /* ******************** FMLP support ********************** */
14351  
14352  /* struct for semaphore with priority inheritance */
14353 @@ -797,7 +1508,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
14354  		if (edf_higher_prio(t, sem->hp_waiter)) {
14355  			sem->hp_waiter = t;
14356  			if (edf_higher_prio(t, sem->owner))
14357 -				set_priority_inheritance(sem->owner, sem->hp_waiter);
14358 +				increase_priority_inheritance(sem->owner, sem->hp_waiter);
14359  		}
14360  
14361  		TS_LOCK_SUSPEND;
14362 @@ -865,7 +1576,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
14363  			/* Well, if next is not the highest-priority waiter,
14364  			 * then it ought to inherit the highest-priority
14365  			 * waiter's priority. */
14366 -			set_priority_inheritance(next, sem->hp_waiter);
14367 +			increase_priority_inheritance(next, sem->hp_waiter);
14368  		}
14369  
14370  		/* wake up next */
14371 @@ -876,7 +1587,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
14372  
14373  	/* we lose the benefit of priority inheritance (if any) */
14374  	if (tsk_rt(t)->inh_task)
14375 -		clear_priority_inheritance(t);
14376 +		decrease_priority_inheritance(t, NULL);
14377  
14378  out:
14379  	spin_unlock_irqrestore(&sem->wait.lock, flags);
14380 @@ -914,6 +1625,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
14381  	.lock   = gsnedf_fmlp_lock,
14382  	.unlock = gsnedf_fmlp_unlock,
14383  	.deallocate = gsnedf_fmlp_free,
14384 +
14385 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14386 +	.propagate_increase_inheritance = NULL,
14387 +	.propagate_decrease_inheritance = NULL
14388 +#endif
14389  };
14390  
14391  static struct litmus_lock* gsnedf_new_fmlp(void)
14392 @@ -932,47 +1648,121 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
14393  	return &sem->litmus_lock;
14394  }
14395  
14396 -/* **** lock constructor **** */
14397 -
14398  
14399  static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14400 -				 void* __user unused)
14401 +				 void* __user args)
14402  {
14403 -	int err = -ENXIO;
14404 +	int err;
14405  
14406 -	/* GSN-EDF currently only supports the FMLP for global resources. */
14407  	switch (type) {
14408  
14409  	case FMLP_SEM:
14410  		/* Flexible Multiprocessor Locking Protocol */
14411  		*lock = gsnedf_new_fmlp();
14412 -		if (*lock)
14413 -			err = 0;
14414 -		else
14415 -			err = -ENOMEM;
14416 +		break;
14417 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14418 +    case RSM_MUTEX:
14419 +		*lock = gsnedf_new_rsm_mutex();
14420  		break;
14421  
14422 +	case IKGLP_SEM:
14423 +		*lock = gsnedf_new_ikglp(args);
14424 +		break;
14425 +#endif
14426 +	case KFMLP_SEM:
14427 +		*lock = gsnedf_new_kfmlp(args);
14428 +		break;
14429 +	default:
14430 +		err = -ENXIO;
14431 +		goto UNSUPPORTED_LOCK;
14432  	};
14433  
14434 +	if (*lock)
14435 +		err = 0;
14436 +	else
14437 +		err = -ENOMEM;
14438 +
14439 +UNSUPPORTED_LOCK:
14440  	return err;
14441  }
14442  
14443 +#endif  // CONFIG_LITMUS_LOCKING
14444 +
14445 +
14446 +
14447 +
14448 +
14449 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14450 +static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
14451 +	.close = kfmlp_aff_obs_close,
14452 +	.deallocate = kfmlp_aff_obs_free,
14453 +};
14454 +
14455 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14456 +static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = {
14457 +	.close = ikglp_aff_obs_close,
14458 +	.deallocate = ikglp_aff_obs_free,
14459 +};
14460  #endif
14461  
14462 +static long gsnedf_allocate_affinity_observer(
14463 +								struct affinity_observer **aff_obs,
14464 +								int type,
14465 +								void* __user args)
14466 +{
14467 +	int err;
14468 +
14469 +	switch (type) {
14470 +
14471 +		case KFMLP_SIMPLE_GPU_AFF_OBS:
14472 +			*aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
14473 +			break;
14474 +
14475 +		case KFMLP_GPU_AFF_OBS:
14476 +			*aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
14477 +			break;
14478 +
14479 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14480 +		case IKGLP_SIMPLE_GPU_AFF_OBS:
14481 +			*aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
14482 +			break;
14483 +
14484 +		case IKGLP_GPU_AFF_OBS:
14485 +			*aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
14486 +			break;
14487 +#endif
14488 +		default:
14489 +			err = -ENXIO;
14490 +			goto UNSUPPORTED_AFF_OBS;
14491 +	};
14492 +
14493 +	if (*aff_obs)
14494 +		err = 0;
14495 +	else
14496 +		err = -ENOMEM;
14497 +
14498 +UNSUPPORTED_AFF_OBS:
14499 +	return err;
14500 +}
14501 +#endif
14502 +
14503 +
14504 +
14505 +
14506  
14507  static long gsnedf_activate_plugin(void)
14508  {
14509  	int cpu;
14510  	cpu_entry_t *entry;
14511  
14512 -	bheap_init(&gsnedf_cpu_heap);
14513 +	INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
14514  #ifdef CONFIG_RELEASE_MASTER
14515  	gsnedf.release_master = atomic_read(&release_master_cpu);
14516  #endif
14517  
14518  	for_each_online_cpu(cpu) {
14519  		entry = &per_cpu(gsnedf_cpu_entries, cpu);
14520 -		bheap_node_init(&entry->hn, entry);
14521 +		INIT_BINHEAP_NODE(&entry->hn);
14522  		entry->linked    = NULL;
14523  		entry->scheduled = NULL;
14524  #ifdef CONFIG_RELEASE_MASTER
14525 @@ -986,6 +1776,20 @@ static long gsnedf_activate_plugin(void)
14526  		}
14527  #endif
14528  	}
14529 +
14530 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14531 +	gsnedf_pending_tasklets.head = NULL;
14532 +	gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
14533 +#endif
14534 +
14535 +#ifdef CONFIG_LITMUS_SOFTIRQD
14536 +    spawn_klitirqd(NULL);
14537 +#endif
14538 +
14539 +#ifdef CONFIG_LITMUS_NVIDIA
14540 +	init_nvidia_info();
14541 +#endif
14542 +
14543  	return 0;
14544  }
14545  
14546 @@ -1002,8 +1806,31 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
14547  	.task_block		= gsnedf_task_block,
14548  	.admit_task		= gsnedf_admit_task,
14549  	.activate_plugin	= gsnedf_activate_plugin,
14550 +	.compare		= edf_higher_prio,
14551  #ifdef CONFIG_LITMUS_LOCKING
14552  	.allocate_lock		= gsnedf_allocate_lock,
14553 +	.increase_prio		= increase_priority_inheritance,
14554 +	.decrease_prio		= decrease_priority_inheritance,
14555 +#endif
14556 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14557 +	.nested_increase_prio		= nested_increase_priority_inheritance,
14558 +	.nested_decrease_prio		= nested_decrease_priority_inheritance,
14559 +	.__compare					= __edf_higher_prio,
14560 +#endif
14561 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14562 +	.get_dgl_spinlock = gsnedf_get_dgl_spinlock,
14563 +#endif
14564 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14565 +	.allocate_aff_obs = gsnedf_allocate_affinity_observer,
14566 +#endif
14567 +#ifdef CONFIG_LITMUS_SOFTIRQD
14568 +	.increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
14569 +	.decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
14570 +#endif
14571 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14572 +	.enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
14573 +	.change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
14574 +	.run_tasklets = gsnedf_run_tasklets,
14575  #endif
14576  };
14577  
14578 @@ -1013,15 +1840,20 @@ static int __init init_gsn_edf(void)
14579  	int cpu;
14580  	cpu_entry_t *entry;
14581  
14582 -	bheap_init(&gsnedf_cpu_heap);
14583 +	INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
14584  	/* initialize CPU state */
14585 -	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
14586 +	for (cpu = 0; cpu < NR_CPUS; ++cpu)  {
14587  		entry = &per_cpu(gsnedf_cpu_entries, cpu);
14588  		gsnedf_cpus[cpu] = entry;
14589  		entry->cpu 	 = cpu;
14590 -		entry->hn        = &gsnedf_heap_node[cpu];
14591 -		bheap_node_init(&entry->hn, entry);
14592 +
14593 +		INIT_BINHEAP_NODE(&entry->hn);
14594  	}
14595 +
14596 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14597 +	raw_spin_lock_init(&dgl_lock);
14598 +#endif
14599 +
14600  	edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
14601  	return register_sched_plugin(&gsn_edf_plugin);
14602  }
14603 diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
14604 index 5a15ce9..9a6fe48 100644
14605 --- a/litmus/sched_litmus.c
14606 +++ b/litmus/sched_litmus.c
14607 @@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
14608  		}
14609  #ifdef  __ARCH_WANT_UNLOCKED_CTXSW
14610  		if (next->oncpu)
14611 +		{
14612  			TRACE_TASK(next, "waiting for !oncpu");
14613 +		}
14614  		while (next->oncpu) {
14615  			cpu_relax();
14616  			mb();
14617 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
14618 index 00a1900..245e41c 100644
14619 --- a/litmus/sched_plugin.c
14620 +++ b/litmus/sched_plugin.c
14621 @@ -13,6 +13,10 @@
14622  #include <litmus/preempt.h>
14623  #include <litmus/jobs.h>
14624  
14625 +#ifdef CONFIG_LITMUS_NVIDIA
14626 +#include <litmus/nvidia_info.h>
14627 +#endif
14628 +
14629  /*
14630   * Generic function to trigger preemption on either local or remote cpu
14631   * from scheduler plugins. The key feature is that this function is
14632 @@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
14633  
14634  static long litmus_dummy_activate_plugin(void)
14635  {
14636 +#ifdef CONFIG_LITMUS_NVIDIA
14637 +	shutdown_nvidia_info();
14638 +#endif
14639  	return 0;
14640  }
14641  
14642 @@ -110,14 +117,93 @@ static long litmus_dummy_deactivate_plugin(void)
14643  	return 0;
14644  }
14645  
14646 -#ifdef CONFIG_LITMUS_LOCKING
14647 +static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b)
14648 +{
14649 +	TRACE_CUR("WARNING: Dummy compare function called!\n");
14650 +	return 0;
14651 +}
14652  
14653 +#ifdef CONFIG_LITMUS_LOCKING
14654  static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
14655  				       void* __user config)
14656  {
14657  	return -ENXIO;
14658  }
14659  
14660 +static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh)
14661 +{
14662 +}
14663 +
14664 +static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
14665 +{
14666 +}
14667 +#endif
14668 +
14669 +#ifdef CONFIG_LITMUS_SOFTIRQD
14670 +static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd,
14671 +                                       struct task_struct* old_owner,
14672 +                                       struct task_struct* new_owner)
14673 +{
14674 +}
14675 +
14676 +static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd,
14677 +                                                struct task_struct* old_owner)
14678 +{
14679 +}
14680 +#endif
14681 +
14682 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14683 +static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
14684 +{
14685 +	TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14686 +	return(0); // failure.
14687 +}
14688 +
14689 +static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio,
14690 +												 struct task_struct *new_prio)
14691 +{
14692 +	TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14693 +}
14694 +
14695 +static void litmus_dummy_run_tasklets(struct task_struct* t)
14696 +{
14697 +	//TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14698 +}
14699 +#endif
14700 +
14701 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14702 +static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh,
14703 +											raw_spinlock_t *to_unlock, unsigned long irqflags)
14704 +{
14705 +}
14706 +
14707 +static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh,
14708 +											raw_spinlock_t *to_unlock, unsigned long irqflags)
14709 +{
14710 +}
14711 +
14712 +static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod,
14713 +								  struct task_struct* b, comparison_mode_t b_mode)
14714 +{
14715 +	TRACE_CUR("WARNING: Dummy compare function called!\n");
14716 +	return 0;
14717 +}
14718 +#endif
14719 +
14720 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14721 +static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t)
14722 +{
14723 +	return NULL;
14724 +}
14725 +#endif
14726 +
14727 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14728 +static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
14729 +									   int type,
14730 +									   void* __user config)
14731 +{
14732 +	return -ENXIO;
14733 +}
14734  #endif
14735  
14736  
14737 @@ -136,9 +222,33 @@ struct sched_plugin linux_sched_plugin = {
14738  	.finish_switch = litmus_dummy_finish_switch,
14739  	.activate_plugin = litmus_dummy_activate_plugin,
14740  	.deactivate_plugin = litmus_dummy_deactivate_plugin,
14741 +	.compare = litmus_dummy_compare,
14742  #ifdef CONFIG_LITMUS_LOCKING
14743  	.allocate_lock = litmus_dummy_allocate_lock,
14744 +	.increase_prio = litmus_dummy_increase_prio,
14745 +	.decrease_prio = litmus_dummy_decrease_prio,
14746 +#endif
14747 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14748 +	.nested_increase_prio = litmus_dummy_nested_increase_prio,
14749 +	.nested_decrease_prio = litmus_dummy_nested_decrease_prio,
14750 +	.__compare = litmus_dummy___compare,
14751 +#endif
14752 +#ifdef CONFIG_LITMUS_SOFTIRQD
14753 +	.increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd,
14754 +	.decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd,
14755 +#endif
14756 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14757 +	.enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
14758 +	.change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
14759 +	.run_tasklets = litmus_dummy_run_tasklets,
14760 +#endif
14761 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14762 +	.get_dgl_spinlock = litmus_dummy_get_dgl_spinlock,
14763  #endif
14764 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14765 +	.allocate_aff_obs = litmus_dummy_allocate_aff_obs,
14766 +#endif
14767 +
14768  	.admit_task = litmus_dummy_admit_task
14769  };
14770  
14771 @@ -174,8 +284,31 @@ int register_sched_plugin(struct sched_plugin* plugin)
14772  	CHECK(complete_job);
14773  	CHECK(activate_plugin);
14774  	CHECK(deactivate_plugin);
14775 +	CHECK(compare);
14776  #ifdef CONFIG_LITMUS_LOCKING
14777  	CHECK(allocate_lock);
14778 +	CHECK(increase_prio);
14779 +	CHECK(decrease_prio);
14780 +#endif
14781 +#ifdef CONFIG_LITMUS_NESTED_LOCKING
14782 +	CHECK(nested_increase_prio);
14783 +	CHECK(nested_decrease_prio);
14784 +	CHECK(__compare);
14785 +#endif
14786 +#ifdef CONFIG_LITMUS_SOFTIRQD
14787 +	CHECK(increase_prio_klitirqd);
14788 +	CHECK(decrease_prio_klitirqd);
14789 +#endif
14790 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14791 +	CHECK(enqueue_pai_tasklet);
14792 +	CHECK(change_prio_pai_tasklet);
14793 +	CHECK(run_tasklets);
14794 +#endif
14795 +#ifdef CONFIG_LITMUS_DGL_SUPPORT
14796 +	CHECK(get_dgl_spinlock);
14797 +#endif
14798 +#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
14799 +	CHECK(allocate_aff_obs);
14800  #endif
14801  	CHECK(admit_task);
14802  
14803 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
14804 index 5ef8d09..f7f5753 100644
14805 --- a/litmus/sched_task_trace.c
14806 +++ b/litmus/sched_task_trace.c
14807 @@ -7,6 +7,7 @@
14808  #include <linux/module.h>
14809  #include <linux/sched.h>
14810  #include <linux/percpu.h>
14811 +#include <linux/hardirq.h>
14812  
14813  #include <litmus/ftdev.h>
14814  #include <litmus/litmus.h>
14815 @@ -16,13 +17,13 @@
14816  #include <litmus/ftdev.h>
14817  
14818  
14819 -#define NO_EVENTS		(1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
14820 +#define NUM_EVENTS		(1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
14821  
14822  #define now() litmus_clock()
14823  
14824  struct local_buffer {
14825 -	struct st_event_record record[NO_EVENTS];
14826 -	char   flag[NO_EVENTS];
14827 +	struct st_event_record record[NUM_EVENTS];
14828 +	char   flag[NUM_EVENTS];
14829  	struct ft_buffer ftbuf;
14830  };
14831  
14832 @@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
14833  	int i, ok = 0, err;
14834  	printk("Allocated %u sched_trace_xxx() events per CPU "
14835  	       "(buffer size: %d bytes)\n",
14836 -	       NO_EVENTS, (int) sizeof(struct local_buffer));
14837 +	       NUM_EVENTS, (int) sizeof(struct local_buffer));
14838  
14839  	err = ftdev_init(&st_dev, THIS_MODULE,
14840  			num_online_cpus(), "sched_trace");
14841 @@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
14842  
14843  	for (i = 0; i < st_dev.minor_cnt; i++) {
14844  		buf = &per_cpu(st_event_buffer, i);
14845 -		ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
14846 +		ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
14847  				     sizeof(struct st_event_record),
14848  				     buf->flag,
14849  				     buf->record);
14850 @@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
14851  {
14852  	struct task_struct *t = (struct task_struct*) _task;
14853  	struct st_event_record* rec;
14854 -	if (is_realtime(t)) {
14855 +	//if (is_realtime(t))  /* comment out to trace EVERYTHING */
14856 +	{
14857  		rec = get_record(ST_SWITCH_TO, t);
14858  		if (rec) {
14859  			rec->data.switch_to.when      = now();
14860 @@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
14861  {
14862  	struct task_struct *t = (struct task_struct*) _task;
14863  	struct st_event_record* rec;
14864 -	if (is_realtime(t)) {
14865 +	//if (is_realtime(t))  /* comment out to trace EVERYTHING */
14866 +	{
14867  		rec = get_record(ST_SWITCH_AWAY, t);
14868  		if (rec) {
14869  			rec->data.switch_away.when      = now();
14870 @@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
14871  	if (rec) {
14872  		rec->data.completion.when   = now();
14873  		rec->data.completion.forced = forced;
14874 +#ifdef LITMUS_NVIDIA
14875 +		rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
14876 +#endif
14877  		put_record(rec);
14878  	}
14879  }
14880 @@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id,
14881  		put_record(rec);
14882  	}
14883  }
14884 +
14885 +
14886 +
14887 +
14888 +feather_callback void do_sched_trace_prediction_err(unsigned long id,
14889 +													unsigned long _task,
14890 +													unsigned long _distance,
14891 +													unsigned long _rel_err)
14892 +{
14893 +	struct task_struct *t = (struct task_struct*) _task;
14894 +	struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
14895 +
14896 +	if (rec) {
14897 +		gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
14898 +		fp_t* rel_err = (fp_t*) _rel_err;
14899 +
14900 +		rec->data.prediction_err.distance = *distance;
14901 +		rec->data.prediction_err.rel_err = rel_err->val;
14902 +		put_record(rec);
14903 +	}
14904 +}
14905 +
14906 +
14907 +feather_callback void do_sched_trace_migration(unsigned long id,
14908 +													unsigned long _task,
14909 +													unsigned long _mig_info)
14910 +{
14911 +	struct task_struct *t = (struct task_struct*) _task;
14912 +	struct st_event_record *rec = get_record(ST_MIGRATION, t);
14913 +
14914 +	if (rec) {
14915 +		struct migration_info* mig_info = (struct migration_info*) _mig_info;
14916 +
14917 +		rec->hdr.extra = mig_info->distance;
14918 +		rec->data.migration.observed = mig_info->observed;
14919 +		rec->data.migration.estimated = mig_info->estimated;
14920 +
14921 +		put_record(rec);
14922 +	}
14923 +}
14924 +
14925 +
14926 +
14927 +
14928 +
14929 +
14930 +
14931 +
14932 +
14933 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
14934 +												   unsigned long _owner)
14935 +{
14936 +	struct task_struct *t = (struct task_struct*) _owner;
14937 +	struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
14938 +
14939 +	if (rec) {
14940 +		rec->data.tasklet_release.when = now();
14941 +		put_record(rec);
14942 +	}
14943 +}
14944 +
14945 +
14946 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
14947 +												   unsigned long _owner)
14948 +{
14949 +	struct task_struct *t = (struct task_struct*) _owner;
14950 +	struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
14951 +
14952 +	if (rec) {
14953 +		rec->data.tasklet_begin.when = now();
14954 +
14955 +		if(!in_interrupt())
14956 +			rec->data.tasklet_begin.exe_pid = current->pid;
14957 +		else
14958 +			rec->data.tasklet_begin.exe_pid = 0;
14959 +
14960 +		put_record(rec);
14961 +	}
14962 +}
14963 +EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
14964 +
14965 +
14966 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
14967 +												 unsigned long _owner,
14968 +												 unsigned long _flushed)
14969 +{
14970 +	struct task_struct *t = (struct task_struct*) _owner;
14971 +	struct st_event_record *rec = get_record(ST_TASKLET_END, t);
14972 +
14973 +	if (rec) {
14974 +		rec->data.tasklet_end.when = now();
14975 +		rec->data.tasklet_end.flushed = _flushed;
14976 +
14977 +		if(!in_interrupt())
14978 +			rec->data.tasklet_end.exe_pid = current->pid;
14979 +		else
14980 +			rec->data.tasklet_end.exe_pid = 0;
14981 +
14982 +		put_record(rec);
14983 +	}
14984 +}
14985 +EXPORT_SYMBOL(do_sched_trace_tasklet_end);
14986 +
14987 +
14988 +feather_callback void do_sched_trace_work_release(unsigned long id,
14989 +													 unsigned long _owner)
14990 +{
14991 +	struct task_struct *t = (struct task_struct*) _owner;
14992 +	struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
14993 +
14994 +	if (rec) {
14995 +		rec->data.work_release.when = now();
14996 +		put_record(rec);
14997 +	}
14998 +}
14999 +
15000 +
15001 +feather_callback void do_sched_trace_work_begin(unsigned long id,
15002 +												unsigned long _owner,
15003 +												unsigned long _exe)
15004 +{
15005 +	struct task_struct *t = (struct task_struct*) _owner;
15006 +	struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
15007 +
15008 +	if (rec) {
15009 +		struct task_struct *exe = (struct task_struct*) _exe;
15010 +		rec->data.work_begin.exe_pid = exe->pid;
15011 +		rec->data.work_begin.when = now();
15012 +		put_record(rec);
15013 +	}
15014 +}
15015 +EXPORT_SYMBOL(do_sched_trace_work_begin);
15016 +
15017 +
15018 +feather_callback void do_sched_trace_work_end(unsigned long id,
15019 +											  unsigned long _owner,
15020 +											  unsigned long _exe,
15021 +											  unsigned long _flushed)
15022 +{
15023 +	struct task_struct *t = (struct task_struct*) _owner;
15024 +	struct st_event_record *rec = get_record(ST_WORK_END, t);
15025 +
15026 +	if (rec) {
15027 +		struct task_struct *exe = (struct task_struct*) _exe;
15028 +		rec->data.work_end.exe_pid = exe->pid;
15029 +		rec->data.work_end.flushed = _flushed;
15030 +		rec->data.work_end.when = now();
15031 +		put_record(rec);
15032 +	}
15033 +}
15034 +EXPORT_SYMBOL(do_sched_trace_work_end);
15035 +
15036 +
15037 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
15038 +											  unsigned long _task,
15039 +											  unsigned long _inh)
15040 +{
15041 +	struct task_struct *t = (struct task_struct*) _task;
15042 +	struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
15043 +
15044 +	if (rec) {
15045 +		struct task_struct *inh = (struct task_struct*) _inh;
15046 +		rec->data.effective_priority_change.when = now();
15047 +		rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
15048 +			inh->pid :
15049 +			0xffff;
15050 +
15051 +		put_record(rec);
15052 +	}
15053 +}
15054 +
15055 +/* pray for no nesting of nv interrupts on same CPU... */
15056 +struct tracing_interrupt_map
15057 +{
15058 +	int active;
15059 +	int count;
15060 +	unsigned long data[128]; // assume nesting less than 128...
15061 +	unsigned long serial[128];
15062 +};
15063 +DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
15064 +
15065 +
15066 +DEFINE_PER_CPU(u32, intCounter);
15067 +
15068 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
15069 +												unsigned long _device)
15070 +{
15071 +	struct st_event_record *rec;
15072 +	u32 serialNum;
15073 +
15074 +	{
15075 +		u32* serial;
15076 +		struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
15077 +		if(!int_map->active == 0xcafebabe)
15078 +		{
15079 +			int_map->count++;
15080 +		}
15081 +		else
15082 +		{
15083 +			int_map->active = 0xcafebabe;
15084 +			int_map->count = 1;
15085 +		}
15086 +		//int_map->data[int_map->count-1] = _device;
15087 +
15088 +		serial = &per_cpu(intCounter, smp_processor_id());
15089 +		*serial += num_online_cpus();
15090 +		serialNum = *serial;
15091 +		int_map->serial[int_map->count-1] = serialNum;
15092 +	}
15093 +
15094 +	rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
15095 +	if(rec) {
15096 +		u32 device = _device;
15097 +		rec->data.nv_interrupt_begin.when = now();
15098 +		rec->data.nv_interrupt_begin.device = device;
15099 +		rec->data.nv_interrupt_begin.serialNumber = serialNum;
15100 +		put_record(rec);
15101 +	}
15102 +}
15103 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
15104 +
15105 +/*
15106 +int is_interrupt_tracing_active(void)
15107 +{
15108 +	struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
15109 +	if(int_map->active == 0xcafebabe)
15110 +		return 1;
15111 +	return 0;
15112 +}
15113 +*/
15114 +
15115 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
15116 +{
15117 +	struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
15118 +	if(int_map->active == 0xcafebabe)
15119 +	{
15120 +		struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
15121 +
15122 +		int_map->count--;
15123 +		if(int_map->count == 0)
15124 +			int_map->active = 0;
15125 +
15126 +		if(rec) {
15127 +			u32 device = _device;
15128 +			rec->data.nv_interrupt_end.when = now();
15129 +			//rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
15130 +			rec->data.nv_interrupt_end.device = device;
15131 +			rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
15132 +			put_record(rec);
15133 +		}
15134 +	}
15135 +}
15136 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
15137 +
15138 +
15139 +
15140 +
15141 +
15142 +
15143 +
15144 +
15145 +
15146 diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
15147 new file mode 100644
15148 index 0000000..cf8e1d7
15149 --- /dev/null
15150 +++ b/litmus/sched_trace_external.c
15151 @@ -0,0 +1,64 @@
15152 +#include <linux/module.h>
15153 +
15154 +#include <litmus/trace.h>
15155 +#include <litmus/sched_trace.h>
15156 +#include <litmus/litmus.h>
15157 +
15158 +void __sched_trace_tasklet_begin_external(struct task_struct* t)
15159 +{
15160 +	sched_trace_tasklet_begin(t);
15161 +}
15162 +EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
15163 +
15164 +void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
15165 +{
15166 +	sched_trace_tasklet_end(t, flushed);
15167 +}
15168 +EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
15169 +
15170 +
15171 +
15172 +void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
15173 +{
15174 +	sched_trace_work_begin(t, e);
15175 +}
15176 +EXPORT_SYMBOL(__sched_trace_work_begin_external);
15177 +
15178 +void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
15179 +{
15180 +	sched_trace_work_end(t, e, f);
15181 +}
15182 +EXPORT_SYMBOL(__sched_trace_work_end_external);
15183 +
15184 +
15185 +
15186 +void __sched_trace_nv_interrupt_begin_external(u32 device)
15187 +{
15188 +	//unsigned long _device = device;
15189 +	sched_trace_nv_interrupt_begin((unsigned long)device);
15190 +}
15191 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
15192 +
15193 +void __sched_trace_nv_interrupt_end_external(u32 device)
15194 +{
15195 +	//unsigned long _device = device;
15196 +	sched_trace_nv_interrupt_end((unsigned long)device);
15197 +}
15198 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
15199 +
15200 +
15201 +#ifdef CONFIG_LITMUS_NVIDIA
15202 +
15203 +#define EXX_TS(evt) \
15204 +void __##evt(void) { evt; } \
15205 +EXPORT_SYMBOL(__##evt);
15206 +
15207 +EXX_TS(TS_NV_TOPISR_START)
15208 +EXX_TS(TS_NV_TOPISR_END)
15209 +EXX_TS(TS_NV_BOTISR_START)
15210 +EXX_TS(TS_NV_BOTISR_END)
15211 +EXX_TS(TS_NV_RELEASE_BOTISR_START)
15212 +EXX_TS(TS_NV_RELEASE_BOTISR_END)
15213 +
15214 +#endif
15215 +
15216 -- 
15217 1.7.9.5

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2015-06-04 21:22:59, 244.8 KB) [[attachment:MC2-liblitmus-imx6-rtss15.patch]]
  • [get | view] (2016-05-12 14:35:37, 51.9 KB) [[attachment:MC2-liblitmus-rtss16.patch]]
  • [get | view] (2016-05-12 14:36:06, 190.4 KB) [[attachment:MC2-litmus-rt-rtss16.patch]]
  • [get | view] (2015-07-19 10:27:52, 1119.9 KB) [[attachment:MC2-litmut-rt-imx6-rtss15.patch]]
  • [get | view] (2014-05-27 20:46:19, 58.3 KB) [[attachment:MC2_liblitmus_ipdps15.patch]]
  • [get | view] (2014-05-27 20:45:43, 1044.3 KB) [[attachment:MC2_litmusrt_ipdps15.patch]]
  • [get | view] (2017-04-07 21:48:09, 6099.5 KB) [[attachment:buff_sharing.tar]]
  • [get | view] (2015-01-08 14:20:07, 61.0 KB) [[attachment:feather-trace-patch-against-sched-deadline-v8.patch]]
  • [get | view] (2014-04-01 23:10:10, 38.9 KB) [[attachment:gedf-mp-rtas14.patch]]
  • [get | view] (2012-03-02 20:13:59, 1.9 KB) [[attachment:gpu-klmirqd-liblitmus-rt-ecrts12.patch]]
  • [get | view] (2012-03-02 20:14:25, 389.8 KB) [[attachment:gpu-klmirqd-litmus-rt-ecrts12.patch]]
  • [get | view] (2012-05-26 21:41:34, 418.0 KB) [[attachment:gpusync-rtss12.patch]]
  • [get | view] (2012-05-26 21:42:20, 8.6 KB) [[attachment:gpusync_liblitmus-rtss12.patch]]
  • [get | view] (2013-05-21 15:32:08, 208.6 KB) [[attachment:gpusync_rtss13_liblitmus.patch]]
  • [get | view] (2013-05-21 15:31:32, 779.5 KB) [[attachment:gpusync_rtss13_litmus.patch]]
  • [get | view] (2012-05-26 21:42:41, 71.4 KB) [[attachment:klt_tracker_v1.0.litmus.tgz]]
  • [get | view] (2016-10-13 21:14:05, 19.6 KB) [[attachment:liblitmus-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:22, 90.0 KB) [[attachment:liblitmus-rtns17.patch]]
  • [get | view] (2018-12-11 01:38:53, 49.1 KB) [[attachment:liblitmus-semi-part-with-edfos.patch]]
  • [get | view] (2017-10-09 19:16:09, 304.0 KB) [[attachment:litmus-rt-os-isolation.patch]]
  • [get | view] (2016-10-13 21:13:27, 207.6 KB) [[attachment:litmus-rt-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:40, 207.6 KB) [[attachment:litmus-rt-rtns17.patch]]
  • [get | view] (2018-12-11 01:39:04, 100.5 KB) [[attachment:litmus-rt-semi-part-with-edfos.patch]]
  • [get | view] (2018-06-26 04:31:48, 7.0 KB) [[attachment:mc2_liblitmus_2015.1-rtns18.patch]]
  • [get | view] (2018-06-26 04:31:33, 292.7 KB) [[attachment:mc2_litmus-rt_2015.1-rtns18.patch]]
  • [get | view] (2017-05-01 20:45:10, 2596.9 KB) [[attachment:mcp_study.zip]]
  • [get | view] (2013-07-13 14:11:53, 58.0 KB) [[attachment:omip-ecrts13.patch]]
  • [get | view] (2014-02-19 21:48:33, 17.2 KB) [[attachment:pgmrt-liblitmus-ecrts14.patch]]
  • [get | view] (2014-02-19 21:47:57, 87.8 KB) [[attachment:pgmrt-litmusrt-ecrts14.patch]]
  • [get | view] (2015-01-08 14:22:32, 61.0 KB) [[attachment:sched-deadline-v8-feather-trace-rtas14.patch]]
  • [get | view] (2018-06-26 04:32:13, 2545.1 KB) [[attachment:sched_study_rtns2018.tar.gz]]
  • [get | view] (2017-04-07 21:53:39, 5969.5 KB) [[attachment:seminal.tar]]
  • [get | view] (2017-04-07 21:51:13, 6064.0 KB) [[attachment:shared_libraries.tar]]
  • [get | view] (2013-07-13 13:58:25, 42.7 KB) [[attachment:tracing-and-dflp-rtas13.patch]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.