attachment:gpu-klmirqd-litmus-rt-ecrts12.patch of Publications

Attachment 'gpu-klmirqd-litmus-rt-ecrts12.patch'

   1 diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
   2 index 91fd0c7..433cd15 100644
   3 --- a/arch/x86/kernel/irq.c
   4 +++ b/arch/x86/kernel/irq.c
   5 @@ -8,6 +8,10 @@
   6  #include <linux/smp.h>
   7  #include <linux/ftrace.h>
   8  
   9 +#ifdef CONFIG_LITMUS_NVIDIA
  10 +#include <litmus/sched_trace.h>
  11 +#endif
  12 +
  13  #include <asm/apic.h>
  14  #include <asm/io_apic.h>
  15  #include <asm/irq.h>
  16 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
  17 index 3770290..b5ddae4 100644
  18 --- a/arch/x86/kernel/syscall_table_32.S
  19 +++ b/arch/x86/kernel/syscall_table_32.S
  20 @@ -352,3 +352,4 @@ ENTRY(sys_call_table)
  21  	.long sys_wait_for_ts_release
  22  	.long sys_release_ts
  23  	.long sys_null_call
  24 +	.long sys_register_nv_device
  25 diff --git a/include/linux/completion.h b/include/linux/completion.h
  26 index c63950e..3ce20dd 100644
  27 --- a/include/linux/completion.h
  28 +++ b/include/linux/completion.h
  29 @@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
  30  	init_waitqueue_head(&x->wait);
  31  }
  32  
  33 +extern void __wait_for_completion_locked(struct completion *);
  34  extern void wait_for_completion(struct completion *);
  35  extern int wait_for_completion_interruptible(struct completion *x);
  36  extern int wait_for_completion_killable(struct completion *x);
  37 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
  38 index a0384a4..a2f2880 100644
  39 --- a/include/linux/interrupt.h
  40 +++ b/include/linux/interrupt.h
  41 @@ -459,6 +459,10 @@ struct tasklet_struct
  42  	atomic_t count;
  43  	void (*func)(unsigned long);
  44  	unsigned long data;
  45 +
  46 +#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
  47 +	struct task_struct *owner;
  48 +#endif
  49  };
  50  
  51  #define DECLARE_TASKLET(name, func, data) \
  52 @@ -496,6 +500,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  53  #define tasklet_unlock(t) do { } while (0)
  54  #endif
  55  
  56 +extern void ___tasklet_schedule(struct tasklet_struct *t);
  57  extern void __tasklet_schedule(struct tasklet_struct *t);
  58  
  59  static inline void tasklet_schedule(struct tasklet_struct *t)
  60 @@ -504,6 +509,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
  61  		__tasklet_schedule(t);
  62  }
  63  
  64 +extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
  65  extern void __tasklet_hi_schedule(struct tasklet_struct *t);
  66  
  67  static inline void tasklet_hi_schedule(struct tasklet_struct *t)
  68 @@ -512,6 +518,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
  69  		__tasklet_hi_schedule(t);
  70  }
  71  
  72 +extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
  73  extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
  74  
  75  /*
  76 @@ -541,7 +548,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
  77  }
  78  
  79  static inline void tasklet_enable(struct tasklet_struct *t)
  80 -{
  81 +{	
  82  	smp_mb__before_atomic_dec();
  83  	atomic_dec(&t->count);
  84  }
  85 diff --git a/include/linux/mutex.h b/include/linux/mutex.h
  86 index f363bc8..9f31995 100644
  87 --- a/include/linux/mutex.h
  88 +++ b/include/linux/mutex.h
  89 @@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
  90  	return atomic_read(&lock->count) != 1;
  91  }
  92  
  93 +/* return non-zero to abort.  only pre-side-effects may abort */
  94 +typedef int (*side_effect_t)(unsigned long);
  95 +extern void mutex_lock_sfx(struct mutex *lock,
  96 +						   side_effect_t pre, unsigned long pre_arg,
  97 +						   side_effect_t post, unsigned long post_arg);
  98 +extern void mutex_unlock_sfx(struct mutex *lock,
  99 +							 side_effect_t pre, unsigned long pre_arg,
 100 +							 side_effect_t post, unsigned long post_arg);
 101 +
 102  /*
 103   * See kernel/mutex.c for detailed documentation of these APIs.
 104   * Also see Documentation/mutex-design.txt.
 105 @@ -145,6 +154,7 @@ extern void mutex_lock(struct mutex *lock);
 106  extern int __must_check mutex_lock_interruptible(struct mutex *lock);
 107  extern int __must_check mutex_lock_killable(struct mutex *lock);
 108  
 109 +
 110  # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 111  # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 112  # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
 113 diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
 114 index 5310d27..69e3f57 100644
 115 --- a/include/linux/semaphore.h
 116 +++ b/include/linux/semaphore.h
 117 @@ -49,4 +49,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
 118  extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
 119  extern void up(struct semaphore *sem);
 120  
 121 +extern void __down(struct semaphore *sem);
 122 +extern void __up(struct semaphore *sem);
 123 +
 124 +struct semaphore_waiter {
 125 +	struct list_head list;
 126 +	struct task_struct *task;
 127 +	int up;
 128 +};
 129 +
 130  #endif /* __LINUX_SEMAPHORE_H */
 131 diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
 132 index 25e02c9..5fecfb3 100644
 133 --- a/include/linux/workqueue.h
 134 +++ b/include/linux/workqueue.h
 135 @@ -83,6 +83,9 @@ struct work_struct {
 136  #ifdef CONFIG_LOCKDEP
 137  	struct lockdep_map lockdep_map;
 138  #endif
 139 +#ifdef CONFIG_LITMUS_SOFTIRQD
 140 +	struct task_struct *owner;
 141 +#endif
 142  };
 143  
 144  #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
 145 @@ -115,11 +118,25 @@ struct execute_work {
 146  #define __WORK_INIT_LOCKDEP_MAP(n, k)
 147  #endif
 148  
 149 +#ifdef CONFIG_LITMUS_SOFTIRQD
 150 +#define __WORK_INIT_OWNER() \
 151 +	.owner = NULL,
 152 +
 153 +#define PREPARE_OWNER(_work, _owner) \
 154 +	do { \
 155 +		(_work)->owner = (_owner); \
 156 +	} while(0)
 157 +#else
 158 +#define __WORK_INIT_OWNER()
 159 +#define PREPARE_OWNER(_work, _owner)
 160 +#endif
 161 +
 162  #define __WORK_INITIALIZER(n, f) {				\
 163  	.data = WORK_DATA_STATIC_INIT(),			\
 164  	.entry	= { &(n).entry, &(n).entry },			\
 165  	.func = (f),						\
 166  	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
 167 +	__WORK_INIT_OWNER() \
 168  	}
 169  
 170  #define __DELAYED_WORK_INITIALIZER(n, f) {			\
 171 @@ -327,6 +344,7 @@ extern void flush_workqueue(struct workqueue_struct *wq);
 172  extern void flush_scheduled_work(void);
 173  extern void flush_delayed_work(struct delayed_work *work);
 174  
 175 +extern int __schedule_work(struct work_struct *work);
 176  extern int schedule_work(struct work_struct *work);
 177  extern int schedule_work_on(int cpu, struct work_struct *work);
 178  extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 179 diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
 180 new file mode 100644
 181 index 0000000..877b409
 182 --- /dev/null
 183 +++ b/include/litmus/affinity.h
 184 @@ -0,0 +1,78 @@
 185 +#ifndef __LITMUS_AFFINITY_H
 186 +#define __LITMUS_AFFINITY_H
 187 +
 188 +#include <linux/cpumask.h>
 189 +
 190 +/*
 191 +  L1 (instr) = depth 0
 192 +  L1 (data)  = depth 1
 193 +  L2 = depth 2
 194 +  L3 = depth 3
 195 + */
 196 +#define NUM_CACHE_LEVELS 4
 197 +
 198 +struct neighborhood
 199 +{
 200 +	unsigned int size[NUM_CACHE_LEVELS];
 201 +	cpumask_var_t neighbors[NUM_CACHE_LEVELS];
 202 +};
 203 +
 204 +/* topology info is stored redundently in a big array for fast lookups */
 205 +extern struct neighborhood neigh_info[NR_CPUS];
 206 +
 207 +void init_topology(void); /* called by Litmus module's _init_litmus() */
 208 +
 209 +/* Works like:
 210 +void get_nearest_available_cpu(cpu_entry_t* nearest, cpu_entry_t* start, cpu_entry_t* entries, int release_master)
 211 +
 212 +Set release_master = -1 for no RM.
 213 + */
 214 +#define get_nearest_available_cpu(nearest, start, entries, release_master) \
 215 +{ \
 216 +	(nearest) = NULL; \
 217 +	if(!(start)->linked) \
 218 +	{ \
 219 +		(nearest) = (start); \
 220 +	} \
 221 +	else \
 222 +	{ \
 223 +		int __level; \
 224 +		int __cpu; \
 225 +		struct neighborhood* __neighbors = &neigh_info[(start)->cpu]; \
 226 +		\
 227 +		for(__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) \
 228 +		{ \
 229 +			if(__neighbors->size[__level] > 1) \
 230 +			{ \
 231 +				for_each_cpu(__cpu, __neighbors->neighbors[__level]) \
 232 +				{ \
 233 +					if(__cpu != (release_master)) \
 234 +					{ \
 235 +						cpu_entry_t* __entry = &per_cpu((entries), __cpu); \
 236 +						if(!__entry->linked) \
 237 +						{ \
 238 +							(nearest) = __entry; \
 239 +							break; \
 240 +						} \
 241 +					} \
 242 +				} \
 243 +			} \
 244 +			else if(__neighbors->size[__level] == 0) \
 245 +			{ \
 246 +				break; \
 247 +			} \
 248 +		} \
 249 +	} \
 250 +	\
 251 +	if((nearest)) \
 252 +	{ \
 253 +		TRACE("P%d is closest available CPU to P%d\n", (nearest)->cpu, (start)->cpu); \
 254 +	} \
 255 +	else \
 256 +	{ \
 257 +		TRACE("Could not find an available CPU close to P%d\n", \
 258 +						(start)->cpu); \
 259 +	} \
 260 +}
 261 +
 262 +#endif
 263 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
 264 index caf2a1e..c740e8f 100644
 265 --- a/include/litmus/fdso.h
 266 +++ b/include/litmus/fdso.h
 267 @@ -18,9 +18,10 @@ typedef enum  {
 268  	MIN_OBJ_TYPE 	= 0,
 269  
 270  	FMLP_SEM	= 0,
 271 -	SRP_SEM		= 1,
 272 +	KFMLP_SEM	= 1,
 273 +	SRP_SEM		= 2,
 274  
 275 -	MAX_OBJ_TYPE	= 1
 276 +	MAX_OBJ_TYPE	= SRP_SEM
 277  } obj_type_t;
 278  
 279  struct inode_obj_id {
 280 @@ -64,6 +65,7 @@ static inline void* od_lookup(int od, obj_type_t type)
 281  }
 282  
 283  #define lookup_fmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, FMLP_SEM))
 284 +#define lookup_kfmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, KFMLP_SEM))
 285  #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
 286  #define lookup_ics(od)     ((struct ics*)           od_lookup(od, ICS_ID))
 287  
 288 diff --git a/include/litmus/fifo_common.h b/include/litmus/fifo_common.h
 289 new file mode 100644
 290 index 0000000..12cfbfe
 291 --- /dev/null
 292 +++ b/include/litmus/fifo_common.h
 293 @@ -0,0 +1,25 @@
 294 +/*
 295 + * EDF common data structures and utility functions shared by all EDF
 296 + * based scheduler plugins
 297 + */
 298 +
 299 +/* CLEANUP: Add comments and make it less messy.
 300 + *
 301 + */
 302 +
 303 +#ifndef __UNC_FIFO_COMMON_H__
 304 +#define __UNC_FIFO_COMMON_H__
 305 +
 306 +#include <litmus/rt_domain.h>
 307 +
 308 +void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
 309 +		     release_jobs_t release);
 310 +
 311 +int fifo_higher_prio(struct task_struct* first,
 312 +		    struct task_struct* second);
 313 +
 314 +int fifo_ready_order(struct bheap_node* a, struct bheap_node* b);
 315 +
 316 +int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t);
 317 +
 318 +#endif
 319 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
 320 index e7769ca..829c1c5 100644
 321 --- a/include/litmus/litmus.h
 322 +++ b/include/litmus/litmus.h
 323 @@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
 324  		);
 325  }
 326  
 327 +
 328  struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
 329  
 330  #define NO_CPU			0xffffffff
 331 @@ -117,7 +118,9 @@ static inline lt_t litmus_clock(void)
 332  #define earlier_release(a, b)  (lt_before(\
 333  	(a)->rt_param.job_params.release,\
 334  	(b)->rt_param.job_params.release))
 335 -
 336 +#define shorter_period(a, b)   (lt_before(\
 337 +    (a)->rt_param.task_params.period,\
 338 +    (b)->rt_param.task_params.period))
 339  void preempt_if_preemptable(struct task_struct* t, int on_cpu);
 340  
 341  #ifdef CONFIG_LITMUS_LOCKING
 342 diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
 343 new file mode 100644
 344 index 0000000..34287f3
 345 --- /dev/null
 346 +++ b/include/litmus/litmus_softirq.h
 347 @@ -0,0 +1,199 @@
 348 +#ifndef __LITMUS_SOFTIRQ_H
 349 +#define __LITMUS_SOFTIRQ_H
 350 +
 351 +#include <linux/interrupt.h>
 352 +#include <linux/workqueue.h>
 353 +
 354 +/*
 355 +   Threaded tasklet handling for Litmus.  Tasklets
 356 +   are scheduled with the priority of the tasklet's
 357 +   owner---that is, the RT task on behalf the tasklet
 358 +   runs.
 359 + 
 360 +   Tasklets are current scheduled in FIFO order with
 361 +   NO priority inheritance for "blocked" tasklets.
 362 + 
 363 +   klitirqd assumes the priority of the owner of the
 364 +   tasklet when the tasklet is next to execute.
 365 + 
 366 +   Currently, hi-tasklets are scheduled before
 367 +   low-tasklets, regardless of priority of low-tasklets.
 368 +   And likewise, low-tasklets are scheduled before work
 369 +   queue objects.  This priority inversion probably needs
 370 +   to be fixed, though it is not an issue if our work with
 371 +   GPUs as GPUs are owned (and associated klitirqds) for
 372 +   exclusive time periods, thus no inversions can
 373 +   occur.
 374 + */
 375 +
 376 +
 377 +
 378 +#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
 379 +
 380 +/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
 381 +   Actual launch of threads is deffered to kworker's
 382 +   workqueue, so daemons will likely not be immediately
 383 +   running when this function returns, though the required
 384 +   data will be initialized.
 385 + 
 386 +   @affinity_set: an array expressing the processor affinity
 387 +    for each of the NR_LITMUS_SOFTIRQD daemons.  May be set
 388 +    to NULL for global scheduling.
 389 + 
 390 +	- Examples -
 391 +	8-CPU system with two CPU clusters:
 392 +		affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
 393 +		NOTE: Daemons not actually bound to specified CPU, but rather
 394 +		cluster in which the CPU resides.
 395 + 
 396 +	8-CPU system, partitioned:
 397 +		affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
 398 + 
 399 +	FIXME: change array to a CPU topology or array of cpumasks
 400 + 
 401 + */
 402 +void spawn_klitirqd(int* affinity);
 403 +
 404 +
 405 +/* Raises a flag to tell klitirqds to terminate.
 406 +   Termination is async, so some threads may be running
 407 +   after function return. */
 408 +void kill_klitirqd(void);
 409 +
 410 +
 411 +/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
 412 +   to handle tasklets. 0, otherwise.*/
 413 +int klitirqd_is_ready(void);
 414 +
 415 +/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
 416 +   to handle tasklets. 0, otherwise.*/
 417 +int klitirqd_is_dead(void);
 418 +
 419 +/* Flushes all pending work out to the OS for regular
 420 + * tasklet/work processing of the specified 'owner'
 421 + *
 422 + * PRECOND: klitirqd_thread must have a clear entry
 423 + * in the GPU registry, otherwise this call will become
 424 + * a no-op as work will loop back to the klitirqd_thread.
 425 + *
 426 + * Pass NULL for owner to flush ALL pending items.
 427 + */
 428 +void flush_pending(struct task_struct* klitirqd_thread,
 429 +				   struct task_struct* owner);
 430 +
 431 +struct task_struct* get_klitirqd(unsigned int k_id);
 432 +
 433 +
 434 +extern int __litmus_tasklet_schedule(
 435 +        struct tasklet_struct *t,
 436 +        unsigned int k_id);
 437 +
 438 +/* schedule a tasklet on klitirqd #k_id */
 439 +static inline int litmus_tasklet_schedule(
 440 +    struct tasklet_struct *t,
 441 +    unsigned int k_id)
 442 +{
 443 +	int ret = 0;
 444 +	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
 445 +		ret = __litmus_tasklet_schedule(t, k_id);
 446 +	return(ret);
 447 +}
 448 +
 449 +/* for use by __tasklet_schedule() */
 450 +static inline int _litmus_tasklet_schedule(
 451 +    struct tasklet_struct *t,
 452 +    unsigned int k_id)
 453 +{
 454 +    return(__litmus_tasklet_schedule(t, k_id));
 455 +}
 456 +
 457 +
 458 +
 459 +
 460 +extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
 461 +                                         unsigned int k_id);
 462 +
 463 +/* schedule a hi tasklet on klitirqd #k_id */
 464 +static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
 465 +                                              unsigned int k_id)
 466 +{
 467 +	int ret = 0;
 468 +	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
 469 +		ret = __litmus_tasklet_hi_schedule(t, k_id);
 470 +	return(ret);
 471 +}
 472 +
 473 +/* for use by __tasklet_hi_schedule() */
 474 +static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
 475 +                                               unsigned int k_id)
 476 +{
 477 +    return(__litmus_tasklet_hi_schedule(t, k_id));
 478 +}
 479 +
 480 +
 481 +
 482 +
 483 +
 484 +extern int __litmus_tasklet_hi_schedule_first(
 485 +    struct tasklet_struct *t,
 486 +    unsigned int k_id);
 487 +
 488 +/* schedule a hi tasklet on klitirqd #k_id on next go-around */
 489 +/* PRECONDITION: Interrupts must be disabled. */
 490 +static inline int litmus_tasklet_hi_schedule_first(
 491 +    struct tasklet_struct *t,
 492 +    unsigned int k_id)
 493 +{
 494 +	int ret = 0;
 495 +	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
 496 +		ret = __litmus_tasklet_hi_schedule_first(t, k_id);
 497 +	return(ret);
 498 +}
 499 +
 500 +/* for use by __tasklet_hi_schedule_first() */
 501 +static inline int _litmus_tasklet_hi_schedule_first(
 502 +    struct tasklet_struct *t,
 503 +    unsigned int k_id)
 504 +{
 505 +    return(__litmus_tasklet_hi_schedule_first(t, k_id));
 506 +}
 507 +
 508 +
 509 +
 510 +//////////////
 511 +
 512 +extern int __litmus_schedule_work(
 513 +	struct work_struct* w,
 514 +	unsigned int k_id);
 515 +
 516 +static inline int litmus_schedule_work(
 517 +	struct work_struct* w,
 518 +	unsigned int k_id)
 519 +{
 520 +	return(__litmus_schedule_work(w, k_id));
 521 +}
 522 +
 523 +
 524 +
 525 +///////////// mutex operations for client threads.
 526 + 
 527 +void down_and_set_stat(struct task_struct* t,
 528 +					 enum klitirqd_sem_status to_set,
 529 +					 struct mutex* sem);
 530 +
 531 +void __down_and_reset_and_set_stat(struct task_struct* t,
 532 +				enum klitirqd_sem_status to_reset,
 533 +				enum klitirqd_sem_status to_set,
 534 +				struct mutex* sem);
 535 +
 536 +void up_and_set_stat(struct task_struct* t,
 537 +					enum klitirqd_sem_status to_set,
 538 +					struct mutex* sem);
 539 +
 540 +
 541 +
 542 +void release_klitirqd_lock(struct task_struct* t);
 543 +
 544 +int reacquire_klitirqd_lock(struct task_struct* t);
 545 +
 546 +#endif
 547 diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
 548 new file mode 100644
 549 index 0000000..9e07a27
 550 --- /dev/null
 551 +++ b/include/litmus/nvidia_info.h
 552 @@ -0,0 +1,38 @@
 553 +#ifndef __LITMUS_NVIDIA_H
 554 +#define __LITMUS_NVIDIA_H
 555 +
 556 +#include <linux/interrupt.h>
 557 +
 558 +
 559 +#include <litmus/litmus_softirq.h>
 560 +
 561 +
 562 +//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
 563 +#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
 564 +
 565 +int init_nvidia_info(void);
 566 +
 567 +int is_nvidia_func(void* func_addr);
 568 +
 569 +void dump_nvidia_info(const struct tasklet_struct *t);
 570 +
 571 +
 572 +// Returns the Nvidia device # associated with provided tasklet and work_struct.
 573 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
 574 +u32 get_work_nv_device_num(const struct work_struct *t);
 575 +
 576 +
 577 +int init_nv_device_reg(void);
 578 +//int get_nv_device_id(struct task_struct* owner);
 579 +
 580 +
 581 +int reg_nv_device(int reg_device_id, int register_device);
 582 +
 583 +struct task_struct* get_nv_device_owner(u32 target_device_id);
 584 +
 585 +void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
 586 +void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
 587 +
 588 +void increment_nv_int_count(u32 device);
 589 +
 590 +#endif
 591 diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
 592 index 260c6fe..244924f 100644
 593 --- a/include/litmus/preempt.h
 594 +++ b/include/litmus/preempt.h
 595 @@ -26,6 +26,7 @@ const char* sched_state_name(int s);
 596  				    (x), #x, __FUNCTION__);		\
 597  	} while (0);
 598  
 599 +//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
 600  #define TRACE_SCHED_STATE_CHANGE(x, y, cpu)				\
 601  	TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n",			\
 602  		    cpu,  (x), sched_state_name(x),			\
 603 diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h
 604 new file mode 100644
 605 index 0000000..5991b0b
 606 --- /dev/null
 607 +++ b/include/litmus/rm_common.h
 608 @@ -0,0 +1,25 @@
 609 +/*
 610 + * EDF common data structures and utility functions shared by all EDF
 611 + * based scheduler plugins
 612 + */
 613 +
 614 +/* CLEANUP: Add comments and make it less messy.
 615 + *
 616 + */
 617 +
 618 +#ifndef __UNC_RM_COMMON_H__
 619 +#define __UNC_RM_COMMON_H__
 620 +
 621 +#include <litmus/rt_domain.h>
 622 +
 623 +void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
 624 +		     release_jobs_t release);
 625 +
 626 +int rm_higher_prio(struct task_struct* first,
 627 +		    struct task_struct* second);
 628 +
 629 +int rm_ready_order(struct bheap_node* a, struct bheap_node* b);
 630 +
 631 +int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t);
 632 +
 633 +#endif
 634 diff --git a/include/litmus/rm_srt_common.h b/include/litmus/rm_srt_common.h
 635 new file mode 100644
 636 index 0000000..78aa287
 637 --- /dev/null
 638 +++ b/include/litmus/rm_srt_common.h
 639 @@ -0,0 +1,25 @@
 640 +/*
 641 + * EDF common data structures and utility functions shared by all EDF
 642 + * based scheduler plugins
 643 + */
 644 +
 645 +/* CLEANUP: Add comments and make it less messy.
 646 + *
 647 + */
 648 +
 649 +#ifndef __UNC_RM_SRT_COMMON_H__
 650 +#define __UNC_RM_SRT_COMMON_H__
 651 +
 652 +#include <litmus/rt_domain.h>
 653 +
 654 +void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
 655 +		     release_jobs_t release);
 656 +
 657 +int rm_srt_higher_prio(struct task_struct* first,
 658 +		    struct task_struct* second);
 659 +
 660 +int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b);
 661 +
 662 +int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t);
 663 +
 664 +#endif
 665 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
 666 index 5de422c..53af3ce 100644
 667 --- a/include/litmus/rt_param.h
 668 +++ b/include/litmus/rt_param.h
 669 @@ -69,6 +69,8 @@ struct control_page {
 670  /* don't export internal data structures to user space (liblitmus) */
 671  #ifdef __KERNEL__
 672  
 673 +#include <linux/semaphore.h>
 674 +
 675  struct _rt_domain;
 676  struct bheap_node;
 677  struct release_heap;
 678 @@ -94,6 +96,14 @@ struct rt_job {
 679  
 680  struct pfair_param;
 681  
 682 +enum klitirqd_sem_status
 683 +{
 684 +	NEED_TO_REACQUIRE,
 685 +	REACQUIRING,
 686 +	NOT_HELD,
 687 +	HELD
 688 +};
 689 +
 690  /*	RT task parameters for scheduling extensions
 691   *	These parameters are inherited during clone and therefore must
 692   *	be explicitly set up before the task set is launched.
 693 @@ -108,6 +118,38 @@ struct rt_param {
 694  	/* is the task present? (true if it can be scheduled) */
 695  	unsigned int		present:1;
 696  
 697 +#ifdef CONFIG_LITMUS_SOFTIRQD
 698 +    /* proxy threads have minimum priority by default */
 699 +    unsigned int        is_proxy_thread:1;
 700 +    
 701 +	/* pointer to klitirqd currently working on this
 702 +	   task_struct's behalf.  only set by the task pointed
 703 +	   to by klitirqd.
 704 +	 
 705 +	   ptr only valid if is_proxy_thread == 0
 706 +	 */
 707 +	struct task_struct* cur_klitirqd;
 708 +
 709 +	/* Used to implement mutual execution exclusion between
 710 +	 * job and klitirqd execution.  Job must always hold
 711 +	 * it's klitirqd_sem to execute.  klitirqd instance
 712 +	 * must hold the semaphore before executing on behalf
 713 +	 * of a job.
 714 +	 */
 715 +	//struct semaphore			klitirqd_sem;
 716 +	struct mutex				klitirqd_sem;
 717 +
 718 +	/* status of held klitirqd_sem, even if the held klitirqd_sem is from
 719 +	   another task (only proxy threads do this though).
 720 +	 */
 721 +	atomic_t					klitirqd_sem_stat;
 722 +#endif
 723 +
 724 +#ifdef CONFIG_LITMUS_NVIDIA
 725 +	/* number of top-half interrupts handled on behalf of current job */
 726 +	atomic_t					nv_int_count;
 727 +#endif
 728 +
 729  #ifdef CONFIG_LITMUS_LOCKING
 730  	/* Is the task being priority-boosted by a locking protocol? */
 731  	unsigned int		priority_boosted:1;
 732 @@ -128,7 +170,7 @@ struct rt_param {
 733  	 * an increased task priority.
 734  	 */
 735  	 struct task_struct*	inh_task;
 736 -
 737 +    
 738  #ifdef CONFIG_NP_SECTION
 739  	/* For the FMLP under PSN-EDF, it is required to make the task
 740  	 * non-preemptive from kernel space. In order not to interfere with
 741 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
 742 index 6e7cabd..8fdf05d 100644
 743 --- a/include/litmus/sched_plugin.h
 744 +++ b/include/litmus/sched_plugin.h
 745 @@ -11,6 +11,8 @@
 746  #include <litmus/locking.h>
 747  #endif
 748  
 749 +#include <linux/interrupt.h>
 750 +
 751  /************************ setup/tear down ********************/
 752  
 753  typedef long (*activate_plugin_t) (void);
 754 @@ -29,7 +31,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
 755   */
 756  typedef void (*finish_switch_t)(struct task_struct *prev);
 757  
 758 -
 759  /********************* task state changes ********************/
 760  
 761  /* Called to setup a new real-time task.
 762 @@ -58,6 +59,21 @@ typedef void (*task_exit_t)    (struct task_struct *);
 763  typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
 764  				 void* __user config);
 765  
 766 +/* Called to change inheritance levels of given task */
 767 +typedef void (*set_prio_inh_t)(struct task_struct* t,
 768 +                               struct task_struct* prio_inh);
 769 +typedef void (*clear_prio_inh_t)(struct task_struct* t);
 770 +
 771 +
 772 +typedef void (*set_prio_inh_klitirq_t)(struct task_struct* klitirqd,
 773 +                                       struct task_struct* old_owner,
 774 +                                       struct task_struct* new_owner);
 775 +typedef void (*clear_prio_inh_klitirqd_t)(struct task_struct* klitirqd,
 776 +                                          struct task_struct* old_owner);
 777 +
 778 +
 779 +typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
 780 +typedef void (*run_tasklets_t)(struct task_struct* next);
 781  
 782  /********************* sys call backends  ********************/
 783  /* This function causes the caller to sleep until the next release */
 784 @@ -88,7 +104,7 @@ struct sched_plugin {
 785  	/*	task state changes 	*/
 786  	admit_task_t		admit_task;
 787  
 788 -        task_new_t 		task_new;
 789 +    task_new_t			task_new;
 790  	task_wake_up_t		task_wake_up;
 791  	task_block_t		task_block;
 792  	task_exit_t 		task_exit;
 793 @@ -96,6 +112,19 @@ struct sched_plugin {
 794  #ifdef CONFIG_LITMUS_LOCKING
 795  	/*	locking protocols	*/
 796  	allocate_lock_t		allocate_lock;
 797 +    
 798 +    set_prio_inh_t      set_prio_inh;
 799 +    clear_prio_inh_t    clear_prio_inh;
 800 +#endif
 801 +    
 802 +#ifdef CONFIG_LITMUS_SOFTIRQD
 803 +    set_prio_inh_klitirq_t		set_prio_inh_klitirqd;
 804 +    clear_prio_inh_klitirqd_t	clear_prio_inh_klitirqd;
 805 +#endif
 806 +	
 807 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
 808 +	enqueue_pai_tasklet_t		enqueue_pai_tasklet;
 809 +	run_tasklets_t				run_tasklets;
 810  #endif
 811  } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
 812  
 813 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
 814 index 7ca34cb..232c758 100644
 815 --- a/include/litmus/sched_trace.h
 816 +++ b/include/litmus/sched_trace.h
 817 @@ -11,12 +11,12 @@ struct st_trace_header {
 818  	u8	cpu;		/* On which CPU was it recorded? */
 819  	u16	pid;		/* PID of the task.              */
 820  	u32	job;		/* The job sequence number.      */
 821 -};
 822 +} __attribute__((packed));
 823  
 824  #define ST_NAME_LEN 16
 825  struct st_name_data {
 826  	char	cmd[ST_NAME_LEN];/* The name of the executable of this process. */
 827 -};
 828 +} __attribute__((packed));
 829  
 830  struct st_param_data {		/* regular params */
 831  	u32	wcet;
 832 @@ -25,30 +25,29 @@ struct st_param_data {		/* regular params */
 833  	u8	partition;
 834  	u8	class;
 835  	u8	__unused[2];
 836 -};
 837 +} __attribute__((packed));
 838  
 839  struct st_release_data {	/* A job is was/is going to be released. */
 840  	u64	release;	/* What's the release time?              */
 841  	u64	deadline;	/* By when must it finish?		 */
 842 -};
 843 +} __attribute__((packed));
 844  
 845  struct st_assigned_data {	/* A job was asigned to a CPU. 		 */
 846  	u64	when;
 847  	u8	target;		/* Where should it execute?	         */
 848  	u8	__unused[7];
 849 -};
 850 +} __attribute__((packed));
 851  
 852  struct st_switch_to_data {	/* A process was switched to on a given CPU.   */
 853  	u64	when;		/* When did this occur?                        */
 854  	u32	exec_time;	/* Time the current job has executed.          */
 855  	u8	__unused[4];
 856 -
 857 -};
 858 +} __attribute__((packed));
 859  
 860  struct st_switch_away_data {	/* A process was switched away from on a given CPU. */
 861  	u64	when;
 862  	u64	exec_time;
 863 -};
 864 +} __attribute__((packed));
 865  
 866  struct st_completion_data {	/* A job completed. */
 867  	u64	when;
 868 @@ -56,35 +55,92 @@ struct st_completion_data {	/* A job completed. */
 869  				 * next task automatically; set to 0 otherwise.
 870  				 */
 871  	u8	__uflags:7;
 872 -	u8	__unused[7];
 873 -};
 874 +	u16 nv_int_count;
 875 +	u8	__unused[5];
 876 +} __attribute__((packed));
 877  
 878  struct st_block_data {		/* A task blocks. */
 879  	u64	when;
 880  	u64	__unused;
 881 -};
 882 +} __attribute__((packed));
 883  
 884  struct st_resume_data {		/* A task resumes. */
 885  	u64	when;
 886  	u64	__unused;
 887 -};
 888 +} __attribute__((packed));
 889  
 890  struct st_action_data {
 891  	u64	when;
 892  	u8	action;
 893  	u8	__unused[7];
 894 -};
 895 +} __attribute__((packed));
 896  
 897  struct st_sys_release_data {
 898  	u64	when;
 899  	u64	release;
 900 -};
 901 +} __attribute__((packed));
 902 +
 903 +
 904 +struct st_tasklet_release_data {
 905 +	u64 when;
 906 +	u64 __unused;
 907 +} __attribute__((packed));
 908 +
 909 +struct st_tasklet_begin_data {
 910 +	u64 when;
 911 +	u16 exe_pid;
 912 +	u8  __unused[6];
 913 +} __attribute__((packed));
 914 +
 915 +struct st_tasklet_end_data {
 916 +	u64 when;
 917 +	u16 exe_pid;
 918 +	u8	flushed;
 919 +	u8	__unused[5];
 920 +} __attribute__((packed));
 921 +
 922 +
 923 +struct st_work_release_data {
 924 +	u64 when;
 925 +	u64 __unused;
 926 +} __attribute__((packed));
 927 +
 928 +struct st_work_begin_data {
 929 +	u64 when;
 930 +	u16 exe_pid;
 931 +	u8	__unused[6];
 932 +} __attribute__((packed));
 933 +
 934 +struct st_work_end_data {
 935 +	u64 when;
 936 +	u16 exe_pid;
 937 +	u8	flushed;
 938 +	u8	__unused[5];
 939 +} __attribute__((packed));
 940 +
 941 +struct st_effective_priority_change_data {
 942 +	u64 when;
 943 +	u16 inh_pid;
 944 +	u8	__unused[6];
 945 +} __attribute__((packed));
 946 +
 947 +struct st_nv_interrupt_begin_data {
 948 +	u64 when;
 949 +	u32 device;
 950 +	u32 serialNumber;
 951 +} __attribute__((packed));
 952 +
 953 +struct st_nv_interrupt_end_data {
 954 +	u64 when;
 955 +	u32 device;
 956 +	u32 serialNumber;
 957 +} __attribute__((packed));
 958  
 959  #define DATA(x) struct st_ ## x ## _data x;
 960  
 961  typedef enum {
 962 -        ST_NAME = 1,		/* Start at one, so that we can spot
 963 -				 * uninitialized records. */
 964 +    ST_NAME = 1, /* Start at one, so that we can spot
 965 +				  * uninitialized records. */
 966  	ST_PARAM,
 967  	ST_RELEASE,
 968  	ST_ASSIGNED,
 969 @@ -94,7 +150,16 @@ typedef enum {
 970  	ST_BLOCK,
 971  	ST_RESUME,
 972  	ST_ACTION,
 973 -	ST_SYS_RELEASE
 974 +	ST_SYS_RELEASE,
 975 +	ST_TASKLET_RELEASE,
 976 +	ST_TASKLET_BEGIN,
 977 +	ST_TASKLET_END,
 978 +	ST_WORK_RELEASE,
 979 +	ST_WORK_BEGIN,
 980 +	ST_WORK_END,
 981 +	ST_EFF_PRIO_CHANGE,
 982 +	ST_NV_INTERRUPT_BEGIN,
 983 +	ST_NV_INTERRUPT_END,
 984  } st_event_record_type_t;
 985  
 986  struct st_event_record {
 987 @@ -113,8 +178,17 @@ struct st_event_record {
 988  		DATA(resume);
 989  		DATA(action);
 990  		DATA(sys_release);
 991 +		DATA(tasklet_release);
 992 +		DATA(tasklet_begin);
 993 +		DATA(tasklet_end);
 994 +		DATA(work_release);
 995 +		DATA(work_begin);
 996 +		DATA(work_end);
 997 +		DATA(effective_priority_change);
 998 +		DATA(nv_interrupt_begin);
 999 +		DATA(nv_interrupt_end);
1000  	} data;
1001 -};
1002 +} __attribute__((packed));
1003  
1004  #undef DATA
1005  
1006 @@ -129,6 +203,8 @@ struct st_event_record {
1007  	ft_event1(id, callback, task)
1008  #define SCHED_TRACE2(id, callback, task, xtra) \
1009  	ft_event2(id, callback, task, xtra)
1010 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
1011 +	ft_event3(id, callback, task, xtra1, xtra2)
1012  
1013  /* provide prototypes; needed on sparc64 */
1014  #ifndef NO_TASK_TRACE_DECLS
1015 @@ -155,12 +231,45 @@ feather_callback void do_sched_trace_action(unsigned long id,
1016  feather_callback void do_sched_trace_sys_release(unsigned long id,
1017  						 lt_t* start);
1018  
1019 +
1020 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
1021 +												   struct task_struct* owner);
1022 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
1023 +												  struct task_struct* owner);
1024 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
1025 +												 struct task_struct* owner,
1026 +												 unsigned long flushed);
1027 +
1028 +feather_callback void do_sched_trace_work_release(unsigned long id,
1029 +													 struct task_struct* owner);
1030 +feather_callback void do_sched_trace_work_begin(unsigned long id,
1031 +												struct task_struct* owner,
1032 +												struct task_struct* exe);
1033 +feather_callback void do_sched_trace_work_end(unsigned long id,
1034 +											  struct task_struct* owner,
1035 +											  struct task_struct* exe,
1036 +											  unsigned long flushed);
1037 +
1038 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
1039 +											  struct task_struct* task,
1040 +											  struct task_struct* inh);
1041 +
1042 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
1043 +												u32 device);
1044 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
1045 +												unsigned long unused);
1046 +
1047 +
1048 +/* returns true if we're tracing an interrupt on current CPU */
1049 +/* int is_interrupt_tracing_active(void); */
1050 +
1051  #endif
1052  
1053  #else
1054  
1055  #define SCHED_TRACE(id, callback, task)        /* no tracing */
1056  #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
1057 +#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
1058  
1059  #endif
1060  
1061 @@ -193,6 +302,35 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
1062  	SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
1063  
1064  
1065 +#define sched_trace_tasklet_release(t) \
1066 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
1067 +
1068 +#define sched_trace_tasklet_begin(t) \
1069 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
1070 +
1071 +#define sched_trace_tasklet_end(t, flushed) \
1072 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
1073 +
1074 +
1075 +#define sched_trace_work_release(t) \
1076 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
1077 +
1078 +#define sched_trace_work_begin(t, e) \
1079 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
1080 +
1081 +#define sched_trace_work_end(t, e, flushed) \
1082 +	SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
1083 +
1084 +
1085 +#define sched_trace_eff_prio_change(t, inh) \
1086 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
1087 +
1088 +
1089 +#define sched_trace_nv_interrupt_begin(d) \
1090 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
1091 +#define sched_trace_nv_interrupt_end(d) \
1092 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
1093 +
1094  #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
1095  
1096  #endif /* __KERNEL__ */
1097 diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
1098 new file mode 100644
1099 index 0000000..e70e45e
1100 --- /dev/null
1101 +++ b/include/litmus/sched_trace_external.h
1102 @@ -0,0 +1,78 @@
1103 +/*
1104 + * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
1105 + */
1106 +#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
1107 +#define _LINUX_SCHED_TRACE_EXTERNAL_H_
1108 +
1109 +
1110 +#ifdef CONFIG_SCHED_TASK_TRACE
1111 +extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
1112 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
1113 +{
1114 +	__sched_trace_tasklet_begin_external(t);
1115 +}
1116 +
1117 +extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
1118 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
1119 +{
1120 +	__sched_trace_tasklet_end_external(t, flushed);
1121 +}
1122 +
1123 +extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
1124 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
1125 +{
1126 +	__sched_trace_work_begin_external(t, e);
1127 +}
1128 +
1129 +extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
1130 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
1131 +{
1132 +	__sched_trace_work_end_external(t, e, f);
1133 +}
1134 +
1135 +#ifdef CONFIG_LITMUS_NVIDIA
1136 +extern void __sched_trace_nv_interrupt_begin_external(u32 device);
1137 +static inline void sched_trace_nv_interrupt_begin_external(u32 device)
1138 +{
1139 +	__sched_trace_nv_interrupt_begin_external(device);
1140 +}
1141 +
1142 +extern void __sched_trace_nv_interrupt_end_external(u32 device);
1143 +static inline void sched_trace_nv_interrupt_end_external(u32 device)
1144 +{
1145 +	__sched_trace_nv_interrupt_end_external(device);
1146 +}
1147 +#endif
1148 +
1149 +#else
1150 +
1151 +// no tracing.
1152 +static inline void sched_trace_tasklet_begin_external(struct task_struct* t){}
1153 +static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){}
1154 +static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){}
1155 +static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){}
1156 +
1157 +#ifdef CONFIG_LITMUS_NVIDIA
1158 +static inline void sched_trace_nv_interrupt_begin_external(u32 device){}
1159 +static inline void sched_trace_nv_interrupt_end_external(u32 device){}
1160 +#endif
1161 +
1162 +#endif
1163 +
1164 +
1165 +#ifdef CONFIG_LITMUS_NVIDIA
1166 +
1167 +#define EX_TS(evt) \
1168 +extern void __##evt(void); \
1169 +static inline void EX_##evt(void) { __##evt(); }
1170 +
1171 +EX_TS(TS_NV_TOPISR_START)
1172 +EX_TS(TS_NV_TOPISR_END)
1173 +EX_TS(TS_NV_BOTISR_START)
1174 +EX_TS(TS_NV_BOTISR_END)
1175 +EX_TS(TS_NV_RELEASE_BOTISR_START)
1176 +EX_TS(TS_NV_RELEASE_BOTISR_END)
1177 +
1178 +#endif
1179 +
1180 +#endif
1181 diff --git a/include/litmus/trace.h b/include/litmus/trace.h
1182 index 05f4872..09d409b 100644
1183 --- a/include/litmus/trace.h
1184 +++ b/include/litmus/trace.h
1185 @@ -100,4 +100,24 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu)
1186  #define TS_SEND_RESCHED_END		DTIMESTAMP(191, TSK_UNKNOWN)
1187  
1188  
1189 +
1190 +#ifdef CONFIG_LITMUS_NVIDIA
1191 +
1192 +#define TS_NV_TOPISR_START		TIMESTAMP(200)
1193 +#define TS_NV_TOPISR_END		TIMESTAMP(201)
1194 +
1195 +#define TS_NV_BOTISR_START		TIMESTAMP(202)
1196 +#define TS_NV_BOTISR_END		TIMESTAMP(203)
1197 +
1198 +#define TS_NV_RELEASE_BOTISR_START	TIMESTAMP(204)
1199 +#define TS_NV_RELEASE_BOTISR_END	TIMESTAMP(205)
1200 +
1201 +#endif
1202 +
1203 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1204 +#define TS_NV_SCHED_BOTISR_START	TIMESTAMP(206)
1205 +#define TS_NV_SCHED_BOTISR_END		TIMESTAMP(207)
1206 +#endif
1207 +
1208 +
1209  #endif /* !_SYS_TRACE_H_ */
1210 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
1211 index 94264c2..c6efc4c 100644
1212 --- a/include/litmus/unistd_32.h
1213 +++ b/include/litmus/unistd_32.h
1214 @@ -17,5 +17,6 @@
1215  #define __NR_wait_for_ts_release __LSC(9)
1216  #define __NR_release_ts		__LSC(10)
1217  #define __NR_null_call		__LSC(11)
1218 +#define __NR_register_nv_device			__LSC(12)
1219  
1220 -#define NR_litmus_syscalls 12
1221 +#define NR_litmus_syscalls 13
1222 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
1223 index d5ced0d..b44a7c3 100644
1224 --- a/include/litmus/unistd_64.h
1225 +++ b/include/litmus/unistd_64.h
1226 @@ -29,5 +29,8 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
1227  __SYSCALL(__NR_release_ts, sys_release_ts)
1228  #define __NR_null_call				__LSC(11)
1229  __SYSCALL(__NR_null_call, sys_null_call)
1230 +#define __NR_register_nv_device			__LSC(12)
1231 +__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
1232  
1233 -#define NR_litmus_syscalls 12
1234 +
1235 +#define NR_litmus_syscalls 13
1236 diff --git a/kernel/lockdep.c b/kernel/lockdep.c
1237 index f2852a5..ebff2cf 100644
1238 --- a/kernel/lockdep.c
1239 +++ b/kernel/lockdep.c
1240 @@ -530,7 +530,7 @@ static void print_lock(struct held_lock *hlock)
1241  	print_ip_sym(hlock->acquire_ip);
1242  }
1243  
1244 -static void lockdep_print_held_locks(struct task_struct *curr)
1245 +void lockdep_print_held_locks(struct task_struct *curr)
1246  {
1247  	int i, depth = curr->lockdep_depth;
1248  
1249 @@ -546,6 +546,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
1250  		print_lock(curr->held_locks + i);
1251  	}
1252  }
1253 +EXPORT_SYMBOL(lockdep_print_held_locks);
1254  
1255  static void print_kernel_version(void)
1256  {
1257 diff --git a/kernel/mutex.c b/kernel/mutex.c
1258 index 200407c..435685e 100644
1259 --- a/kernel/mutex.c
1260 +++ b/kernel/mutex.c
1261 @@ -496,3 +496,144 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
1262  	return 1;
1263  }
1264  EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
1265 +
1266 +
1267 +
1268 +
1269 +
1270 +
1271 +
1272 +
1273 +//__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
1274 +
1275 +void mutex_lock_sfx(struct mutex *lock,
1276 +				   side_effect_t pre, unsigned long pre_arg,
1277 +				   side_effect_t post, unsigned long post_arg)
1278 +{
1279 +	long state = TASK_UNINTERRUPTIBLE;
1280 +	unsigned int subclass = 0;
1281 +	unsigned long ip = _RET_IP_;
1282 +	
1283 +
1284 +	struct task_struct *task = current;
1285 +	struct mutex_waiter waiter;
1286 +	unsigned long flags;
1287 +	
1288 +	preempt_disable();
1289 +	mutex_acquire(&lock->dep_map, subclass, 0, ip);
1290 +
1291 +	spin_lock_mutex(&lock->wait_lock, flags);
1292 +	
1293 +	if(pre)
1294 +	{
1295 +		if(unlikely(pre(pre_arg)))
1296 +		{
1297 +			// this will fuck with lockdep's CONFIG_PROVE_LOCKING...
1298 +			spin_unlock_mutex(&lock->wait_lock, flags);
1299 +			preempt_enable();
1300 +			return;
1301 +		}
1302 +	}
1303 +
1304 +	debug_mutex_lock_common(lock, &waiter);
1305 +	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
1306 +	
1307 +	/* add waiting tasks to the end of the waitqueue (FIFO): */
1308 +	list_add_tail(&waiter.list, &lock->wait_list);
1309 +	waiter.task = task;
1310 +	
1311 +	if (atomic_xchg(&lock->count, -1) == 1)
1312 +		goto done;
1313 +	
1314 +	lock_contended(&lock->dep_map, ip);
1315 +	
1316 +	for (;;) {
1317 +		/*
1318 +		 * Lets try to take the lock again - this is needed even if
1319 +		 * we get here for the first time (shortly after failing to
1320 +		 * acquire the lock), to make sure that we get a wakeup once
1321 +		 * it's unlocked. Later on, if we sleep, this is the
1322 +		 * operation that gives us the lock. We xchg it to -1, so
1323 +		 * that when we release the lock, we properly wake up the
1324 +		 * other waiters:
1325 +		 */
1326 +		if (atomic_xchg(&lock->count, -1) == 1)
1327 +			break;
1328 +		
1329 +		__set_task_state(task, state);
1330 +		
1331 +		/* didnt get the lock, go to sleep: */
1332 +		spin_unlock_mutex(&lock->wait_lock, flags);
1333 +		preempt_enable_no_resched();
1334 +		schedule();
1335 +		preempt_disable();
1336 +		spin_lock_mutex(&lock->wait_lock, flags);
1337 +	}
1338 +	
1339 +done:
1340 +	lock_acquired(&lock->dep_map, ip);
1341 +	/* got the lock - rejoice! */
1342 +	mutex_remove_waiter(lock, &waiter, current_thread_info());
1343 +	mutex_set_owner(lock);
1344 +	
1345 +	/* set it to 0 if there are no waiters left: */
1346 +	if (likely(list_empty(&lock->wait_list)))
1347 +		atomic_set(&lock->count, 0);
1348 +	
1349 +	if(post)
1350 +		post(post_arg);	
1351 +	
1352 +	spin_unlock_mutex(&lock->wait_lock, flags);
1353 +	
1354 +	debug_mutex_free_waiter(&waiter);
1355 +	preempt_enable();
1356 +	
1357 +	//return 0;	
1358 +}
1359 +EXPORT_SYMBOL(mutex_lock_sfx);
1360 +
1361 +
1362 +
1363 +//__mutex_unlock_common_slowpath(lock_count, 1);
1364 +
1365 +void mutex_unlock_sfx(struct mutex *lock,
1366 +					side_effect_t pre, unsigned long pre_arg,
1367 +					side_effect_t post, unsigned long post_arg)
1368 +{
1369 +	//struct mutex *lock = container_of(lock_count, struct mutex, count);
1370 +	unsigned long flags;
1371 +	
1372 +	spin_lock_mutex(&lock->wait_lock, flags);
1373 +	
1374 +	if(pre)
1375 +		pre(pre_arg);
1376 +	
1377 +	//mutex_release(&lock->dep_map, nested, _RET_IP_);
1378 +	mutex_release(&lock->dep_map, 1, _RET_IP_);
1379 +	debug_mutex_unlock(lock);
1380 +	
1381 +	/*
1382 +	 * some architectures leave the lock unlocked in the fastpath failure
1383 +	 * case, others need to leave it locked. In the later case we have to
1384 +	 * unlock it here
1385 +	 */
1386 +	if (__mutex_slowpath_needs_to_unlock())
1387 +		atomic_set(&lock->count, 1);
1388 +	
1389 +	if (!list_empty(&lock->wait_list)) {
1390 +		/* get the first entry from the wait-list: */
1391 +		struct mutex_waiter *waiter =
1392 +		list_entry(lock->wait_list.next,
1393 +				   struct mutex_waiter, list);
1394 +		
1395 +		debug_mutex_wake_waiter(lock, waiter);
1396 +		
1397 +		wake_up_process(waiter->task);
1398 +	}
1399 +	
1400 +	if(post)
1401 +		post(post_arg);
1402 +	
1403 +	spin_unlock_mutex(&lock->wait_lock, flags);	
1404 +}
1405 +EXPORT_SYMBOL(mutex_unlock_sfx);
1406 diff --git a/kernel/sched.c b/kernel/sched.c
1407 index c5d7750..08b725c 100644
1408 --- a/kernel/sched.c
1409 +++ b/kernel/sched.c
1410 @@ -82,6 +82,10 @@
1411  #include <litmus/sched_trace.h>
1412  #include <litmus/trace.h>
1413  
1414 +#ifdef CONFIG_LITMUS_SOFTIRQD
1415 +#include <litmus/litmus_softirq.h>
1416 +#endif
1417 +
1418  static void litmus_tick(struct rq*, struct task_struct*);
1419  
1420  #define CREATE_TRACE_POINTS
1421 @@ -2879,6 +2883,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
1422  	struct mm_struct *mm, *oldmm;
1423  
1424  	prepare_task_switch(rq, prev, next);
1425 +
1426  	trace_sched_switch(prev, next);
1427  	mm = next->mm;
1428  	oldmm = prev->active_mm;
1429 @@ -3789,6 +3794,7 @@ pick_next_task(struct rq *rq)
1430  	}
1431  }
1432  
1433 +
1434  /*
1435   * schedule() is the main scheduler function.
1436   */
1437 @@ -3807,6 +3813,10 @@ need_resched:
1438  	rcu_note_context_switch(cpu);
1439  	prev = rq->curr;
1440  
1441 +#ifdef CONFIG_LITMUS_SOFTIRQD
1442 +	release_klitirqd_lock(prev);
1443 +#endif
1444 +	
1445  	release_kernel_lock(prev);
1446  need_resched_nonpreemptible:
1447  	TS_SCHED_START;
1448 @@ -3882,15 +3892,26 @@ need_resched_nonpreemptible:
1449  
1450  	if (sched_state_validate_switch() || unlikely(reacquire_kernel_lock(prev)))
1451  		goto need_resched_nonpreemptible;
1452 -
1453 +	
1454  	preempt_enable_no_resched();
1455 +
1456  	if (need_resched())
1457  		goto need_resched;
1458  
1459 +#ifdef LITMUS_SOFTIRQD
1460 +	reacquire_klitirqd_lock(prev);
1461 +#endif
1462 +
1463 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1464 +	litmus->run_tasklets(prev);
1465 +#endif	
1466 +	
1467  	srp_ceiling_block();
1468  }
1469  EXPORT_SYMBOL(schedule);
1470  
1471 +
1472 +
1473  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
1474  /*
1475   * Look out! "owner" is an entirely speculative pointer
1476 @@ -4051,6 +4072,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
1477  	}
1478  }
1479  
1480 +
1481  /**
1482   * __wake_up - wake up threads blocked on a waitqueue.
1483   * @q: the waitqueue
1484 @@ -4236,6 +4258,12 @@ void __sched wait_for_completion(struct completion *x)
1485  }
1486  EXPORT_SYMBOL(wait_for_completion);
1487  
1488 +void __sched __wait_for_completion_locked(struct completion *x)
1489 +{
1490 +	do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
1491 +}
1492 +EXPORT_SYMBOL(__wait_for_completion_locked);
1493 +
1494  /**
1495   * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
1496   * @x:  holds the state of this particular completion
1497 diff --git a/kernel/semaphore.c b/kernel/semaphore.c
1498 index 94a62c0..c947a04 100644
1499 --- a/kernel/semaphore.c
1500 +++ b/kernel/semaphore.c
1501 @@ -33,11 +33,11 @@
1502  #include <linux/spinlock.h>
1503  #include <linux/ftrace.h>
1504  
1505 -static noinline void __down(struct semaphore *sem);
1506 +noinline void __down(struct semaphore *sem);
1507  static noinline int __down_interruptible(struct semaphore *sem);
1508  static noinline int __down_killable(struct semaphore *sem);
1509  static noinline int __down_timeout(struct semaphore *sem, long jiffies);
1510 -static noinline void __up(struct semaphore *sem);
1511 +noinline void __up(struct semaphore *sem);
1512  
1513  /**
1514   * down - acquire the semaphore
1515 @@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
1516  
1517  /* Functions for the contended case */
1518  
1519 +/*
1520  struct semaphore_waiter {
1521  	struct list_head list;
1522  	struct task_struct *task;
1523  	int up;
1524  };
1525 + */
1526  
1527  /*
1528   * Because this function is inlined, the 'state' parameter will be
1529 @@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
1530  	return -EINTR;
1531  }
1532  
1533 -static noinline void __sched __down(struct semaphore *sem)
1534 +noinline void __sched __down(struct semaphore *sem)
1535  {
1536  	__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
1537  }
1538 +EXPORT_SYMBOL(__down);
1539 +
1540  
1541  static noinline int __sched __down_interruptible(struct semaphore *sem)
1542  {
1543 @@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
1544  	return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
1545  }
1546  
1547 -static noinline void __sched __up(struct semaphore *sem)
1548 +noinline void __sched __up(struct semaphore *sem)
1549  {
1550  	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
1551  						struct semaphore_waiter, list);
1552 @@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
1553  	waiter->up = 1;
1554  	wake_up_process(waiter->task);
1555  }
1556 +EXPORT_SYMBOL(__up);
1557 \ No newline at end of file
1558 diff --git a/kernel/softirq.c b/kernel/softirq.c
1559 index 07b4f1b..7a6f500 100644
1560 --- a/kernel/softirq.c
1561 +++ b/kernel/softirq.c
1562 @@ -29,6 +29,15 @@
1563  #include <trace/events/irq.h>
1564  
1565  #include <asm/irq.h>
1566 +
1567 +#include <litmus/litmus.h>
1568 +#include <litmus/sched_trace.h>
1569 +
1570 +#ifdef CONFIG_LITMUS_NVIDIA
1571 +#include <litmus/nvidia_info.h>
1572 +#include <litmus/trace.h>
1573 +#endif
1574 +
1575  /*
1576     - No shared variables, all the data are CPU local.
1577     - If a softirq needs serialization, let it serialize itself
1578 @@ -54,7 +63,7 @@ EXPORT_SYMBOL(irq_stat);
1579  
1580  static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
1581  
1582 -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
1583 +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd) = NULL;
1584  
1585  char *softirq_to_name[NR_SOFTIRQS] = {
1586  	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
1587 @@ -177,6 +186,7 @@ void local_bh_enable_ip(unsigned long ip)
1588  }
1589  EXPORT_SYMBOL(local_bh_enable_ip);
1590  
1591 +
1592  /*
1593   * We restart softirq processing MAX_SOFTIRQ_RESTART times,
1594   * and we fall back to softirqd after that.
1595 @@ -187,34 +197,30 @@ EXPORT_SYMBOL(local_bh_enable_ip);
1596   * should not be able to lock up the box.
1597   */
1598  #define MAX_SOFTIRQ_RESTART 10
1599 -
1600 -asmlinkage void __do_softirq(void)
1601 +static void ____do_softirq(void)
1602  {
1603 -	struct softirq_action *h;
1604  	__u32 pending;
1605 -	int max_restart = MAX_SOFTIRQ_RESTART;
1606 +	
1607 +	struct softirq_action *h;
1608  	int cpu;
1609 -
1610 +	
1611  	pending = local_softirq_pending();
1612 +	
1613  	account_system_vtime(current);
1614 -
1615 -	__local_bh_disable((unsigned long)__builtin_return_address(0));
1616 -	lockdep_softirq_enter();
1617 -
1618 +	
1619  	cpu = smp_processor_id();
1620 -restart:
1621 -	/* Reset the pending bitmask before enabling irqs */
1622 -	set_softirq_pending(0);
1623  
1624 +	set_softirq_pending(0);
1625 +	
1626  	local_irq_enable();
1627 -
1628 +	
1629  	h = softirq_vec;
1630 -
1631 +	
1632  	do {
1633  		if (pending & 1) {
1634  			int prev_count = preempt_count();
1635  			kstat_incr_softirqs_this_cpu(h - softirq_vec);
1636 -
1637 +			
1638  			trace_softirq_entry(h, softirq_vec);
1639  			h->action(h);
1640  			trace_softirq_exit(h, softirq_vec);
1641 @@ -226,26 +232,70 @@ restart:
1642  				       h->action, prev_count, preempt_count());
1643  				preempt_count() = prev_count;
1644  			}
1645 -
1646 +			
1647  			rcu_bh_qs(cpu);
1648  		}
1649  		h++;
1650  		pending >>= 1;
1651  	} while (pending);
1652 -
1653 +	
1654  	local_irq_disable();
1655 +}
1656 +
1657 +static void ___do_softirq(void)
1658 +{
1659 +	__u32 pending;
1660 +
1661 +	//struct softirq_action *h;
1662 +	int max_restart = MAX_SOFTIRQ_RESTART;
1663 +	//int cpu;
1664 +
1665 +	pending = local_softirq_pending();
1666 +
1667 +restart:
1668 +	____do_softirq();
1669  
1670  	pending = local_softirq_pending();
1671  	if (pending && --max_restart)
1672  		goto restart;
1673  
1674  	if (pending)
1675 +	{
1676  		wakeup_softirqd();
1677 +	}
1678 +}
1679  
1680 +asmlinkage void __do_softirq(void)
1681 +{
1682 +#ifdef LITMUS_THREAD_ALL_SOFTIRQ
1683 +	/* Skip straight to wakeup_softirqd() if we're using 
1684 +	 LITMUS_THREAD_ALL_SOFTIRQ (unless there's really high prio-stuff waiting.). */
1685 +	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
1686 +	
1687 +	if(tsk)
1688 +	{
1689 +		__u32 pending = local_softirq_pending();
1690 +		const __u32 high_prio_softirq = (1<<HI_SOFTIRQ) | (1<<TIMER_SOFTIRQ) | (1<<HRTIMER_SOFTIRQ);
1691 +		if(pending && !(pending & high_prio_softirq))
1692 +		{
1693 +			wakeup_softirqd();
1694 +			return;
1695 +		}
1696 +	}
1697 +#endif
1698 +	
1699 +	/*
1700 +	 * 'immediate' softirq execution:
1701 +	 */
1702 +	__local_bh_disable((unsigned long)__builtin_return_address(0));
1703 +	lockdep_softirq_enter();
1704 +	
1705 +	___do_softirq();
1706 +	
1707  	lockdep_softirq_exit();
1708 -
1709 +	
1710  	account_system_vtime(current);
1711 -	_local_bh_enable();
1712 +	_local_bh_enable();	
1713  }
1714  
1715  #ifndef __ARCH_HAS_DO_SOFTIRQ
1716 @@ -357,8 +407,65 @@ struct tasklet_head
1717  static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
1718  static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
1719  
1720 +
1721  void __tasklet_schedule(struct tasklet_struct *t)
1722  {
1723 +#ifdef CONFIG_LITMUS_NVIDIA
1724 +	if(is_nvidia_func(t->func))
1725 +	{
1726 +		u32 nvidia_device = get_tasklet_nv_device_num(t);	
1727 +		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
1728 +		//			  __FUNCTION__, nvidia_device,litmus_clock());
1729 +
1730 +		unsigned long flags;
1731 +		struct task_struct* device_owner;
1732 +
1733 +		lock_nv_registry(nvidia_device, &flags);
1734 +
1735 +		device_owner = get_nv_device_owner(nvidia_device);
1736 +
1737 +		if(device_owner==NULL)
1738 +		{
1739 +			t->owner = NULL;
1740 +		}
1741 +		else
1742 +		{
1743 +			if(is_realtime(device_owner))
1744 +			{
1745 +				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
1746 +					  __FUNCTION__, nvidia_device,litmus_clock());				
1747 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
1748 +					  __FUNCTION__,device_owner->pid,nvidia_device);
1749 +
1750 +				t->owner = device_owner;
1751 +				sched_trace_tasklet_release(t->owner);
1752 +
1753 +				if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
1754 +				{
1755 +					unlock_nv_registry(nvidia_device, &flags);
1756 +					return;
1757 +				}
1758 +				else
1759 +				{
1760 +					t->owner = NULL; /* fall through to normal scheduling */
1761 +				}
1762 +			}
1763 +			else
1764 +			{
1765 +				t->owner = NULL;
1766 +			}
1767 +		}
1768 +		unlock_nv_registry(nvidia_device, &flags);
1769 +	}
1770 +#endif
1771 +
1772 +	___tasklet_schedule(t);
1773 +}
1774 +EXPORT_SYMBOL(__tasklet_schedule);
1775 +
1776 +
1777 +void ___tasklet_schedule(struct tasklet_struct *t)
1778 +{
1779  	unsigned long flags;
1780  
1781  	local_irq_save(flags);
1782 @@ -368,11 +475,65 @@ void __tasklet_schedule(struct tasklet_struct *t)
1783  	raise_softirq_irqoff(TASKLET_SOFTIRQ);
1784  	local_irq_restore(flags);
1785  }
1786 +EXPORT_SYMBOL(___tasklet_schedule);
1787  
1788 -EXPORT_SYMBOL(__tasklet_schedule);
1789  
1790  void __tasklet_hi_schedule(struct tasklet_struct *t)
1791  {
1792 +#ifdef CONFIG_LITMUS_NVIDIA
1793 +	if(is_nvidia_func(t->func))
1794 +	{	
1795 +		u32 nvidia_device = get_tasklet_nv_device_num(t);
1796 +		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
1797 +		//			  __FUNCTION__, nvidia_device,litmus_clock());
1798 +
1799 +		unsigned long flags;
1800 +		struct task_struct* device_owner;
1801 +		
1802 +		lock_nv_registry(nvidia_device, &flags);
1803 +		
1804 +		device_owner = get_nv_device_owner(nvidia_device);
1805 +
1806 +		if(device_owner==NULL) 
1807 +		{
1808 +			t->owner = NULL;
1809 +		}
1810 +		else
1811 +		{
1812 +			if( is_realtime(device_owner))
1813 +			{
1814 +				TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
1815 +					  __FUNCTION__, nvidia_device,litmus_clock());				
1816 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
1817 +					  __FUNCTION__,device_owner->pid,nvidia_device);
1818 +				
1819 +				t->owner = device_owner;
1820 +				sched_trace_tasklet_release(t->owner);
1821 +				if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
1822 +				{
1823 +					unlock_nv_registry(nvidia_device, &flags);
1824 +					return;
1825 +				}
1826 +				else
1827 +				{
1828 +					t->owner = NULL; /* fall through to normal scheduling */
1829 +				}
1830 +			}
1831 +			else
1832 +			{
1833 +				t->owner = NULL;
1834 +			}
1835 +		}
1836 +		unlock_nv_registry(nvidia_device, &flags);
1837 +	}
1838 +#endif
1839 +
1840 +	___tasklet_hi_schedule(t);
1841 +}
1842 +EXPORT_SYMBOL(__tasklet_hi_schedule);
1843 +
1844 +void ___tasklet_hi_schedule(struct tasklet_struct* t)
1845 +{
1846  	unsigned long flags;
1847  
1848  	local_irq_save(flags);
1849 @@ -382,19 +543,72 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
1850  	raise_softirq_irqoff(HI_SOFTIRQ);
1851  	local_irq_restore(flags);
1852  }
1853 -
1854 -EXPORT_SYMBOL(__tasklet_hi_schedule);
1855 +EXPORT_SYMBOL(___tasklet_hi_schedule);
1856  
1857  void __tasklet_hi_schedule_first(struct tasklet_struct *t)
1858  {
1859  	BUG_ON(!irqs_disabled());
1860 +#ifdef CONFIG_LITMUS_NVIDIA	
1861 +	if(is_nvidia_func(t->func))
1862 +	{	
1863 +		u32 nvidia_device = get_tasklet_nv_device_num(t);
1864 +		//		TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
1865 +		//			  __FUNCTION__, nvidia_device,litmus_clock());
1866 +		unsigned long flags;
1867 +		struct task_struct* device_owner;
1868 +		
1869 +		lock_nv_registry(nvidia_device, &flags);
1870 +
1871 +		device_owner = get_nv_device_owner(nvidia_device);
1872 +
1873 +		if(device_owner==NULL)
1874 +		{
1875 +			t->owner = NULL;
1876 +		}
1877 +		else
1878 +		{
1879 +			if(is_realtime(device_owner))
1880 +			{
1881 +				TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
1882 +					  __FUNCTION__, nvidia_device,litmus_clock());
1883 +				
1884 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
1885 +					  __FUNCTION__,device_owner->pid,nvidia_device);
1886 +				
1887 +				t->owner = device_owner;
1888 +				sched_trace_tasklet_release(t->owner);
1889 +				if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
1890 +				{
1891 +					unlock_nv_registry(nvidia_device, &flags);
1892 +					return;
1893 +				}
1894 +				else
1895 +				{
1896 +					t->owner = NULL; /* fall through to normal scheduling */
1897 +				}
1898 +			}
1899 +			else
1900 +			{
1901 +				t->owner = NULL;
1902 +			}
1903 +		}
1904 +		unlock_nv_registry(nvidia_device, &flags);
1905 +	}
1906 +#endif
1907 +
1908 +	___tasklet_hi_schedule_first(t);
1909 +}
1910 +EXPORT_SYMBOL(__tasklet_hi_schedule_first);
1911 +
1912 +void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
1913 +{
1914 +	BUG_ON(!irqs_disabled());
1915  
1916  	t->next = __get_cpu_var(tasklet_hi_vec).head;
1917  	__get_cpu_var(tasklet_hi_vec).head = t;
1918  	__raise_softirq_irqoff(HI_SOFTIRQ);
1919  }
1920 -
1921 -EXPORT_SYMBOL(__tasklet_hi_schedule_first);
1922 +EXPORT_SYMBOL(___tasklet_hi_schedule_first);
1923  
1924  static void tasklet_action(struct softirq_action *a)
1925  {
1926 @@ -450,6 +664,7 @@ static void tasklet_hi_action(struct softirq_action *a)
1927  			if (!atomic_read(&t->count)) {
1928  				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
1929  					BUG();
1930 +
1931  				t->func(t->data);
1932  				tasklet_unlock(t);
1933  				continue;
1934 @@ -473,8 +688,13 @@ void tasklet_init(struct tasklet_struct *t,
1935  	t->next = NULL;
1936  	t->state = 0;
1937  	atomic_set(&t->count, 0);
1938 +
1939  	t->func = func;
1940  	t->data = data;
1941 +
1942 +#ifdef CONFIG_LITMUS_SOFTIRQD
1943 +	t->owner = NULL;
1944 +#endif
1945  }
1946  
1947  EXPORT_SYMBOL(tasklet_init);
1948 @@ -489,6 +709,7 @@ void tasklet_kill(struct tasklet_struct *t)
1949  			yield();
1950  		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
1951  	}
1952 +
1953  	tasklet_unlock_wait(t);
1954  	clear_bit(TASKLET_STATE_SCHED, &t->state);
1955  }
1956 @@ -694,6 +915,8 @@ void __init softirq_init(void)
1957  
1958  static int run_ksoftirqd(void * __bind_cpu)
1959  {
1960 +	unsigned long flags;
1961 +	
1962  	set_current_state(TASK_INTERRUPTIBLE);
1963  
1964  	while (!kthread_should_stop()) {
1965 @@ -712,7 +935,11 @@ static int run_ksoftirqd(void * __bind_cpu)
1966  			   don't process */
1967  			if (cpu_is_offline((long)__bind_cpu))
1968  				goto wait_to_die;
1969 -			do_softirq();
1970 +			
1971 +			local_irq_save(flags);
1972 +			____do_softirq();
1973 +			local_irq_restore(flags);
1974 +			
1975  			preempt_enable_no_resched();
1976  			cond_resched();
1977  			preempt_disable();
1978 @@ -760,6 +987,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
1979  	for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
1980  		if (*i == t) {
1981  			*i = t->next;
1982 +
1983  			/* If this was the tail element, move the tail ptr */
1984  			if (*i == NULL)
1985  				per_cpu(tasklet_vec, cpu).tail = i;
1986 diff --git a/kernel/workqueue.c b/kernel/workqueue.c
1987 index f77afd9..2293aad 100644
1988 --- a/kernel/workqueue.c
1989 +++ b/kernel/workqueue.c
1990 @@ -47,6 +47,13 @@
1991  
1992  #include "workqueue_sched.h"
1993  
1994 +#ifdef CONFIG_LITMUS_NVIDIA
1995 +#include <litmus/litmus.h>
1996 +#include <litmus/sched_trace.h>
1997 +#include <litmus/nvidia_info.h>
1998 +#endif
1999 +
2000 +
2001  enum {
2002  	/* global_cwq flags */
2003  	GCWQ_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
2004 @@ -1010,9 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
2005  		work_flags |= WORK_STRUCT_DELAYED;
2006  		worklist = &cwq->delayed_works;
2007  	}
2008 -
2009  	insert_work(cwq, work, worklist, work_flags);
2010 -
2011  	spin_unlock_irqrestore(&gcwq->lock, flags);
2012  }
2013  
2014 @@ -2526,10 +2531,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
2015   */
2016  int schedule_work(struct work_struct *work)
2017  {
2018 -	return queue_work(system_wq, work);
2019 +#if 0
2020 +#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2021 +	if(is_nvidia_func(work->func))
2022 +	{
2023 +		u32 nvidiaDevice = get_work_nv_device_num(work);
2024 +		
2025 +		//1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
2026 +		unsigned long flags;
2027 +		struct task_struct* device_owner;
2028 +		
2029 +		lock_nv_registry(nvidiaDevice, &flags);
2030 +		
2031 +		device_owner = get_nv_device_owner(nvidiaDevice);
2032 +		
2033 +		//2) If there is an owner, set work->owner to the owner's task struct.
2034 +		if(device_owner==NULL) 
2035 +		{
2036 +			work->owner = NULL;
2037 +			//TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
2038 +		}
2039 +		else
2040 +		{
2041 +			if( is_realtime(device_owner))
2042 +			{
2043 +				TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
2044 +					  __FUNCTION__, nvidiaDevice,litmus_clock());
2045 +				TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2046 +					  __FUNCTION__,
2047 +					  device_owner->pid,
2048 +					  nvidiaDevice);
2049 +				
2050 +				//3) Call litmus_schedule_work() and return (don't execute the rest
2051 +				//	of schedule_schedule()).
2052 +				work->owner = device_owner;
2053 +				sched_trace_work_release(work->owner);
2054 +				if(likely(litmus_schedule_work(work, nvidiaDevice)))
2055 +				{
2056 +					unlock_nv_registry(nvidiaDevice, &flags);
2057 +					return 1;
2058 +				}
2059 +				else
2060 +				{
2061 +					work->owner = NULL; /* fall through to normal work scheduling */
2062 +				}
2063 +			}
2064 +			else
2065 +			{
2066 +				work->owner = NULL;
2067 +			}
2068 +		}
2069 +		unlock_nv_registry(nvidiaDevice, &flags);
2070 +	}
2071 +#endif
2072 +#endif
2073 +	return(__schedule_work(work));
2074  }
2075  EXPORT_SYMBOL(schedule_work);
2076  
2077 +int __schedule_work(struct work_struct* work)
2078 +{
2079 +	return queue_work(system_wq, work);
2080 +}
2081 +EXPORT_SYMBOL(__schedule_work);
2082 +
2083  /*
2084   * schedule_work_on - put work task on a specific cpu
2085   * @cpu: cpu to put the work task on
2086 diff --git a/litmus/Kconfig b/litmus/Kconfig
2087 index ad8dc83..5109cf7 100644
2088 --- a/litmus/Kconfig
2089 +++ b/litmus/Kconfig
2090 @@ -62,6 +62,25 @@ config LITMUS_LOCKING
2091  
2092  endmenu
2093  
2094 +menu "Performance Enhancements"
2095 +
2096 +config SCHED_CPU_AFFINITY
2097 +	bool "Local Migration Affinity"
2098 +	default y
2099 +	help
2100 +	  Rescheduled tasks prefer CPUs near to their previously used CPU.  This
2101 +	  may improve performance through possible preservation of cache affinity.
2102 +
2103 +	  Warning: May make bugs ahrder to find since tasks may migrate less often.
2104 +
2105 +	  NOTES:
2106 +	  	* Pfair/PD^2 does not support this option.
2107 +		* Only x86 currently supported.
2108 +
2109 +	  Say Yes if unsure.
2110 +
2111 +endmenu
2112 +
2113  menu "Tracing"
2114  
2115  config FEATHER_TRACE
2116 @@ -182,4 +201,106 @@ config SCHED_DEBUG_TRACE_CALLER
2117  
2118  endmenu
2119  
2120 +menu "Interrupt Handling"
2121 +
2122 +config LITMUS_THREAD_ALL_SOFTIRQ
2123 +       bool "Process all softirqs in ksoftirqd threads."
2124 +       default n
2125 +       help
2126 +	     (Experimental) Thread all softirqs to ksoftirqd
2127 +		 daemon threads, similar to PREEMPT_RT.  I/O
2128 +		 throughput will will drop with this enabled, but
2129 +		 latencies due to interrupts will be reduced.
2130 +
2131 +		 WARNING: Timer responsiveness will likely be
2132 +		 decreased as timer callbacks are also threaded.
2133 +		 This is unlike PREEEMPT_RTs hardirqs.
2134 +
2135 +		If unsure, say No.
2136 +
2137 +
2138 +choice 
2139 +	prompt "Scheduling of interrupt bottom-halves in Litmus."
2140 +	default LITMUS_SOFTIRQD_NONE
2141 +	depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
2142 +	help
2143 +		Schedule tasklets with known priorities in Litmus.
2144 +
2145 +config LITMUS_SOFTIRQD_NONE
2146 +	bool "No tasklet scheduling in Litmus."
2147 +	help
2148 +	  Don't schedule tasklets in Litmus.  Default.
2149 +
2150 +config LITMUS_SOFTIRQD
2151 +	bool "Spawn klitirqd interrupt handling threads."
2152 +	help
2153 +	  Create klitirqd interrupt handling threads.  Work must be
2154 +	  specifically dispatched to these workers.  (Softirqs for
2155 +	  Litmus tasks are not magically redirected to klitirqd.)
2156 +
2157 +	  G-EDF/RM, C-EDF/RM ONLY for now!
2158 +
2159 +
2160 +config LITMUS_PAI_SOFTIRQD
2161 +	bool "Defer tasklets to context switch points."
2162 +	help
2163 +	  Only execute scheduled tasklet bottom halves at
2164 +	  scheduling points.  Trades context switch overhead
2165 +	  at the cost of non-preemptive durations of bottom half
2166 +	  processing.
2167 +		 
2168 +	  G-EDF/RM, C-EDF/RM ONLY for now!	 
2169 +		 
2170 +endchoice	   
2171 +	   
2172 +
2173 +config NR_LITMUS_SOFTIRQD
2174 +	   int "Number of klitirqd."
2175 +	   depends on LITMUS_SOFTIRQD
2176 +	   range 1 4096
2177 +	   default "1"
2178 +	   help
2179 +	     Should be <= to the number of CPUs in your system.
2180 +
2181 +config LITMUS_NVIDIA
2182 +	  bool "Litmus handling of NVIDIA interrupts."
2183 +	  depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
2184 +	  default n
2185 +	  help
2186 +	    Direct tasklets from NVIDIA devices to Litmus's klitirqd.
2187 +
2188 +		If unsure, say No.
2189 +
2190 +config NV_DEVICE_NUM
2191 +	   int "Number of NVIDIA GPUs."
2192 +	   depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
2193 +	   range 1 4096
2194 +	   default "1"
2195 +	   help
2196 +	     Should be (<= to the number of CPUs) and
2197 +		 (<= to the number of GPUs) in your system.
2198 +
2199 +choice
2200 +	  prompt "CUDA/Driver Version Support"
2201 +	  default CUDA_4_0
2202 +	  depends on LITMUS_NVIDIA
2203 +	  help
2204 +	  	Select the version of CUDA/driver to support.
2205 +	
2206 +config CUDA_4_0
2207 +	  bool "CUDA 4.0"
2208 +	  depends on LITMUS_NVIDIA
2209 +	  help
2210 +	  	Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
2211 +
2212 +config CUDA_3_2
2213 +	  bool "CUDA 3.2"
2214 +	  depends on LITMUS_NVIDIA
2215 +	  help
2216 +	  	Support CUDA 3.2 (dev. driver version: x86_64-260.24)
2217 +
2218 +endchoice
2219 +
2220 +endmenu
2221 +
2222  endmenu
2223 diff --git a/litmus/Makefile b/litmus/Makefile
2224 index ad9936e..869939e 100644
2225 --- a/litmus/Makefile
2226 +++ b/litmus/Makefile
2227 @@ -19,10 +19,15 @@ obj-y     = sched_plugin.o litmus.o \
2228  	    sched_gsn_edf.o \
2229  	    sched_psn_edf.o
2230  
2231 -obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
2232 +obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o sched_cfifo.o fifo_common.o sched_crm.o rm_common.o sched_crm_srt.o rm_srt_common.o
2233  obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
2234 +obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
2235  
2236  obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
2237  obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
2238  obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
2239  obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
2240 +
2241 +obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
2242 +obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
2243 +obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
2244 diff --git a/litmus/affinity.c b/litmus/affinity.c
2245 new file mode 100644
2246 index 0000000..3b430d1
2247 --- /dev/null
2248 +++ b/litmus/affinity.c
2249 @@ -0,0 +1,49 @@
2250 +#include <linux/cpu.h>
2251 +
2252 +#include <litmus/affinity.h>
2253 +
2254 +struct neighborhood neigh_info[NR_CPUS];
2255 +
2256 +/* called by _init_litmus() */
2257 +void init_topology(void)
2258 +{
2259 +	int cpu;
2260 +	int i;
2261 +	int chk;
2262 +	int depth = num_cache_leaves;
2263 +
2264 +	if(depth > NUM_CACHE_LEVELS)
2265 +		depth = NUM_CACHE_LEVELS;
2266 +
2267 +	for_each_online_cpu(cpu)
2268 +	{
2269 +		for(i = 0; i < depth; ++i)
2270 +		{
2271 +			long unsigned int firstbits;
2272 +
2273 +			chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
2274 +			if(chk) /* failed */
2275 +			{
2276 +				neigh_info[cpu].size[i] = 0;
2277 +			}
2278 +			else
2279 +			{
2280 +				/* size = num bits in mask */
2281 +				neigh_info[cpu].size[i] = cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
2282 +			}
2283 +			firstbits = *neigh_info[cpu].neighbors[i]->bits;
2284 +			printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
2285 +							cpu, neigh_info[cpu].size[i], i, firstbits);
2286 +		}
2287 +
2288 +		/* set data for non-existent levels */
2289 +		for(; i < NUM_CACHE_LEVELS; ++i)
2290 +		{
2291 +			neigh_info[cpu].size[i] = 0;
2292 +
2293 +			printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
2294 +							cpu, neigh_info[cpu].size[i], i, 0lu);
2295 +		}
2296 +	}
2297 +}
2298 +
2299 diff --git a/litmus/edf_common.c b/litmus/edf_common.c
2300 index 9b44dc2..0a06d7a 100644
2301 --- a/litmus/edf_common.c
2302 +++ b/litmus/edf_common.c
2303 @@ -63,8 +63,52 @@ int edf_higher_prio(struct task_struct* first,
2304  
2305  #endif
2306  
2307 +	if (!is_realtime(second_task))
2308 +		return true;
2309 +
2310 +	if (earlier_deadline(first_task, second_task))
2311 +		return true;
2312 +
2313 +	if (get_deadline(first_task) == get_deadline(second_task))
2314 +	{
2315 +		if (shorter_period(first_task, second_task))
2316 +		{
2317 +			return true;
2318 +		}
2319 +		if (get_rt_period(first_task) == get_rt_period(second_task))
2320 +		{
2321 +#ifdef CONFIG_LITMUS_SOFTIRQD
2322 +			if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
2323 +		    {
2324 +				return true;
2325 +			}
2326 +			if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
2327 +			{
2328 +#endif      
2329 +			if (first_task->pid < second_task->pid)
2330 +			{   
2331 +				return true;
2332 +			}
2333 +			if (first_task->pid == second_task->pid)
2334 +			{
2335 +				return !second->rt_param.inh_task;
2336 +			}
2337 +#ifdef CONFIG_LITMUS_SOFTIRQD
2338 +			}
2339 +#endif
2340 +		}
2341 +	}
2342 +	
2343 +	return false;
2344  
2345 +#if 0
2346  	return !is_realtime(second_task)  ||
2347 +    
2348 +#ifdef CONFIG_LITMUS_SOFTIRQD
2349 +        /* proxy threads always lose w/o inheritance. */
2350 +        (first_task->rt_param.is_proxy_thread <
2351 +            second_task->rt_param.is_proxy_thread) ||
2352 +#endif
2353  
2354  		/* is the deadline of the first task earlier?
2355  		 * Then it has higher priority.
2356 @@ -82,6 +126,7 @@ int edf_higher_prio(struct task_struct* first,
2357  		 */
2358  		(first_task->pid == second_task->pid &&
2359  		 !second->rt_param.inh_task)));
2360 +#endif
2361  }
2362  
2363  int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
2364 diff --git a/litmus/fdso.c b/litmus/fdso.c
2365 index aa7b384..2b7f9ba 100644
2366 --- a/litmus/fdso.c
2367 +++ b/litmus/fdso.c
2368 @@ -22,6 +22,7 @@ extern struct fdso_ops generic_lock_ops;
2369  
2370  static const struct fdso_ops* fdso_ops[] = {
2371  	&generic_lock_ops, /* FMLP_SEM */
2372 +	&generic_lock_ops, /* KFMLP_SEM */
2373  	&generic_lock_ops, /* SRP_SEM */
2374  };
2375  
2376 diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c
2377 new file mode 100644
2378 index 0000000..c94510a
2379 --- /dev/null
2380 +++ b/litmus/fifo_common.c
2381 @@ -0,0 +1,124 @@
2382 +/*
2383 + * kernel/fifo_common.c
2384 + *
2385 + * Common functions for EDF based scheduler.
2386 + */
2387 +
2388 +#include <linux/percpu.h>
2389 +#include <linux/sched.h>
2390 +#include <linux/list.h>
2391 +
2392 +#include <litmus/litmus.h>
2393 +#include <litmus/sched_plugin.h>
2394 +#include <litmus/sched_trace.h>
2395 +
2396 +#include <litmus/fifo_common.h>
2397 +
2398 +/* fifo_higher_prio -  returns true if first has a higher EDF priority
2399 + *                    than second. Deadline ties are broken by PID.
2400 + *
2401 + * both first and second may be NULL
2402 + */
2403 +int fifo_higher_prio(struct task_struct* first,
2404 +		    struct task_struct* second)
2405 +{
2406 +	struct task_struct *first_task = first;
2407 +	struct task_struct *second_task = second;
2408 +
2409 +	/* There is no point in comparing a task to itself. */
2410 +	if (first && first == second) {
2411 +		TRACE_TASK(first,
2412 +			   "WARNING: pointless edf priority comparison.\n");
2413 +		return 0;
2414 +	}
2415 +
2416 +
2417 +	/* check for NULL tasks */
2418 +	if (!first || !second)
2419 +		return first && !second;
2420 +
2421 +#ifdef CONFIG_LITMUS_LOCKING
2422 +
2423 +	/* Check for inherited priorities. Change task
2424 +	 * used for comparison in such a case.
2425 +	 */
2426 +	if (unlikely(first->rt_param.inh_task))
2427 +		first_task = first->rt_param.inh_task;
2428 +	if (unlikely(second->rt_param.inh_task))
2429 +		second_task = second->rt_param.inh_task;
2430 +
2431 +	/* Check for priority boosting. Tie-break by start of boosting.
2432 +	 */
2433 +	if (unlikely(is_priority_boosted(first_task))) {
2434 +		/* first_task is boosted, how about second_task? */
2435 +		if (!is_priority_boosted(second_task) ||
2436 +		    lt_before(get_boost_start(first_task),
2437 +			      get_boost_start(second_task)))
2438 +			return 1;
2439 +		else
2440 +			return 0;
2441 +	} else if (unlikely(is_priority_boosted(second_task)))
2442 +		/* second_task is boosted, first is not*/
2443 +		return 0;
2444 +
2445 +#endif
2446 +
2447 +
2448 +	return !is_realtime(second_task)  ||
2449 +    
2450 +#ifdef CONFIG_LITMUS_SOFTIRQD
2451 +        /* proxy threads always lose w/o inheritance. */
2452 +        (first_task->rt_param.is_proxy_thread <
2453 +            second_task->rt_param.is_proxy_thread) ||
2454 +#endif
2455 +
2456 +		/* is the deadline of the first task earlier?
2457 +		 * Then it has higher priority.
2458 +		 */
2459 +		earlier_release(first_task, second_task) ||
2460 +
2461 +		/* Do we have a deadline tie?
2462 +		 * Then break by PID.
2463 +		 */
2464 +		(get_release(first_task) == get_release(second_task) &&
2465 +	        (first_task->pid < second_task->pid ||
2466 +
2467 +		/* If the PIDs are the same then the task with the inherited
2468 +		 * priority wins.
2469 +		 */
2470 +		(first_task->pid == second_task->pid &&
2471 +		 !second->rt_param.inh_task)));
2472 +}
2473 +
2474 +int fifo_ready_order(struct bheap_node* a, struct bheap_node* b)
2475 +{
2476 +	return fifo_higher_prio(bheap2task(a), bheap2task(b));
2477 +}
2478 +
2479 +void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
2480 +		      release_jobs_t release)
2481 +{
2482 +	rt_domain_init(rt,  fifo_ready_order, resched, release);
2483 +}
2484 +
2485 +/* need_to_preempt - check whether the task t needs to be preempted
2486 + *                   call only with irqs disabled and with  ready_lock acquired
2487 + *                   THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
2488 + */
2489 +int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t)
2490 +{
2491 +	/* we need the read lock for fifo_ready_queue */
2492 +	/* no need to preempt if there is nothing pending */
2493 +	if (!__jobs_pending(rt))
2494 +		return 0;
2495 +	/* we need to reschedule if t doesn't exist */
2496 +	if (!t)
2497 +		return 1;
2498 +
2499 +	/* NOTE: We cannot check for non-preemptibility since we
2500 +	 *       don't know what address space we're currently in.
2501 +	 */
2502 +
2503 +	/* make sure to get non-rt stuff out of the way */
2504 +	return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t);
2505 +}
2506 diff --git a/litmus/litmus.c b/litmus/litmus.c
2507 index 26938ac..29363c6 100644
2508 --- a/litmus/litmus.c
2509 +++ b/litmus/litmus.c
2510 @@ -17,6 +17,14 @@
2511  #include <litmus/litmus_proc.h>
2512  #include <litmus/sched_trace.h>
2513  
2514 +#ifdef CONFIG_SCHED_CPU_AFFINITY
2515 +#include <litmus/affinity.h>
2516 +#endif
2517 +
2518 +#ifdef CONFIG_LITMUS_NVIDIA
2519 +#include <litmus/nvidia_info.h>
2520 +#endif
2521 +
2522  /* Number of RT tasks that exist in the system */
2523  atomic_t rt_task_count 		= ATOMIC_INIT(0);
2524  static DEFINE_RAW_SPINLOCK(task_transition_lock);
2525 @@ -47,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
2526  struct release_heap* release_heap_alloc(int gfp_flags);
2527  void release_heap_free(struct release_heap* rh);
2528  
2529 +#ifdef CONFIG_LITMUS_NVIDIA
2530 +/*
2531 + * sys_register_nv_device
2532 + * @nv_device_id: The Nvidia device id that the task want to register
2533 + * @reg_action: set to '1' to register the specified device. zero otherwise.
2534 + * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
2535 + * Returns EFAULT  if nv_device_id is out of range.
2536 + *	   0       if success
2537 + */
2538 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
2539 +{
2540 +	/* register the device to caller (aka 'current') */
2541 +	return(reg_nv_device(nv_device_id, reg_action));
2542 +}
2543 +#else
2544 +asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
2545 +{
2546 +	return(-EINVAL);
2547 +}
2548 +#endif
2549 +
2550 +
2551  /*
2552   * sys_set_task_rt_param
2553   * @pid: Pid of the task which scheduling parameters must be changed
2554 @@ -115,7 +145,7 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
2555  		tp.cls != RT_CLASS_BEST_EFFORT)
2556  	{
2557  		printk(KERN_INFO "litmus: real-time task %d rejected "
2558 -				 "because its class is invalid\n");
2559 +				 "because its class is invalid\n", pid);
2560  		goto out_unlock;
2561  	}
2562  	if (tp.budget_policy != NO_ENFORCEMENT &&
2563 @@ -131,6 +161,22 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
2564  
2565  	target->rt_param.task_params = tp;
2566  
2567 +#ifdef CONFIG_LITMUS_SOFTIRQD
2568 +	/* proxy thread off by default */
2569 +	target->rt_param.is_proxy_thread = 0;
2570 +    target->rt_param.cur_klitirqd = NULL;
2571 +	//init_MUTEX(&target->rt_param.klitirqd_sem);
2572 +	mutex_init(&target->rt_param.klitirqd_sem);
2573 +	//init_completion(&target->rt_param.klitirqd_sem);
2574 +	//target->rt_param.klitirqd_sem_stat = NOT_HELD;
2575 +	atomic_set(&target->rt_param.klitirqd_sem_stat, NOT_HELD);
2576 +#endif
2577 +
2578 +#ifdef CONFIG_LITMUS_NVIDIA
2579 +	atomic_set(&target->rt_param.nv_int_count, 0);
2580 +#endif
2581 +
2582 +
2583  	retval = 0;
2584        out_unlock:
2585  	read_unlock_irq(&tasklist_lock);
2586 @@ -265,6 +311,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
2587  	return retval;
2588  }
2589  
2590 +
2591  /* sys_null_call() is only used for determining raw system call
2592   * overheads (kernel entry, kernel exit). It has no useful side effects.
2593   * If ts is non-NULL, then the current Feather-Trace time is recorded.
2594 @@ -278,7 +325,7 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
2595  		now = get_cycles();
2596  		ret = put_user(now, ts);
2597  	}
2598 -
2599 +	
2600  	return ret;
2601  }
2602  
2603 @@ -299,6 +346,20 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
2604  	 * at this point in time.
2605  	 */
2606  	WARN_ON(p->rt_param.inh_task);
2607 +   
2608 +#ifdef CONFIG_LITMUS_SOFTIRQD
2609 +	/* We probably should not have any tasklets executing for
2610 +     * us at this time.
2611 +	 */    
2612 +    WARN_ON(p->rt_param.cur_klitirqd);
2613 +	WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
2614 +
2615 +	if(p->rt_param.cur_klitirqd)
2616 +		flush_pending(p->rt_param.cur_klitirqd, p);
2617 +
2618 +	if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
2619 +		up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
2620 +#endif
2621  
2622  	/* Cleanup everything else. */
2623  	memset(&p->rt_param, 0, sizeof(p->rt_param));
2624 @@ -399,7 +460,7 @@ static void synch_on_plugin_switch(void* info)
2625   */
2626  int switch_sched_plugin(struct sched_plugin* plugin)
2627  {
2628 -	unsigned long flags;
2629 +	//unsigned long flags;
2630  	int ret = 0;
2631  
2632  	BUG_ON(!plugin);
2633 @@ -413,8 +474,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
2634  	while (atomic_read(&cannot_use_plugin) < num_online_cpus())
2635  		cpu_relax();
2636  
2637 +#ifdef CONFIG_LITMUS_SOFTIRQD
2638 +	if(!klitirqd_is_dead())
2639 +	{
2640 +		kill_klitirqd();
2641 +	}
2642 +#endif
2643 +
2644  	/* stop task transitions */
2645 -	raw_spin_lock_irqsave(&task_transition_lock, flags);
2646 +	//raw_spin_lock_irqsave(&task_transition_lock, flags);
2647  
2648  	/* don't switch if there are active real-time tasks */
2649  	if (atomic_read(&rt_task_count) == 0) {
2650 @@ -432,7 +500,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
2651  	} else
2652  		ret = -EBUSY;
2653  out:
2654 -	raw_spin_unlock_irqrestore(&task_transition_lock, flags);
2655 +	//raw_spin_unlock_irqrestore(&task_transition_lock, flags);
2656  	atomic_set(&cannot_use_plugin, 0);
2657  	return ret;
2658  }
2659 @@ -540,6 +608,10 @@ static int __init _init_litmus(void)
2660  
2661  	init_litmus_proc();
2662  
2663 +#ifdef CONFIG_SCHED_CPU_AFFINITY
2664 +	init_topology();
2665 +#endif
2666 +
2667  	return 0;
2668  }
2669  
2670 diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
2671 new file mode 100644
2672 index 0000000..b31eeb8
2673 --- /dev/null
2674 +++ b/litmus/litmus_pai_softirq.c
2675 @@ -0,0 +1,64 @@
2676 +#include <linux/interrupt.h>
2677 +#include <linux/percpu.h>
2678 +#include <linux/cpu.h>
2679 +#include <linux/kthread.h>
2680 +#include <linux/ftrace.h>
2681 +#include <linux/smp.h>
2682 +#include <linux/slab.h>
2683 +#include <linux/mutex.h>
2684 +
2685 +#include <linux/sched.h>
2686 +#include <linux/cpuset.h>
2687 +
2688 +#include <litmus/litmus.h>
2689 +#include <litmus/sched_trace.h>
2690 +#include <litmus/jobs.h>
2691 +#include <litmus/sched_plugin.h>
2692 +#include <litmus/litmus_softirq.h>
2693 +
2694 +
2695 +
2696 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
2697 +{
2698 +	int ret = 0; /* assume failure */
2699 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
2700 +    {
2701 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
2702 +        BUG();
2703 +    }
2704 +
2705 +    ret = litmus->enqueue_pai_tasklet(t);
2706 +	
2707 +	return(ret);
2708 +}
2709 +
2710 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
2711 +
2712 +
2713 +
2714 +// failure causes default Linux handling.
2715 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
2716 +{
2717 +	int ret = 0; /* assume failure */
2718 +	return(ret);
2719 +}
2720 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
2721 +
2722 +
2723 +// failure causes default Linux handling.
2724 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
2725 +{
2726 +	int ret = 0; /* assume failure */
2727 +	return(ret);
2728 +}
2729 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
2730 +
2731 +
2732 +// failure causes default Linux handling.
2733 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
2734 +{
2735 +	int ret = 0; /* assume failure */
2736 +	return(ret);
2737 +}
2738 +EXPORT_SYMBOL(__litmus_schedule_work);
2739 +
2740 diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
2741 index 4bf725a..3815133 100644
2742 --- a/litmus/litmus_proc.c
2743 +++ b/litmus/litmus_proc.c
2744 @@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
2745  #ifdef CONFIG_RELEASE_MASTER
2746  	*release_master_file = NULL,
2747  #endif
2748 +#ifdef CONFIG_LITMUS_SOFTIRQD
2749 +	*klitirqd_file = NULL,
2750 +#endif
2751  	*plugs_file = NULL;
2752  
2753  /* in litmus/sync.c */
2754  int count_tasks_waiting_for_release(void);
2755  
2756 +extern int proc_read_klitirqd_stats(char *page, char **start,
2757 +									off_t off, int count,
2758 +									int *eof, void *data);
2759 +
2760  static int proc_read_stats(char *page, char **start,
2761  			   off_t off, int count,
2762  			   int *eof, void *data)
2763 @@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
2764  	release_master_file->write_proc  = proc_write_release_master;
2765  #endif
2766  
2767 +#ifdef CONFIG_LITMUS_SOFTIRQD
2768 +	klitirqd_file =
2769 +		create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
2770 +							   proc_read_klitirqd_stats, NULL);
2771 +#endif	
2772 +	
2773  	stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
2774  					   proc_read_stats, NULL);
2775  
2776 @@ -187,6 +200,10 @@ void exit_litmus_proc(void)
2777  		remove_proc_entry("stats", litmus_dir);
2778  	if (curr_file)
2779  		remove_proc_entry("active_plugin", litmus_dir);
2780 +#ifdef CONFIG_LITMUS_SOFTIRQD
2781 +	if (klitirqd_file)
2782 +		remove_proc_entry("klitirqd_stats", litmus_dir);
2783 +#endif
2784  #ifdef CONFIG_RELEASE_MASTER
2785  	if (release_master_file)
2786  		remove_proc_entry("release_master", litmus_dir);
2787 diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
2788 new file mode 100644
2789 index 0000000..c49676c
2790 --- /dev/null
2791 +++ b/litmus/litmus_softirq.c
2792 @@ -0,0 +1,1584 @@
2793 +#include <linux/interrupt.h>
2794 +#include <linux/percpu.h>
2795 +#include <linux/cpu.h>
2796 +#include <linux/kthread.h>
2797 +#include <linux/ftrace.h>
2798 +#include <linux/smp.h>
2799 +#include <linux/slab.h>
2800 +#include <linux/mutex.h>
2801 +
2802 +#include <linux/sched.h>
2803 +#include <linux/cpuset.h>
2804 +
2805 +#include <litmus/litmus.h>
2806 +#include <litmus/sched_trace.h>
2807 +#include <litmus/jobs.h>
2808 +#include <litmus/sched_plugin.h>
2809 +#include <litmus/litmus_softirq.h>
2810 +
2811 +/* TODO: Remove unneeded mb() and other barriers. */
2812 +
2813 +
2814 +/* counts number of daemons ready to handle litmus irqs. */
2815 +static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
2816 +
2817 +enum pending_flags
2818 +{
2819 +    LIT_TASKLET_LOW = 0x1,
2820 +    LIT_TASKLET_HI  = LIT_TASKLET_LOW<<1,
2821 +	LIT_WORK = LIT_TASKLET_HI<<1
2822 +};
2823 +
2824 +/* only support tasklet processing for now. */
2825 +struct tasklet_head
2826 +{
2827 +	struct tasklet_struct *head;
2828 +	struct tasklet_struct **tail;
2829 +};
2830 +
2831 +struct klitirqd_info
2832 +{
2833 +	struct task_struct*		klitirqd;
2834 +    struct task_struct*     current_owner;
2835 +    int						terminating;
2836 +
2837 +
2838 +	raw_spinlock_t			lock;
2839 +	
2840 +	u32						pending;
2841 +	atomic_t				num_hi_pending;
2842 +	atomic_t				num_low_pending;
2843 +	atomic_t				num_work_pending;
2844 +
2845 +	/* in order of priority */
2846 +	struct tasklet_head     pending_tasklets_hi;
2847 +	struct tasklet_head		pending_tasklets;
2848 +	struct list_head		worklist;
2849 +};
2850 +
2851 +/* one list for each klitirqd */
2852 +static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
2853 +
2854 +
2855 +
2856 +
2857 +
2858 +int proc_read_klitirqd_stats(char *page, char **start,
2859 +							 off_t off, int count,
2860 +							 int *eof, void *data)
2861 +{
2862 +	int len = snprintf(page, PAGE_SIZE,
2863 +				"num ready klitirqds: %d\n\n",
2864 +				atomic_read(&num_ready_klitirqds));
2865 +	
2866 +	if(klitirqd_is_ready())
2867 +	{
2868 +		int i;
2869 +		for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
2870 +		{
2871 +			len +=
2872 +				snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
2873 +						 "klitirqd_th%d: %s/%d\n"
2874 +						 "\tcurrent_owner: %s/%d\n"
2875 +						 "\tpending: %x\n"
2876 +						 "\tnum hi: %d\n"
2877 +						 "\tnum low: %d\n"
2878 +						 "\tnum work: %d\n\n",
2879 +						 i,
2880 +						 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
2881 +						 (klitirqds[i].current_owner != NULL) ?
2882 +						 	klitirqds[i].current_owner->comm : "(null)",
2883 +						 (klitirqds[i].current_owner != NULL) ?
2884 +							klitirqds[i].current_owner->pid : 0,
2885 +						 klitirqds[i].pending,
2886 +						 atomic_read(&klitirqds[i].num_hi_pending),
2887 +						 atomic_read(&klitirqds[i].num_low_pending),
2888 +						 atomic_read(&klitirqds[i].num_work_pending));
2889 +		}
2890 +	}
2891 +
2892 +	return(len);
2893 +}
2894 +
2895 +				   
2896 +
2897 +
2898 +
2899 +#if 0
2900 +static atomic_t dump_id = ATOMIC_INIT(0);
2901 +
2902 +static void __dump_state(struct klitirqd_info* which, const char* caller)
2903 +{
2904 +	struct tasklet_struct* list;
2905 +
2906 +	int id = atomic_inc_return(&dump_id);
2907 +
2908 +	//if(in_interrupt())
2909 +	{
2910 +		if(which->current_owner)
2911 +		{
2912 +			TRACE("(id: %d  caller: %s)\n"
2913 +				"klitirqd: %s/%d\n"
2914 +				"current owner: %s/%d\n"
2915 +				"pending: %x\n",
2916 +				id, caller,
2917 +				which->klitirqd->comm, which->klitirqd->pid,
2918 +				which->current_owner->comm, which->current_owner->pid,
2919 +				which->pending);
2920 +		}
2921 +		else
2922 +		{
2923 +			TRACE("(id: %d  caller: %s)\n"
2924 +				"klitirqd: %s/%d\n"
2925 +				"current owner: %p\n"
2926 +				"pending: %x\n",
2927 +				id, caller,
2928 +				which->klitirqd->comm, which->klitirqd->pid,
2929 +				NULL,
2930 +				which->pending);
2931 +		}
2932 +
2933 +		list = which->pending_tasklets.head;
2934 +		while(list)
2935 +		{
2936 +			struct tasklet_struct *t = list;
2937 +			list = list->next; /* advance */
2938 +			if(t->owner)
2939 +				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
2940 +			else
2941 +				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
2942 +		}
2943 +	}
2944 +}
2945 +
2946 +static void dump_state(struct klitirqd_info* which, const char* caller)
2947 +{
2948 +	unsigned long flags;
2949 +
2950 +	raw_spin_lock_irqsave(&which->lock, flags);
2951 +    __dump_state(which, caller);
2952 +    raw_spin_unlock_irqrestore(&which->lock, flags);
2953 +}
2954 +#endif
2955 +
2956 +
2957 +/* forward declarations */
2958 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
2959 +									   struct klitirqd_info *which,
2960 +									   int wakeup);
2961 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
2962 +										  struct klitirqd_info *which,
2963 +										  int wakeup);
2964 +static void ___litmus_schedule_work(struct work_struct *w,
2965 +									struct klitirqd_info *which,
2966 +									int wakeup);
2967 +
2968 +
2969 +
2970 +inline unsigned int klitirqd_id(struct task_struct* tsk)
2971 +{
2972 +    int i;
2973 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
2974 +    {
2975 +        if(klitirqds[i].klitirqd == tsk)
2976 +        {
2977 +            return i;
2978 +        }
2979 +    }
2980 +    
2981 +    BUG();
2982 +    
2983 +    return 0;
2984 +}
2985 +
2986 +
2987 +inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
2988 +{
2989 +    return (which->pending & LIT_TASKLET_HI);
2990 +}
2991 +
2992 +inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
2993 +{
2994 +    return (which->pending & LIT_TASKLET_LOW);
2995 +}
2996 +
2997 +inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
2998 +{
2999 +	return (which->pending & LIT_WORK);
3000 +}
3001 +
3002 +inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
3003 +{
3004 +    return(which->pending);
3005 +}
3006 +
3007 +
3008 +inline static u32 litirq_pending(struct klitirqd_info* which)
3009 +{
3010 +    unsigned long flags;
3011 +    u32 pending;
3012 +    
3013 +    raw_spin_lock_irqsave(&which->lock, flags);
3014 +    pending = litirq_pending_irqoff(which);
3015 +    raw_spin_unlock_irqrestore(&which->lock, flags);
3016 +    
3017 +    return pending;
3018 +};
3019 +
3020 +inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
3021 +{
3022 +	unsigned long flags;
3023 +	u32 pending;
3024 +
3025 +	raw_spin_lock_irqsave(&which->lock, flags);
3026 +	pending = litirq_pending_irqoff(which);
3027 +	if(pending)
3028 +	{
3029 +		if(which->current_owner != owner)
3030 +		{
3031 +			pending = 0;  // owner switch!
3032 +		}
3033 +	}
3034 +	raw_spin_unlock_irqrestore(&which->lock, flags);
3035 +
3036 +	return pending;
3037 +}
3038 +
3039 +
3040 +inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
3041 +				struct mutex** sem,
3042 +				struct task_struct** t)
3043 +{
3044 +	unsigned long flags;
3045 +	u32 pending;
3046 +
3047 +	/* init values */
3048 +	*sem = NULL;
3049 +	*t = NULL;
3050 +
3051 +	raw_spin_lock_irqsave(&which->lock, flags);
3052 +
3053 +	pending = litirq_pending_irqoff(which);
3054 +	if(pending)
3055 +	{
3056 +		if(which->current_owner != NULL)
3057 +		{
3058 +			*t = which->current_owner;
3059 +			*sem = &tsk_rt(which->current_owner)->klitirqd_sem;
3060 +		}
3061 +		else
3062 +		{
3063 +			BUG();
3064 +		}
3065 +	}
3066 +	raw_spin_unlock_irqrestore(&which->lock, flags);
3067 +
3068 +	if(likely(*sem))
3069 +	{
3070 +		return pending;
3071 +	}
3072 +	else
3073 +	{
3074 +		return 0;
3075 +	}
3076 +}
3077 +
3078 +/* returns true if the next piece of work to do is from a different owner.
3079 + */
3080 +static int tasklet_ownership_change(
3081 +				struct klitirqd_info* which,
3082 +				enum pending_flags taskletQ)
3083 +{
3084 +	/* this function doesn't have to look at work objects since they have
3085 +	   priority below tasklets. */
3086 +
3087 +    unsigned long flags;
3088 +    int ret = 0;
3089 +
3090 +    raw_spin_lock_irqsave(&which->lock, flags);
3091 +    
3092 +	switch(taskletQ)
3093 +	{
3094 +	case LIT_TASKLET_HI:
3095 +		if(litirq_pending_hi_irqoff(which))
3096 +		{
3097 +			ret = (which->pending_tasklets_hi.head->owner != 
3098 +						which->current_owner);
3099 +		}
3100 +		break;
3101 +	case LIT_TASKLET_LOW:
3102 +		if(litirq_pending_low_irqoff(which))
3103 +		{
3104 +			ret = (which->pending_tasklets.head->owner !=
3105 +						which->current_owner);
3106 +		}
3107 +		break;
3108 +	default:
3109 +		break;
3110 +	}
3111 +    
3112 +    raw_spin_unlock_irqrestore(&which->lock, flags);
3113 +    
3114 +    TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
3115 +    
3116 +    return ret;
3117 +}
3118 +
3119 +
3120 +static void __reeval_prio(struct klitirqd_info* which)
3121 +{
3122 +    struct task_struct* next_owner = NULL;
3123 +	struct task_struct* klitirqd = which->klitirqd;
3124 +	
3125 +	/* Check in prio-order */
3126 +	u32 pending = litirq_pending_irqoff(which);
3127 +	
3128 +	//__dump_state(which, "__reeval_prio: before");
3129 +	
3130 +	if(pending)
3131 +	{
3132 +		if(pending & LIT_TASKLET_HI)
3133 +		{
3134 +			next_owner = which->pending_tasklets_hi.head->owner;
3135 +		}
3136 +		else if(pending & LIT_TASKLET_LOW)
3137 +		{
3138 +			next_owner = which->pending_tasklets.head->owner;
3139 +		}
3140 +		else if(pending & LIT_WORK)
3141 +		{
3142 +			struct work_struct* work =
3143 +				list_first_entry(&which->worklist, struct work_struct, entry);
3144 +			next_owner = work->owner;
3145 +		}
3146 +	}
3147 +
3148 +	if(next_owner != which->current_owner)
3149 +	{
3150 +		struct task_struct* old_owner = which->current_owner;
3151 +
3152 +		/* bind the next owner. */
3153 +		which->current_owner = next_owner;
3154 +		mb();
3155 +
3156 +        if(next_owner != NULL)
3157 +        {
3158 +			if(!in_interrupt())
3159 +			{
3160 +				TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
3161 +						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
3162 +						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
3163 +						next_owner->comm, next_owner->pid);
3164 +			}
3165 +			else
3166 +			{
3167 +				TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
3168 +					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
3169 +					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
3170 +					next_owner->comm, next_owner->pid);				
3171 +			}
3172 +
3173 +			litmus->set_prio_inh_klitirqd(klitirqd, old_owner, next_owner);
3174 +        }
3175 +        else
3176 +        {
3177 +			if(likely(!in_interrupt()))
3178 +			{
3179 +				TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
3180 +						__FUNCTION__, klitirqd->comm, klitirqd->pid);
3181 +			}
3182 +			else
3183 +			{
3184 +				// is this a bug?
3185 +				TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
3186 +					__FUNCTION__, klitirqd->comm, klitirqd->pid);			
3187 +			}
3188 +           
3189 +			BUG_ON(pending != 0);
3190 +			litmus->clear_prio_inh_klitirqd(klitirqd, old_owner);
3191 +        }
3192 +    }
3193 +	
3194 +	//__dump_state(which, "__reeval_prio: after");
3195 +}
3196 +
3197 +static void reeval_prio(struct klitirqd_info* which)
3198 +{
3199 +    unsigned long flags;
3200 +    
3201 +    raw_spin_lock_irqsave(&which->lock, flags);
3202 +    __reeval_prio(which);
3203 +    raw_spin_unlock_irqrestore(&which->lock, flags);
3204 +}
3205 +
3206 +
3207 +static void wakeup_litirqd_locked(struct klitirqd_info* which)
3208 +{
3209 +	/* Interrupts are disabled: no need to stop preemption */
3210 +	if (which && which->klitirqd)
3211 +	{
3212 +        __reeval_prio(which); /* configure the proper priority */
3213 +
3214 +		if(which->klitirqd->state != TASK_RUNNING)
3215 +		{
3216 +        	TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
3217 +			  	which->klitirqd->comm, which->klitirqd->pid);
3218 +
3219 +			wake_up_process(which->klitirqd);
3220 +		}
3221 +    }
3222 +}
3223 +
3224 +
3225 +static void do_lit_tasklet(struct klitirqd_info* which,
3226 +						   struct tasklet_head* pending_tasklets)
3227 +{
3228 +    unsigned long flags;
3229 +	struct tasklet_struct *list;
3230 +	atomic_t* count;
3231 +
3232 +    raw_spin_lock_irqsave(&which->lock, flags);
3233 +	
3234 +	//__dump_state(which, "do_lit_tasklet: before steal");
3235 +	
3236 +	/* copy out the tasklets for our private use. */
3237 +	list = pending_tasklets->head;
3238 +	pending_tasklets->head = NULL;
3239 +	pending_tasklets->tail = &pending_tasklets->head;
3240 +	
3241 +	/* remove pending flag */
3242 +	which->pending &= (pending_tasklets == &which->pending_tasklets) ?
3243 +		~LIT_TASKLET_LOW :
3244 +		~LIT_TASKLET_HI;
3245 +	
3246 +	count = (pending_tasklets == &which->pending_tasklets) ?
3247 +		&which->num_low_pending:
3248 +		&which->num_hi_pending;
3249 +	
3250 +	//__dump_state(which, "do_lit_tasklet: after steal");
3251 +	
3252 +    raw_spin_unlock_irqrestore(&which->lock, flags);
3253 +
3254 +    
3255 +    while(list)
3256 +    {
3257 +        struct tasklet_struct *t = list;
3258 +        
3259 +        /* advance, lest we forget */
3260 +		list = list->next;
3261 +        
3262 +        /* execute tasklet if it has my priority and is free */
3263 +		if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
3264 +			if (!atomic_read(&t->count)) {
3265 +				
3266 +				sched_trace_tasklet_begin(t->owner);
3267 +				
3268 +				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
3269 +                {
3270 +					BUG();
3271 +                }
3272 +                TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
3273 +				t->func(t->data);
3274 +				tasklet_unlock(t);
3275 +				
3276 +				atomic_dec(count);
3277 +				
3278 +				sched_trace_tasklet_end(t->owner, 0ul);
3279 +				
3280 +				continue;  /* process more tasklets */
3281 +			}
3282 +			tasklet_unlock(t);
3283 +		}
3284 +        
3285 +        TRACE_CUR("%s: Could not invoke tasklet.  Requeuing.\n", __FUNCTION__);
3286 +        
3287 +		/* couldn't process tasklet.  put it back at the end of the queue. */
3288 +		if(pending_tasklets == &which->pending_tasklets)
3289 +			___litmus_tasklet_schedule(t, which, 0);
3290 +		else
3291 +			___litmus_tasklet_hi_schedule(t, which, 0);
3292 +    }
3293 +}
3294 +
3295 +
3296 +// returns 1 if priorities need to be changed to continue processing
3297 +// pending tasklets.
3298 +static int do_litirq(struct klitirqd_info* which)
3299 +{
3300 +    u32 pending;
3301 +    int resched = 0;
3302 +    
3303 +    if(in_interrupt())
3304 +    {
3305 +        TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
3306 +        return(0);
3307 +    }
3308 +	
3309 +	if(which->klitirqd != current)
3310 +	{
3311 +        TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
3312 +				  __FUNCTION__, current->comm, current->pid,
3313 +				  which->klitirqd->comm, which->klitirqd->pid);
3314 +        return(0);
3315 +	}
3316 +	
3317 +    if(!is_realtime(current))
3318 +    {
3319 +        TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
3320 +				  __FUNCTION__, current->policy);
3321 +        return(0);
3322 +    }
3323 +
3324 +    
3325 +    /* We only handle tasklets & work objects, no need for RCU triggers? */
3326 +    
3327 +    pending = litirq_pending(which);
3328 +    if(pending)
3329 +    {
3330 +        /* extract the work to do and do it! */
3331 +        if(pending & LIT_TASKLET_HI)
3332 +        {
3333 +            TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
3334 +            do_lit_tasklet(which, &which->pending_tasklets_hi);
3335 +            resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
3336 +            
3337 +            if(resched)
3338 +            {
3339 +                TRACE_CUR("%s: HI tasklets of another owner remain. "
3340 +						  "Skipping any LOW tasklets.\n", __FUNCTION__);
3341 +            }
3342 +        }
3343 +        
3344 +        if(!resched && (pending & LIT_TASKLET_LOW))
3345 +        {
3346 +            TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
3347 +			do_lit_tasklet(which, &which->pending_tasklets);
3348 +			resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
3349 +			
3350 +            if(resched)
3351 +            {
3352 +                TRACE_CUR("%s: LOW tasklets of another owner remain. "
3353 +						  "Skipping any work objects.\n", __FUNCTION__);
3354 +            }
3355 +        }
3356 +    }
3357 +	
3358 +	return(resched);
3359 +}
3360 +
3361 +
3362 +static void do_work(struct klitirqd_info* which)
3363 +{
3364 +	unsigned long flags;
3365 +	work_func_t f;
3366 +	struct work_struct* work;
3367 +	
3368 +	// only execute one work-queue item to yield to tasklets.
3369 +	// ...is this a good idea, or should we just batch them?
3370 +	raw_spin_lock_irqsave(&which->lock, flags);
3371 +	
3372 +	if(!litirq_pending_work_irqoff(which))
3373 +	{
3374 +		raw_spin_unlock_irqrestore(&which->lock, flags);
3375 +		goto no_work;
3376 +	}
3377 +
3378 +	work = list_first_entry(&which->worklist, struct work_struct, entry);
3379 +	list_del_init(&work->entry);
3380 +	
3381 +	if(list_empty(&which->worklist))
3382 +	{
3383 +		which->pending &= ~LIT_WORK;
3384 +	}
3385 +	
3386 +	raw_spin_unlock_irqrestore(&which->lock, flags);
3387 +	
3388 +	
3389 +	
3390 +	/* safe to read current_owner outside of lock since only this thread
3391 +	 may write to the pointer. */
3392 +	if(work->owner == which->current_owner)
3393 +	{
3394 +		TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
3395 +		// do the work!
3396 +		work_clear_pending(work);
3397 +		f = work->func;
3398 +		f(work);  /* can't touch 'work' after this point,
3399 +				   the user may have freed it. */
3400 +		
3401 +		atomic_dec(&which->num_work_pending);
3402 +	}
3403 +	else
3404 +	{
3405 +		TRACE_CUR("%s: Could not invoke work object.  Requeuing.\n",
3406 +				  __FUNCTION__);
3407 +		___litmus_schedule_work(work, which, 0);
3408 +	}
3409 +	
3410 +no_work:
3411 +	return;
3412 +}
3413 +
3414 +
3415 +static int set_litmus_daemon_sched(void)
3416 +{
3417 +    /* set up a daemon job that will never complete.
3418 +       it should only ever run on behalf of another
3419 +       real-time task.
3420 +
3421 +       TODO: Transition to a new job whenever a
3422 +       new tasklet is handled */
3423 +    
3424 +    int ret = 0;
3425 +
3426 +	struct rt_task tp = {
3427 +		.exec_cost = 0,
3428 +		.period = 1000000000, /* dummy 1 second period */
3429 +		.phase = 0,
3430 +		.cpu = task_cpu(current),
3431 +		.budget_policy = NO_ENFORCEMENT,
3432 +		.cls = RT_CLASS_BEST_EFFORT
3433 +	};
3434 +	
3435 +	struct sched_param param = { .sched_priority = 0};
3436 +	
3437 +	
3438 +	/* set task params, mark as proxy thread, and init other data */
3439 +	tsk_rt(current)->task_params = tp;
3440 +	tsk_rt(current)->is_proxy_thread = 1;
3441 +	tsk_rt(current)->cur_klitirqd = NULL;
3442 +	//init_MUTEX(&tsk_rt(current)->klitirqd_sem);
3443 +	mutex_init(&tsk_rt(current)->klitirqd_sem);
3444 +	//init_completion(&tsk_rt(current)->klitirqd_sem);
3445 +	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
3446 +	
3447 +	/* inform the OS we're SCHED_LITMUS --
3448 +	   sched_setscheduler_nocheck() calls litmus_admit_task(). */
3449 +	sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);	
3450 +
3451 +    return ret;
3452 +}
3453 +
3454 +static void enter_execution_phase(struct klitirqd_info* which,
3455 +								  struct mutex* sem,
3456 +								  struct task_struct* t)
3457 +{
3458 +	TRACE_CUR("%s: Trying to enter execution phase. "
3459 +			  "Acquiring semaphore of %s/%d\n", __FUNCTION__,
3460 +			  t->comm, t->pid);
3461 +	down_and_set_stat(current, HELD, sem);
3462 +	TRACE_CUR("%s: Execution phase entered! "
3463 +			  "Acquired semaphore of %s/%d\n", __FUNCTION__,
3464 +			  t->comm, t->pid);
3465 +}
3466 +
3467 +static void exit_execution_phase(struct klitirqd_info* which,
3468 +								 struct mutex* sem,
3469 +								 struct task_struct* t)
3470 +{
3471 +	TRACE_CUR("%s: Exiting execution phase. "
3472 +			  "Releasing semaphore of %s/%d\n", __FUNCTION__,
3473 +			  t->comm, t->pid);
3474 +	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
3475 +	{
3476 +		up_and_set_stat(current, NOT_HELD, sem);
3477 +		TRACE_CUR("%s: Execution phase exited! "
3478 +				  "Released semaphore of %s/%d\n", __FUNCTION__,
3479 +				  t->comm, t->pid);		
3480 +	}
3481 +	else
3482 +	{
3483 +		TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
3484 +	}
3485 +}
3486 +
3487 +/* main loop for klitsoftirqd */
3488 +static int run_klitirqd(void* unused)
3489 +{
3490 +	struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
3491 +	struct mutex* sem;
3492 +	struct task_struct* owner;
3493 +
3494 +    int rt_status = set_litmus_daemon_sched();
3495 +
3496 +    if(rt_status != 0)
3497 +    {
3498 +        TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
3499 +        goto rt_failed;
3500 +    }
3501 +    
3502 +	atomic_inc(&num_ready_klitirqds);
3503 +	
3504 +	set_current_state(TASK_INTERRUPTIBLE);
3505 +    
3506 +	while (!kthread_should_stop())
3507 +	{
3508 +		preempt_disable();
3509 +		if (!litirq_pending(which))
3510 +		{
3511 +            /* sleep for work */
3512 +            TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
3513 +					  __FUNCTION__);
3514 +			preempt_enable_no_resched();
3515 +            schedule();
3516 +
3517 +			if(kthread_should_stop()) /* bail out */
3518 +			{
3519 +				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
3520 +				continue;
3521 +			}
3522 +
3523 +			preempt_disable();
3524 +		}
3525 +        
3526 +		__set_current_state(TASK_RUNNING);
3527 +
3528 +		while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
3529 +		{
3530 +			int needs_resched = 0;
3531 +
3532 +			preempt_enable_no_resched();
3533 +	
3534 +			BUG_ON(sem == NULL);
3535 +
3536 +			// wait to enter execution phase; wait for 'current_owner' to block.
3537 +			enter_execution_phase(which, sem, owner);
3538 +
3539 +			if(kthread_should_stop())
3540 +			{
3541 +				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
3542 +				break;
3543 +			}
3544 +
3545 +			preempt_disable();
3546 +			
3547 +			/* Double check that there's still pending work and the owner hasn't
3548 +			 * changed. Pending items may have been flushed while we were sleeping.
3549 +			 */
3550 +			if(litirq_pending_with_owner(which, owner))
3551 +			{
3552 +				TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
3553 +						  __FUNCTION__);				
3554 +				
3555 +				needs_resched = do_litirq(which);
3556 +				
3557 +				preempt_enable_no_resched();
3558 +			
3559 +				// work objects are preemptible.
3560 +				if(!needs_resched)
3561 +				{
3562 +					do_work(which);
3563 +				}            
3564 +			
3565 +				// exit execution phase.
3566 +				exit_execution_phase(which, sem, owner);
3567 +				
3568 +				TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
3569 +				reeval_prio(which); /* check if we need to change priority here */
3570 +			}
3571 +			else
3572 +			{
3573 +				TRACE_CUR("%s: Pending work was flushed!  Prev owner was %s/%d\n",
3574 +								__FUNCTION__,
3575 +								owner->comm, owner->pid);					
3576 +				preempt_enable_no_resched();
3577 +
3578 +				// exit execution phase.
3579 +				exit_execution_phase(which, sem, owner);
3580 +			}
3581 +
3582 +			cond_resched();
3583 +			preempt_disable();
3584 +		}
3585 +		preempt_enable();
3586 +		set_current_state(TASK_INTERRUPTIBLE);
3587 +	}
3588 +	__set_current_state(TASK_RUNNING);
3589 +	
3590 +	atomic_dec(&num_ready_klitirqds);
3591 +
3592 +rt_failed:
3593 +    litmus_exit_task(current);
3594 +    
3595 +	return rt_status;
3596 +}
3597 +
3598 +
3599 +struct klitirqd_launch_data
3600 +{
3601 +	int* cpu_affinity;
3602 +	struct work_struct work;
3603 +};
3604 +
3605 +/* executed by a kworker from workqueues */
3606 +static void launch_klitirqd(struct work_struct *work)
3607 +{
3608 +    int i;
3609 +	
3610 +	struct klitirqd_launch_data* launch_data =
3611 +		container_of(work, struct klitirqd_launch_data, work);
3612 +    
3613 +    TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
3614 +    
3615 +    /* create the daemon threads */
3616 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3617 +    {
3618 +		if(launch_data->cpu_affinity)
3619 +		{
3620 +			klitirqds[i].klitirqd = 
3621 +				kthread_create(
3622 +				   run_klitirqd,
3623 +				   /* treat the affinity as a pointer, we'll cast it back later */
3624 +				   (void*)(long long)launch_data->cpu_affinity[i],
3625 +				   "klitirqd_th%d/%d",
3626 +				   i,
3627 +				   launch_data->cpu_affinity[i]);
3628 +			
3629 +			/* litmus will put is in the right cluster. */
3630 +			kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
3631 +		}
3632 +		else
3633 +		{
3634 +			klitirqds[i].klitirqd = 
3635 +				kthread_create(
3636 +				   run_klitirqd,
3637 +				   /* treat the affinity as a pointer, we'll cast it back later */
3638 +				   (void*)(long long)(-1),
3639 +				   "klitirqd_th%d",
3640 +				   i);
3641 +		}
3642 +    }    
3643 +    
3644 +    TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
3645 +    
3646 +    /* unleash the daemons */
3647 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3648 +    {
3649 +        wake_up_process(klitirqds[i].klitirqd);
3650 +    }
3651 +    
3652 +	if(launch_data->cpu_affinity)
3653 +		kfree(launch_data->cpu_affinity);
3654 +	kfree(launch_data);
3655 +}
3656 +
3657 +
3658 +void spawn_klitirqd(int* affinity)
3659 +{
3660 +    int i;
3661 +    struct klitirqd_launch_data* delayed_launch;
3662 +	
3663 +	if(atomic_read(&num_ready_klitirqds) != 0)
3664 +	{
3665 +		TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
3666 +		return;
3667 +	}
3668 +    
3669 +    /* init the tasklet & work queues */
3670 +    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3671 +    {
3672 +		klitirqds[i].terminating = 0;
3673 +		klitirqds[i].pending = 0;
3674 +		
3675 +		klitirqds[i].num_hi_pending.counter = 0;
3676 +		klitirqds[i].num_low_pending.counter = 0;
3677 +		klitirqds[i].num_work_pending.counter = 0;
3678 +		
3679 +        klitirqds[i].pending_tasklets_hi.head = NULL;
3680 +        klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;		
3681 +		
3682 +        klitirqds[i].pending_tasklets.head = NULL;
3683 +        klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
3684 +
3685 +		INIT_LIST_HEAD(&klitirqds[i].worklist);
3686 +		
3687 +		raw_spin_lock_init(&klitirqds[i].lock);
3688 +    }
3689 +    
3690 +    /* wait to flush the initializations to memory since other threads
3691 +       will access it. */    
3692 +    mb();
3693 +    
3694 +    /* tell a work queue to launch the threads.  we can't make scheduling
3695 +       calls since we're in an atomic state. */
3696 +    TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
3697 +	delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
3698 +	if(affinity)
3699 +	{
3700 +		delayed_launch->cpu_affinity =
3701 +			kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
3702 +		
3703 +		memcpy(delayed_launch->cpu_affinity, affinity,
3704 +			sizeof(int)*NR_LITMUS_SOFTIRQD);
3705 +	}
3706 +	else
3707 +	{
3708 +		delayed_launch->cpu_affinity = NULL;
3709 +	}
3710 +    INIT_WORK(&delayed_launch->work, launch_klitirqd);
3711 +    schedule_work(&delayed_launch->work);
3712 +}
3713 +
3714 +
3715 +void kill_klitirqd(void)
3716 +{
3717 +	if(!klitirqd_is_dead())
3718 +	{
3719 +    	int i;
3720 +    
3721 +    	TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
3722 +    
3723 +    	for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
3724 +    	{
3725 +			if(klitirqds[i].terminating != 1)
3726 +			{
3727 +				klitirqds[i].terminating = 1;
3728 +				mb(); /* just to be sure? */
3729 +				flush_pending(klitirqds[i].klitirqd, NULL);
3730 +
3731 +				/* signal termination */
3732 +       			kthread_stop(klitirqds[i].klitirqd);
3733 +			}
3734 +    	}
3735 +	}
3736 +}
3737 +
3738 +
3739 +int klitirqd_is_ready(void)
3740 +{
3741 +	return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
3742 +}
3743 +
3744 +int klitirqd_is_dead(void)
3745 +{
3746 +	return(atomic_read(&num_ready_klitirqds) == 0);
3747 +}
3748 +
3749 +
3750 +struct task_struct* get_klitirqd(unsigned int k_id)
3751 +{
3752 +	return(klitirqds[k_id].klitirqd);
3753 +}
3754 +
3755 +
3756 +void flush_pending(struct task_struct* klitirqd_thread,
3757 +				   struct task_struct* owner)
3758 +{	
3759 +	unsigned int k_id = klitirqd_id(klitirqd_thread);
3760 +	struct klitirqd_info *which = &klitirqds[k_id];
3761 +	
3762 +	unsigned long flags;
3763 +	struct tasklet_struct *list;
3764 +
3765 +	u32 work_flushed = 0;
3766 +	
3767 +	raw_spin_lock_irqsave(&which->lock, flags);
3768 +	
3769 +	//__dump_state(which, "flush_pending: before");
3770 +	
3771 +	// flush hi tasklets.
3772 +	if(litirq_pending_hi_irqoff(which))
3773 +	{
3774 +		which->pending &= ~LIT_TASKLET_HI;
3775 +		
3776 +		list = which->pending_tasklets_hi.head;
3777 +		which->pending_tasklets_hi.head = NULL;
3778 +		which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
3779 +		
3780 +		TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
3781 +		
3782 +		while(list)
3783 +		{
3784 +			struct tasklet_struct *t = list;
3785 +			list = list->next;
3786 +			
3787 +			if(likely((t->owner == owner) || (owner == NULL)))
3788 +			{
3789 +				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
3790 +				{
3791 +					BUG();
3792 +				}
3793 +
3794 +				work_flushed |= LIT_TASKLET_HI;
3795 +
3796 +				t->owner = NULL;
3797 +
3798 +				// WTF?
3799 +				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
3800 +				{
3801 +					atomic_dec(&which->num_hi_pending);
3802 +					___tasklet_hi_schedule(t);
3803 +				}
3804 +				else
3805 +				{
3806 +					TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
3807 +					BUG();
3808 +				}
3809 +			}
3810 +			else
3811 +			{
3812 +				TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
3813 +				// put back on queue.
3814 +				___litmus_tasklet_hi_schedule(t, which, 0);
3815 +			}
3816 +		}
3817 +	}
3818 +	
3819 +	// flush low tasklets.
3820 +	if(litirq_pending_low_irqoff(which))
3821 +	{
3822 +		which->pending &= ~LIT_TASKLET_LOW;
3823 +		
3824 +		list = which->pending_tasklets.head;
3825 +		which->pending_tasklets.head = NULL;
3826 +		which->pending_tasklets.tail = &which->pending_tasklets.head;
3827 +		
3828 +		TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
3829 +		
3830 +		while(list)
3831 +		{
3832 +			struct tasklet_struct *t = list;
3833 +			list = list->next;
3834 +			
3835 +			if(likely((t->owner == owner) || (owner == NULL)))
3836 +			{
3837 +				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
3838 +				{
3839 +					BUG();
3840 +				}
3841 +
3842 +				work_flushed |= LIT_TASKLET_LOW;
3843 +				
3844 +				t->owner = NULL;
3845 +				sched_trace_tasklet_end(owner, 1ul);
3846 +
3847 +				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
3848 +				{
3849 +					atomic_dec(&which->num_low_pending);
3850 +					___tasklet_schedule(t);
3851 +				}
3852 +				else
3853 +				{
3854 +					TRACE("%s: dropped tasklet??\n", __FUNCTION__);
3855 +					BUG();
3856 +				}
3857 +			}
3858 +			else
3859 +			{
3860 +				TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
3861 +				// put back on queue
3862 +				___litmus_tasklet_schedule(t, which, 0);
3863 +			}
3864 +		}
3865 +	}
3866 +	
3867 +	// flush work objects
3868 +	if(litirq_pending_work_irqoff(which))
3869 +	{
3870 +		which->pending &= ~LIT_WORK;
3871 +		
3872 +		TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
3873 +		
3874 +		while(!list_empty(&which->worklist))
3875 +		{
3876 +			struct work_struct* work =
3877 +				list_first_entry(&which->worklist, struct work_struct, entry);
3878 +			list_del_init(&work->entry);
3879 +			
3880 +			if(likely((work->owner == owner) || (owner == NULL)))
3881 +			{
3882 +				work_flushed |= LIT_WORK;
3883 +				atomic_dec(&which->num_work_pending);
3884 +
3885 +				work->owner = NULL;
3886 +				sched_trace_work_end(owner, current, 1ul);
3887 +				__schedule_work(work);
3888 +			}
3889 +			else
3890 +			{
3891 +				TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
3892 +				// put back on queue
3893 +				___litmus_schedule_work(work, which, 0);
3894 +			}
3895 +		}
3896 +	}
3897 +	
3898 +	//__dump_state(which, "flush_pending: after (before reeval prio)");
3899 +	
3900 +
3901 +	mb(); /* commit changes to pending flags */
3902 +
3903 +	/* reset the scheduling priority */
3904 +	if(work_flushed)
3905 +	{
3906 +		__reeval_prio(which);
3907 +
3908 +		/* Try to offload flushed tasklets to Linux's ksoftirqd. */
3909 +		if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
3910 +		{
3911 +			wakeup_softirqd();
3912 +		}
3913 +	}
3914 +	else
3915 +	{
3916 +		TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
3917 +	}
3918 +
3919 +	raw_spin_unlock_irqrestore(&which->lock, flags);	
3920 +}
3921 +
3922 +
3923 +
3924 +
3925 +static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
3926 +									   struct klitirqd_info *which,
3927 +									   int wakeup)
3928 +{
3929 +	unsigned long flags;
3930 +	u32 old_pending;
3931 +
3932 +	t->next = NULL;
3933 +	
3934 +    raw_spin_lock_irqsave(&which->lock, flags);
3935 +    
3936 +	//__dump_state(which, "___litmus_tasklet_schedule: before queuing");
3937 +	
3938 +    *(which->pending_tasklets.tail) = t;
3939 +    which->pending_tasklets.tail = &t->next;
3940 +   
3941 +	old_pending = which->pending;
3942 +	which->pending |= LIT_TASKLET_LOW;
3943 +
3944 +	atomic_inc(&which->num_low_pending);
3945 +	
3946 +	mb();
3947 +
3948 +	if(!old_pending && wakeup)
3949 +	{
3950 +		wakeup_litirqd_locked(which); /* wake up the klitirqd */
3951 +	}
3952 +	
3953 +	//__dump_state(which, "___litmus_tasklet_schedule: after queuing");
3954 +	
3955 +    raw_spin_unlock_irqrestore(&which->lock, flags);	
3956 +}
3957 +
3958 +int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
3959 +{
3960 +	int ret = 0; /* assume failure */
3961 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
3962 +    {
3963 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
3964 +        BUG();
3965 +    }
3966 +
3967 +    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
3968 +    {
3969 +        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
3970 +        BUG();        
3971 +    }
3972 +
3973 +	if(likely(!klitirqds[k_id].terminating))
3974 +	{
3975 +		/* Can't accept tasklets while we're processing a workqueue
3976 +		   because they're handled by the same thread. This case is
3977 +		   very RARE.
3978 +
3979 +		   TODO: Use a separate thread for work objects!!!!!!
3980 +         */
3981 +		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
3982 +		{
3983 +			ret = 1;
3984 +			___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
3985 +		}
3986 +		else
3987 +		{
3988 +			TRACE("%s: rejected tasklet because of pending work.\n",
3989 +						__FUNCTION__);
3990 +		}
3991 +	}
3992 +	return(ret);
3993 +}
3994 +
3995 +EXPORT_SYMBOL(__litmus_tasklet_schedule);
3996 +
3997 +
3998 +static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
3999 +									   struct klitirqd_info *which,
4000 +									   int wakeup)
4001 +{
4002 +	unsigned long flags;
4003 +	u32 old_pending;
4004 +
4005 +	t->next = NULL;
4006 +	
4007 +    raw_spin_lock_irqsave(&which->lock, flags);
4008 +
4009 +    *(which->pending_tasklets_hi.tail) = t;
4010 +    which->pending_tasklets_hi.tail = &t->next;
4011 +    
4012 +	old_pending = which->pending;
4013 +	which->pending |= LIT_TASKLET_HI;
4014 +	
4015 +	atomic_inc(&which->num_hi_pending);
4016 +	
4017 +	mb();
4018 +
4019 +	if(!old_pending && wakeup)
4020 +	{
4021 +		wakeup_litirqd_locked(which); /* wake up the klitirqd */
4022 +	}
4023 +	
4024 +    raw_spin_unlock_irqrestore(&which->lock, flags);	
4025 +}
4026 +
4027 +int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
4028 +{
4029 +	int ret = 0; /* assume failure */
4030 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
4031 +    {
4032 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
4033 +        BUG();
4034 +    }
4035 +    
4036 +    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
4037 +    {
4038 +        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
4039 +        BUG();        
4040 +    }
4041 +    
4042 +    if(unlikely(!klitirqd_is_ready()))
4043 +    {
4044 +        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
4045 +        BUG();        
4046 +    }    
4047 +    
4048 +	if(likely(!klitirqds[k_id].terminating))
4049 +	{
4050 +		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
4051 +		{
4052 +			ret = 1;
4053 +			___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
4054 +		}
4055 +		else
4056 +		{
4057 +			TRACE("%s: rejected tasklet because of pending work.\n",
4058 +						__FUNCTION__);
4059 +		}
4060 +	}
4061 +	return(ret);
4062 +}
4063 +
4064 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
4065 +
4066 +
4067 +int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
4068 +{
4069 +	int ret = 0; /* assume failure */
4070 +	u32 old_pending;
4071 +
4072 +	BUG_ON(!irqs_disabled());
4073 +    
4074 +    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
4075 +    {
4076 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
4077 +        BUG();
4078 +    }
4079 +    
4080 +    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
4081 +    {
4082 +        TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
4083 +        BUG();        
4084 +    }
4085 +    
4086 +    if(unlikely(!klitirqd_is_ready()))
4087 +    {
4088 +        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
4089 +        BUG();        
4090 +    }
4091 +
4092 +	if(likely(!klitirqds[k_id].terminating))
4093 +	{
4094 +    	raw_spin_lock(&klitirqds[k_id].lock);
4095 +    
4096 +		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
4097 +		{
4098 +			ret = 1;  // success!
4099 +
4100 +			t->next = klitirqds[k_id].pending_tasklets_hi.head;
4101 +    		klitirqds[k_id].pending_tasklets_hi.head = t;
4102 +	
4103 +			old_pending = klitirqds[k_id].pending;
4104 +			klitirqds[k_id].pending |= LIT_TASKLET_HI;
4105 +		
4106 +			atomic_inc(&klitirqds[k_id].num_hi_pending);
4107 +   
4108 +			mb();
4109 +
4110 +			if(!old_pending)
4111 +    			wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
4112 +		}
4113 +		else
4114 +		{
4115 +			TRACE("%s: rejected tasklet because of pending work.\n",
4116 +					__FUNCTION__);
4117 +		}
4118 +
4119 +    	raw_spin_unlock(&klitirqds[k_id].lock);
4120 +	}
4121 +	return(ret);
4122 +}
4123 +
4124 +EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
4125 +
4126 +
4127 +
4128 +static void ___litmus_schedule_work(struct work_struct *w,
4129 +									struct klitirqd_info *which,
4130 +									int wakeup)
4131 +{
4132 +	unsigned long flags;
4133 +	u32 old_pending;
4134 +
4135 +	raw_spin_lock_irqsave(&which->lock, flags);
4136 +	
4137 +	work_pending(w);
4138 +	list_add_tail(&w->entry, &which->worklist);
4139 +	
4140 +	old_pending = which->pending;
4141 +	which->pending |= LIT_WORK;
4142 +
4143 +	atomic_inc(&which->num_work_pending);
4144 +	
4145 +	mb();
4146 +
4147 +	if(!old_pending && wakeup)
4148 +	{
4149 +		wakeup_litirqd_locked(which); /* wakeup the klitirqd */
4150 +	}
4151 +	
4152 +	raw_spin_unlock_irqrestore(&which->lock, flags);
4153 +}
4154 +
4155 +int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
4156 +{
4157 +	int ret = 1; /* assume success */
4158 +	if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
4159 +	{
4160 +		TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
4161 +		BUG();
4162 +	}
4163 +	
4164 +	if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
4165 +	{
4166 +		TRACE("%s: No klitirqd_th%u!\n", k_id);
4167 +		BUG();
4168 +	}
4169 +	
4170 +    if(unlikely(!klitirqd_is_ready()))
4171 +    {
4172 +        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
4173 +        BUG();        
4174 +    }	
4175 +
4176 +	if(likely(!klitirqds[k_id].terminating))
4177 +		___litmus_schedule_work(w, &klitirqds[k_id], 1);
4178 +	else
4179 +		ret = 0;
4180 +	return(ret);
4181 +}
4182 +EXPORT_SYMBOL(__litmus_schedule_work);
4183 +
4184 +
4185 +static int set_klitirqd_sem_status(unsigned long stat)
4186 +{
4187 +	TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
4188 +					atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
4189 +					stat);
4190 +	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
4191 +	//mb();
4192 +
4193 +	return(0);
4194 +}
4195 +
4196 +static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
4197 +{
4198 +	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
4199 +	{
4200 +		return(set_klitirqd_sem_status(stat));
4201 +	}
4202 +	return(-1);
4203 +}
4204 +
4205 +
4206 +void __down_and_reset_and_set_stat(struct task_struct* t,
4207 +					   enum klitirqd_sem_status to_reset,
4208 +					   enum klitirqd_sem_status to_set,
4209 +					   struct mutex* sem)
4210 +{
4211 +#if 0
4212 +	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
4213 +	struct task_struct* task = container_of(param, struct task_struct, rt_param);
4214 +
4215 +	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
4216 +					__FUNCTION__, task->comm, task->pid);
4217 +#endif
4218 +
4219 +	mutex_lock_sfx(sem,
4220 +				   set_klitirqd_sem_status_if_not_held, to_reset,
4221 +				   set_klitirqd_sem_status, to_set);
4222 +#if 0
4223 +	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
4224 +					__FUNCTION__, task->comm, task->pid);
4225 +#endif
4226 +}
4227 +
4228 +void down_and_set_stat(struct task_struct* t,
4229 +					   enum klitirqd_sem_status to_set,
4230 +					   struct mutex* sem)
4231 +{
4232 +#if 0
4233 +	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
4234 +	struct task_struct* task = container_of(param, struct task_struct, rt_param);
4235 +
4236 +	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
4237 +					__FUNCTION__, task->comm, task->pid);
4238 +#endif
4239 +
4240 +	mutex_lock_sfx(sem,
4241 +				   NULL, 0,
4242 +				   set_klitirqd_sem_status, to_set);
4243 +
4244 +#if 0
4245 +	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
4246 +					__FUNCTION__, task->comm, task->pid);
4247 +#endif
4248 +}
4249 +
4250 +
4251 +void up_and_set_stat(struct task_struct* t,
4252 +					 enum klitirqd_sem_status to_set,
4253 +					 struct mutex* sem)
4254 +{
4255 +#if 0
4256 +	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
4257 +	struct task_struct* task = container_of(param, struct task_struct, rt_param);
4258 +
4259 +	TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
4260 +					__FUNCTION__,
4261 +					task->comm, task->pid);
4262 +#endif
4263 +
4264 +	mutex_unlock_sfx(sem, NULL, 0,
4265 +					 set_klitirqd_sem_status, to_set);
4266 +
4267 +#if 0
4268 +	TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
4269 +					__FUNCTION__,
4270 +					task->comm, task->pid);
4271 +#endif
4272 +}
4273 +
4274 +
4275 +
4276 +void release_klitirqd_lock(struct task_struct* t)
4277 +{
4278 +	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
4279 +	{
4280 +		struct mutex* sem;
4281 +		struct task_struct* owner = t;
4282 +		
4283 +		if(t->state == TASK_RUNNING)
4284 +		{
4285 +			TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
4286 +			return;
4287 +		}
4288 +		
4289 +		if(likely(!tsk_rt(t)->is_proxy_thread))
4290 +		{
4291 +			sem = &tsk_rt(t)->klitirqd_sem;
4292 +		}
4293 +		else
4294 +		{
4295 +			unsigned int k_id = klitirqd_id(t);
4296 +			owner = klitirqds[k_id].current_owner;
4297 +
4298 +			BUG_ON(t != klitirqds[k_id].klitirqd);
4299 +
4300 +			if(likely(owner))
4301 +			{
4302 +				sem = &tsk_rt(owner)->klitirqd_sem;
4303 +			}
4304 +			else
4305 +			{
4306 +				BUG();
4307 +				
4308 +				// We had the rug pulled out from under us.  Abort attempt
4309 +				// to reacquire the lock since our client no longer needs us.
4310 +				TRACE_CUR("HUH?!  How did this happen?\n");
4311 +				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
4312 +				return;
4313 +			}
4314 +		}
4315 +		
4316 +		//TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
4317 +		up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
4318 +		//TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
4319 +	}
4320 +	/*
4321 +	else if(is_realtime(t))
4322 +	{
4323 +		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
4324 +	}
4325 +	*/
4326 +}
4327 +
4328 +int reacquire_klitirqd_lock(struct task_struct* t)
4329 +{
4330 +	int ret = 0;
4331 +
4332 +	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
4333 +	{
4334 +		struct mutex* sem;
4335 +		struct task_struct* owner = t;
4336 +		
4337 +		if(likely(!tsk_rt(t)->is_proxy_thread))
4338 +		{
4339 +			sem = &tsk_rt(t)->klitirqd_sem;
4340 +		}
4341 +		else
4342 +		{
4343 +			unsigned int k_id = klitirqd_id(t);		
4344 +			//struct task_struct* owner = klitirqds[k_id].current_owner;
4345 +			owner = klitirqds[k_id].current_owner;
4346 +			
4347 +			BUG_ON(t != klitirqds[k_id].klitirqd);
4348 +
4349 +			if(likely(owner))
4350 +			{
4351 +				sem = &tsk_rt(owner)->klitirqd_sem;
4352 +			}
4353 +			else
4354 +			{
4355 +				// We had the rug pulled out from under us.  Abort attempt
4356 +				// to reacquire the lock since our client no longer needs us.
4357 +				TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
4358 +				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
4359 +				return(0);
4360 +			}
4361 +		}
4362 +		
4363 +		//TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
4364 +		__down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
4365 +		//TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
4366 +	}
4367 +	/*
4368 +	else if(is_realtime(t))
4369 +	{
4370 +		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
4371 +	}
4372 +	*/
4373 +
4374 +	return(ret);
4375 +}
4376 +
4377 diff --git a/litmus/locking.c b/litmus/locking.c
4378 index 2693f1a..cfce98e 100644
4379 --- a/litmus/locking.c
4380 +++ b/litmus/locking.c
4381 @@ -121,7 +121,6 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
4382  	return(t);
4383  }
4384  
4385 -
4386  #else
4387  
4388  struct fdso_ops generic_lock_ops = {};
4389 diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
4390 new file mode 100644
4391 index 0000000..d171521
4392 --- /dev/null
4393 +++ b/litmus/nvidia_info.c
4394 @@ -0,0 +1,536 @@
4395 +#include <linux/module.h>
4396 +#include <linux/semaphore.h>
4397 +#include <linux/pci.h>
4398 +
4399 +#include <litmus/sched_trace.h>
4400 +#include <litmus/nvidia_info.h>
4401 +#include <litmus/litmus.h>
4402 +
4403 +typedef unsigned char      NvV8;  /* "void": enumerated or multiple fields   */
4404 +typedef unsigned short     NvV16; /* "void": enumerated or multiple fields   */
4405 +typedef unsigned char      NvU8;  /* 0 to 255                                */
4406 +typedef unsigned short     NvU16; /* 0 to 65535                              */
4407 +typedef signed char        NvS8;  /* -128 to 127                             */
4408 +typedef signed short       NvS16; /* -32768 to 32767                         */
4409 +typedef float              NvF32; /* IEEE Single Precision (S1E8M23)         */
4410 +typedef double             NvF64; /* IEEE Double Precision (S1E11M52)        */
4411 +typedef unsigned int       NvV32; /* "void": enumerated or multiple fields   */
4412 +typedef unsigned int       NvU32; /* 0 to 4294967295                         */
4413 +typedef unsigned long long NvU64; /* 0 to 18446744073709551615          */
4414 +typedef union
4415 +{
4416 +    volatile NvV8 Reg008[1];
4417 +    volatile NvV16 Reg016[1];
4418 +    volatile NvV32 Reg032[1];
4419 +} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
4420 +
4421 +typedef struct
4422 +{
4423 +    NvU64 address;
4424 +    NvU64 size;
4425 +    NvU32 offset;
4426 +    NvU32 *map;
4427 +    litmus_nv_phwreg_t map_u;
4428 +} litmus_nv_aperture_t;
4429 +
4430 +typedef struct
4431 +{
4432 +    void  *priv;                    /* private data */
4433 +    void  *os_state;                /* os-specific device state */
4434 +	
4435 +    int    rmInitialized;
4436 +    int    flags;
4437 +	
4438 +    /* PCI config info */
4439 +    NvU32 domain;
4440 +    NvU16 bus;
4441 +    NvU16 slot;
4442 +    NvU16 vendor_id;
4443 +    NvU16 device_id;
4444 +    NvU16 subsystem_id;
4445 +    NvU32 gpu_id;
4446 +    void *handle;
4447 +	
4448 +    NvU32 pci_cfg_space[16];
4449 +	
4450 +    /* physical characteristics */
4451 +    litmus_nv_aperture_t bars[3];
4452 +    litmus_nv_aperture_t *regs;
4453 +    litmus_nv_aperture_t *fb, ud;
4454 +    litmus_nv_aperture_t agp;
4455 +	
4456 +    NvU32  interrupt_line;
4457 +	
4458 +    NvU32 agp_config;
4459 +    NvU32 agp_status;
4460 +	
4461 +    NvU32 primary_vga;
4462 +	
4463 +    NvU32 sim_env;
4464 +	
4465 +    NvU32 rc_timer_enabled;
4466 +	
4467 +    /* list of events allocated for this device */
4468 +    void *event_list;
4469 +	
4470 +    void *kern_mappings;
4471 +	
4472 +} litmus_nv_state_t;
4473 +
4474 +typedef struct work_struct litmus_nv_task_t;
4475 +
4476 +typedef struct litmus_nv_work_s {
4477 +    litmus_nv_task_t task;
4478 +    void *data;
4479 +} litmus_nv_work_t;
4480 +
4481 +typedef struct litmus_nv_linux_state_s {
4482 +    litmus_nv_state_t nv_state;
4483 +    atomic_t usage_count;
4484 +	
4485 +    struct pci_dev *dev;
4486 +    void *agp_bridge;
4487 +    void *alloc_queue;
4488 +	
4489 +    void *timer_sp;
4490 +    void *isr_sp;
4491 +    void *pci_cfgchk_sp;
4492 +    void *isr_bh_sp;
4493 +
4494 +#ifdef CONFIG_CUDA_4_0
4495 +	char registry_keys[512];
4496 +#endif
4497 +
4498 +    /* keep track of any pending bottom halfes */
4499 +    struct tasklet_struct tasklet;
4500 +    litmus_nv_work_t work;
4501 +	
4502 +    /* get a timer callback every second */
4503 +    struct timer_list rc_timer;
4504 +	
4505 +    /* lock for linux-specific data, not used by core rm */
4506 +    struct semaphore ldata_lock;
4507 +	
4508 +    /* lock for linux-specific alloc queue */
4509 +    struct semaphore at_lock;
4510 +	
4511 +#if 0
4512 +#if defined(NV_USER_MAP)
4513 +    /* list of user mappings */
4514 +    struct nv_usermap_s *usermap_list;
4515 +	
4516 +    /* lock for VMware-specific mapping list */
4517 +    struct semaphore mt_lock;
4518 +#endif /* defined(NV_USER_MAP) */	
4519 +#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
4520 +	void *apm_nv_dev;
4521 +#endif
4522 +#endif
4523 +	
4524 +    NvU32 device_num;
4525 +    struct litmus_nv_linux_state_s *next;
4526 +} litmus_nv_linux_state_t;
4527 +
4528 +void dump_nvidia_info(const struct tasklet_struct *t)
4529 +{
4530 +	litmus_nv_state_t* nvstate = NULL;
4531 +	litmus_nv_linux_state_t* linuxstate =  NULL;
4532 +	struct pci_dev* pci = NULL;
4533 +	
4534 +	nvstate = (litmus_nv_state_t*)(t->data);
4535 +	
4536 +	if(nvstate)
4537 +	{
4538 +		TRACE("NV State:\n"
4539 +			  "\ttasklet ptr = %p\n"
4540 +			  "\tstate ptr = %p\n"
4541 +			  "\tprivate data ptr = %p\n"
4542 +			  "\tos state ptr = %p\n"
4543 +			  "\tdomain = %u\n"
4544 +			  "\tbus = %u\n"
4545 +			  "\tslot = %u\n"
4546 +			  "\tvender_id = %u\n"
4547 +			  "\tdevice_id = %u\n"
4548 +			  "\tsubsystem_id = %u\n"
4549 +			  "\tgpu_id = %u\n"
4550 +			  "\tinterrupt_line = %u\n",
4551 +			  t,
4552 +			  nvstate,
4553 +			  nvstate->priv,
4554 +			  nvstate->os_state,
4555 +			  nvstate->domain,
4556 +			  nvstate->bus,
4557 +			  nvstate->slot,
4558 +			  nvstate->vendor_id,
4559 +			  nvstate->device_id,
4560 +			  nvstate->subsystem_id,
4561 +			  nvstate->gpu_id,
4562 +			  nvstate->interrupt_line);
4563 +		
4564 +		linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
4565 +	}
4566 +	else
4567 +	{
4568 +		TRACE("INVALID NVSTATE????\n");
4569 +	}
4570 +	
4571 +	if(linuxstate)
4572 +	{
4573 +		int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
4574 +		int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
4575 +		int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
4576 +		
4577 +		
4578 +		TRACE("LINUX NV State:\n"
4579 +			  "\tlinux nv state ptr: %p\n"
4580 +			  "\taddress of tasklet: %p\n"
4581 +			  "\taddress of work: %p\n"
4582 +			  "\tusage_count: %d\n"
4583 +			  "\tdevice_num: %u\n"
4584 +			  "\ttasklet addr == this tasklet: %d\n"
4585 +			  "\tpci: %p\n",
4586 +			  linuxstate,
4587 +			  &(linuxstate->tasklet),
4588 +			  &(linuxstate->work),
4589 +			  atomic_read(&(linuxstate->usage_count)),
4590 +			  linuxstate->device_num,
4591 +			  (t == &(linuxstate->tasklet)),
4592 +			  linuxstate->dev);
4593 +		
4594 +		pci = linuxstate->dev;
4595 +		
4596 +		TRACE("Offsets:\n"
4597 +			  "\tOffset from LinuxState: %d, %x\n"
4598 +			  "\tOffset from NVState: %d, %x\n"
4599 +			  "\tOffset from parameter: %d, %x\n"
4600 +			  "\tdevice_num: %u\n",
4601 +			  ls_offset, ls_offset,
4602 +			  ns_offset_raw, ns_offset_raw,
4603 +			  ns_offset_desired, ns_offset_desired,
4604 +			  *((u32*)((void*)nvstate + ns_offset_desired)));
4605 +	}
4606 +	else
4607 +	{
4608 +		TRACE("INVALID LINUXNVSTATE?????\n");
4609 +	}
4610 +
4611 +#if 0
4612 +	if(pci)
4613 +	{
4614 +		TRACE("PCI DEV Info:\n"
4615 +			  "pci device ptr: %p\n"
4616 +			  "\tdevfn = %d\n"
4617 +			  "\tvendor = %d\n"
4618 +			  "\tdevice = %d\n"
4619 +			  "\tsubsystem_vendor = %d\n"
4620 +			  "\tsubsystem_device = %d\n"
4621 +			  "\tslot # = %d\n",
4622 +			  pci,
4623 +			  pci->devfn,
4624 +			  pci->vendor,
4625 +			  pci->device,
4626 +			  pci->subsystem_vendor,
4627 +			  pci->subsystem_device,
4628 +			  pci->slot->number);
4629 +	}
4630 +	else
4631 +	{
4632 +		TRACE("INVALID PCIDEV PTR?????\n");
4633 +	}
4634 +#endif
4635 +}
4636 +
4637 +static struct module* nvidia_mod = NULL;
4638 +int init_nvidia_info(void)
4639 +{
4640 +	mutex_lock(&module_mutex);
4641 +	nvidia_mod = find_module("nvidia");
4642 +	mutex_unlock(&module_mutex);	
4643 +	if(nvidia_mod != NULL)
4644 +	{
4645 +		TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
4646 +			  (void*)(nvidia_mod->module_core),
4647 +			  (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
4648 +		init_nv_device_reg();
4649 +		return(0);
4650 +	}
4651 +	else
4652 +	{
4653 +		TRACE("%s : Could not find NVIDIA module!  Loaded?\n", __FUNCTION__);
4654 +		return(-1);
4655 +	}
4656 +}
4657 +
4658 +
4659 +/* works with pointers to static data inside the module too. */
4660 +int is_nvidia_func(void* func_addr)
4661 +{
4662 +	int ret = 0;
4663 +	if(nvidia_mod)
4664 +	{
4665 +		ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
4666 +		/*
4667 +		if(ret)
4668 +		{
4669 +			TRACE("%s : %p is in NVIDIA module: %d\n",
4670 +			  	__FUNCTION__, func_addr, ret);
4671 +		}*/
4672 +	}
4673 +	
4674 +	return(ret);
4675 +}
4676 +
4677 +u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
4678 +{
4679 +	// life is too short to use hard-coded offsets.  update this later.
4680 +	litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
4681 +	litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
4682 +
4683 +	BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
4684 +
4685 +	return(linuxstate->device_num);
4686 +
4687 +	//int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
4688 +
4689 +#if 0
4690 +	// offset determined though observed behavior of the NV driver.
4691 +	//const int DEVICE_NUM_OFFSET = 0x480;  // CUDA 4.0 RC1
4692 +	//const int DEVICE_NUM_OFFSET = 0x510;  // CUDA 4.0 RC2
4693 +
4694 +	void* state = (void*)(t->data);
4695 +	void* device_num_ptr = state + DEVICE_NUM_OFFSET;
4696 +	
4697 +	//dump_nvidia_info(t);
4698 +	return(*((u32*)device_num_ptr));
4699 +#endif
4700 +}
4701 +
4702 +u32 get_work_nv_device_num(const struct work_struct *t)
4703 +{
4704 +	// offset determined though observed behavior of the NV driver.
4705 +	const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
4706 +	void* state = (void*)(t);
4707 +	void** device_num_ptr = state + DEVICE_NUM_OFFSET;
4708 +	return(*((u32*)(*device_num_ptr)));
4709 +}
4710 +
4711 +
4712 +
4713 +typedef struct {
4714 +	raw_spinlock_t	lock;
4715 +	struct task_struct *device_owner;
4716 +}nv_device_registry_t;
4717 +
4718 +static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
4719 +
4720 +int init_nv_device_reg(void)
4721 +{
4722 +	int i;
4723 +	
4724 +	//memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
4725 +	
4726 +	for(i = 0; i < NV_DEVICE_NUM; ++i)
4727 +	{
4728 +		raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
4729 +		NV_DEVICE_REG[i].device_owner = NULL;
4730 +	}
4731 +				 
4732 +	return(1);
4733 +}
4734 +
4735 +/* use to get nv_device_id by given owner.
4736 + (if return -1, can't get the assocaite device id)*/
4737 +/*
4738 +int get_nv_device_id(struct task_struct* owner)
4739 +{
4740 +	int i;
4741 +	if(!owner)
4742 +	{
4743 +		return(-1);
4744 +	}
4745 +	for(i = 0; i < NV_DEVICE_NUM; ++i)
4746 +	{
4747 +		if(NV_DEVICE_REG[i].device_owner == owner)
4748 +			return(i);
4749 +	}
4750 +	return(-1); 
4751 +}
4752 +*/
4753 +
4754 +
4755 +
4756 +static int __reg_nv_device(int reg_device_id)
4757 +{
4758 +	int ret = 0;
4759 +    struct task_struct* old =
4760 +		cmpxchg(&NV_DEVICE_REG[reg_device_id].device_owner,
4761 +				NULL,
4762 +				current);
4763 +
4764 +	mb();
4765 +
4766 +	if(likely(old == NULL))
4767 +	{
4768 +#ifdef CONFIG_LITMUS_SOFTIRQD
4769 +		down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
4770 +#endif
4771 +		TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
4772 +	}   
4773 +	else
4774 +	{   
4775 +		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
4776 +		ret = -EBUSY;
4777 +	}
4778 +	
4779 +	return(ret);
4780 +	
4781 +	
4782 +	
4783 +#if 0
4784 +	//unsigned long flags;
4785 +	//raw_spin_lock_irqsave(&NV_DEVICE_REG[reg_device_id].lock, flags);
4786 +	//lock_nv_registry(reg_device_id, &flags);
4787 +
4788 +	if(likely(NV_DEVICE_REG[reg_device_id].device_owner == NULL))
4789 +	{
4790 +		NV_DEVICE_REG[reg_device_id].device_owner = current;
4791 +		mb(); // needed?
4792 +
4793 +		// release spin lock before chance of going to sleep.
4794 +		//raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);	
4795 +		//unlock_nv_registry(reg_device_id, &flags);
4796 +
4797 +		down_and_set_stat(current, HELD, &tsk_rt(current)->klitirqd_sem);
4798 +		TRACE_CUR("%s: device %d registered.\n", __FUNCTION__, reg_device_id);
4799 +		return(0);
4800 +	}
4801 +	else
4802 +	{
4803 +		//raw_spin_unlock_irqrestore(&NV_DEVICE_REG[reg_device_id].lock, flags);
4804 +		//unlock_nv_registry(reg_device_id, &flags);
4805 +
4806 +		TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
4807 +		return(-EBUSY);		
4808 +	}
4809 +#endif
4810 +}
4811 +
4812 +static int __clear_reg_nv_device(int de_reg_device_id)
4813 +{
4814 +	int ret = 0;	
4815 +	struct task_struct* old;
4816 +	
4817 +#ifdef CONFIG_LITMUS_SOFTIRQD
4818 +	unsigned long flags;
4819 +    struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
4820 +	lock_nv_registry(de_reg_device_id, &flags);
4821 +#endif
4822 +	
4823 +	old = cmpxchg(&NV_DEVICE_REG[de_reg_device_id].device_owner,
4824 +				current,
4825 +				NULL);
4826 +	
4827 +	mb();
4828 +
4829 +#ifdef CONFIG_LITMUS_SOFTIRQD
4830 +	if(likely(old == current))
4831 +	{   
4832 +		flush_pending(klitirqd_th, current);
4833 +		//unlock_nv_registry(de_reg_device_id, &flags);
4834 +		
4835 +		up_and_set_stat(current, NOT_HELD, &tsk_rt(current)->klitirqd_sem);
4836 +
4837 +		unlock_nv_registry(de_reg_device_id, &flags);
4838 +		ret = 0;
4839 +		
4840 +		TRACE_CUR("%s: semaphore released.\n",__FUNCTION__);
4841 +	}
4842 +	else
4843 +	{
4844 +		unlock_nv_registry(de_reg_device_id, &flags);
4845 +		ret = -EINVAL;
4846 +		
4847 +		if(old)
4848 +			TRACE_CUR("%s: device %d is not registered for this process's use!  %s/%d is!\n",
4849 +					  __FUNCTION__, de_reg_device_id, old->comm, old->pid);
4850 +		else
4851 +			TRACE_CUR("%s: device %d is not registered for this process's use! No one is!\n",
4852 +					  __FUNCTION__, de_reg_device_id);
4853 +	}
4854 +#endif
4855 +
4856 +	return(ret);
4857 +}
4858 +
4859 +
4860 +int reg_nv_device(int reg_device_id, int reg_action)
4861 +{
4862 +	int ret;
4863 +
4864 +	if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
4865 +	{
4866 +		if(reg_action)
4867 +			ret = __reg_nv_device(reg_device_id);
4868 +		else
4869 +			ret = __clear_reg_nv_device(reg_device_id);
4870 +	}
4871 +	else
4872 +	{
4873 +		ret = -ENODEV;
4874 +	}
4875 +
4876 +	return(ret);
4877 +}
4878 +
4879 +/* use to get the owner of nv_device_id. */
4880 +struct task_struct* get_nv_device_owner(u32 target_device_id)
4881 +{
4882 +	struct task_struct* owner;
4883 +	BUG_ON(target_device_id >= NV_DEVICE_NUM);
4884 +	owner = NV_DEVICE_REG[target_device_id].device_owner;
4885 +	return(owner);
4886 +}
4887 +
4888 +void lock_nv_registry(u32 target_device_id, unsigned long* flags)
4889 +{
4890 +	BUG_ON(target_device_id >= NV_DEVICE_NUM);
4891 +
4892 +	if(in_interrupt())
4893 +		TRACE("Locking registry for %d.\n", target_device_id);
4894 +	else
4895 +		TRACE_CUR("Locking registry for %d.\n", target_device_id);
4896 +
4897 +	raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
4898 +}
4899 +
4900 +void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
4901 +{
4902 +	BUG_ON(target_device_id >= NV_DEVICE_NUM);
4903 +
4904 +	if(in_interrupt())
4905 +		TRACE("Unlocking registry for %d.\n", target_device_id);
4906 +	else
4907 +		TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
4908 +
4909 +	raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
4910 +}
4911 +
4912 +
4913 +void increment_nv_int_count(u32 device)
4914 +{
4915 +	unsigned long flags;
4916 +	struct task_struct* owner;
4917 +
4918 +	lock_nv_registry(device, &flags);
4919 +
4920 +	owner = NV_DEVICE_REG[device].device_owner;
4921 +	if(owner)
4922 +	{
4923 +		atomic_inc(&tsk_rt(owner)->nv_int_count);
4924 +	}
4925 +
4926 +	unlock_nv_registry(device, &flags);
4927 +}
4928 +EXPORT_SYMBOL(increment_nv_int_count);
4929 +
4930 +
4931 diff --git a/litmus/preempt.c b/litmus/preempt.c
4932 index ebe2e34..08b98c3 100644
4933 --- a/litmus/preempt.c
4934 +++ b/litmus/preempt.c
4935 @@ -30,8 +30,11 @@ void sched_state_will_schedule(struct task_struct* tsk)
4936  		/* Litmus tasks should never be subject to a remote
4937  		 * set_tsk_need_resched(). */
4938  		BUG_ON(is_realtime(tsk));
4939 +
4940 +/*
4941  	TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
4942  		   __builtin_return_address(0));
4943 +*/
4944  }
4945  
4946  /* Called by the IPI handler after another CPU called smp_send_resched(). */
4947 @@ -43,13 +46,17 @@ void sched_state_ipi(void)
4948  		/* Cause scheduler to be invoked.
4949  		 * This will cause a transition to WILL_SCHEDULE. */
4950  		set_tsk_need_resched(current);
4951 +		/*
4952  		TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
4953  			    current->comm, current->pid);
4954 +		*/
4955  	} else {
4956  		/* ignore */
4957 +		/*
4958  		TRACE_STATE("ignoring IPI in state %x (%s)\n",
4959  			    get_sched_state(),
4960  			    sched_state_name(get_sched_state()));
4961 +		*/
4962  	}
4963  }
4964  
4965 diff --git a/litmus/rm_common.c b/litmus/rm_common.c
4966 new file mode 100644
4967 index 0000000..88f83bc
4968 --- /dev/null
4969 +++ b/litmus/rm_common.c
4970 @@ -0,0 +1,160 @@
4971 +/*
4972 + * kernel/rm_common.c
4973 + *
4974 + * Common functions for EDF based scheduler.
4975 + */
4976 +
4977 +#include <linux/percpu.h>
4978 +#include <linux/sched.h>
4979 +#include <linux/list.h>
4980 +
4981 +#include <litmus/litmus.h>
4982 +#include <litmus/sched_plugin.h>
4983 +#include <litmus/sched_trace.h>
4984 +
4985 +#include <litmus/rm_common.h>
4986 +
4987 +/* rm_higher_prio -  returns true if first has a higher EDF priority
4988 + *                    than second. Deadline ties are broken by PID.
4989 + *
4990 + * both first and second may be NULL
4991 + */
4992 +int rm_higher_prio(struct task_struct* first,
4993 +		    struct task_struct* second)
4994 +{
4995 +	struct task_struct *first_task = first;
4996 +	struct task_struct *second_task = second;
4997 +
4998 +	/* There is no point in comparing a task to itself. */
4999 +	if (first && first == second) {
5000 +		TRACE_TASK(first,
5001 +			   "WARNING: pointless edf priority comparison.\n");
5002 +		return 0;
5003 +	}
5004 +
5005 +
5006 +	/* check for NULL tasks */
5007 +	if (!first || !second)
5008 +		return first && !second;
5009 +
5010 +#ifdef CONFIG_LITMUS_LOCKING
5011 +
5012 +	/* Check for inherited priorities. Change task
5013 +	 * used for comparison in such a case.
5014 +	 */
5015 +	if (unlikely(first->rt_param.inh_task))
5016 +		first_task = first->rt_param.inh_task;
5017 +	if (unlikely(second->rt_param.inh_task))
5018 +		second_task = second->rt_param.inh_task;
5019 +
5020 +	/* Check for priority boosting. Tie-break by start of boosting.
5021 +	 */
5022 +	if (unlikely(is_priority_boosted(first_task))) {
5023 +		/* first_task is boosted, how about second_task? */
5024 +		if (!is_priority_boosted(second_task) ||
5025 +		    lt_before(get_boost_start(first_task),
5026 +			      get_boost_start(second_task)))
5027 +			return 1;
5028 +		else
5029 +			return 0;
5030 +	} else if (unlikely(is_priority_boosted(second_task)))
5031 +		/* second_task is boosted, first is not*/
5032 +		return 0;
5033 +
5034 +#endif
5035 +
5036 +	if (!is_realtime(second_task))
5037 +		return true;
5038 +
5039 +	if (shorter_period(first_task, second_task))
5040 +		return true;
5041 +
5042 +	if (get_rt_period(first_task) == get_rt_period(second_task))
5043 +	{
5044 +#ifdef CONFIG_LITMUS_SOFTIRQD
5045 +			if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
5046 +			{
5047 +				return true;
5048 +			}
5049 +			if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
5050 +			{
5051 +#endif
5052 +			if (first_task->pid < second_task->pid)
5053 +			{
5054 +				return true;
5055 +			}
5056 +			if (first_task->pid == second_task->pid)
5057 +			{
5058 +				return !second->rt_param.inh_task;
5059 +			}
5060 +#ifdef CONFIG_LITMUS_SOFTIRQD
5061 +			}
5062 +#endif
5063 +	}
5064 +
5065 +	return false;
5066 +
5067 +#if 0
5068 +	return !is_realtime(second_task) ||
5069 +			shorter_period(first_task, second_task) ||
5070 +			((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task))
5071 + 
5072 +#ifdef CONFIG_LITMUS_SOFTIRQD
5073 +        /* proxy threads always lose w/o inheritance. */
5074 +        (first_task->rt_param.is_proxy_thread <
5075 +            second_task->rt_param.is_proxy_thread) ||
5076 +#endif
5077 +
5078 +		/* is the period of the first task shorter?
5079 +		 * Then it has higher priority.
5080 +		 */
5081 +		shorter_period(first_task, second_task) ||
5082 +
5083 +		(earlier_deadline(first_task, second_task) ||
5084 +
5085 +		/* Do we have a deadline tie?
5086 +		 * Then break by PID.
5087 +		 */
5088 +		(get_rt_period(first_task) == get_rt_period(second_task) &&
5089 +	        (first_task->pid < second_task->pid ||
5090 +
5091 +		/* If the PIDs are the same then the task with the inherited
5092 +		 * priority wins.
5093 +		 */
5094 +		(first_task->pid == second_task->pid &&
5095 +		 !second->rt_param.inh_task)));
5096 +#endif
5097 +}
5098 +
5099 +int rm_ready_order(struct bheap_node* a, struct bheap_node* b)
5100 +{
5101 +	return rm_higher_prio(bheap2task(a), bheap2task(b));
5102 +}
5103 +
5104 +void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
5105 +		      release_jobs_t release)
5106 +{
5107 +	rt_domain_init(rt,  rm_ready_order, resched, release);
5108 +}
5109 +
5110 +/* need_to_preempt - check whether the task t needs to be preempted
5111 + *                   call only with irqs disabled and with  ready_lock acquired
5112 + *                   THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
5113 + */
5114 +int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
5115 +{
5116 +	/* we need the read lock for rm_ready_queue */
5117 +	/* no need to preempt if there is nothing pending */
5118 +	if (!__jobs_pending(rt))
5119 +		return 0;
5120 +	/* we need to reschedule if t doesn't exist */
5121 +	if (!t)
5122 +		return 1;
5123 +
5124 +	/* NOTE: We cannot check for non-preemptibility since we
5125 +	 *       don't know what address space we're currently in.
5126 +	 */
5127 +
5128 +	/* make sure to get non-rt stuff out of the way */
5129 +	return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t);
5130 +}
5131 diff --git a/litmus/rm_srt_common.c b/litmus/rm_srt_common.c
5132 new file mode 100644
5133 index 0000000..f58a800
5134 --- /dev/null
5135 +++ b/litmus/rm_srt_common.c
5136 @@ -0,0 +1,167 @@
5137 +/*
5138 + * kernel/rm_common.c
5139 + *
5140 + * Common functions for EDF based scheduler.
5141 + */
5142 +
5143 +#include <linux/percpu.h>
5144 +#include <linux/sched.h>
5145 +#include <linux/list.h>
5146 +
5147 +#include <litmus/litmus.h>
5148 +#include <litmus/sched_plugin.h>
5149 +#include <litmus/sched_trace.h>
5150 +
5151 +#include <litmus/rm_common.h>
5152 +
5153 +/* rm_srt_higher_prio -  returns true if first has a higher EDF priority
5154 + *                    than second. Deadline ties are broken by PID.
5155 + *
5156 + * both first and second may be NULL
5157 + */
5158 +int rm_srt_higher_prio(struct task_struct* first,
5159 +		    struct task_struct* second)
5160 +{
5161 +	struct task_struct *first_task = first;
5162 +	struct task_struct *second_task = second;
5163 +
5164 +	/* There is no point in comparing a task to itself. */
5165 +	if (first && first == second) {
5166 +		TRACE_TASK(first,
5167 +			   "WARNING: pointless edf priority comparison.\n");
5168 +		return 0;
5169 +	}
5170 +
5171 +
5172 +	/* check for NULL tasks */
5173 +	if (!first || !second)
5174 +		return first && !second;
5175 +
5176 +#ifdef CONFIG_LITMUS_LOCKING
5177 +
5178 +	/* Check for inherited priorities. Change task
5179 +	 * used for comparison in such a case.
5180 +	 */
5181 +	if (unlikely(first->rt_param.inh_task))
5182 +		first_task = first->rt_param.inh_task;
5183 +	if (unlikely(second->rt_param.inh_task))
5184 +		second_task = second->rt_param.inh_task;
5185 +
5186 +	/* Check for priority boosting. Tie-break by start of boosting.
5187 +	 */
5188 +	if (unlikely(is_priority_boosted(first_task))) {
5189 +		/* first_task is boosted, how about second_task? */
5190 +		if (!is_priority_boosted(second_task) ||
5191 +		    lt_before(get_boost_start(first_task),
5192 +			      get_boost_start(second_task)))
5193 +			return 1;
5194 +		else
5195 +			return 0;
5196 +	} else if (unlikely(is_priority_boosted(second_task)))
5197 +		/* second_task is boosted, first is not*/
5198 +		return 0;
5199 +
5200 +#endif
5201 +
5202 +	if (!is_realtime(second_task))
5203 +		return true;
5204 +
5205 +	if (shorter_period(first_task, second_task))
5206 +		return true;
5207 +
5208 +	if (get_rt_period(first_task) == get_rt_period(second_task))
5209 +	{
5210 +		if (earlier_deadline(first_task, second_task))
5211 +		{
5212 +			return true;
5213 +		}
5214 +		if(get_deadline(first_task) == get_deadline(second_task))
5215 +		{
5216 +#ifdef CONFIG_LITMUS_SOFTIRQD
5217 +			if (first_task->rt_param.is_proxy_thread < second_task->rt_param.is_proxy_thread)
5218 +			{
5219 +				return true;
5220 +			}
5221 +			if (first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread)
5222 +			{
5223 +#endif
5224 +			if (first_task->pid < second_task->pid)
5225 +			{
5226 +				return true;
5227 +			}
5228 +			if (first_task->pid == second_task->pid)
5229 +			{
5230 +				return !second->rt_param.inh_task;
5231 +			}
5232 +#ifdef CONFIG_LITMUS_SOFTIRQD
5233 +			}
5234 +#endif
5235 +		}
5236 +	}
5237 +
5238 +	return false;
5239 +
5240 +#if 0
5241 +	return !is_realtime(second_task) ||
5242 +			shorter_period(first_task, second_task) ||
5243 +			((get_rt_period(first_task) == get_rt_period(second_task)) && earlier_deadline(first_task, second_task))
5244 + 
5245 +#ifdef CONFIG_LITMUS_SOFTIRQD
5246 +        /* proxy threads always lose w/o inheritance. */
5247 +        (first_task->rt_param.is_proxy_thread <
5248 +            second_task->rt_param.is_proxy_thread) ||
5249 +#endif
5250 +
5251 +		/* is the period of the first task shorter?
5252 +		 * Then it has higher priority.
5253 +		 */
5254 +		shorter_period(first_task, second_task) ||
5255 +
5256 +		(earlier_deadline(first_task, second_task) ||
5257 +
5258 +		/* Do we have a deadline tie?
5259 +		 * Then break by PID.
5260 +		 */
5261 +		(get_rt_period(first_task) == get_rt_period(second_task) &&
5262 +	        (first_task->pid < second_task->pid ||
5263 +
5264 +		/* If the PIDs are the same then the task with the inherited
5265 +		 * priority wins.
5266 +		 */
5267 +		(first_task->pid == second_task->pid &&
5268 +		 !second->rt_param.inh_task)));
5269 +#endif
5270 +}
5271 +
5272 +int rm_srt_ready_order(struct bheap_node* a, struct bheap_node* b)
5273 +{
5274 +	return rm_srt_higher_prio(bheap2task(a), bheap2task(b));
5275 +}
5276 +
5277 +void rm_srt_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
5278 +		      release_jobs_t release)
5279 +{
5280 +	rt_domain_init(rt,  rm_srt_ready_order, resched, release);
5281 +}
5282 +
5283 +/* need_to_preempt - check whether the task t needs to be preempted
5284 + *                   call only with irqs disabled and with  ready_lock acquired
5285 + *                   THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
5286 + */
5287 +int rm_srt_preemption_needed(rt_domain_t* rt, struct task_struct *t)
5288 +{
5289 +	/* we need the read lock for rm_ready_queue */
5290 +	/* no need to preempt if there is nothing pending */
5291 +	if (!__jobs_pending(rt))
5292 +		return 0;
5293 +	/* we need to reschedule if t doesn't exist */
5294 +	if (!t)
5295 +		return 1;
5296 +
5297 +	/* NOTE: We cannot check for non-preemptibility since we
5298 +	 *       don't know what address space we're currently in.
5299 +	 */
5300 +
5301 +	/* make sure to get non-rt stuff out of the way */
5302 +	return !is_realtime(t) || rm_srt_higher_prio(__next_ready(rt), t);
5303 +}
5304 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
5305 index 73fe1c4..9aa5822 100644
5306 --- a/litmus/sched_cedf.c
5307 +++ b/litmus/sched_cedf.c
5308 @@ -29,6 +29,7 @@
5309  #include <linux/percpu.h>
5310  #include <linux/sched.h>
5311  #include <linux/slab.h>
5312 +#include <linux/uaccess.h>
5313  
5314  #include <linux/module.h>
5315  
5316 @@ -45,7 +46,23 @@
5317  
5318  /* to configure the cluster size */
5319  #include <litmus/litmus_proc.h>
5320 -#include <linux/uaccess.h>
5321 +
5322 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5323 +#include <litmus/affinity.h>
5324 +#endif
5325 +
5326 +#ifdef CONFIG_LITMUS_SOFTIRQD
5327 +#include <litmus/litmus_softirq.h>
5328 +#endif
5329 +
5330 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5331 +#include <linux/interrupt.h>
5332 +#include <litmus/trace.h>
5333 +#endif
5334 +
5335 +#ifdef CONFIG_LITMUS_NVIDIA
5336 +#include <litmus/nvidia_info.h>
5337 +#endif
5338  
5339  /* Reference configuration variable. Determines which cache level is used to
5340   * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
5341 @@ -79,6 +96,15 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
5342  #define test_will_schedule(cpu) \
5343  	(atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
5344  
5345 +
5346 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5347 +struct tasklet_head
5348 +{
5349 +	struct tasklet_struct *head;
5350 +	struct tasklet_struct **tail;
5351 +};
5352 +#endif
5353 +
5354  /*
5355   * In C-EDF there is a cedf domain _per_ cluster
5356   * The number of clusters is dynamically determined accordingly to the
5357 @@ -95,7 +121,13 @@ typedef struct clusterdomain {
5358  	struct bheap_node *heap_node;
5359  	struct bheap      cpu_heap;
5360  	/* lock for this cluster */
5361 -#define lock domain.ready_lock
5362 +#define cedf_lock domain.ready_lock
5363 +	
5364 +	
5365 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5366 +	struct tasklet_head pending_tasklets;
5367 +#endif	
5368 +	
5369  } cedf_domain_t;
5370  
5371  /* a cedf_domain per cluster; allocation is done at init/activation time */
5372 @@ -257,21 +289,50 @@ static noinline void requeue(struct task_struct* task)
5373  	}
5374  }
5375  
5376 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5377 +static cpu_entry_t* cedf_get_nearest_available_cpu(
5378 +				cedf_domain_t *cluster, cpu_entry_t* start)
5379 +{
5380 +	cpu_entry_t* affinity;
5381 +
5382 +	get_nearest_available_cpu(affinity, start, cedf_cpu_entries, -1);
5383 +
5384 +	/* make sure CPU is in our cluster */
5385 +	if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
5386 +		return(affinity);
5387 +	else
5388 +		return(NULL);
5389 +}
5390 +#endif
5391 +
5392 +
5393  /* check for any necessary preemptions */
5394  static void check_for_preemptions(cedf_domain_t *cluster)
5395  {
5396  	struct task_struct *task;
5397 -	cpu_entry_t* last;
5398 +	cpu_entry_t *last;
5399  
5400  	for(last = lowest_prio_cpu(cluster);
5401  	    edf_preemption_needed(&cluster->domain, last->linked);
5402  	    last = lowest_prio_cpu(cluster)) {
5403  		/* preemption necessary */
5404  		task = __take_ready(&cluster->domain);
5405 -		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
5406 -		      task->pid, last->cpu);
5407 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5408 +		{
5409 +			cpu_entry_t* affinity =
5410 +					cedf_get_nearest_available_cpu(cluster,
5411 +							&per_cpu(cedf_cpu_entries, task_cpu(task)));
5412 +			if(affinity)
5413 +				last = affinity;
5414 +			else if(last->linked)
5415 +				requeue(last->linked);
5416 +		}
5417 +#else
5418  		if (last->linked)
5419  			requeue(last->linked);
5420 +#endif
5421 +		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
5422 +				task->pid, last->cpu);
5423  		link_task_to_cpu(task, last);
5424  		preempt(last);
5425  	}
5426 @@ -292,12 +353,12 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
5427  	cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
5428  	unsigned long flags;
5429  
5430 -	raw_spin_lock_irqsave(&cluster->lock, flags);
5431 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5432  
5433  	__merge_ready(&cluster->domain, tasks);
5434  	check_for_preemptions(cluster);
5435  
5436 -	raw_spin_unlock_irqrestore(&cluster->lock, flags);
5437 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5438  }
5439  
5440  /* caller holds cedf_lock */
5441 @@ -307,6 +368,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
5442  
5443  	sched_trace_task_completion(t, forced);
5444  
5445 +#ifdef CONFIG_LITMUS_NVIDIA
5446 +	atomic_set(&tsk_rt(t)->nv_int_count, 0);
5447 +#endif
5448 +
5449  	TRACE_TASK(t, "job_completion().\n");
5450  
5451  	/* set flags */
5452 @@ -350,6 +415,461 @@ static void cedf_tick(struct task_struct* t)
5453  	}
5454  }
5455  
5456 +
5457 +
5458 +
5459 +
5460 +
5461 +
5462 +
5463 +
5464 +
5465 +
5466 +
5467 +
5468 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5469 +
5470 +
5471 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
5472 +{
5473 +	if (!atomic_read(&tasklet->count)) {
5474 +		if(tasklet->owner) {
5475 +			sched_trace_tasklet_begin(tasklet->owner);
5476 +		}
5477 +		
5478 +		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
5479 +		{
5480 +			BUG();
5481 +		}
5482 +		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
5483 +			  __FUNCTION__,
5484 +			  (tasklet->owner) ? tasklet->owner->pid : -1,
5485 +			  (tasklet->owner) ? 0 : 1);
5486 +		tasklet->func(tasklet->data);
5487 +		tasklet_unlock(tasklet);
5488 +		
5489 +		if(tasklet->owner) {
5490 +			sched_trace_tasklet_end(tasklet->owner, flushed);
5491 +		}
5492 +	}
5493 +	else {
5494 +		BUG();
5495 +	}
5496 +}
5497 +
5498 +
5499 +static void __extract_tasklets(cedf_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
5500 +{
5501 +	struct tasklet_struct* step;
5502 +	struct tasklet_struct* tasklet;
5503 +	struct tasklet_struct* prev;
5504 +	
5505 +	task_tasklets->head = NULL;
5506 +	task_tasklets->tail = &(task_tasklets->head);
5507 +
5508 +	prev = NULL;
5509 +	for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
5510 +	{
5511 +		if(step->owner == task)
5512 +		{
5513 +			TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
5514 +
5515 +			tasklet = step;
5516 +
5517 +			if(prev) {
5518 +				prev->next = tasklet->next;
5519 +			}
5520 +			else if(cluster->pending_tasklets.head == tasklet) {
5521 +				// we're at the head.
5522 +				cluster->pending_tasklets.head = tasklet->next;
5523 +			}
5524 +
5525 +			if(cluster->pending_tasklets.tail == &tasklet) {
5526 +				// we're at the tail
5527 +				if(prev) {
5528 +					cluster->pending_tasklets.tail = &prev;
5529 +				}
5530 +				else {
5531 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
5532 +				}
5533 +			}
5534 +
5535 +			tasklet->next = NULL;
5536 +			*(task_tasklets->tail) = tasklet;
5537 +			task_tasklets->tail = &(tasklet->next);
5538 +		}
5539 +		else {
5540 +			prev = step;
5541 +		}
5542 +	}
5543 +}
5544 +
5545 +static void flush_tasklets(cedf_domain_t* cluster, struct task_struct* task)
5546 +{
5547 +#if 0
5548 +	unsigned long flags;
5549 +	struct tasklet_head task_tasklets;
5550 +	struct tasklet_struct* step;
5551 +
5552 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5553 +	__extract_tasklets(cluster, task, &task_tasklets);
5554 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5555 +
5556 +	if(cluster->pending_tasklets.head != NULL) {
5557 +		TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
5558 +	}
5559 +	
5560 +	// now execute any flushed tasklets.
5561 +	for(step = cluster->pending_tasklets.head; step != NULL; /**/)
5562 +	{
5563 +		struct tasklet_struct* temp = step->next;
5564 +
5565 +		step->next = NULL;
5566 +		__do_lit_tasklet(step, 1ul);
5567 +
5568 +		step = temp;
5569 +	}
5570 +#endif
5571 +	
5572 +	// lazy flushing.
5573 +	// just change ownership to NULL and let an idle processor
5574 +	// take care of it. :P
5575 +	
5576 +	struct tasklet_struct* step;
5577 +	unsigned long flags;
5578 +	
5579 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5580 +	
5581 +	for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
5582 +	{
5583 +		if(step->owner == task)
5584 +		{
5585 +			TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
5586 +			step->owner = NULL;
5587 +		}
5588 +	}	
5589 +	
5590 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);	
5591 +}
5592 +
5593 +
5594 +static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
5595 +{
5596 +	int work_to_do = 1;
5597 +	struct tasklet_struct *tasklet = NULL;
5598 +	//struct tasklet_struct *step;
5599 +	unsigned long flags;
5600 +	
5601 +	while(work_to_do) {
5602 +		
5603 +		TS_NV_SCHED_BOTISR_START;
5604 +		
5605 +		// remove tasklet at head of list if it has higher priority.
5606 +		raw_spin_lock_irqsave(&cluster->cedf_lock, flags);	
5607 +
5608 +/*
5609 +		step = cluster->pending_tasklets.head;
5610 +		TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
5611 +		while(step != NULL){
5612 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5613 +			step = step->next;
5614 +		}
5615 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5616 +		TRACE("%s: done.\n", __FUNCTION__);
5617 + */
5618 +
5619 +		if(cluster->pending_tasklets.head != NULL) {
5620 +			// remove tasklet at head.
5621 +			tasklet = cluster->pending_tasklets.head;
5622 +			
5623 +			if(edf_higher_prio(tasklet->owner, sched_task)) {
5624 +
5625 +				if(NULL == tasklet->next) {
5626 +					// tasklet is at the head, list only has one element
5627 +					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
5628 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
5629 +				}
5630 +
5631 +				// remove the tasklet from the queue
5632 +				cluster->pending_tasklets.head = tasklet->next;
5633 +
5634 +				TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
5635 +			}
5636 +			else {
5637 +				TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1, smp_processor_id());
5638 +				tasklet = NULL;
5639 +			}
5640 +		}
5641 +		else {
5642 +			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
5643 +		}
5644 +
5645 +
5646 +		/*
5647 +		step = cluster->pending_tasklets.head;
5648 +		TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
5649 +		while(step != NULL){
5650 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5651 +			step = step->next;
5652 +		}
5653 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5654 +		TRACE("%s: done.\n", __FUNCTION__);
5655 +		 */
5656 +	
5657 +		raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5658 +		
5659 +		
5660 +		TS_NV_SCHED_BOTISR_END;
5661 +		
5662 +		if(tasklet) {
5663 +			__do_lit_tasklet(tasklet, 0ul);
5664 +			tasklet = NULL;	
5665 +		}
5666 +		else {
5667 +			work_to_do = 0;
5668 +		}
5669 +	}
5670 +	
5671 +	//TRACE("%s: exited.\n", __FUNCTION__);
5672 +}
5673 +
5674 +
5675 +static void run_tasklets(struct task_struct* sched_task)
5676 +{
5677 +	cedf_domain_t* cluster;
5678 +
5679 +#if 0
5680 +	int task_is_rt = is_realtime(sched_task);
5681 +	cedf_domain_t* cluster;
5682 +
5683 +	if(is_realtime(sched_task)) {
5684 +		cluster = task_cpu_cluster(sched_task);
5685 +	}
5686 +	else {
5687 +		cluster = remote_cluster(get_cpu());
5688 +	}
5689 +
5690 +	if(cluster && cluster->pending_tasklets.head != NULL) {
5691 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
5692 +		
5693 +		do_lit_tasklets(cluster, sched_task);
5694 +	}
5695 +
5696 +	if(!task_is_rt) {
5697 +		put_cpu_no_resched();
5698 +	}
5699 +#else
5700 +
5701 +	preempt_disable();
5702 +
5703 +	cluster = (is_realtime(sched_task)) ?
5704 +		task_cpu_cluster(sched_task) :
5705 +		remote_cluster(smp_processor_id());
5706 +
5707 +	if(cluster && cluster->pending_tasklets.head != NULL) {
5708 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
5709 +		do_lit_tasklets(cluster, sched_task);
5710 +	}
5711 +
5712 +	preempt_enable_no_resched();
5713 +
5714 +#endif
5715 +}
5716 +
5717 +
5718 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
5719 +{
5720 +	struct tasklet_struct* step;
5721 +	
5722 +	/*
5723 +	step = cluster->pending_tasklets.head;
5724 +	TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
5725 +	while(step != NULL){
5726 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5727 +		step = step->next;
5728 +	}
5729 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5730 +	TRACE("%s: done.\n", __FUNCTION__);
5731 +	 */
5732 +	
5733 +	
5734 +	tasklet->next = NULL;  // make sure there are no old values floating around
5735 +
5736 +	step = cluster->pending_tasklets.head;
5737 +	if(step == NULL) {
5738 +		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
5739 +		// insert at tail.
5740 +		*(cluster->pending_tasklets.tail) = tasklet;
5741 +		cluster->pending_tasklets.tail = &(tasklet->next);		
5742 +	}
5743 +	else if((*(cluster->pending_tasklets.tail) != NULL) &&
5744 +			edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
5745 +		// insert at tail.
5746 +		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
5747 +		
5748 +		*(cluster->pending_tasklets.tail) = tasklet;
5749 +		cluster->pending_tasklets.tail = &(tasklet->next);
5750 +	}
5751 +	else {
5752 +
5753 +        //WARN_ON(1 == 1);
5754 +
5755 +		// insert the tasklet somewhere in the middle.
5756 +
5757 +        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
5758 +
5759 +		while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
5760 +			step = step->next;
5761 +		}
5762 +		
5763 +		// insert tasklet right before step->next.
5764 +		
5765 +		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
5766 +			  tasklet->owner->pid,
5767 +			  (step->owner) ?
5768 +				step->owner->pid :
5769 +				-1,
5770 +			  (step->next) ?
5771 +				((step->next->owner) ?
5772 +					step->next->owner->pid :
5773 +					-1) :
5774 +				-1);
5775 +		
5776 +		tasklet->next = step->next;
5777 +		step->next = tasklet;
5778 +
5779 +		// patch up the head if needed.
5780 +		if(cluster->pending_tasklets.head == step)
5781 +		{
5782 +			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
5783 +			cluster->pending_tasklets.head = tasklet;
5784 +		}
5785 +	}
5786 +	
5787 +	/*
5788 +	step = cluster->pending_tasklets.head;
5789 +	TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
5790 +	while(step != NULL){
5791 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
5792 +		step = step->next;
5793 +	}
5794 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
5795 +	TRACE("%s: done.\n", __FUNCTION__);
5796 +	 */
5797 +	
5798 +// TODO: Maintain this list in priority order.
5799 +//	tasklet->next = NULL;
5800 +//	*(cluster->pending_tasklets.tail) = tasklet;
5801 +//	cluster->pending_tasklets.tail = &tasklet->next;
5802 +}
5803 +
5804 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
5805 +{
5806 +	cedf_domain_t *cluster = NULL;
5807 +	cpu_entry_t *targetCPU = NULL;
5808 +	int thisCPU;
5809 +	int runLocal = 0;
5810 +	int runNow = 0;
5811 +	unsigned long flags;
5812 +	
5813 +    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
5814 +    {
5815 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
5816 +		return 0;
5817 +    }	
5818 +	
5819 +	cluster = task_cpu_cluster(tasklet->owner);
5820 +
5821 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);		
5822 +
5823 +	thisCPU = smp_processor_id();
5824 +
5825 +#if 1
5826 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5827 +	{
5828 +		cpu_entry_t* affinity = NULL;
5829 +		
5830 +		// use this CPU if it is in our cluster and isn't running any RT work.
5831 +		if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
5832 +			affinity = &(__get_cpu_var(cedf_cpu_entries));
5833 +		}
5834 +		else {
5835 +			// this CPU is busy or shouldn't run tasklet in this cluster.
5836 +			// look for available near by CPUs.
5837 +			// NOTE: Affinity towards owner and not this CPU.  Is this right?
5838 +			affinity = 
5839 +				cedf_get_nearest_available_cpu(cluster,
5840 +								&per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
5841 +		}
5842 +
5843 +		targetCPU = affinity;
5844 +	}
5845 +#endif
5846 +#endif
5847 +
5848 +	if (targetCPU == NULL) {
5849 +		targetCPU = lowest_prio_cpu(cluster);
5850 +	}
5851 +
5852 +	if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
5853 +		if (thisCPU == targetCPU->cpu) {
5854 +			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
5855 +			runLocal = 1;
5856 +			runNow = 1;
5857 +		}
5858 +		else {
5859 +			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
5860 +			runLocal = 0;
5861 +			runNow = 1;
5862 +		}
5863 +	}
5864 +	else {
5865 +		runLocal = 0;
5866 +		runNow = 0;
5867 +	}
5868 +	
5869 +	if(!runLocal) {
5870 +		// enqueue the tasklet
5871 +		__add_pai_tasklet(tasklet, cluster);
5872 +	}
5873 +	
5874 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5875 +	
5876 +	
5877 +	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
5878 +		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
5879 +		__do_lit_tasklet(tasklet, 0ul);
5880 +	}
5881 +	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
5882 +		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
5883 +		preempt(targetCPU);  // need to be protected by cedf_lock?
5884 +	}
5885 +	else {
5886 +		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
5887 +	}
5888 +	
5889 +	return(1); // success
5890 +}
5891 +
5892 +
5893 +#endif
5894 +
5895 +
5896 +
5897 +
5898 +
5899 +
5900 +
5901 +
5902 +
5903 +
5904 +
5905 +
5906 +
5907 +
5908 +
5909 +
5910 +
5911  /* Getting schedule() right is a bit tricky. schedule() may not make any
5912   * assumptions on the state of the current task since it may be called for a
5913   * number of reasons. The reasons include a scheduler_tick() determined that it
5914 @@ -378,7 +898,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
5915  	int out_of_time, sleep, preempt, np, exists, blocks;
5916  	struct task_struct* next = NULL;
5917  
5918 -	raw_spin_lock(&cluster->lock);
5919 +	raw_spin_lock(&cluster->cedf_lock);
5920  	clear_will_schedule();
5921  
5922  	/* sanity checking */
5923 @@ -462,8 +982,18 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
5924  			next = prev;
5925  
5926  	sched_state_task_picked();
5927 -	raw_spin_unlock(&cluster->lock);
5928 -
5929 +	
5930 +	raw_spin_unlock(&cluster->cedf_lock);
5931 +
5932 +	/*
5933 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
5934 +	if(cluster->pending_tasklets.head != NULL)  // peak at data.  normally locked with cluster->cedf_lock
5935 +	{
5936 +		do_lit_tasklets(cluster, next);
5937 +	}
5938 +#endif
5939 +*/
5940 +	
5941  #ifdef WANT_ALL_SCHED_EVENTS
5942  	TRACE("cedf_lock released, next=0x%p\n", next);
5943  
5944 @@ -473,7 +1003,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
5945  		TRACE("becomes idle at %llu.\n", litmus_clock());
5946  #endif
5947  
5948 -
5949  	return next;
5950  }
5951  
5952 @@ -504,7 +1033,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
5953  	/* the cluster doesn't change even if t is running */
5954  	cluster = task_cpu_cluster(t);
5955  
5956 -	raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags);
5957 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5958  
5959  	/* setup job params */
5960  	release_at(t, litmus_clock());
5961 @@ -521,20 +1050,22 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
5962  	t->rt_param.linked_on          = NO_CPU;
5963  
5964  	cedf_job_arrival(t);
5965 -	raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags);
5966 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
5967  }
5968  
5969  static void cedf_task_wake_up(struct task_struct *task)
5970  {
5971  	unsigned long flags;
5972 -	lt_t now;
5973 +	//lt_t now;
5974  	cedf_domain_t *cluster;
5975  
5976  	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
5977  
5978  	cluster = task_cpu_cluster(task);
5979  
5980 -	raw_spin_lock_irqsave(&cluster->lock, flags);
5981 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
5982 +
5983 +#if 0  // sporadic task model
5984  	/* We need to take suspensions because of semaphores into
5985  	 * account! If a job resumes after being suspended due to acquiring
5986  	 * a semaphore, it should never be treated as a new job release.
5987 @@ -556,8 +1087,17 @@ static void cedf_task_wake_up(struct task_struct *task)
5988  			}
5989  		}
5990  	}
5991 -	cedf_job_arrival(task);
5992 -	raw_spin_unlock_irqrestore(&cluster->lock, flags);
5993 +#endif
5994 +
5995 +	//BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
5996 +	set_rt_flags(task, RT_F_RUNNING);  // periodic model
5997 +
5998 +	if(tsk_rt(task)->linked_on == NO_CPU)
5999 +		cedf_job_arrival(task);
6000 +	else
6001 +		TRACE("WTF, mate?!\n");
6002 +
6003 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
6004  }
6005  
6006  static void cedf_task_block(struct task_struct *t)
6007 @@ -570,9 +1110,9 @@ static void cedf_task_block(struct task_struct *t)
6008  	cluster = task_cpu_cluster(t);
6009  
6010  	/* unlink if necessary */
6011 -	raw_spin_lock_irqsave(&cluster->lock, flags);
6012 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
6013  	unlink(t);
6014 -	raw_spin_unlock_irqrestore(&cluster->lock, flags);
6015 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
6016  
6017  	BUG_ON(!is_realtime(t));
6018  }
6019 @@ -583,8 +1123,12 @@ static void cedf_task_exit(struct task_struct * t)
6020  	unsigned long flags;
6021  	cedf_domain_t *cluster = task_cpu_cluster(t);
6022  
6023 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6024 +	flush_tasklets(cluster, t);
6025 +#endif	
6026 +	
6027  	/* unlink if necessary */
6028 -	raw_spin_lock_irqsave(&cluster->lock, flags);
6029 +	raw_spin_lock_irqsave(&cluster->cedf_lock, flags);
6030  	unlink(t);
6031  	if (tsk_rt(t)->scheduled_on != NO_CPU) {
6032  		cpu_entry_t *cpu;
6033 @@ -592,7 +1136,7 @@ static void cedf_task_exit(struct task_struct * t)
6034  		cpu->scheduled = NULL;
6035  		tsk_rt(t)->scheduled_on = NO_CPU;
6036  	}
6037 -	raw_spin_unlock_irqrestore(&cluster->lock, flags);
6038 +	raw_spin_unlock_irqrestore(&cluster->cedf_lock, flags);
6039  
6040  	BUG_ON(!is_realtime(t));
6041          TRACE_TASK(t, "RIP\n");
6042 @@ -603,6 +1147,721 @@ static long cedf_admit_task(struct task_struct* tsk)
6043  	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
6044  }
6045  
6046 +
6047 +
6048 +
6049 +
6050 +
6051 +
6052 +
6053 +
6054 +
6055 +
6056 +
6057 +
6058 +#ifdef CONFIG_LITMUS_LOCKING
6059 +
6060 +#include <litmus/fdso.h>
6061 +
6062 +
6063 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
6064 +{
6065 +	int linked_on;
6066 +	int check_preempt = 0;	
6067 +	
6068 +	cedf_domain_t* cluster = task_cpu_cluster(t);
6069 +	
6070 +	if(prio_inh != NULL)
6071 +		TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
6072 +	else
6073 +		TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
6074 +	
6075 +	sched_trace_eff_prio_change(t, prio_inh);
6076 +	
6077 +	tsk_rt(t)->inh_task = prio_inh;
6078 +	
6079 +	linked_on  = tsk_rt(t)->linked_on;
6080 +	
6081 +	/* If it is scheduled, then we need to reorder the CPU heap. */
6082 +	if (linked_on != NO_CPU) {
6083 +		TRACE_TASK(t, "%s: linked  on %d\n",
6084 +				   __FUNCTION__, linked_on);
6085 +		/* Holder is scheduled; need to re-order CPUs.
6086 +		 * We can't use heap_decrease() here since
6087 +		 * the cpu_heap is ordered in reverse direction, so
6088 +		 * it is actually an increase. */
6089 +		bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
6090 +                     per_cpu(cedf_cpu_entries, linked_on).hn);
6091 +		bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
6092 +                     per_cpu(cedf_cpu_entries, linked_on).hn);
6093 +	} else {
6094 +		/* holder may be queued: first stop queue changes */
6095 +		raw_spin_lock(&cluster->domain.release_lock);
6096 +		if (is_queued(t)) {
6097 +			TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
6098 +			
6099 +			/* We need to update the position of holder in some
6100 +			 * heap. Note that this could be a release heap if we
6101 +			 * budget enforcement is used and this job overran. */
6102 +			check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
6103 +			
6104 +		} else {
6105 +			/* Nothing to do: if it is not queued and not linked
6106 +			 * then it is either sleeping or currently being moved
6107 +			 * by other code (e.g., a timer interrupt handler) that
6108 +			 * will use the correct priority when enqueuing the
6109 +			 * task. */
6110 +			TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
6111 +		}
6112 +		raw_spin_unlock(&cluster->domain.release_lock);
6113 +		
6114 +		/* If holder was enqueued in a release heap, then the following
6115 +		 * preemption check is pointless, but we can't easily detect
6116 +		 * that case. If you want to fix this, then consider that
6117 +		 * simply adding a state flag requires O(n) time to update when
6118 +		 * releasing n tasks, which conflicts with the goal to have
6119 +		 * O(log n) merges. */
6120 +		if (check_preempt) {
6121 +			/* heap_decrease() hit the top level of the heap: make
6122 +			 * sure preemption checks get the right task, not the
6123 +			 * potentially stale cache. */
6124 +			bheap_uncache_min(edf_ready_order, &cluster->domain.ready_queue);
6125 +			check_for_preemptions(cluster);
6126 +		}
6127 +	}
6128 +}
6129 +
6130 +/* called with IRQs off */
6131 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
6132 +{
6133 +	cedf_domain_t* cluster = task_cpu_cluster(t);
6134 +	
6135 +	raw_spin_lock(&cluster->cedf_lock);
6136 +	
6137 +	__set_priority_inheritance(t, prio_inh);
6138 +	
6139 +#ifdef CONFIG_LITMUS_SOFTIRQD
6140 +	if(tsk_rt(t)->cur_klitirqd != NULL)
6141 +	{
6142 +		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
6143 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
6144 +		
6145 +		__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
6146 +	}
6147 +#endif
6148 +	
6149 +	raw_spin_unlock(&cluster->cedf_lock);
6150 +}
6151 +
6152 +
6153 +/* called with IRQs off */
6154 +static void __clear_priority_inheritance(struct task_struct* t)
6155 +{
6156 +    TRACE_TASK(t, "priority restored\n");
6157 +	
6158 +    if(tsk_rt(t)->scheduled_on != NO_CPU)
6159 +    {
6160 +		sched_trace_eff_prio_change(t, NULL);
6161 +		
6162 +        tsk_rt(t)->inh_task = NULL;
6163 +        
6164 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
6165 +         * since the priority was effectively lowered. */
6166 +        unlink(t);
6167 +        cedf_job_arrival(t);
6168 +    }
6169 +    else
6170 +    {
6171 +        __set_priority_inheritance(t, NULL);
6172 +    }
6173 +	
6174 +#ifdef CONFIG_LITMUS_SOFTIRQD
6175 +	if(tsk_rt(t)->cur_klitirqd != NULL)
6176 +	{
6177 +		TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
6178 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
6179 +		
6180 +		if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
6181 +		{
6182 +			sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
6183 +			
6184 +			tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
6185 +			
6186 +			/* Check if rescheduling is necessary. We can't use heap_decrease()
6187 +			 * since the priority was effectively lowered. */
6188 +			unlink(tsk_rt(t)->cur_klitirqd);
6189 +			cedf_job_arrival(tsk_rt(t)->cur_klitirqd);
6190 +		}
6191 +		else
6192 +		{
6193 +			__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
6194 +		}
6195 +	}
6196 +#endif
6197 +}
6198 +
6199 +/* called with IRQs off */
6200 +static void clear_priority_inheritance(struct task_struct* t)
6201 +{
6202 +	cedf_domain_t* cluster = task_cpu_cluster(t);
6203 +	
6204 +	raw_spin_lock(&cluster->cedf_lock);
6205 +	__clear_priority_inheritance(t);
6206 +	raw_spin_unlock(&cluster->cedf_lock);
6207 +}
6208 +
6209 +
6210 +
6211 +#ifdef CONFIG_LITMUS_SOFTIRQD
6212 +/* called with IRQs off */
6213 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
6214 +											  struct task_struct* old_owner,
6215 +											  struct task_struct* new_owner)
6216 +{
6217 +	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
6218 +	
6219 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
6220 +	
6221 +	raw_spin_lock(&cluster->cedf_lock);
6222 +	
6223 +	if(old_owner != new_owner)
6224 +	{
6225 +		if(old_owner)
6226 +		{
6227 +			// unreachable?
6228 +			tsk_rt(old_owner)->cur_klitirqd = NULL;
6229 +		}
6230 +		
6231 +		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
6232 +				   new_owner->comm, new_owner->pid);
6233 +		
6234 +		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
6235 +	}
6236 +	
6237 +	__set_priority_inheritance(klitirqd,
6238 +							   (tsk_rt(new_owner)->inh_task == NULL) ?
6239 +							   new_owner :
6240 +							   tsk_rt(new_owner)->inh_task);
6241 +	
6242 +	raw_spin_unlock(&cluster->cedf_lock);
6243 +}
6244 +
6245 +/* called with IRQs off */
6246 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
6247 +												struct task_struct* old_owner)
6248 +{
6249 +	cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
6250 +	
6251 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
6252 +	
6253 +	raw_spin_lock(&cluster->cedf_lock);
6254 +    
6255 +    TRACE_TASK(klitirqd, "priority restored\n");
6256 +	
6257 +    if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
6258 +    {
6259 +        tsk_rt(klitirqd)->inh_task = NULL;
6260 +        
6261 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
6262 +         * since the priority was effectively lowered. */
6263 +        unlink(klitirqd);
6264 +        cedf_job_arrival(klitirqd);
6265 +    }
6266 +    else
6267 +    {
6268 +        __set_priority_inheritance(klitirqd, NULL);
6269 +    }
6270 +	
6271 +	tsk_rt(old_owner)->cur_klitirqd = NULL;
6272 +	
6273 +	raw_spin_unlock(&cluster->cedf_lock);
6274 +}
6275 +#endif  // CONFIG_LITMUS_SOFTIRQD
6276 +
6277 +
6278 +/* ******************** KFMLP support ********************** */
6279 +
6280 +/* struct for semaphore with priority inheritance */
6281 +struct kfmlp_queue
6282 +{
6283 +	wait_queue_head_t wait;
6284 +	struct task_struct* owner;
6285 +	struct task_struct* hp_waiter;
6286 +	int count; /* number of waiters + holder */
6287 +};
6288 +
6289 +struct kfmlp_semaphore
6290 +{
6291 +	struct litmus_lock litmus_lock;
6292 +	
6293 +	spinlock_t lock;
6294 +	
6295 +	int num_resources; /* aka k */
6296 +	struct kfmlp_queue *queues; /* array */
6297 +	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
6298 +};
6299 +
6300 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
6301 +{
6302 +	return container_of(lock, struct kfmlp_semaphore, litmus_lock);
6303 +}
6304 +
6305 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
6306 +								struct kfmlp_queue* queue)
6307 +{
6308 +	return (queue - &sem->queues[0]);
6309 +}
6310 +
6311 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
6312 +												  struct task_struct* holder)
6313 +{
6314 +	int i;
6315 +	for(i = 0; i < sem->num_resources; ++i)
6316 +		if(sem->queues[i].owner == holder)
6317 +			return(&sem->queues[i]);
6318 +	return(NULL);
6319 +}
6320 +
6321 +/* caller is responsible for locking */
6322 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
6323 +										 struct task_struct *skip)
6324 +{
6325 +	struct list_head	*pos;
6326 +	struct task_struct 	*queued, *found = NULL;
6327 +	
6328 +	list_for_each(pos, &kqueue->wait.task_list) {
6329 +		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
6330 +												   task_list)->private;
6331 +		
6332 +		/* Compare task prios, find high prio task. */
6333 +		if (queued != skip && edf_higher_prio(queued, found))
6334 +			found = queued;
6335 +	}
6336 +	return found;
6337 +}
6338 +
6339 +static inline struct kfmlp_queue* kfmlp_find_shortest(
6340 +										  struct kfmlp_semaphore* sem,
6341 +										  struct kfmlp_queue* search_start)
6342 +{
6343 +	// we start our search at search_start instead of at the beginning of the
6344 +	// queue list to load-balance across all resources.
6345 +	struct kfmlp_queue* step = search_start;
6346 +	struct kfmlp_queue* shortest = sem->shortest_queue;
6347 +	
6348 +	do
6349 +	{
6350 +		step = (step+1 != &sem->queues[sem->num_resources]) ?
6351 +		step+1 : &sem->queues[0];
6352 +		if(step->count < shortest->count)
6353 +		{
6354 +			shortest = step;
6355 +			if(step->count == 0)
6356 +				break; /* can't get any shorter */
6357 +		}
6358 +	}while(step != search_start);
6359 +	
6360 +	return(shortest);
6361 +}
6362 +
6363 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
6364 +{
6365 +	/* must hold sem->lock */
6366 +	
6367 +	struct kfmlp_queue *my_queue = NULL;
6368 +	struct task_struct *max_hp = NULL;
6369 +	
6370 +	
6371 +	struct list_head	*pos;
6372 +	struct task_struct 	*queued;
6373 +	int i;
6374 +	
6375 +	for(i = 0; i < sem->num_resources; ++i)
6376 +	{
6377 +		if( (sem->queues[i].count > 1) &&
6378 +		   ((my_queue == NULL) ||
6379 +			(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
6380 +		{
6381 +			my_queue = &sem->queues[i];
6382 +		}
6383 +	}
6384 +	
6385 +	if(my_queue)
6386 +	{
6387 +		cedf_domain_t* cluster;
6388 +		
6389 +		max_hp = my_queue->hp_waiter;
6390 +		BUG_ON(!max_hp);
6391 +
6392 +		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
6393 +				  kfmlp_get_idx(sem, my_queue),
6394 +				  max_hp->comm, max_hp->pid,
6395 +				  kfmlp_get_idx(sem, my_queue));
6396 +		
6397 +		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
6398 +		
6399 +		/*
6400 +		 if(my_queue->hp_waiter)
6401 +		 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
6402 +		 kfmlp_get_idx(sem, my_queue),
6403 +		 my_queue->hp_waiter->comm,
6404 +		 my_queue->hp_waiter->pid);
6405 +		 else
6406 +		 TRACE_CUR("queue %d: new hp_waiter is %p\n",
6407 +		 kfmlp_get_idx(sem, my_queue), NULL);
6408 +		 */
6409 +	
6410 +		cluster = task_cpu_cluster(max_hp);
6411 +
6412 +		raw_spin_lock(&cluster->cedf_lock);
6413 +		
6414 +		/*
6415 +		 if(my_queue->owner)
6416 +		 TRACE_CUR("queue %d: owner is %s/%d\n",
6417 +		 kfmlp_get_idx(sem, my_queue),
6418 +		 my_queue->owner->comm,
6419 +		 my_queue->owner->pid);
6420 +		 else
6421 +		 TRACE_CUR("queue %d: owner is %p\n",
6422 +		 kfmlp_get_idx(sem, my_queue),
6423 +		 NULL);
6424 +		 */
6425 +		
6426 +		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
6427 +		{
6428 +			__clear_priority_inheritance(my_queue->owner);
6429 +			if(my_queue->hp_waiter != NULL)
6430 +			{
6431 +				__set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
6432 +			}
6433 +		}
6434 +		raw_spin_unlock(&cluster->cedf_lock);
6435 +		
6436 +		list_for_each(pos, &my_queue->wait.task_list)
6437 +		{
6438 +			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
6439 +													   task_list)->private;
6440 +			/* Compare task prios, find high prio task. */
6441 +			if (queued == max_hp)
6442 +			{
6443 +				/*
6444 +				 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
6445 +				 kfmlp_get_idx(sem, my_queue));
6446 +				 */
6447 +				__remove_wait_queue(&my_queue->wait,
6448 +									list_entry(pos, wait_queue_t, task_list));
6449 +				break;
6450 +			}
6451 +		}
6452 +		--(my_queue->count);
6453 +	}
6454 +	
6455 +	return(max_hp);
6456 +}
6457 +
6458 +int cedf_kfmlp_lock(struct litmus_lock* l)
6459 +{
6460 +	struct task_struct* t = current;
6461 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6462 +	struct kfmlp_queue* my_queue;
6463 +	wait_queue_t wait;
6464 +	unsigned long flags;
6465 +	
6466 +	if (!is_realtime(t))
6467 +		return -EPERM;
6468 +	
6469 +	spin_lock_irqsave(&sem->lock, flags);
6470 +	
6471 +	my_queue = sem->shortest_queue;
6472 +	
6473 +	if (my_queue->owner) {
6474 +		/* resource is not free => must suspend and wait */
6475 +		TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
6476 +				  kfmlp_get_idx(sem, my_queue));
6477 +		
6478 +		init_waitqueue_entry(&wait, t);
6479 +		
6480 +		/* FIXME: interruptible would be nice some day */
6481 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
6482 +		
6483 +		__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
6484 +		
6485 +		/* check if we need to activate priority inheritance */
6486 +		if (edf_higher_prio(t, my_queue->hp_waiter))
6487 +		{
6488 +			my_queue->hp_waiter = t;
6489 +			if (edf_higher_prio(t, my_queue->owner))
6490 +			{
6491 +				set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
6492 +			}
6493 +		}
6494 +		
6495 +		++(my_queue->count);
6496 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
6497 +		
6498 +		/* release lock before sleeping */
6499 +		spin_unlock_irqrestore(&sem->lock, flags);
6500 +		
6501 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
6502 +		 * when we wake up; we are guaranteed to have the lock since
6503 +		 * there is only one wake up per release (or steal).
6504 +		 */
6505 +		schedule();
6506 +		
6507 +		
6508 +		if(my_queue->owner == t)
6509 +		{
6510 +			TRACE_CUR("queue %d: acquired through waiting\n",
6511 +					  kfmlp_get_idx(sem, my_queue));
6512 +		}
6513 +		else
6514 +		{
6515 +			/* this case may happen if our wait entry was stolen
6516 +			 between queues.  record where we went.*/
6517 +			my_queue = kfmlp_get_queue(sem, t);
6518 +			BUG_ON(!my_queue);
6519 +			TRACE_CUR("queue %d: acquired through stealing\n",
6520 +					  kfmlp_get_idx(sem, my_queue));
6521 +		}
6522 +	}
6523 +	else
6524 +	{
6525 +		TRACE_CUR("queue %d: acquired immediately\n",
6526 +				  kfmlp_get_idx(sem, my_queue));
6527 +		
6528 +		my_queue->owner = t;
6529 +		
6530 +		++(my_queue->count);
6531 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);		
6532 +		
6533 +		spin_unlock_irqrestore(&sem->lock, flags);
6534 +	}
6535 +	
6536 +	return kfmlp_get_idx(sem, my_queue);
6537 +}
6538 +
6539 +int cedf_kfmlp_unlock(struct litmus_lock* l)
6540 +{
6541 +	struct task_struct *t = current, *next;
6542 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6543 +	struct kfmlp_queue *my_queue;
6544 +	unsigned long flags;
6545 +	int err = 0;
6546 +	
6547 +	spin_lock_irqsave(&sem->lock, flags);
6548 +	
6549 +	my_queue = kfmlp_get_queue(sem, t);
6550 +	
6551 +	if (!my_queue) {
6552 +		err = -EINVAL;
6553 +		goto out;
6554 +	}
6555 +	
6556 +	/* check if there are jobs waiting for this resource */
6557 +	next = __waitqueue_remove_first(&my_queue->wait);
6558 +	if (next) {
6559 +		/*
6560 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
6561 +		 kfmlp_get_idx(sem, my_queue),
6562 +		 next->comm, next->pid);
6563 +		 */
6564 +		/* next becomes the resouce holder */
6565 +		my_queue->owner = next;
6566 +		
6567 +		--(my_queue->count);
6568 +		if(my_queue->count < sem->shortest_queue->count)
6569 +		{
6570 +			sem->shortest_queue = my_queue;
6571 +		}	
6572 +		
6573 +		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
6574 +				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
6575 +		
6576 +		/* determine new hp_waiter if necessary */
6577 +		if (next == my_queue->hp_waiter) {
6578 +			TRACE_TASK(next, "was highest-prio waiter\n");
6579 +			/* next has the highest priority --- it doesn't need to
6580 +			 * inherit.  However, we need to make sure that the
6581 +			 * next-highest priority in the queue is reflected in
6582 +			 * hp_waiter. */
6583 +			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
6584 +			if (my_queue->hp_waiter)
6585 +				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
6586 +			else
6587 +				TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
6588 +		} else {
6589 +			/* Well, if next is not the highest-priority waiter,
6590 +			 * then it ought to inherit the highest-priority
6591 +			 * waiter's priority. */
6592 +			set_priority_inheritance(next, my_queue->hp_waiter);
6593 +		}
6594 +		
6595 +		/* wake up next */
6596 +		wake_up_process(next);
6597 +	}
6598 +	else
6599 +	{
6600 +		TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
6601 +		
6602 +		next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
6603 +		
6604 +		/*
6605 +		 if(next)
6606 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
6607 +		 kfmlp_get_idx(sem, my_queue),
6608 +		 next->comm, next->pid);
6609 +		 */
6610 +		
6611 +		my_queue->owner = next;
6612 +		
6613 +		if(next)
6614 +		{
6615 +			TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
6616 +					  kfmlp_get_idx(sem, my_queue),
6617 +					  next->comm, next->pid);
6618 +			
6619 +			/* wake up next */
6620 +			wake_up_process(next);			
6621 +		}
6622 +		else
6623 +		{
6624 +			TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
6625 +			
6626 +			--(my_queue->count);
6627 +			if(my_queue->count < sem->shortest_queue->count)
6628 +			{
6629 +				sem->shortest_queue = my_queue;
6630 +			}
6631 +		}
6632 +	}
6633 +	
6634 +	/* we lose the benefit of priority inheritance (if any) */
6635 +	if (tsk_rt(t)->inh_task)
6636 +		clear_priority_inheritance(t);
6637 +	
6638 +out:
6639 +	spin_unlock_irqrestore(&sem->lock, flags);
6640 +	
6641 +	return err;
6642 +}
6643 +
6644 +int cedf_kfmlp_close(struct litmus_lock* l)
6645 +{
6646 +	struct task_struct *t = current;
6647 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6648 +	struct kfmlp_queue *my_queue;
6649 +	unsigned long flags;
6650 +	
6651 +	int owner;
6652 +	
6653 +	spin_lock_irqsave(&sem->lock, flags);
6654 +	
6655 +	my_queue = kfmlp_get_queue(sem, t);	
6656 +	owner = (my_queue) ? (my_queue->owner == t) : 0;
6657 +	
6658 +	spin_unlock_irqrestore(&sem->lock, flags);
6659 +	
6660 +	if (owner)
6661 +		cedf_kfmlp_unlock(l);
6662 +	
6663 +	return 0;
6664 +}
6665 +
6666 +void cedf_kfmlp_free(struct litmus_lock* l)
6667 +{
6668 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
6669 +	kfree(sem->queues);
6670 +	kfree(sem);
6671 +}
6672 +
6673 +static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
6674 +	.close  = cedf_kfmlp_close,
6675 +	.lock   = cedf_kfmlp_lock,
6676 +	.unlock = cedf_kfmlp_unlock,
6677 +	.deallocate = cedf_kfmlp_free,
6678 +};
6679 +
6680 +static struct litmus_lock* cedf_new_kfmlp(void* __user arg, int* ret_code)
6681 +{
6682 +	struct kfmlp_semaphore* sem;
6683 +	int num_resources = 0;
6684 +	int i;
6685 +	
6686 +	if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
6687 +	{
6688 +		*ret_code = -EINVAL;
6689 +		return(NULL);
6690 +	}
6691 +	if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
6692 +	{
6693 +		*ret_code = -EINVAL;
6694 +		return(NULL);
6695 +	}
6696 +	if(num_resources < 1)
6697 +	{
6698 +		*ret_code = -EINVAL;
6699 +		return(NULL);		
6700 +	}
6701 +	
6702 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
6703 +	if(!sem)
6704 +	{
6705 +		*ret_code = -ENOMEM;
6706 +		return NULL;
6707 +	}
6708 +	
6709 +	sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
6710 +	if(!sem->queues)
6711 +	{
6712 +		kfree(sem);
6713 +		*ret_code = -ENOMEM;
6714 +		return NULL;		
6715 +	}
6716 +	
6717 +	sem->litmus_lock.ops = &cedf_kfmlp_lock_ops;
6718 +	spin_lock_init(&sem->lock);
6719 +	sem->num_resources = num_resources;
6720 +	
6721 +	for(i = 0; i < num_resources; ++i)
6722 +	{
6723 +		sem->queues[i].owner = NULL;
6724 +		sem->queues[i].hp_waiter = NULL;
6725 +		init_waitqueue_head(&sem->queues[i].wait);
6726 +		sem->queues[i].count = 0;
6727 +	}
6728 +	
6729 +	sem->shortest_queue = &sem->queues[0];
6730 +	
6731 +	*ret_code = 0;
6732 +	return &sem->litmus_lock;
6733 +}
6734 +
6735 +
6736 +/* **** lock constructor **** */
6737 +
6738 +static long cedf_allocate_lock(struct litmus_lock **lock, int type,
6739 +								 void* __user arg)
6740 +{
6741 +	int err = -ENXIO;
6742 +	
6743 +	/* C-EDF currently only supports the FMLP for global resources
6744 +		WITHIN a given cluster.  DO NOT USE CROSS-CLUSTER! */
6745 +	switch (type) {
6746 +		case KFMLP_SEM:
6747 +			*lock = cedf_new_kfmlp(arg, &err);
6748 +			break;
6749 +	};
6750 +	
6751 +	return err;
6752 +}
6753 +
6754 +#endif  // CONFIG_LITMUS_LOCKING
6755 +
6756 +
6757 +
6758 +
6759 +
6760 +
6761  /* total number of cluster */
6762  static int num_clusters;
6763  /* we do not support cluster of different sizes */
6764 @@ -696,6 +1955,13 @@ static long cedf_activate_plugin(void)
6765  		bheap_init(&(cedf[i].cpu_heap));
6766  		edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
6767  
6768 +		
6769 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6770 +		cedf[i].pending_tasklets.head = NULL;
6771 +		cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
6772 +#endif
6773 +		
6774 +		
6775  		if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
6776  			return -ENOMEM;
6777  	}
6778 @@ -746,6 +2012,40 @@ static long cedf_activate_plugin(void)
6779  			break;
6780  		}
6781  	}
6782 +	
6783 +#ifdef CONFIG_LITMUS_SOFTIRQD
6784 +	{
6785 +		/* distribute the daemons evenly across the clusters. */
6786 +		int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
6787 +		int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
6788 +		int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
6789 +		
6790 +		int daemon = 0;
6791 +		for(i = 0; i < num_clusters; ++i)
6792 +		{
6793 +			int num_on_this_cluster = num_daemons_per_cluster;
6794 +			if(left_over)
6795 +			{
6796 +				++num_on_this_cluster;
6797 +				--left_over;
6798 +			}
6799 +			
6800 +			for(j = 0; j < num_on_this_cluster; ++j)
6801 +			{
6802 +				// first CPU of this cluster
6803 +				affinity[daemon++] = i*cluster_size;
6804 +			}
6805 +		}
6806 +	
6807 +		spawn_klitirqd(affinity);
6808 +		
6809 +		kfree(affinity);
6810 +	}
6811 +#endif
6812 +	
6813 +#ifdef CONFIG_LITMUS_NVIDIA
6814 +	init_nvidia_info();
6815 +#endif	
6816  
6817  	free_cpumask_var(mask);
6818  	clusters_allocated = 1;
6819 @@ -765,6 +2065,19 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
6820  	.task_block		= cedf_task_block,
6821  	.admit_task		= cedf_admit_task,
6822  	.activate_plugin	= cedf_activate_plugin,
6823 +#ifdef CONFIG_LITMUS_LOCKING
6824 +	.allocate_lock	= cedf_allocate_lock,
6825 +    .set_prio_inh   = set_priority_inheritance,
6826 +    .clear_prio_inh = clear_priority_inheritance,	
6827 +#endif
6828 +#ifdef CONFIG_LITMUS_SOFTIRQD
6829 +	.set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
6830 +	.clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
6831 +#endif
6832 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6833 +	.enqueue_pai_tasklet = enqueue_pai_tasklet,
6834 +	.run_tasklets = run_tasklets,
6835 +#endif
6836  };
6837  
6838  static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
6839 diff --git a/litmus/sched_cfifo.c b/litmus/sched_cfifo.c
6840 new file mode 100644
6841 index 0000000..7fbdec3
6842 --- /dev/null
6843 +++ b/litmus/sched_cfifo.c
6844 @@ -0,0 +1,2063 @@
6845 +/*
6846 + * litmus/sched_cfifo.c
6847 + *
6848 + * Implementation of the C-FIFO scheduling algorithm.
6849 + *
6850 + * This implementation is based on G-EDF:
6851 + * - CPUs are clustered around L2 or L3 caches.
6852 + * - Clusters topology is automatically detected (this is arch dependent
6853 + *   and is working only on x86 at the moment --- and only with modern
6854 + *   cpus that exports cpuid4 information)
6855 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
6856 + *   the programmer needs to be aware of the topology to place tasks
6857 + *   in the desired cluster
6858 + * - default clustering is around L2 cache (cache index = 2)
6859 + *   supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
6860 + *   online_cpus are placed in a single cluster).
6861 + *
6862 + *   For details on functions, take a look at sched_gsn_edf.c
6863 + *
6864 + * Currently, we do not support changes in the number of online cpus.
6865 + * If the num_online_cpus() dynamically changes, the plugin is broken.
6866 + *
6867 + * This version uses the simple approach and serializes all scheduling
6868 + * decisions by the use of a queue lock. This is probably not the
6869 + * best way to do it, but it should suffice for now.
6870 + */
6871 +
6872 +#include <linux/spinlock.h>
6873 +#include <linux/percpu.h>
6874 +#include <linux/sched.h>
6875 +#include <linux/slab.h>
6876 +#include <linux/uaccess.h>
6877 +
6878 +#include <linux/module.h>
6879 +
6880 +#include <litmus/litmus.h>
6881 +#include <litmus/jobs.h>
6882 +#include <litmus/preempt.h>
6883 +#include <litmus/sched_plugin.h>
6884 +#include <litmus/fifo_common.h>
6885 +#include <litmus/sched_trace.h>
6886 +
6887 +#include <litmus/clustered.h>
6888 +
6889 +#include <litmus/bheap.h>
6890 +
6891 +/* to configure the cluster size */
6892 +#include <litmus/litmus_proc.h>
6893 +
6894 +#ifdef CONFIG_SCHED_CPU_AFFINITY
6895 +#include <litmus/affinity.h>
6896 +#endif
6897 +
6898 +#ifdef CONFIG_LITMUS_SOFTIRQD
6899 +#include <litmus/litmus_softirq.h>
6900 +#endif
6901 +
6902 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6903 +#include <linux/interrupt.h>
6904 +#include <litmus/trace.h>
6905 +#endif
6906 +
6907 +#ifdef CONFIG_LITMUS_NVIDIA
6908 +#include <litmus/nvidia_info.h>
6909 +#endif
6910 +
6911 +/* Reference configuration variable. Determines which cache level is used to
6912 + * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
6913 + * all CPUs form a single cluster (just like GSN-EDF).
6914 + */
6915 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
6916 +
6917 +struct clusterdomain;
6918 +
6919 +/* cpu_entry_t - maintain the linked and scheduled state
6920 + *
6921 + * A cpu also contains a pointer to the cfifo_domain_t cluster
6922 + * that owns it (struct clusterdomain*)
6923 + */
6924 +typedef struct  {
6925 +	int 			cpu;
6926 +	struct clusterdomain*	cluster;	/* owning cluster */
6927 +	struct task_struct*	linked;		/* only RT tasks */
6928 +	struct task_struct*	scheduled;	/* only RT tasks */
6929 +	atomic_t		will_schedule;	/* prevent unneeded IPIs */
6930 +	struct bheap_node*	hn;
6931 +} cpu_entry_t;
6932 +
6933 +/* one cpu_entry_t per CPU */
6934 +DEFINE_PER_CPU(cpu_entry_t, cfifo_cpu_entries);
6935 +
6936 +#define set_will_schedule() \
6937 +	(atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 1))
6938 +#define clear_will_schedule() \
6939 +	(atomic_set(&__get_cpu_var(cfifo_cpu_entries).will_schedule, 0))
6940 +#define test_will_schedule(cpu) \
6941 +	(atomic_read(&per_cpu(cfifo_cpu_entries, cpu).will_schedule))
6942 +
6943 +
6944 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6945 +struct tasklet_head
6946 +{
6947 +	struct tasklet_struct *head;
6948 +	struct tasklet_struct **tail;
6949 +};
6950 +#endif
6951 +
6952 +/*
6953 + * In C-FIFO there is a cfifo domain _per_ cluster
6954 + * The number of clusters is dynamically determined accordingly to the
6955 + * total cpu number and the cluster size
6956 + */
6957 +typedef struct clusterdomain {
6958 +	/* rt_domain for this cluster */
6959 +	rt_domain_t	domain;
6960 +	/* cpus in this cluster */
6961 +	cpu_entry_t*	*cpus;
6962 +	/* map of this cluster cpus */
6963 +	cpumask_var_t	cpu_map;
6964 +	/* the cpus queue themselves according to priority in here */
6965 +	struct bheap_node *heap_node;
6966 +	struct bheap      cpu_heap;
6967 +	/* lock for this cluster */
6968 +#define cfifo_lock domain.ready_lock
6969 +	
6970 +	
6971 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
6972 +	struct tasklet_head pending_tasklets;
6973 +#endif	
6974 +
6975 +} cfifo_domain_t;
6976 +
6977 +/* a cfifo_domain per cluster; allocation is done at init/activation time */
6978 +cfifo_domain_t *cfifo;
6979 +
6980 +#define remote_cluster(cpu)	((cfifo_domain_t *) per_cpu(cfifo_cpu_entries, cpu).cluster)
6981 +#define task_cpu_cluster(task)	remote_cluster(get_partition(task))
6982 +
6983 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
6984 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
6985 + * information during the initialization of the plugin (e.g., topology)
6986 +#define WANT_ALL_SCHED_EVENTS
6987 + */
6988 +#define VERBOSE_INIT
6989 +
6990 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
6991 +{
6992 +	cpu_entry_t *a, *b;
6993 +	a = _a->value;
6994 +	b = _b->value;
6995 +	/* Note that a and b are inverted: we want the lowest-priority CPU at
6996 +	 * the top of the heap.
6997 +	 */
6998 +	return fifo_higher_prio(b->linked, a->linked);
6999 +}
7000 +
7001 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
7002 + *                       order in the cpu queue. Caller must hold cfifo lock.
7003 + */
7004 +static void update_cpu_position(cpu_entry_t *entry)
7005 +{
7006 +	cfifo_domain_t *cluster = entry->cluster;
7007 +
7008 +	if (likely(bheap_node_in_heap(entry->hn)))
7009 +		bheap_delete(cpu_lower_prio,
7010 +				&cluster->cpu_heap,
7011 +				entry->hn);
7012 +
7013 +	bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
7014 +}
7015 +
7016 +/* caller must hold cfifo lock */
7017 +static cpu_entry_t* lowest_prio_cpu(cfifo_domain_t *cluster)
7018 +{
7019 +	struct bheap_node* hn;
7020 +	hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
7021 +	return hn->value;
7022 +}
7023 +
7024 +
7025 +/* link_task_to_cpu - Update the link of a CPU.
7026 + *                    Handles the case where the to-be-linked task is already
7027 + *                    scheduled on a different CPU.
7028 + */
7029 +static noinline void link_task_to_cpu(struct task_struct* linked,
7030 +				      cpu_entry_t *entry)
7031 +{
7032 +	cpu_entry_t *sched;
7033 +	struct task_struct* tmp;
7034 +	int on_cpu;
7035 +
7036 +	BUG_ON(linked && !is_realtime(linked));
7037 +
7038 +	/* Currently linked task is set to be unlinked. */
7039 +	if (entry->linked) {
7040 +		entry->linked->rt_param.linked_on = NO_CPU;
7041 +	}
7042 +
7043 +	/* Link new task to CPU. */
7044 +	if (linked) {
7045 +		set_rt_flags(linked, RT_F_RUNNING);
7046 +		/* handle task is already scheduled somewhere! */
7047 +		on_cpu = linked->rt_param.scheduled_on;
7048 +		if (on_cpu != NO_CPU) {
7049 +			sched = &per_cpu(cfifo_cpu_entries, on_cpu);
7050 +			/* this should only happen if not linked already */
7051 +			BUG_ON(sched->linked == linked);
7052 +
7053 +			/* If we are already scheduled on the CPU to which we
7054 +			 * wanted to link, we don't need to do the swap --
7055 +			 * we just link ourselves to the CPU and depend on
7056 +			 * the caller to get things right.
7057 +			 */
7058 +			if (entry != sched) {
7059 +				TRACE_TASK(linked,
7060 +					   "already scheduled on %d, updating link.\n",
7061 +					   sched->cpu);
7062 +				tmp = sched->linked;
7063 +				linked->rt_param.linked_on = sched->cpu;
7064 +				sched->linked = linked;
7065 +				update_cpu_position(sched);
7066 +				linked = tmp;
7067 +			}
7068 +		}
7069 +		if (linked) /* might be NULL due to swap */
7070 +			linked->rt_param.linked_on = entry->cpu;
7071 +	}
7072 +	entry->linked = linked;
7073 +#ifdef WANT_ALL_SCHED_EVENTS
7074 +	if (linked)
7075 +		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
7076 +	else
7077 +		TRACE("NULL linked to %d.\n", entry->cpu);
7078 +#endif
7079 +	update_cpu_position(entry);
7080 +}
7081 +
7082 +/* unlink - Make sure a task is not linked any longer to an entry
7083 + *          where it was linked before. Must hold cfifo_lock.
7084 + */
7085 +static noinline void unlink(struct task_struct* t)
7086 +{
7087 +    	cpu_entry_t *entry;
7088 +
7089 +	if (t->rt_param.linked_on != NO_CPU) {
7090 +		/* unlink */
7091 +		entry = &per_cpu(cfifo_cpu_entries, t->rt_param.linked_on);
7092 +		t->rt_param.linked_on = NO_CPU;
7093 +		link_task_to_cpu(NULL, entry);
7094 +	} else if (is_queued(t)) {
7095 +		/* This is an interesting situation: t is scheduled,
7096 +		 * but was just recently unlinked.  It cannot be
7097 +		 * linked anywhere else (because then it would have
7098 +		 * been relinked to this CPU), thus it must be in some
7099 +		 * queue. We must remove it from the list in this
7100 +		 * case.
7101 +		 *
7102 +		 * in C-FIFO case is should be somewhere in the queue for
7103 +		 * its domain, therefore and we can get the domain using
7104 +		 * task_cpu_cluster
7105 +		 */
7106 +		remove(&(task_cpu_cluster(t))->domain, t);
7107 +	}
7108 +}
7109 +
7110 +
7111 +/* preempt - force a CPU to reschedule
7112 + */
7113 +static void preempt(cpu_entry_t *entry)
7114 +{
7115 +	preempt_if_preemptable(entry->scheduled, entry->cpu);
7116 +}
7117 +
7118 +/* requeue - Put an unlinked task into c-fifo domain.
7119 + *           Caller must hold cfifo_lock.
7120 + */
7121 +static noinline void requeue(struct task_struct* task)
7122 +{
7123 +	cfifo_domain_t *cluster = task_cpu_cluster(task);
7124 +	BUG_ON(!task);
7125 +	/* sanity check before insertion */
7126 +	BUG_ON(is_queued(task));
7127 +
7128 +	if (is_released(task, litmus_clock()))
7129 +		__add_ready(&cluster->domain, task);
7130 +	else {
7131 +		/* it has got to wait */
7132 +		add_release(&cluster->domain, task);
7133 +	}
7134 +}
7135 +
7136 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7137 +static cpu_entry_t* cfifo_get_nearest_available_cpu(
7138 +				cfifo_domain_t *cluster, cpu_entry_t* start)
7139 +{
7140 +	cpu_entry_t* affinity;
7141 +
7142 +	get_nearest_available_cpu(affinity, start, cfifo_cpu_entries, -1);
7143 +
7144 +	/* make sure CPU is in our cluster */
7145 +	if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
7146 +		return(affinity);
7147 +	else
7148 +		return(NULL);
7149 +}
7150 +#endif
7151 +
7152 +
7153 +/* check for any necessary preemptions */
7154 +static void check_for_preemptions(cfifo_domain_t *cluster)
7155 +{
7156 +	struct task_struct *task;
7157 +	cpu_entry_t *last;
7158 +
7159 +	for(last = lowest_prio_cpu(cluster);
7160 +	    fifo_preemption_needed(&cluster->domain, last->linked);
7161 +	    last = lowest_prio_cpu(cluster)) {
7162 +		/* preemption necessary */
7163 +		task = __take_ready(&cluster->domain);
7164 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7165 +		{
7166 +			cpu_entry_t* affinity =
7167 +					cfifo_get_nearest_available_cpu(cluster,
7168 +							&per_cpu(cfifo_cpu_entries, task_cpu(task)));
7169 +			if(affinity)
7170 +				last = affinity;
7171 +			else if(last->linked)
7172 +				requeue(last->linked);
7173 +		}
7174 +#else
7175 +		if (last->linked)
7176 +			requeue(last->linked);
7177 +#endif
7178 +		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
7179 +				task->pid, last->cpu);
7180 +		link_task_to_cpu(task, last);
7181 +		preempt(last);
7182 +	}
7183 +}
7184 +
7185 +/* cfifo_job_arrival: task is either resumed or released */
7186 +static noinline void cfifo_job_arrival(struct task_struct* task)
7187 +{
7188 +	cfifo_domain_t *cluster = task_cpu_cluster(task);
7189 +	BUG_ON(!task);
7190 +
7191 +	requeue(task);
7192 +	check_for_preemptions(cluster);
7193 +}
7194 +
7195 +static void cfifo_release_jobs(rt_domain_t* rt, struct bheap* tasks)
7196 +{
7197 +	cfifo_domain_t* cluster = container_of(rt, cfifo_domain_t, domain);
7198 +	unsigned long flags;
7199 +
7200 +	raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7201 +
7202 +	__merge_ready(&cluster->domain, tasks);
7203 +	check_for_preemptions(cluster);
7204 +
7205 +	raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7206 +}
7207 +
7208 +/* caller holds cfifo_lock */
7209 +static noinline void job_completion(struct task_struct *t, int forced)
7210 +{
7211 +	BUG_ON(!t);
7212 +
7213 +	sched_trace_task_completion(t, forced);
7214 +
7215 +#ifdef CONFIG_LITMUS_NVIDIA
7216 +	atomic_set(&tsk_rt(t)->nv_int_count, 0);
7217 +#endif
7218 +
7219 +	TRACE_TASK(t, "job_completion().\n");
7220 +
7221 +	/* set flags */
7222 +	set_rt_flags(t, RT_F_SLEEP);
7223 +	/* prepare for next period */
7224 +	prepare_for_next_period(t);
7225 +	if (is_released(t, litmus_clock()))
7226 +		sched_trace_task_release(t);
7227 +	/* unlink */
7228 +	unlink(t);
7229 +	/* requeue
7230 +	 * But don't requeue a blocking task. */
7231 +	if (is_running(t))
7232 +		cfifo_job_arrival(t);
7233 +}
7234 +
7235 +/* cfifo_tick - this function is called for every local timer
7236 + *                         interrupt.
7237 + *
7238 + *                   checks whether the current task has expired and checks
7239 + *                   whether we need to preempt it if it has not expired
7240 + */
7241 +static void cfifo_tick(struct task_struct* t)
7242 +{
7243 +	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
7244 +		if (!is_np(t)) {
7245 +			/* np tasks will be preempted when they become
7246 +			 * preemptable again
7247 +			 */
7248 +			litmus_reschedule_local();
7249 +			set_will_schedule();
7250 +			TRACE("cfifo_scheduler_tick: "
7251 +			      "%d is preemptable "
7252 +			      " => FORCE_RESCHED\n", t->pid);
7253 +		} else if (is_user_np(t)) {
7254 +			TRACE("cfifo_scheduler_tick: "
7255 +			      "%d is non-preemptable, "
7256 +			      "preemption delayed.\n", t->pid);
7257 +			request_exit_np(t);
7258 +		}
7259 +	}
7260 +}
7261 +
7262 +
7263 +
7264 +
7265 +
7266 +
7267 +
7268 +
7269 +
7270 +
7271 +
7272 +
7273 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
7274 +
7275 +
7276 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
7277 +{
7278 +	if (!atomic_read(&tasklet->count)) {
7279 +		sched_trace_tasklet_begin(tasklet->owner);
7280 +		
7281 +		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
7282 +		{
7283 +			BUG();
7284 +		}
7285 +		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed);
7286 +		tasklet->func(tasklet->data);
7287 +		tasklet_unlock(tasklet);
7288 +		
7289 +		sched_trace_tasklet_end(tasklet->owner, flushed);
7290 +	}
7291 +	else {
7292 +		BUG();
7293 +	}
7294 +}
7295 +
7296 +
7297 +static void __extract_tasklets(cfifo_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
7298 +{
7299 +	struct tasklet_struct* step;
7300 +	struct tasklet_struct* tasklet;
7301 +	struct tasklet_struct* prev;
7302 +	
7303 +	task_tasklets->head = NULL;
7304 +	task_tasklets->tail = &(task_tasklets->head);
7305 +	
7306 +	prev = NULL;
7307 +	for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
7308 +	{
7309 +		if(step->owner == task)
7310 +		{
7311 +			TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
7312 +			
7313 +			tasklet = step;
7314 +			
7315 +			if(prev) {
7316 +				prev->next = tasklet->next;
7317 +			}
7318 +			else if(cluster->pending_tasklets.head == tasklet) {
7319 +				// we're at the head.
7320 +				cluster->pending_tasklets.head = tasklet->next;
7321 +			}
7322 +			
7323 +			if(cluster->pending_tasklets.tail == &tasklet) {
7324 +				// we're at the tail
7325 +				if(prev) {
7326 +					cluster->pending_tasklets.tail = &prev;
7327 +				}
7328 +				else {
7329 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
7330 +				}
7331 +			}
7332 +			
7333 +			tasklet->next = NULL;
7334 +			*(task_tasklets->tail) = tasklet;
7335 +			task_tasklets->tail = &(tasklet->next);
7336 +		}
7337 +		else {
7338 +			prev = step;
7339 +		}
7340 +	}
7341 +}
7342 +
7343 +static void flush_tasklets(cfifo_domain_t* cluster, struct task_struct* task)
7344 +{
7345 +	unsigned long flags;
7346 +	struct tasklet_head task_tasklets;
7347 +	struct tasklet_struct* step;
7348 +	
7349 +	raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7350 +	__extract_tasklets(cluster, task, &task_tasklets);
7351 +	raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7352 +	
7353 +	if(cluster->pending_tasklets.head != NULL) {
7354 +		TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
7355 +	}
7356 +	
7357 +	// now execute any flushed tasklets.
7358 +	for(step = cluster->pending_tasklets.head; step != NULL; /**/)
7359 +	{
7360 +		struct tasklet_struct* temp = step->next;
7361 +		
7362 +		step->next = NULL;
7363 +		__do_lit_tasklet(step, 1ul);
7364 +		
7365 +		step = temp;
7366 +	}
7367 +}
7368 +
7369 +
7370 +static void do_lit_tasklets(cfifo_domain_t* cluster, struct task_struct* sched_task)
7371 +{
7372 +	int work_to_do = 1;
7373 +	struct tasklet_struct *tasklet = NULL;
7374 +	//struct tasklet_struct *step;
7375 +	unsigned long flags;
7376 +	
7377 +	while(work_to_do) {
7378 +		
7379 +		TS_NV_SCHED_BOTISR_START;
7380 +		
7381 +		// remove tasklet at head of list if it has higher priority.
7382 +		raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);	
7383 +		
7384 +		/*
7385 +		step = cluster->pending_tasklets.head;
7386 +		TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
7387 +		while(step != NULL){
7388 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7389 +			step = step->next;
7390 +		}
7391 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7392 +		TRACE("%s: done.\n", __FUNCTION__);
7393 +		 */
7394 +		
7395 +		
7396 +		if(cluster->pending_tasklets.head != NULL) {
7397 +			// remove tasklet at head.
7398 +			tasklet = cluster->pending_tasklets.head;
7399 +			
7400 +			if(fifo_higher_prio(tasklet->owner, sched_task)) {
7401 +				
7402 +				if(NULL == tasklet->next) {
7403 +					// tasklet is at the head, list only has one element
7404 +					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
7405 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
7406 +				}
7407 +				
7408 +				// remove the tasklet from the queue
7409 +				cluster->pending_tasklets.head = tasklet->next;
7410 +				
7411 +				TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
7412 +			}
7413 +			else {
7414 +				TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
7415 +				tasklet = NULL;
7416 +			}
7417 +		}
7418 +		else {
7419 +			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
7420 +		}
7421 +		
7422 +		/*
7423 +		step = cluster->pending_tasklets.head;
7424 +		TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
7425 +		while(step != NULL){
7426 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7427 +			step = step->next;
7428 +		}
7429 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7430 +		TRACE("%s: done.\n", __FUNCTION__);
7431 +		 */
7432 +		
7433 +		raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7434 +		
7435 +		TS_NV_SCHED_BOTISR_END;
7436 +		
7437 +		if(tasklet) {
7438 +			__do_lit_tasklet(tasklet, 0ul);
7439 +			tasklet = NULL;	
7440 +		}
7441 +		else {
7442 +			work_to_do = 0;
7443 +		}
7444 +	}
7445 +	
7446 +	//TRACE("%s: exited.\n", __FUNCTION__);
7447 +}
7448 +
7449 +
7450 +static void run_tasklets(struct task_struct* sched_task)
7451 +{
7452 +	cfifo_domain_t* cluster;
7453 +	
7454 +#if 0
7455 +	int task_is_rt = is_realtime(sched_task);
7456 +	cfifo_domain_t* cluster;
7457 +	
7458 +	if(is_realtime(sched_task)) {
7459 +		cluster = task_cpu_cluster(sched_task);
7460 +	}
7461 +	else {
7462 +		cluster = remote_cluster(get_cpu());
7463 +	}
7464 +	
7465 +	if(cluster && cluster->pending_tasklets.head != NULL) {
7466 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
7467 +		
7468 +		do_lit_tasklets(cluster, sched_task);
7469 +	}
7470 +	
7471 +	if(!task_is_rt) {
7472 +		put_cpu_no_resched();
7473 +	}
7474 +#else
7475 +	
7476 +	preempt_disable();
7477 +	
7478 +	cluster = (is_realtime(sched_task)) ?
7479 +		task_cpu_cluster(sched_task) :
7480 +		remote_cluster(smp_processor_id());
7481 +	
7482 +	if(cluster && cluster->pending_tasklets.head != NULL) {
7483 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
7484 +		do_lit_tasklets(cluster, sched_task);
7485 +	}
7486 +	
7487 +	preempt_enable_no_resched();
7488 +	
7489 +#endif
7490 +}
7491 +
7492 +
7493 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, cfifo_domain_t* cluster)
7494 +{
7495 +	struct tasklet_struct* step;
7496 +	
7497 +	/*
7498 +	step = cluster->pending_tasklets.head;
7499 +	TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
7500 +	while(step != NULL){
7501 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7502 +		step = step->next;
7503 +	}
7504 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7505 +	TRACE("%s: done.\n", __FUNCTION__);
7506 +	 */
7507 +	
7508 +	
7509 +	tasklet->next = NULL;  // make sure there are no old values floating around
7510 +	
7511 +	step = cluster->pending_tasklets.head;
7512 +	if(step == NULL) {
7513 +		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
7514 +		// insert at tail.
7515 +		*(cluster->pending_tasklets.tail) = tasklet;
7516 +		cluster->pending_tasklets.tail = &(tasklet->next);		
7517 +	}
7518 +	else if((*(cluster->pending_tasklets.tail) != NULL) &&
7519 +			fifo_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
7520 +		// insert at tail.
7521 +		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
7522 +		
7523 +		*(cluster->pending_tasklets.tail) = tasklet;
7524 +		cluster->pending_tasklets.tail = &(tasklet->next);
7525 +	}
7526 +	else {
7527 +		
7528 +        //WARN_ON(1 == 1);
7529 +		
7530 +		// insert the tasklet somewhere in the middle.
7531 +		
7532 +        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
7533 +		
7534 +		while(step->next && fifo_higher_prio(step->next->owner, tasklet->owner)) {
7535 +			step = step->next;
7536 +		}
7537 +		
7538 +		// insert tasklet right before step->next.
7539 +		
7540 +		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
7541 +		
7542 +		tasklet->next = step->next;
7543 +		step->next = tasklet;
7544 +		
7545 +		// patch up the head if needed.
7546 +		if(cluster->pending_tasklets.head == step)
7547 +		{
7548 +			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
7549 +			cluster->pending_tasklets.head = tasklet;
7550 +		}
7551 +	}
7552 +	
7553 +	/*
7554 +	step = cluster->pending_tasklets.head;
7555 +	TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
7556 +	while(step != NULL){
7557 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
7558 +		step = step->next;
7559 +	}
7560 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
7561 +	TRACE("%s: done.\n", __FUNCTION__);
7562 +	 */
7563 +	
7564 +	// TODO: Maintain this list in priority order.
7565 +	//	tasklet->next = NULL;
7566 +	//	*(cluster->pending_tasklets.tail) = tasklet;
7567 +	//	cluster->pending_tasklets.tail = &tasklet->next;
7568 +}
7569 +
7570 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
7571 +{
7572 +	cfifo_domain_t *cluster = NULL;
7573 +	cpu_entry_t *targetCPU = NULL;
7574 +	int thisCPU;
7575 +	int runLocal = 0;
7576 +	int runNow = 0;
7577 +	unsigned long flags;
7578 +	
7579 +    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
7580 +    {
7581 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
7582 +		return 0;
7583 +    }	
7584 +	
7585 +	cluster = task_cpu_cluster(tasklet->owner);
7586 +	
7587 +	raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);		
7588 +	
7589 +	thisCPU = smp_processor_id();
7590 +	
7591 +#if 1
7592 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7593 +	{
7594 +		cpu_entry_t* affinity = NULL;
7595 +		
7596 +		// use this CPU if it is in our cluster and isn't running any RT work.
7597 +		if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cfifo_cpu_entries).linked == NULL)) {
7598 +			affinity = &(__get_cpu_var(cfifo_cpu_entries));
7599 +		}
7600 +		else {
7601 +			// this CPU is busy or shouldn't run tasklet in this cluster.
7602 +			// look for available near by CPUs.
7603 +			// NOTE: Affinity towards owner and not this CPU.  Is this right?
7604 +			affinity = 
7605 +			cfifo_get_nearest_available_cpu(cluster,
7606 +										   &per_cpu(cfifo_cpu_entries, task_cpu(tasklet->owner)));
7607 +		}
7608 +		
7609 +		targetCPU = affinity;
7610 +	}
7611 +#endif
7612 +#endif
7613 +	
7614 +	if (targetCPU == NULL) {
7615 +		targetCPU = lowest_prio_cpu(cluster);
7616 +	}
7617 +	
7618 +	if (fifo_higher_prio(tasklet->owner, targetCPU->linked)) {
7619 +		if (thisCPU == targetCPU->cpu) {
7620 +			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
7621 +			runLocal = 1;
7622 +			runNow = 1;
7623 +		}
7624 +		else {
7625 +			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
7626 +			runLocal = 0;
7627 +			runNow = 1;
7628 +		}
7629 +	}
7630 +	else {
7631 +		runLocal = 0;
7632 +		runNow = 0;
7633 +	}
7634 +	
7635 +	if(!runLocal) {
7636 +		// enqueue the tasklet
7637 +		__add_pai_tasklet(tasklet, cluster);
7638 +	}
7639 +	
7640 +	raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7641 +	
7642 +	
7643 +	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
7644 +		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
7645 +		__do_lit_tasklet(tasklet, 0ul);
7646 +	}
7647 +	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
7648 +		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
7649 +		preempt(targetCPU);  // need to be protected by cfifo_lock?
7650 +	}
7651 +	else {
7652 +		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
7653 +	}
7654 +	
7655 +	return(1); // success
7656 +}
7657 +
7658 +
7659 +#endif
7660 +
7661 +
7662 +
7663 +
7664 +
7665 +
7666 +
7667 +
7668 +
7669 +
7670 +
7671 +
7672 +
7673 +
7674 +
7675 +
7676 +
7677 +
7678 +
7679 +
7680 +/* Getting schedule() right is a bit tricky. schedule() may not make any
7681 + * assumptions on the state of the current task since it may be called for a
7682 + * number of reasons. The reasons include a scheduler_tick() determined that it
7683 + * was necessary, because sys_exit_np() was called, because some Linux
7684 + * subsystem determined so, or even (in the worst case) because there is a bug
7685 + * hidden somewhere. Thus, we must take extreme care to determine what the
7686 + * current state is.
7687 + *
7688 + * The CPU could currently be scheduling a task (or not), be linked (or not).
7689 + *
7690 + * The following assertions for the scheduled task could hold:
7691 + *
7692 + *      - !is_running(scheduled)        // the job blocks
7693 + *	- scheduled->timeslice == 0	// the job completed (forcefully)
7694 + *	- get_rt_flag() == RT_F_SLEEP	// the job completed (by syscall)
7695 + * 	- linked != scheduled		// we need to reschedule (for any reason)
7696 + * 	- is_np(scheduled)		// rescheduling must be delayed,
7697 + *					   sys_exit_np must be requested
7698 + *
7699 + * Any of these can occur together.
7700 + */
7701 +static struct task_struct* cfifo_schedule(struct task_struct * prev)
7702 +{
7703 +	cpu_entry_t* entry = &__get_cpu_var(cfifo_cpu_entries);
7704 +	cfifo_domain_t *cluster = entry->cluster;
7705 +	int out_of_time, sleep, preempt, np, exists, blocks;
7706 +	struct task_struct* next = NULL;
7707 +
7708 +	raw_spin_lock(&cluster->cfifo_lock);
7709 +	clear_will_schedule();
7710 +
7711 +	/* sanity checking */
7712 +	BUG_ON(entry->scheduled && entry->scheduled != prev);
7713 +	BUG_ON(entry->scheduled && !is_realtime(prev));
7714 +	BUG_ON(is_realtime(prev) && !entry->scheduled);
7715 +
7716 +	/* (0) Determine state */
7717 +	exists      = entry->scheduled != NULL;
7718 +	blocks      = exists && !is_running(entry->scheduled);
7719 +	out_of_time = exists &&
7720 +				  budget_enforced(entry->scheduled) &&
7721 +				  budget_exhausted(entry->scheduled);
7722 +	np 	    = exists && is_np(entry->scheduled);
7723 +	sleep	    = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
7724 +	preempt     = entry->scheduled != entry->linked;
7725 +
7726 +#ifdef WANT_ALL_SCHED_EVENTS
7727 +	TRACE_TASK(prev, "invoked cfifo_schedule.\n");
7728 +#endif
7729 +
7730 +	if (exists)
7731 +		TRACE_TASK(prev,
7732 +			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
7733 +			   "state:%d sig:%d\n",
7734 +			   blocks, out_of_time, np, sleep, preempt,
7735 +			   prev->state, signal_pending(prev));
7736 +	if (entry->linked && preempt)
7737 +		TRACE_TASK(prev, "will be preempted by %s/%d\n",
7738 +			   entry->linked->comm, entry->linked->pid);
7739 +
7740 +
7741 +	/* If a task blocks we have no choice but to reschedule.
7742 +	 */
7743 +	if (blocks)
7744 +		unlink(entry->scheduled);
7745 +
7746 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
7747 +	 * We need to make sure to update the link structure anyway in case
7748 +	 * that we are still linked. Multiple calls to request_exit_np() don't
7749 +	 * hurt.
7750 +	 */
7751 +	if (np && (out_of_time || preempt || sleep)) {
7752 +		unlink(entry->scheduled);
7753 +		request_exit_np(entry->scheduled);
7754 +	}
7755 +
7756 +	/* Any task that is preemptable and either exhausts its execution
7757 +	 * budget or wants to sleep completes. We may have to reschedule after
7758 +	 * this. Don't do a job completion if we block (can't have timers running
7759 +	 * for blocked jobs). Preemption go first for the same reason.
7760 +	 */
7761 +	if (!np && (out_of_time || sleep) && !blocks && !preempt)
7762 +		job_completion(entry->scheduled, !sleep);
7763 +
7764 +	/* Link pending task if we became unlinked.
7765 +	 */
7766 +	if (!entry->linked)
7767 +		link_task_to_cpu(__take_ready(&cluster->domain), entry);
7768 +
7769 +	/* The final scheduling decision. Do we need to switch for some reason?
7770 +	 * If linked is different from scheduled, then select linked as next.
7771 +	 */
7772 +	if ((!np || blocks) &&
7773 +	    entry->linked != entry->scheduled) {
7774 +		/* Schedule a linked job? */
7775 +		if (entry->linked) {
7776 +			entry->linked->rt_param.scheduled_on = entry->cpu;
7777 +			next = entry->linked;
7778 +		}
7779 +		if (entry->scheduled) {
7780 +			/* not gonna be scheduled soon */
7781 +			entry->scheduled->rt_param.scheduled_on = NO_CPU;
7782 +			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
7783 +		}
7784 +	} else
7785 +		/* Only override Linux scheduler if we have a real-time task
7786 +		 * scheduled that needs to continue.
7787 +		 */
7788 +		if (exists)
7789 +			next = prev;
7790 +
7791 +	sched_state_task_picked();
7792 +	raw_spin_unlock(&cluster->cfifo_lock);
7793 +
7794 +#ifdef WANT_ALL_SCHED_EVENTS
7795 +	TRACE("cfifo_lock released, next=0x%p\n", next);
7796 +
7797 +	if (next)
7798 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
7799 +	else if (exists && !next)
7800 +		TRACE("becomes idle at %llu.\n", litmus_clock());
7801 +#endif
7802 +
7803 +
7804 +	return next;
7805 +}
7806 +
7807 +
7808 +/* _finish_switch - we just finished the switch away from prev
7809 + */
7810 +static void cfifo_finish_switch(struct task_struct *prev)
7811 +{
7812 +	cpu_entry_t* 	entry = &__get_cpu_var(cfifo_cpu_entries);
7813 +
7814 +	entry->scheduled = is_realtime(current) ? current : NULL;
7815 +#ifdef WANT_ALL_SCHED_EVENTS
7816 +	TRACE_TASK(prev, "switched away from\n");
7817 +#endif
7818 +}
7819 +
7820 +
7821 +/*	Prepare a task for running in RT mode
7822 + */
7823 +static void cfifo_task_new(struct task_struct * t, int on_rq, int running)
7824 +{
7825 +	unsigned long 		flags;
7826 +	cpu_entry_t* 		entry;
7827 +	cfifo_domain_t*		cluster;
7828 +
7829 +	TRACE("cfifo: task new %d\n", t->pid);
7830 +
7831 +	/* the cluster doesn't change even if t is running */
7832 +	cluster = task_cpu_cluster(t);
7833 +
7834 +	raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7835 +
7836 +	/* setup job params */
7837 +	release_at(t, litmus_clock());
7838 +
7839 +	if (running) {
7840 +		entry = &per_cpu(cfifo_cpu_entries, task_cpu(t));
7841 +		BUG_ON(entry->scheduled);
7842 +
7843 +		entry->scheduled = t;
7844 +		tsk_rt(t)->scheduled_on = task_cpu(t);
7845 +	} else {
7846 +		t->rt_param.scheduled_on = NO_CPU;
7847 +	}
7848 +	t->rt_param.linked_on          = NO_CPU;
7849 +
7850 +	cfifo_job_arrival(t);
7851 +	raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7852 +}
7853 +
7854 +static void cfifo_task_wake_up(struct task_struct *task)
7855 +{
7856 +	unsigned long flags;
7857 +	//lt_t now;
7858 +	cfifo_domain_t *cluster;
7859 +
7860 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
7861 +
7862 +	cluster = task_cpu_cluster(task);
7863 +
7864 +	raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7865 +
7866 +#if 0  // sporadic task model
7867 +	/* We need to take suspensions because of semaphores into
7868 +	 * account! If a job resumes after being suspended due to acquiring
7869 +	 * a semaphore, it should never be treated as a new job release.
7870 +	 */
7871 +	if (get_rt_flags(task) == RT_F_EXIT_SEM) {
7872 +		set_rt_flags(task, RT_F_RUNNING);
7873 +	} else {
7874 +		now = litmus_clock();
7875 +		if (is_tardy(task, now)) {
7876 +			/* new sporadic release */
7877 +			release_at(task, now);
7878 +			sched_trace_task_release(task);
7879 +		}
7880 +		else {
7881 +			if (task->rt.time_slice) {
7882 +				/* came back in time before deadline
7883 +				*/
7884 +				set_rt_flags(task, RT_F_RUNNING);
7885 +			}
7886 +		}
7887 +	}
7888 +#endif
7889 +
7890 +	//BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
7891 +	set_rt_flags(task, RT_F_RUNNING);  // periodic model
7892 +
7893 +	if(tsk_rt(task)->linked_on == NO_CPU)
7894 +		cfifo_job_arrival(task);
7895 +	else
7896 +		TRACE("WTF, mate?!\n");
7897 +
7898 +	raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7899 +}
7900 +
7901 +static void cfifo_task_block(struct task_struct *t)
7902 +{
7903 +	unsigned long flags;
7904 +	cfifo_domain_t *cluster;
7905 +
7906 +	TRACE_TASK(t, "block at %llu\n", litmus_clock());
7907 +
7908 +	cluster = task_cpu_cluster(t);
7909 +
7910 +	/* unlink if necessary */
7911 +	raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7912 +	unlink(t);
7913 +	raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7914 +
7915 +	BUG_ON(!is_realtime(t));
7916 +}
7917 +
7918 +
7919 +static void cfifo_task_exit(struct task_struct * t)
7920 +{
7921 +	unsigned long flags;
7922 +	cfifo_domain_t *cluster = task_cpu_cluster(t);
7923 +
7924 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
7925 +	flush_tasklets(cluster, t);
7926 +#endif		
7927 +	
7928 +	/* unlink if necessary */
7929 +	raw_spin_lock_irqsave(&cluster->cfifo_lock, flags);
7930 +	unlink(t);
7931 +	if (tsk_rt(t)->scheduled_on != NO_CPU) {
7932 +		cpu_entry_t *cpu;
7933 +		cpu = &per_cpu(cfifo_cpu_entries, tsk_rt(t)->scheduled_on);
7934 +		cpu->scheduled = NULL;
7935 +		tsk_rt(t)->scheduled_on = NO_CPU;
7936 +	}
7937 +	raw_spin_unlock_irqrestore(&cluster->cfifo_lock, flags);
7938 +	
7939 +	BUG_ON(!is_realtime(t));
7940 +        TRACE_TASK(t, "RIP\n");
7941 +}
7942 +
7943 +static long cfifo_admit_task(struct task_struct* tsk)
7944 +{
7945 +	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
7946 +}
7947 +
7948 +
7949 +
7950 +
7951 +
7952 +
7953 +
7954 +
7955 +
7956 +
7957 +
7958 +
7959 +
7960 +#ifdef CONFIG_LITMUS_LOCKING
7961 +
7962 +#include <litmus/fdso.h>
7963 +
7964 +
7965 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
7966 +{
7967 +	int linked_on;
7968 +	int check_preempt = 0;	
7969 +	
7970 +	cfifo_domain_t* cluster = task_cpu_cluster(t);
7971 +	
7972 +	if(prio_inh != NULL)
7973 +		TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
7974 +	else
7975 +		TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
7976 +	
7977 +	sched_trace_eff_prio_change(t, prio_inh);
7978 +	
7979 +	tsk_rt(t)->inh_task = prio_inh;
7980 +	
7981 +	linked_on  = tsk_rt(t)->linked_on;
7982 +	
7983 +	/* If it is scheduled, then we need to reorder the CPU heap. */
7984 +	if (linked_on != NO_CPU) {
7985 +		TRACE_TASK(t, "%s: linked  on %d\n",
7986 +				   __FUNCTION__, linked_on);
7987 +		/* Holder is scheduled; need to re-order CPUs.
7988 +		 * We can't use heap_decrease() here since
7989 +		 * the cpu_heap is ordered in reverse direction, so
7990 +		 * it is actually an increase. */
7991 +		bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
7992 +                     per_cpu(cfifo_cpu_entries, linked_on).hn);
7993 +		bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
7994 +                     per_cpu(cfifo_cpu_entries, linked_on).hn);
7995 +	} else {
7996 +		/* holder may be queued: first stop queue changes */
7997 +		raw_spin_lock(&cluster->domain.release_lock);
7998 +		if (is_queued(t)) {
7999 +			TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
8000 +			
8001 +			/* We need to update the position of holder in some
8002 +			 * heap. Note that this could be a release heap if we
8003 +			 * budget enforcement is used and this job overran. */
8004 +			check_preempt = !bheap_decrease(fifo_ready_order, tsk_rt(t)->heap_node);
8005 +			
8006 +		} else {
8007 +			/* Nothing to do: if it is not queued and not linked
8008 +			 * then it is either sleeping or currently being moved
8009 +			 * by other code (e.g., a timer interrupt handler) that
8010 +			 * will use the correct priority when enqueuing the
8011 +			 * task. */
8012 +			TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
8013 +		}
8014 +		raw_spin_unlock(&cluster->domain.release_lock);
8015 +		
8016 +		/* If holder was enqueued in a release heap, then the following
8017 +		 * preemption check is pointless, but we can't easily detect
8018 +		 * that case. If you want to fix this, then consider that
8019 +		 * simply adding a state flag requires O(n) time to update when
8020 +		 * releasing n tasks, which conflicts with the goal to have
8021 +		 * O(log n) merges. */
8022 +		if (check_preempt) {
8023 +			/* heap_decrease() hit the top level of the heap: make
8024 +			 * sure preemption checks get the right task, not the
8025 +			 * potentially stale cache. */
8026 +			bheap_uncache_min(fifo_ready_order, &cluster->domain.ready_queue);
8027 +			check_for_preemptions(cluster);
8028 +		}
8029 +	}
8030 +}
8031 +
8032 +/* called with IRQs off */
8033 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
8034 +{
8035 +	cfifo_domain_t* cluster = task_cpu_cluster(t);
8036 +	
8037 +	raw_spin_lock(&cluster->cfifo_lock);
8038 +	
8039 +	__set_priority_inheritance(t, prio_inh);
8040 +	
8041 +#ifdef CONFIG_LITMUS_SOFTIRQD
8042 +	if(tsk_rt(t)->cur_klitirqd != NULL)
8043 +	{
8044 +		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
8045 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
8046 +		
8047 +		__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
8048 +	}
8049 +#endif
8050 +	
8051 +	raw_spin_unlock(&cluster->cfifo_lock);
8052 +}
8053 +
8054 +
8055 +/* called with IRQs off */
8056 +static void __clear_priority_inheritance(struct task_struct* t)
8057 +{
8058 +    TRACE_TASK(t, "priority restored\n");
8059 +	
8060 +    if(tsk_rt(t)->scheduled_on != NO_CPU)
8061 +    {
8062 +		sched_trace_eff_prio_change(t, NULL);
8063 +		
8064 +        tsk_rt(t)->inh_task = NULL;
8065 +        
8066 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
8067 +         * since the priority was effectively lowered. */
8068 +        unlink(t);
8069 +        cfifo_job_arrival(t);
8070 +    }
8071 +    else
8072 +    {
8073 +        __set_priority_inheritance(t, NULL);
8074 +    }
8075 +	
8076 +#ifdef CONFIG_LITMUS_SOFTIRQD
8077 +	if(tsk_rt(t)->cur_klitirqd != NULL)
8078 +	{
8079 +		TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
8080 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
8081 +		
8082 +		if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
8083 +		{
8084 +			sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
8085 +			
8086 +			tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
8087 +			
8088 +			/* Check if rescheduling is necessary. We can't use heap_decrease()
8089 +			 * since the priority was effectively lowered. */
8090 +			unlink(tsk_rt(t)->cur_klitirqd);
8091 +			cfifo_job_arrival(tsk_rt(t)->cur_klitirqd);
8092 +		}
8093 +		else
8094 +		{
8095 +			__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
8096 +		}
8097 +	}
8098 +#endif
8099 +}
8100 +
8101 +/* called with IRQs off */
8102 +static void clear_priority_inheritance(struct task_struct* t)
8103 +{
8104 +	cfifo_domain_t* cluster = task_cpu_cluster(t);
8105 +	
8106 +	raw_spin_lock(&cluster->cfifo_lock);
8107 +	__clear_priority_inheritance(t);
8108 +	raw_spin_unlock(&cluster->cfifo_lock);
8109 +}
8110 +
8111 +
8112 +
8113 +#ifdef CONFIG_LITMUS_SOFTIRQD
8114 +/* called with IRQs off */
8115 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
8116 +											  struct task_struct* old_owner,
8117 +											  struct task_struct* new_owner)
8118 +{
8119 +	cfifo_domain_t* cluster = task_cpu_cluster(klitirqd);
8120 +	
8121 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
8122 +	
8123 +	raw_spin_lock(&cluster->cfifo_lock);
8124 +	
8125 +	if(old_owner != new_owner)
8126 +	{
8127 +		if(old_owner)
8128 +		{
8129 +			// unreachable?
8130 +			tsk_rt(old_owner)->cur_klitirqd = NULL;
8131 +		}
8132 +		
8133 +		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
8134 +				   new_owner->comm, new_owner->pid);
8135 +		
8136 +		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
8137 +	}
8138 +	
8139 +	__set_priority_inheritance(klitirqd,
8140 +							   (tsk_rt(new_owner)->inh_task == NULL) ?
8141 +							   new_owner :
8142 +							   tsk_rt(new_owner)->inh_task);
8143 +	
8144 +	raw_spin_unlock(&cluster->cfifo_lock);
8145 +}
8146 +
8147 +/* called with IRQs off */
8148 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
8149 +												struct task_struct* old_owner)
8150 +{
8151 +	cfifo_domain_t* cluster = task_cpu_cluster(klitirqd);
8152 +	
8153 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
8154 +	
8155 +	raw_spin_lock(&cluster->cfifo_lock);
8156 +    
8157 +    TRACE_TASK(klitirqd, "priority restored\n");
8158 +	
8159 +    if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
8160 +    {
8161 +        tsk_rt(klitirqd)->inh_task = NULL;
8162 +        
8163 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
8164 +         * since the priority was effectively lowered. */
8165 +        unlink(klitirqd);
8166 +        cfifo_job_arrival(klitirqd);
8167 +    }
8168 +    else
8169 +    {
8170 +        __set_priority_inheritance(klitirqd, NULL);
8171 +    }
8172 +	
8173 +	tsk_rt(old_owner)->cur_klitirqd = NULL;
8174 +	
8175 +	raw_spin_unlock(&cluster->cfifo_lock);
8176 +}
8177 +#endif  // CONFIG_LITMUS_SOFTIRQD
8178 +
8179 +
8180 +/* ******************** KFMLP support ********************** */
8181 +
8182 +/* struct for semaphore with priority inheritance */
8183 +struct kfmlp_queue
8184 +{
8185 +	wait_queue_head_t wait;
8186 +	struct task_struct* owner;
8187 +	struct task_struct* hp_waiter;
8188 +	int count; /* number of waiters + holder */
8189 +};
8190 +
8191 +struct kfmlp_semaphore
8192 +{
8193 +	struct litmus_lock litmus_lock;
8194 +	
8195 +	spinlock_t lock;
8196 +	
8197 +	int num_resources; /* aka k */
8198 +	struct kfmlp_queue *queues; /* array */
8199 +	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
8200 +};
8201 +
8202 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
8203 +{
8204 +	return container_of(lock, struct kfmlp_semaphore, litmus_lock);
8205 +}
8206 +
8207 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
8208 +								struct kfmlp_queue* queue)
8209 +{
8210 +	return (queue - &sem->queues[0]);
8211 +}
8212 +
8213 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
8214 +												  struct task_struct* holder)
8215 +{
8216 +	int i;
8217 +	for(i = 0; i < sem->num_resources; ++i)
8218 +		if(sem->queues[i].owner == holder)
8219 +			return(&sem->queues[i]);
8220 +	return(NULL);
8221 +}
8222 +
8223 +/* caller is responsible for locking */
8224 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
8225 +										 struct task_struct *skip)
8226 +{
8227 +	struct list_head	*pos;
8228 +	struct task_struct 	*queued, *found = NULL;
8229 +	
8230 +	list_for_each(pos, &kqueue->wait.task_list) {
8231 +		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
8232 +												   task_list)->private;
8233 +		
8234 +		/* Compare task prios, find high prio task. */
8235 +		if (queued != skip && fifo_higher_prio(queued, found))
8236 +			found = queued;
8237 +	}
8238 +	return found;
8239 +}
8240 +
8241 +static inline struct kfmlp_queue* kfmlp_find_shortest(
8242 +										  struct kfmlp_semaphore* sem,
8243 +										  struct kfmlp_queue* search_start)
8244 +{
8245 +	// we start our search at search_start instead of at the beginning of the
8246 +	// queue list to load-balance across all resources.
8247 +	struct kfmlp_queue* step = search_start;
8248 +	struct kfmlp_queue* shortest = sem->shortest_queue;
8249 +	
8250 +	do
8251 +	{
8252 +		step = (step+1 != &sem->queues[sem->num_resources]) ?
8253 +		step+1 : &sem->queues[0];
8254 +		if(step->count < shortest->count)
8255 +		{
8256 +			shortest = step;
8257 +			if(step->count == 0)
8258 +				break; /* can't get any shorter */
8259 +		}
8260 +	}while(step != search_start);
8261 +	
8262 +	return(shortest);
8263 +}
8264 +
8265 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
8266 +{
8267 +	/* must hold sem->lock */
8268 +	
8269 +	struct kfmlp_queue *my_queue = NULL;
8270 +	struct task_struct *max_hp = NULL;
8271 +	
8272 +	
8273 +	struct list_head	*pos;
8274 +	struct task_struct 	*queued;
8275 +	int i;
8276 +	
8277 +	for(i = 0; i < sem->num_resources; ++i)
8278 +	{
8279 +		if( (sem->queues[i].count > 1) &&
8280 +		   ((my_queue == NULL) ||
8281 +			(fifo_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
8282 +		{
8283 +			my_queue = &sem->queues[i];
8284 +		}
8285 +	}
8286 +	
8287 +	if(my_queue)
8288 +	{
8289 +		cfifo_domain_t* cluster;
8290 +		
8291 +		max_hp = my_queue->hp_waiter;
8292 +		BUG_ON(!max_hp);
8293 +
8294 +		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
8295 +				  kfmlp_get_idx(sem, my_queue),
8296 +				  max_hp->comm, max_hp->pid,
8297 +				  kfmlp_get_idx(sem, my_queue));
8298 +		
8299 +		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
8300 +		
8301 +		/*
8302 +		 if(my_queue->hp_waiter)
8303 +		 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
8304 +		 kfmlp_get_idx(sem, my_queue),
8305 +		 my_queue->hp_waiter->comm,
8306 +		 my_queue->hp_waiter->pid);
8307 +		 else
8308 +		 TRACE_CUR("queue %d: new hp_waiter is %p\n",
8309 +		 kfmlp_get_idx(sem, my_queue), NULL);
8310 +		 */
8311 +	
8312 +		cluster = task_cpu_cluster(max_hp);
8313 +
8314 +		raw_spin_lock(&cluster->cfifo_lock);
8315 +		
8316 +		/*
8317 +		 if(my_queue->owner)
8318 +		 TRACE_CUR("queue %d: owner is %s/%d\n",
8319 +		 kfmlp_get_idx(sem, my_queue),
8320 +		 my_queue->owner->comm,
8321 +		 my_queue->owner->pid);
8322 +		 else
8323 +		 TRACE_CUR("queue %d: owner is %p\n",
8324 +		 kfmlp_get_idx(sem, my_queue),
8325 +		 NULL);
8326 +		 */
8327 +		
8328 +		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
8329 +		{
8330 +			__clear_priority_inheritance(my_queue->owner);
8331 +			if(my_queue->hp_waiter != NULL)
8332 +			{
8333 +				__set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
8334 +			}
8335 +		}
8336 +		raw_spin_unlock(&cluster->cfifo_lock);
8337 +		
8338 +		list_for_each(pos, &my_queue->wait.task_list)
8339 +		{
8340 +			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
8341 +													   task_list)->private;
8342 +			/* Compare task prios, find high prio task. */
8343 +			if (queued == max_hp)
8344 +			{
8345 +				/*
8346 +				 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
8347 +				 kfmlp_get_idx(sem, my_queue));
8348 +				 */
8349 +				__remove_wait_queue(&my_queue->wait,
8350 +									list_entry(pos, wait_queue_t, task_list));
8351 +				break;
8352 +			}
8353 +		}
8354 +		--(my_queue->count);
8355 +	}
8356 +	
8357 +	return(max_hp);
8358 +}
8359 +
8360 +int cfifo_kfmlp_lock(struct litmus_lock* l)
8361 +{
8362 +	struct task_struct* t = current;
8363 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8364 +	struct kfmlp_queue* my_queue;
8365 +	wait_queue_t wait;
8366 +	unsigned long flags;
8367 +	
8368 +	if (!is_realtime(t))
8369 +		return -EPERM;
8370 +	
8371 +	spin_lock_irqsave(&sem->lock, flags);
8372 +	
8373 +	my_queue = sem->shortest_queue;
8374 +	
8375 +	if (my_queue->owner) {
8376 +		/* resource is not free => must suspend and wait */
8377 +		TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
8378 +				  kfmlp_get_idx(sem, my_queue));
8379 +		
8380 +		init_waitqueue_entry(&wait, t);
8381 +		
8382 +		/* FIXME: interruptible would be nice some day */
8383 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
8384 +		
8385 +		__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
8386 +		
8387 +		/* check if we need to activate priority inheritance */
8388 +		if (fifo_higher_prio(t, my_queue->hp_waiter))
8389 +		{
8390 +			my_queue->hp_waiter = t;
8391 +			if (fifo_higher_prio(t, my_queue->owner))
8392 +			{
8393 +				set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
8394 +			}
8395 +		}
8396 +		
8397 +		++(my_queue->count);
8398 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
8399 +		
8400 +		/* release lock before sleeping */
8401 +		spin_unlock_irqrestore(&sem->lock, flags);
8402 +		
8403 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
8404 +		 * when we wake up; we are guaranteed to have the lock since
8405 +		 * there is only one wake up per release (or steal).
8406 +		 */
8407 +		schedule();
8408 +		
8409 +		
8410 +		if(my_queue->owner == t)
8411 +		{
8412 +			TRACE_CUR("queue %d: acquired through waiting\n",
8413 +					  kfmlp_get_idx(sem, my_queue));
8414 +		}
8415 +		else
8416 +		{
8417 +			/* this case may happen if our wait entry was stolen
8418 +			 between queues.  record where we went.*/
8419 +			my_queue = kfmlp_get_queue(sem, t);
8420 +			BUG_ON(!my_queue);
8421 +			TRACE_CUR("queue %d: acquired through stealing\n",
8422 +					  kfmlp_get_idx(sem, my_queue));
8423 +		}
8424 +	}
8425 +	else
8426 +	{
8427 +		TRACE_CUR("queue %d: acquired immediately\n",
8428 +				  kfmlp_get_idx(sem, my_queue));
8429 +		
8430 +		my_queue->owner = t;
8431 +		
8432 +		++(my_queue->count);
8433 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);		
8434 +		
8435 +		spin_unlock_irqrestore(&sem->lock, flags);
8436 +	}
8437 +	
8438 +	return kfmlp_get_idx(sem, my_queue);
8439 +}
8440 +
8441 +int cfifo_kfmlp_unlock(struct litmus_lock* l)
8442 +{
8443 +	struct task_struct *t = current, *next;
8444 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8445 +	struct kfmlp_queue *my_queue;
8446 +	unsigned long flags;
8447 +	int err = 0;
8448 +	
8449 +	spin_lock_irqsave(&sem->lock, flags);
8450 +	
8451 +	my_queue = kfmlp_get_queue(sem, t);
8452 +	
8453 +	if (!my_queue) {
8454 +		err = -EINVAL;
8455 +		goto out;
8456 +	}
8457 +	
8458 +	/* check if there are jobs waiting for this resource */
8459 +	next = __waitqueue_remove_first(&my_queue->wait);
8460 +	if (next) {
8461 +		/*
8462 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
8463 +		 kfmlp_get_idx(sem, my_queue),
8464 +		 next->comm, next->pid);
8465 +		 */
8466 +		/* next becomes the resouce holder */
8467 +		my_queue->owner = next;
8468 +		
8469 +		--(my_queue->count);
8470 +		if(my_queue->count < sem->shortest_queue->count)
8471 +		{
8472 +			sem->shortest_queue = my_queue;
8473 +		}	
8474 +		
8475 +		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
8476 +				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
8477 +		
8478 +		/* determine new hp_waiter if necessary */
8479 +		if (next == my_queue->hp_waiter) {
8480 +			TRACE_TASK(next, "was highest-prio waiter\n");
8481 +			/* next has the highest priority --- it doesn't need to
8482 +			 * inherit.  However, we need to make sure that the
8483 +			 * next-highest priority in the queue is reflected in
8484 +			 * hp_waiter. */
8485 +			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
8486 +			if (my_queue->hp_waiter)
8487 +				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
8488 +			else
8489 +				TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
8490 +		} else {
8491 +			/* Well, if next is not the highest-priority waiter,
8492 +			 * then it ought to inherit the highest-priority
8493 +			 * waiter's priority. */
8494 +			set_priority_inheritance(next, my_queue->hp_waiter);
8495 +		}
8496 +		
8497 +		/* wake up next */
8498 +		wake_up_process(next);
8499 +	}
8500 +	else
8501 +	{
8502 +		TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
8503 +		
8504 +		next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
8505 +		
8506 +		/*
8507 +		 if(next)
8508 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
8509 +		 kfmlp_get_idx(sem, my_queue),
8510 +		 next->comm, next->pid);
8511 +		 */
8512 +		
8513 +		my_queue->owner = next;
8514 +		
8515 +		if(next)
8516 +		{
8517 +			TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
8518 +					  kfmlp_get_idx(sem, my_queue),
8519 +					  next->comm, next->pid);
8520 +			
8521 +			/* wake up next */
8522 +			wake_up_process(next);			
8523 +		}
8524 +		else
8525 +		{
8526 +			TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
8527 +			
8528 +			--(my_queue->count);
8529 +			if(my_queue->count < sem->shortest_queue->count)
8530 +			{
8531 +				sem->shortest_queue = my_queue;
8532 +			}
8533 +		}
8534 +	}
8535 +	
8536 +	/* we lose the benefit of priority inheritance (if any) */
8537 +	if (tsk_rt(t)->inh_task)
8538 +		clear_priority_inheritance(t);
8539 +	
8540 +out:
8541 +	spin_unlock_irqrestore(&sem->lock, flags);
8542 +	
8543 +	return err;
8544 +}
8545 +
8546 +int cfifo_kfmlp_close(struct litmus_lock* l)
8547 +{
8548 +	struct task_struct *t = current;
8549 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8550 +	struct kfmlp_queue *my_queue;
8551 +	unsigned long flags;
8552 +	
8553 +	int owner;
8554 +	
8555 +	spin_lock_irqsave(&sem->lock, flags);
8556 +	
8557 +	my_queue = kfmlp_get_queue(sem, t);	
8558 +	owner = (my_queue) ? (my_queue->owner == t) : 0;
8559 +	
8560 +	spin_unlock_irqrestore(&sem->lock, flags);
8561 +	
8562 +	if (owner)
8563 +		cfifo_kfmlp_unlock(l);
8564 +	
8565 +	return 0;
8566 +}
8567 +
8568 +void cfifo_kfmlp_free(struct litmus_lock* l)
8569 +{
8570 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
8571 +	kfree(sem->queues);
8572 +	kfree(sem);
8573 +}
8574 +
8575 +static struct litmus_lock_ops cfifo_kfmlp_lock_ops = {
8576 +	.close  = cfifo_kfmlp_close,
8577 +	.lock   = cfifo_kfmlp_lock,
8578 +	.unlock = cfifo_kfmlp_unlock,
8579 +	.deallocate = cfifo_kfmlp_free,
8580 +};
8581 +
8582 +static struct litmus_lock* cfifo_new_kfmlp(void* __user arg, int* ret_code)
8583 +{
8584 +	struct kfmlp_semaphore* sem;
8585 +	int num_resources = 0;
8586 +	int i;
8587 +	
8588 +	if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
8589 +	{
8590 +		*ret_code = -EINVAL;
8591 +		return(NULL);
8592 +	}
8593 +	if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
8594 +	{
8595 +		*ret_code = -EINVAL;
8596 +		return(NULL);
8597 +	}
8598 +	if(num_resources < 1)
8599 +	{
8600 +		*ret_code = -EINVAL;
8601 +		return(NULL);		
8602 +	}
8603 +	
8604 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
8605 +	if(!sem)
8606 +	{
8607 +		*ret_code = -ENOMEM;
8608 +		return NULL;
8609 +	}
8610 +	
8611 +	sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
8612 +	if(!sem->queues)
8613 +	{
8614 +		kfree(sem);
8615 +		*ret_code = -ENOMEM;
8616 +		return NULL;		
8617 +	}
8618 +	
8619 +	sem->litmus_lock.ops = &cfifo_kfmlp_lock_ops;
8620 +	spin_lock_init(&sem->lock);
8621 +	sem->num_resources = num_resources;
8622 +	
8623 +	for(i = 0; i < num_resources; ++i)
8624 +	{
8625 +		sem->queues[i].owner = NULL;
8626 +		sem->queues[i].hp_waiter = NULL;
8627 +		init_waitqueue_head(&sem->queues[i].wait);
8628 +		sem->queues[i].count = 0;
8629 +	}
8630 +	
8631 +	sem->shortest_queue = &sem->queues[0];
8632 +	
8633 +	*ret_code = 0;
8634 +	return &sem->litmus_lock;
8635 +}
8636 +
8637 +
8638 +/* **** lock constructor **** */
8639 +
8640 +static long cfifo_allocate_lock(struct litmus_lock **lock, int type,
8641 +								 void* __user arg)
8642 +{
8643 +	int err = -ENXIO;
8644 +	
8645 +	/* C-FIFO currently only supports the FMLP for global resources
8646 +		WITHIN a given cluster.  DO NOT USE CROSS-CLUSTER! */
8647 +	switch (type) {
8648 +		case KFMLP_SEM:
8649 +			*lock = cfifo_new_kfmlp(arg, &err);
8650 +			break;
8651 +	};
8652 +	
8653 +	return err;
8654 +}
8655 +
8656 +#endif  // CONFIG_LITMUS_LOCKING
8657 +
8658 +
8659 +
8660 +
8661 +
8662 +
8663 +/* total number of cluster */
8664 +static int num_clusters;
8665 +/* we do not support cluster of different sizes */
8666 +static unsigned int cluster_size;
8667 +
8668 +#ifdef VERBOSE_INIT
8669 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
8670 +{
8671 +	int chk;
8672 +	char buf[255];
8673 +
8674 +	chk = cpulist_scnprintf(buf, 254, mask);
8675 +	buf[chk] = '\0';
8676 +	printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
8677 +
8678 +}
8679 +#endif
8680 +
8681 +static int clusters_allocated = 0;
8682 +
8683 +static void cleanup_cfifo(void)
8684 +{
8685 +	int i;
8686 +
8687 +	if (clusters_allocated) {
8688 +		for (i = 0; i < num_clusters; i++) {
8689 +			kfree(cfifo[i].cpus);
8690 +			kfree(cfifo[i].heap_node);
8691 +			free_cpumask_var(cfifo[i].cpu_map);
8692 +		}
8693 +
8694 +		kfree(cfifo);
8695 +	}
8696 +}
8697 +
8698 +static long cfifo_activate_plugin(void)
8699 +{
8700 +	int i, j, cpu, ccpu, cpu_count;
8701 +	cpu_entry_t *entry;
8702 +
8703 +	cpumask_var_t mask;
8704 +	int chk = 0;
8705 +
8706 +	/* de-allocate old clusters, if any */
8707 +	cleanup_cfifo();
8708 +
8709 +	printk(KERN_INFO "C-FIFO: Activate Plugin, cluster configuration = %d\n",
8710 +			cluster_config);
8711 +
8712 +	/* need to get cluster_size first */
8713 +	if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
8714 +		return -ENOMEM;
8715 +
8716 +	if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
8717 +		cluster_size = num_online_cpus();
8718 +	} else {
8719 +		chk = get_shared_cpu_map(mask, 0, cluster_config);
8720 +		if (chk) {
8721 +			/* if chk != 0 then it is the max allowed index */
8722 +			printk(KERN_INFO "C-FIFO: Cluster configuration = %d "
8723 +			       "is not supported on this hardware.\n",
8724 +			       cluster_config);
8725 +			/* User should notice that the configuration failed, so
8726 +			 * let's bail out. */
8727 +			return -EINVAL;
8728 +		}
8729 +
8730 +		cluster_size = cpumask_weight(mask);
8731 +	}
8732 +
8733 +	if ((num_online_cpus() % cluster_size) != 0) {
8734 +		/* this can't be right, some cpus are left out */
8735 +		printk(KERN_ERR "C-FIFO: Trying to group %d cpus in %d!\n",
8736 +				num_online_cpus(), cluster_size);
8737 +		return -1;
8738 +	}
8739 +
8740 +	num_clusters = num_online_cpus() / cluster_size;
8741 +	printk(KERN_INFO "C-FIFO: %d cluster(s) of size = %d\n",
8742 +			num_clusters, cluster_size);
8743 +
8744 +	/* initialize clusters */
8745 +	cfifo = kmalloc(num_clusters * sizeof(cfifo_domain_t), GFP_ATOMIC);
8746 +	for (i = 0; i < num_clusters; i++) {
8747 +
8748 +		cfifo[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
8749 +				GFP_ATOMIC);
8750 +		cfifo[i].heap_node = kmalloc(
8751 +				cluster_size * sizeof(struct bheap_node),
8752 +				GFP_ATOMIC);
8753 +		bheap_init(&(cfifo[i].cpu_heap));
8754 +		fifo_domain_init(&(cfifo[i].domain), NULL, cfifo_release_jobs);
8755 +
8756 +		
8757 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
8758 +		cfifo[i].pending_tasklets.head = NULL;
8759 +		cfifo[i].pending_tasklets.tail = &(cfifo[i].pending_tasklets.head);
8760 +#endif		
8761 +		
8762 +		if(!zalloc_cpumask_var(&cfifo[i].cpu_map, GFP_ATOMIC))
8763 +			return -ENOMEM;
8764 +	}
8765 +
8766 +	/* cycle through cluster and add cpus to them */
8767 +	for (i = 0; i < num_clusters; i++) {
8768 +
8769 +		for_each_online_cpu(cpu) {
8770 +			/* check if the cpu is already in a cluster */
8771 +			for (j = 0; j < num_clusters; j++)
8772 +				if (cpumask_test_cpu(cpu, cfifo[j].cpu_map))
8773 +					break;
8774 +			/* if it is in a cluster go to next cpu */
8775 +			if (j < num_clusters &&
8776 +					cpumask_test_cpu(cpu, cfifo[j].cpu_map))
8777 +				continue;
8778 +
8779 +			/* this cpu isn't in any cluster */
8780 +			/* get the shared cpus */
8781 +			if (unlikely(cluster_config == GLOBAL_CLUSTER))
8782 +				cpumask_copy(mask, cpu_online_mask);
8783 +			else
8784 +				get_shared_cpu_map(mask, cpu, cluster_config);
8785 +
8786 +			cpumask_copy(cfifo[i].cpu_map, mask);
8787 +#ifdef VERBOSE_INIT
8788 +			print_cluster_topology(mask, cpu);
8789 +#endif
8790 +			/* add cpus to current cluster and init cpu_entry_t */
8791 +			cpu_count = 0;
8792 +			for_each_cpu(ccpu, cfifo[i].cpu_map) {
8793 +
8794 +				entry = &per_cpu(cfifo_cpu_entries, ccpu);
8795 +				cfifo[i].cpus[cpu_count] = entry;
8796 +				atomic_set(&entry->will_schedule, 0);
8797 +				entry->cpu = ccpu;
8798 +				entry->cluster = &cfifo[i];
8799 +				entry->hn = &(cfifo[i].heap_node[cpu_count]);
8800 +				bheap_node_init(&entry->hn, entry);
8801 +
8802 +				cpu_count++;
8803 +
8804 +				entry->linked = NULL;
8805 +				entry->scheduled = NULL;
8806 +				update_cpu_position(entry);
8807 +			}
8808 +			/* done with this cluster */
8809 +			break;
8810 +		}
8811 +	}
8812 +	
8813 +#ifdef CONFIG_LITMUS_SOFTIRQD
8814 +	{
8815 +		/* distribute the daemons evenly across the clusters. */
8816 +		int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
8817 +		int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
8818 +		int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
8819 +		
8820 +		int daemon = 0;
8821 +		for(i = 0; i < num_clusters; ++i)
8822 +		{
8823 +			int num_on_this_cluster = num_daemons_per_cluster;
8824 +			if(left_over)
8825 +			{
8826 +				++num_on_this_cluster;
8827 +				--left_over;
8828 +			}
8829 +			
8830 +			for(j = 0; j < num_on_this_cluster; ++j)
8831 +			{
8832 +				// first CPU of this cluster
8833 +				affinity[daemon++] = i*cluster_size;
8834 +			}
8835 +		}
8836 +	
8837 +		spawn_klitirqd(affinity);
8838 +		
8839 +		kfree(affinity);
8840 +	}
8841 +#endif
8842 +	
8843 +#ifdef CONFIG_LITMUS_NVIDIA
8844 +	init_nvidia_info();
8845 +#endif	
8846 +
8847 +	free_cpumask_var(mask);
8848 +	clusters_allocated = 1;
8849 +	return 0;
8850 +}
8851 +
8852 +/*	Plugin object	*/
8853 +static struct sched_plugin cfifo_plugin __cacheline_aligned_in_smp = {
8854 +	.plugin_name		= "C-FIFO",
8855 +	.finish_switch		= cfifo_finish_switch,
8856 +	.tick			= cfifo_tick,
8857 +	.task_new		= cfifo_task_new,
8858 +	.complete_job		= complete_job,
8859 +	.task_exit		= cfifo_task_exit,
8860 +	.schedule		= cfifo_schedule,
8861 +	.task_wake_up		= cfifo_task_wake_up,
8862 +	.task_block		= cfifo_task_block,
8863 +	.admit_task		= cfifo_admit_task,
8864 +	.activate_plugin	= cfifo_activate_plugin,
8865 +#ifdef CONFIG_LITMUS_LOCKING
8866 +	.allocate_lock	= cfifo_allocate_lock,
8867 +    .set_prio_inh   = set_priority_inheritance,
8868 +    .clear_prio_inh = clear_priority_inheritance,	
8869 +#endif
8870 +#ifdef CONFIG_LITMUS_SOFTIRQD
8871 +	.set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
8872 +	.clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
8873 +#endif
8874 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
8875 +	.enqueue_pai_tasklet = enqueue_pai_tasklet,
8876 +	.run_tasklets = run_tasklets,
8877 +#endif	
8878 +};
8879 +
8880 +static struct proc_dir_entry *cluster_file = NULL, *cfifo_dir = NULL;
8881 +
8882 +static int __init init_cfifo(void)
8883 +{
8884 +	int err, fs;
8885 +
8886 +	err = register_sched_plugin(&cfifo_plugin);
8887 +	if (!err) {
8888 +		fs = make_plugin_proc_dir(&cfifo_plugin, &cfifo_dir);
8889 +		if (!fs)
8890 +			cluster_file = create_cluster_file(cfifo_dir, &cluster_config);
8891 +		else
8892 +			printk(KERN_ERR "Could not allocate C-FIFO procfs dir.\n");
8893 +	}
8894 +	return err;
8895 +}
8896 +
8897 +static void clean_cfifo(void)
8898 +{
8899 +	cleanup_cfifo();
8900 +	if (cluster_file)
8901 +		remove_proc_entry("cluster", cfifo_dir);
8902 +	if (cfifo_dir)
8903 +		remove_plugin_proc_dir(&cfifo_plugin);
8904 +}
8905 +
8906 +module_init(init_cfifo);
8907 +module_exit(clean_cfifo);
8908 diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c
8909 new file mode 100644
8910 index 0000000..e51de10
8911 --- /dev/null
8912 +++ b/litmus/sched_crm.c
8913 @@ -0,0 +1,2099 @@
8914 +/*
8915 + * litmus/sched_crm.c
8916 + *
8917 + * Implementation of the C-RM scheduling algorithm.
8918 + *
8919 + * This implementation is based on G-EDF:
8920 + * - CPUs are clustered around L2 or L3 caches.
8921 + * - Clusters topology is automatically detected (this is arch dependent
8922 + *   and is working only on x86 at the moment --- and only with modern
8923 + *   cpus that exports cpuid4 information)
8924 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
8925 + *   the programmer needs to be aware of the topology to place tasks
8926 + *   in the desired cluster
8927 + * - default clustering is around L2 cache (cache index = 2)
8928 + *   supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
8929 + *   online_cpus are placed in a single cluster).
8930 + *
8931 + *   For details on functions, take a look at sched_gsn_edf.c
8932 + *
8933 + * Currently, we do not support changes in the number of online cpus.
8934 + * If the num_online_cpus() dynamically changes, the plugin is broken.
8935 + *
8936 + * This version uses the simple approach and serializes all scheduling
8937 + * decisions by the use of a queue lock. This is probably not the
8938 + * best way to do it, but it should suffice for now.
8939 + */
8940 +
8941 +#include <linux/spinlock.h>
8942 +#include <linux/percpu.h>
8943 +#include <linux/sched.h>
8944 +#include <linux/slab.h>
8945 +#include <linux/uaccess.h>
8946 +
8947 +#include <linux/module.h>
8948 +
8949 +#include <litmus/litmus.h>
8950 +#include <litmus/jobs.h>
8951 +#include <litmus/preempt.h>
8952 +#include <litmus/sched_plugin.h>
8953 +#include <litmus/rm_common.h>
8954 +#include <litmus/sched_trace.h>
8955 +
8956 +#include <litmus/clustered.h>
8957 +
8958 +#include <litmus/bheap.h>
8959 +
8960 +/* to configure the cluster size */
8961 +#include <litmus/litmus_proc.h>
8962 +
8963 +#ifdef CONFIG_SCHED_CPU_AFFINITY
8964 +#include <litmus/affinity.h>
8965 +#endif
8966 +
8967 +#ifdef CONFIG_LITMUS_SOFTIRQD
8968 +#include <litmus/litmus_softirq.h>
8969 +#endif
8970 +
8971 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
8972 +#include <linux/interrupt.h>
8973 +#include <litmus/trace.h>
8974 +#endif
8975 +
8976 +#ifdef CONFIG_LITMUS_NVIDIA
8977 +#include <litmus/nvidia_info.h>
8978 +#endif
8979 +
8980 +/* Reference configuration variable. Determines which cache level is used to
8981 + * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
8982 + * all CPUs form a single cluster (just like GSN-EDF).
8983 + */
8984 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
8985 +
8986 +struct clusterdomain;
8987 +
8988 +/* cpu_entry_t - maintain the linked and scheduled state
8989 + *
8990 + * A cpu also contains a pointer to the crm_domain_t cluster
8991 + * that owns it (struct clusterdomain*)
8992 + */
8993 +typedef struct  {
8994 +	int 			cpu;
8995 +	struct clusterdomain*	cluster;	/* owning cluster */
8996 +	struct task_struct*	linked;		/* only RT tasks */
8997 +	struct task_struct*	scheduled;	/* only RT tasks */
8998 +	atomic_t		will_schedule;	/* prevent unneeded IPIs */
8999 +	struct bheap_node*	hn;
9000 +} cpu_entry_t;
9001 +
9002 +/* one cpu_entry_t per CPU */
9003 +DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries);
9004 +
9005 +#define set_will_schedule() \
9006 +	(atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1))
9007 +#define clear_will_schedule() \
9008 +	(atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0))
9009 +#define test_will_schedule(cpu) \
9010 +	(atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule))
9011 +
9012 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
9013 +struct tasklet_head
9014 +{
9015 +	struct tasklet_struct *head;
9016 +	struct tasklet_struct **tail;
9017 +};
9018 +#endif
9019 +
9020 +/*
9021 + * In C-RM there is a crm domain _per_ cluster
9022 + * The number of clusters is dynamically determined accordingly to the
9023 + * total cpu number and the cluster size
9024 + */
9025 +typedef struct clusterdomain {
9026 +	/* rt_domain for this cluster */
9027 +	rt_domain_t	domain;
9028 +	/* cpus in this cluster */
9029 +	cpu_entry_t*	*cpus;
9030 +	/* map of this cluster cpus */
9031 +	cpumask_var_t	cpu_map;
9032 +	/* the cpus queue themselves according to priority in here */
9033 +	struct bheap_node *heap_node;
9034 +	struct bheap      cpu_heap;
9035 +	/* lock for this cluster */
9036 +#define crm_lock domain.ready_lock
9037 +	
9038 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
9039 +	struct tasklet_head pending_tasklets;
9040 +#endif	
9041 +} crm_domain_t;
9042 +
9043 +/* a crm_domain per cluster; allocation is done at init/activation time */
9044 +crm_domain_t *crm;
9045 +
9046 +#define remote_cluster(cpu)	((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster)
9047 +#define task_cpu_cluster(task)	remote_cluster(get_partition(task))
9048 +
9049 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
9050 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
9051 + * information during the initialization of the plugin (e.g., topology)
9052 +#define WANT_ALL_SCHED_EVENTS
9053 + */
9054 +#define VERBOSE_INIT
9055 +
9056 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
9057 +{
9058 +	cpu_entry_t *a, *b;
9059 +	a = _a->value;
9060 +	b = _b->value;
9061 +	/* Note that a and b are inverted: we want the lowest-priority CPU at
9062 +	 * the top of the heap.
9063 +	 */
9064 +	return rm_higher_prio(b->linked, a->linked);
9065 +}
9066 +
9067 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
9068 + *                       order in the cpu queue. Caller must hold crm lock.
9069 + */
9070 +static void update_cpu_position(cpu_entry_t *entry)
9071 +{
9072 +	crm_domain_t *cluster = entry->cluster;
9073 +
9074 +	if (likely(bheap_node_in_heap(entry->hn)))
9075 +		bheap_delete(cpu_lower_prio,
9076 +				&cluster->cpu_heap,
9077 +				entry->hn);
9078 +
9079 +	bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
9080 +}
9081 +
9082 +/* caller must hold crm lock */
9083 +static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster)
9084 +{
9085 +	struct bheap_node* hn;
9086 +	hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
9087 +	return hn->value;
9088 +}
9089 +
9090 +
9091 +/* link_task_to_cpu - Update the link of a CPU.
9092 + *                    Handles the case where the to-be-linked task is already
9093 + *                    scheduled on a different CPU.
9094 + */
9095 +static noinline void link_task_to_cpu(struct task_struct* linked,
9096 +				      cpu_entry_t *entry)
9097 +{
9098 +	cpu_entry_t *sched;
9099 +	struct task_struct* tmp;
9100 +	int on_cpu;
9101 +
9102 +	BUG_ON(linked && !is_realtime(linked));
9103 +
9104 +	/* Currently linked task is set to be unlinked. */
9105 +	if (entry->linked) {
9106 +		entry->linked->rt_param.linked_on = NO_CPU;
9107 +	}
9108 +
9109 +	/* Link new task to CPU. */
9110 +	if (linked) {
9111 +		set_rt_flags(linked, RT_F_RUNNING);
9112 +		/* handle task is already scheduled somewhere! */
9113 +		on_cpu = linked->rt_param.scheduled_on;
9114 +		if (on_cpu != NO_CPU) {
9115 +			sched = &per_cpu(crm_cpu_entries, on_cpu);
9116 +			/* this should only happen if not linked already */
9117 +			BUG_ON(sched->linked == linked);
9118 +
9119 +			/* If we are already scheduled on the CPU to which we
9120 +			 * wanted to link, we don't need to do the swap --
9121 +			 * we just link ourselves to the CPU and depend on
9122 +			 * the caller to get things right.
9123 +			 */
9124 +			if (entry != sched) {
9125 +				TRACE_TASK(linked,
9126 +					   "already scheduled on %d, updating link.\n",
9127 +					   sched->cpu);
9128 +				tmp = sched->linked;
9129 +				linked->rt_param.linked_on = sched->cpu;
9130 +				sched->linked = linked;
9131 +				update_cpu_position(sched);
9132 +				linked = tmp;
9133 +			}
9134 +		}
9135 +		if (linked) /* might be NULL due to swap */
9136 +			linked->rt_param.linked_on = entry->cpu;
9137 +	}
9138 +	entry->linked = linked;
9139 +#ifdef WANT_ALL_SCHED_EVENTS
9140 +	if (linked)
9141 +		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
9142 +	else
9143 +		TRACE("NULL linked to %d.\n", entry->cpu);
9144 +#endif
9145 +	update_cpu_position(entry);
9146 +}
9147 +
9148 +/* unlink - Make sure a task is not linked any longer to an entry
9149 + *          where it was linked before. Must hold crm_lock.
9150 + */
9151 +static noinline void unlink(struct task_struct* t)
9152 +{
9153 +    	cpu_entry_t *entry;
9154 +
9155 +	if (t->rt_param.linked_on != NO_CPU) {
9156 +		/* unlink */
9157 +		entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on);
9158 +		t->rt_param.linked_on = NO_CPU;
9159 +		link_task_to_cpu(NULL, entry);
9160 +	} else if (is_queued(t)) {
9161 +		/* This is an interesting situation: t is scheduled,
9162 +		 * but was just recently unlinked.  It cannot be
9163 +		 * linked anywhere else (because then it would have
9164 +		 * been relinked to this CPU), thus it must be in some
9165 +		 * queue. We must remove it from the list in this
9166 +		 * case.
9167 +		 *
9168 +		 * in C-RM case is should be somewhere in the queue for
9169 +		 * its domain, therefore and we can get the domain using
9170 +		 * task_cpu_cluster
9171 +		 */
9172 +		remove(&(task_cpu_cluster(t))->domain, t);
9173 +	}
9174 +}
9175 +
9176 +
9177 +/* preempt - force a CPU to reschedule
9178 + */
9179 +static void preempt(cpu_entry_t *entry)
9180 +{
9181 +	preempt_if_preemptable(entry->scheduled, entry->cpu);
9182 +}
9183 +
9184 +/* requeue - Put an unlinked task into c-rm domain.
9185 + *           Caller must hold crm_lock.
9186 + */
9187 +static noinline void requeue(struct task_struct* task)
9188 +{
9189 +	crm_domain_t *cluster = task_cpu_cluster(task);
9190 +	BUG_ON(!task);
9191 +	/* sanity check before insertion */
9192 +	BUG_ON(is_queued(task));
9193 +
9194 +	if (is_released(task, litmus_clock()))
9195 +		__add_ready(&cluster->domain, task);
9196 +	else {
9197 +		/* it has got to wait */
9198 +		add_release(&cluster->domain, task);
9199 +	}
9200 +}
9201 +
9202 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9203 +static cpu_entry_t* crm_get_nearest_available_cpu(
9204 +				crm_domain_t *cluster, cpu_entry_t* start)
9205 +{
9206 +	cpu_entry_t* affinity;
9207 +
9208 +	get_nearest_available_cpu(affinity, start, crm_cpu_entries, -1);
9209 +
9210 +	/* make sure CPU is in our cluster */
9211 +	if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
9212 +		return(affinity);
9213 +	else
9214 +		return(NULL);
9215 +}
9216 +#endif
9217 +
9218 +
9219 +/* check for any necessary preemptions */
9220 +static void check_for_preemptions(crm_domain_t *cluster)
9221 +{
9222 +	struct task_struct *task;
9223 +	cpu_entry_t *last;
9224 +
9225 +	for(last = lowest_prio_cpu(cluster);
9226 +	    rm_preemption_needed(&cluster->domain, last->linked);
9227 +	    last = lowest_prio_cpu(cluster)) {
9228 +		/* preemption necessary */
9229 +		task = __take_ready(&cluster->domain);
9230 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9231 +		{
9232 +			cpu_entry_t* affinity =
9233 +					crm_get_nearest_available_cpu(cluster,
9234 +							&per_cpu(crm_cpu_entries, task_cpu(task)));
9235 +			if(affinity)
9236 +				last = affinity;
9237 +			else if(last->linked)
9238 +				requeue(last->linked);
9239 +		}
9240 +#else
9241 +		if (last->linked)
9242 +			requeue(last->linked);
9243 +#endif
9244 +		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
9245 +				task->pid, last->cpu);
9246 +		link_task_to_cpu(task, last);
9247 +		preempt(last);
9248 +	}
9249 +}
9250 +
9251 +/* crm_job_arrival: task is either resumed or released */
9252 +static noinline void crm_job_arrival(struct task_struct* task)
9253 +{
9254 +	crm_domain_t *cluster = task_cpu_cluster(task);
9255 +	BUG_ON(!task);
9256 +
9257 +	requeue(task);
9258 +	check_for_preemptions(cluster);
9259 +}
9260 +
9261 +static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks)
9262 +{
9263 +	crm_domain_t* cluster = container_of(rt, crm_domain_t, domain);
9264 +	unsigned long flags;
9265 +
9266 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9267 +
9268 +	__merge_ready(&cluster->domain, tasks);
9269 +	check_for_preemptions(cluster);
9270 +
9271 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9272 +}
9273 +
9274 +/* caller holds crm_lock */
9275 +static noinline void job_completion(struct task_struct *t, int forced)
9276 +{
9277 +	BUG_ON(!t);
9278 +
9279 +	sched_trace_task_completion(t, forced);
9280 +
9281 +#ifdef CONFIG_LITMUS_NVIDIA
9282 +	atomic_set(&tsk_rt(t)->nv_int_count, 0);
9283 +#endif
9284 +
9285 +	TRACE_TASK(t, "job_completion().\n");
9286 +
9287 +	/* set flags */
9288 +	set_rt_flags(t, RT_F_SLEEP);
9289 +	/* prepare for next period */
9290 +	prepare_for_next_period(t);
9291 +	if (is_released(t, litmus_clock()))
9292 +		sched_trace_task_release(t);
9293 +	/* unlink */
9294 +	unlink(t);
9295 +	/* requeue
9296 +	 * But don't requeue a blocking task. */
9297 +	if (is_running(t))
9298 +		crm_job_arrival(t);
9299 +}
9300 +
9301 +/* crm_tick - this function is called for every local timer
9302 + *                         interrupt.
9303 + *
9304 + *                   checks whether the current task has expired and checks
9305 + *                   whether we need to preempt it if it has not expired
9306 + */
9307 +static void crm_tick(struct task_struct* t)
9308 +{
9309 +	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
9310 +		if (!is_np(t)) {
9311 +			/* np tasks will be preempted when they become
9312 +			 * preemptable again
9313 +			 */
9314 +			litmus_reschedule_local();
9315 +			set_will_schedule();
9316 +			TRACE("crm_scheduler_tick: "
9317 +			      "%d is preemptable "
9318 +			      " => FORCE_RESCHED\n", t->pid);
9319 +		} else if (is_user_np(t)) {
9320 +			TRACE("crm_scheduler_tick: "
9321 +			      "%d is non-preemptable, "
9322 +			      "preemption delayed.\n", t->pid);
9323 +			request_exit_np(t);
9324 +		}
9325 +	}
9326 +}
9327 +
9328 +
9329 +
9330 +
9331 +
9332 +
9333 +
9334 +
9335 +
9336 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
9337 +
9338 +
9339 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
9340 +{
9341 +	if (!atomic_read(&tasklet->count)) {
9342 +		if(tasklet->owner) {
9343 +			sched_trace_tasklet_begin(tasklet->owner);
9344 +		}
9345 +		
9346 +		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
9347 +		{
9348 +			BUG();
9349 +		}
9350 +		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
9351 +			  __FUNCTION__,
9352 +			  (tasklet->owner) ? tasklet->owner->pid : -1,
9353 +			  (tasklet->owner) ? 0 : 1);
9354 +		tasklet->func(tasklet->data);
9355 +		tasklet_unlock(tasklet);
9356 +		
9357 +		if(tasklet->owner) {
9358 +			sched_trace_tasklet_end(tasklet->owner, flushed);
9359 +		}
9360 +	}
9361 +	else {
9362 +		BUG();
9363 +	}
9364 +}
9365 +
9366 +
9367 +static void __extract_tasklets(crm_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
9368 +{
9369 +	struct tasklet_struct* step;
9370 +	struct tasklet_struct* tasklet;
9371 +	struct tasklet_struct* prev;
9372 +	
9373 +	task_tasklets->head = NULL;
9374 +	task_tasklets->tail = &(task_tasklets->head);
9375 +	
9376 +	prev = NULL;
9377 +	for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
9378 +	{
9379 +		if(step->owner == task)
9380 +		{
9381 +			TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
9382 +			
9383 +			tasklet = step;
9384 +			
9385 +			if(prev) {
9386 +				prev->next = tasklet->next;
9387 +			}
9388 +			else if(cluster->pending_tasklets.head == tasklet) {
9389 +				// we're at the head.
9390 +				cluster->pending_tasklets.head = tasklet->next;
9391 +			}
9392 +			
9393 +			if(cluster->pending_tasklets.tail == &tasklet) {
9394 +				// we're at the tail
9395 +				if(prev) {
9396 +					cluster->pending_tasklets.tail = &prev;
9397 +				}
9398 +				else {
9399 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
9400 +				}
9401 +			}
9402 +			
9403 +			tasklet->next = NULL;
9404 +			*(task_tasklets->tail) = tasklet;
9405 +			task_tasklets->tail = &(tasklet->next);
9406 +		}
9407 +		else {
9408 +			prev = step;
9409 +		}
9410 +	}
9411 +}
9412 +
9413 +static void flush_tasklets(crm_domain_t* cluster, struct task_struct* task)
9414 +{
9415 +#if 0
9416 +	unsigned long flags;
9417 +	struct tasklet_head task_tasklets;
9418 +	struct tasklet_struct* step;
9419 +	
9420 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9421 +	__extract_tasklets(cluster, task, &task_tasklets);
9422 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9423 +	
9424 +	if(cluster->pending_tasklets.head != NULL) {
9425 +		TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
9426 +	}
9427 +	
9428 +	// now execute any flushed tasklets.
9429 +	for(step = cluster->pending_tasklets.head; step != NULL; /**/)
9430 +	{
9431 +		struct tasklet_struct* temp = step->next;
9432 +		
9433 +		step->next = NULL;
9434 +		__do_lit_tasklet(step, 1ul);
9435 +		
9436 +		step = temp;
9437 +	}
9438 +#endif
9439 +	
9440 +	// lazy flushing.
9441 +	// just change ownership to NULL and let an idle processor
9442 +	// take care of it. :P
9443 +	
9444 +	struct tasklet_struct* step;
9445 +	unsigned long flags;
9446 +	
9447 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9448 +
9449 +	for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
9450 +	{
9451 +		if(step->owner == task)
9452 +		{
9453 +			TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
9454 +			step->owner = NULL;
9455 +		}
9456 +	}	
9457 +	
9458 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9459 +}
9460 +
9461 +
9462 +static void do_lit_tasklets(crm_domain_t* cluster, struct task_struct* sched_task)
9463 +{
9464 +	int work_to_do = 1;
9465 +	struct tasklet_struct *tasklet = NULL;
9466 +	//struct tasklet_struct *step;
9467 +	unsigned long flags;
9468 +	
9469 +	while(work_to_do) {
9470 +		
9471 +		TS_NV_SCHED_BOTISR_START;
9472 +		
9473 +		// remove tasklet at head of list if it has higher priority.
9474 +		raw_spin_lock_irqsave(&cluster->crm_lock, flags);	
9475 +		
9476 +		/*
9477 +		step = cluster->pending_tasklets.head;
9478 +		TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
9479 +		while(step != NULL){
9480 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9481 +			step = step->next;
9482 +		}
9483 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9484 +		TRACE("%s: done.\n", __FUNCTION__);
9485 +		*/
9486 +		
9487 +		if(cluster->pending_tasklets.head != NULL) {
9488 +			// remove tasklet at head.
9489 +			tasklet = cluster->pending_tasklets.head;
9490 +			
9491 +			if(rm_higher_prio(tasklet->owner, sched_task)) {
9492 +				
9493 +				if(NULL == tasklet->next) {
9494 +					// tasklet is at the head, list only has one element
9495 +					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
9496 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
9497 +				}
9498 +				
9499 +				// remove the tasklet from the queue
9500 +				cluster->pending_tasklets.head = tasklet->next;
9501 +				
9502 +				TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1);
9503 +			}
9504 +			else {
9505 +				TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, (tasklet->owner) ? tasklet->owner->pid : -1, smp_processor_id());
9506 +				tasklet = NULL;
9507 +			}
9508 +		}
9509 +		else {
9510 +			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
9511 +		}
9512 +		
9513 +		/*
9514 +		step = cluster->pending_tasklets.head;
9515 +		TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
9516 +		while(step != NULL){
9517 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9518 +			step = step->next;
9519 +		}
9520 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9521 +		TRACE("%s: done.\n", __FUNCTION__);
9522 +		*/
9523 +		
9524 +		raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9525 +		
9526 +		TS_NV_SCHED_BOTISR_END;
9527 +		
9528 +		if(tasklet) {
9529 +			__do_lit_tasklet(tasklet, 0ul);
9530 +			tasklet = NULL;	
9531 +		}
9532 +		else {
9533 +			work_to_do = 0;
9534 +		}
9535 +	}
9536 +	
9537 +	//TRACE("%s: exited.\n", __FUNCTION__);
9538 +}
9539 +
9540 +
9541 +static void run_tasklets(struct task_struct* sched_task)
9542 +{
9543 +	crm_domain_t* cluster;
9544 +	
9545 +#if 0
9546 +	int task_is_rt = is_realtime(sched_task);
9547 +	crm_domain_t* cluster;
9548 +	
9549 +	if(is_realtime(sched_task)) {
9550 +		cluster = task_cpu_cluster(sched_task);
9551 +	}
9552 +	else {
9553 +		cluster = remote_cluster(get_cpu());
9554 +	}
9555 +	
9556 +	if(cluster && cluster->pending_tasklets.head != NULL) {
9557 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
9558 +		
9559 +		do_lit_tasklets(cluster, sched_task);
9560 +	}
9561 +	
9562 +	if(!task_is_rt) {
9563 +		put_cpu_no_resched();
9564 +	}
9565 +#else
9566 +	
9567 +	preempt_disable();
9568 +	
9569 +	cluster = (is_realtime(sched_task)) ?
9570 +		task_cpu_cluster(sched_task) :
9571 +		remote_cluster(smp_processor_id());
9572 +	
9573 +	if(cluster && cluster->pending_tasklets.head != NULL) {
9574 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
9575 +		do_lit_tasklets(cluster, sched_task);
9576 +	}
9577 +	
9578 +	preempt_enable_no_resched();
9579 +	
9580 +#endif
9581 +}
9582 +
9583 +
9584 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_domain_t* cluster)
9585 +{
9586 +	struct tasklet_struct* step;
9587 +	
9588 +	/*
9589 +	step = cluster->pending_tasklets.head;
9590 +	TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
9591 +	while(step != NULL){
9592 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9593 +		step = step->next;
9594 +	}
9595 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9596 +	TRACE("%s: done.\n", __FUNCTION__);
9597 +	*/
9598 +	
9599 +	tasklet->next = NULL;  // make sure there are no old values floating around
9600 +	
9601 +	step = cluster->pending_tasklets.head;
9602 +	if(step == NULL) {
9603 +		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
9604 +		// insert at tail.
9605 +		*(cluster->pending_tasklets.tail) = tasklet;
9606 +		cluster->pending_tasklets.tail = &(tasklet->next);		
9607 +	}
9608 +	else if((*(cluster->pending_tasklets.tail) != NULL) &&
9609 +			rm_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
9610 +		// insert at tail.
9611 +		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
9612 +		
9613 +		*(cluster->pending_tasklets.tail) = tasklet;
9614 +		cluster->pending_tasklets.tail = &(tasklet->next);
9615 +	}
9616 +	else {
9617 +		
9618 +        //WARN_ON(1 == 1);
9619 +		
9620 +		// insert the tasklet somewhere in the middle.
9621 +		
9622 +        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
9623 +		
9624 +		while(step->next && rm_higher_prio(step->next->owner, tasklet->owner)) {
9625 +			step = step->next;
9626 +		}
9627 +		
9628 +		// insert tasklet right before step->next.
9629 +		
9630 +		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
9631 +			  tasklet->owner->pid,
9632 +			  (step->owner) ?
9633 +			      step->owner->pid :
9634 +			      -1,
9635 +			  (step->next) ?
9636 +				((step->next->owner) ?
9637 +					step->next->owner->pid :
9638 +					-1) :
9639 +			    -1);
9640 +			  
9641 +		tasklet->next = step->next;
9642 +		step->next = tasklet;
9643 +		
9644 +		// patch up the head if needed.
9645 +		if(cluster->pending_tasklets.head == step)
9646 +		{
9647 +			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
9648 +			cluster->pending_tasklets.head = tasklet;
9649 +		}
9650 +	}
9651 +	
9652 +	/*
9653 +	step = cluster->pending_tasklets.head;
9654 +	TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
9655 +	while(step != NULL){
9656 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
9657 +		step = step->next;
9658 +	}
9659 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
9660 +	TRACE("%s: done.\n", __FUNCTION__);	
9661 +	*/
9662 +	 
9663 +	// TODO: Maintain this list in priority order.
9664 +	//	tasklet->next = NULL;
9665 +	//	*(cluster->pending_tasklets.tail) = tasklet;
9666 +	//	cluster->pending_tasklets.tail = &tasklet->next;
9667 +}
9668 +
9669 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
9670 +{
9671 +	crm_domain_t *cluster = NULL;
9672 +	cpu_entry_t *targetCPU = NULL;
9673 +	int thisCPU;
9674 +	int runLocal = 0;
9675 +	int runNow = 0;
9676 +	unsigned long flags;
9677 +	
9678 +    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
9679 +    {
9680 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
9681 +		return 0;
9682 +    }	
9683 +	
9684 +	cluster = task_cpu_cluster(tasklet->owner);
9685 +	
9686 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);		
9687 +	
9688 +	thisCPU = smp_processor_id();
9689 +	
9690 +#if 1
9691 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9692 +	{
9693 +		cpu_entry_t* affinity = NULL;
9694 +		
9695 +		// use this CPU if it is in our cluster and isn't running any RT work.
9696 +		if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(crm_cpu_entries).linked == NULL)) {
9697 +			affinity = &(__get_cpu_var(crm_cpu_entries));
9698 +		}
9699 +		else {
9700 +			// this CPU is busy or shouldn't run tasklet in this cluster.
9701 +			// look for available near by CPUs.
9702 +			// NOTE: Affinity towards owner and not this CPU.  Is this right?
9703 +			affinity = 
9704 +			crm_get_nearest_available_cpu(cluster,
9705 +										   &per_cpu(crm_cpu_entries, task_cpu(tasklet->owner)));
9706 +		}
9707 +		
9708 +		targetCPU = affinity;
9709 +	}
9710 +#endif
9711 +#endif
9712 +	
9713 +	if (targetCPU == NULL) {
9714 +		targetCPU = lowest_prio_cpu(cluster);
9715 +	}
9716 +	
9717 +	if (rm_higher_prio(tasklet->owner, targetCPU->linked)) {
9718 +		if (thisCPU == targetCPU->cpu) {
9719 +			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
9720 +			runLocal = 1;
9721 +			runNow = 1;
9722 +		}
9723 +		else {
9724 +			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
9725 +			runLocal = 0;
9726 +			runNow = 1;
9727 +		}
9728 +	}
9729 +	else {
9730 +		runLocal = 0;
9731 +		runNow = 0;
9732 +	}
9733 +	
9734 +	if(!runLocal) {
9735 +		// enqueue the tasklet
9736 +		__add_pai_tasklet(tasklet, cluster);
9737 +	}
9738 +	
9739 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9740 +	
9741 +	
9742 +	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
9743 +		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
9744 +		__do_lit_tasklet(tasklet, 0ul);
9745 +	}
9746 +	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
9747 +		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
9748 +		preempt(targetCPU);  // need to be protected by crm_lock?
9749 +	}
9750 +	else {
9751 +		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
9752 +	}
9753 +	
9754 +	return(1); // success
9755 +}
9756 +
9757 +
9758 +#endif
9759 +
9760 +
9761 +
9762 +
9763 +
9764 +
9765 +
9766 +
9767 +
9768 +
9769 +
9770 +
9771 +
9772 +
9773 +
9774 +
9775 +
9776 +
9777 +
9778 +
9779 +
9780 +
9781 +
9782 +
9783 +
9784 +
9785 +
9786 +/* Getting schedule() right is a bit tricky. schedule() may not make any
9787 + * assumptions on the state of the current task since it may be called for a
9788 + * number of reasons. The reasons include a scheduler_tick() determined that it
9789 + * was necessary, because sys_exit_np() was called, because some Linux
9790 + * subsystem determined so, or even (in the worst case) because there is a bug
9791 + * hidden somewhere. Thus, we must take extreme care to determine what the
9792 + * current state is.
9793 + *
9794 + * The CPU could currently be scheduling a task (or not), be linked (or not).
9795 + *
9796 + * The following assertions for the scheduled task could hold:
9797 + *
9798 + *      - !is_running(scheduled)        // the job blocks
9799 + *	- scheduled->timeslice == 0	// the job completed (forcefully)
9800 + *	- get_rt_flag() == RT_F_SLEEP	// the job completed (by syscall)
9801 + * 	- linked != scheduled		// we need to reschedule (for any reason)
9802 + * 	- is_np(scheduled)		// rescheduling must be delayed,
9803 + *					   sys_exit_np must be requested
9804 + *
9805 + * Any of these can occur together.
9806 + */
9807 +static struct task_struct* crm_schedule(struct task_struct * prev)
9808 +{
9809 +	cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries);
9810 +	crm_domain_t *cluster = entry->cluster;
9811 +	int out_of_time, sleep, preempt, np, exists, blocks;
9812 +	struct task_struct* next = NULL;
9813 +
9814 +	raw_spin_lock(&cluster->crm_lock);
9815 +	clear_will_schedule();
9816 +
9817 +	/* sanity checking */
9818 +	BUG_ON(entry->scheduled && entry->scheduled != prev);
9819 +	BUG_ON(entry->scheduled && !is_realtime(prev));
9820 +	BUG_ON(is_realtime(prev) && !entry->scheduled);
9821 +
9822 +	/* (0) Determine state */
9823 +	exists      = entry->scheduled != NULL;
9824 +	blocks      = exists && !is_running(entry->scheduled);
9825 +	out_of_time = exists &&
9826 +				  budget_enforced(entry->scheduled) &&
9827 +				  budget_exhausted(entry->scheduled);
9828 +	np 	    = exists && is_np(entry->scheduled);
9829 +	sleep	    = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
9830 +	preempt     = entry->scheduled != entry->linked;
9831 +
9832 +#ifdef WANT_ALL_SCHED_EVENTS
9833 +	TRACE_TASK(prev, "invoked crm_schedule.\n");
9834 +#endif
9835 +
9836 +	if (exists)
9837 +		TRACE_TASK(prev,
9838 +			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
9839 +			   "state:%d sig:%d\n",
9840 +			   blocks, out_of_time, np, sleep, preempt,
9841 +			   prev->state, signal_pending(prev));
9842 +	if (entry->linked && preempt)
9843 +		TRACE_TASK(prev, "will be preempted by %s/%d\n",
9844 +			   entry->linked->comm, entry->linked->pid);
9845 +
9846 +
9847 +	/* If a task blocks we have no choice but to reschedule.
9848 +	 */
9849 +	if (blocks)
9850 +		unlink(entry->scheduled);
9851 +
9852 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
9853 +	 * We need to make sure to update the link structure anyway in case
9854 +	 * that we are still linked. Multiple calls to request_exit_np() don't
9855 +	 * hurt.
9856 +	 */
9857 +	if (np && (out_of_time || preempt || sleep)) {
9858 +		unlink(entry->scheduled);
9859 +		request_exit_np(entry->scheduled);
9860 +	}
9861 +
9862 +	/* Any task that is preemptable and either exhausts its execution
9863 +	 * budget or wants to sleep completes. We may have to reschedule after
9864 +	 * this. Don't do a job completion if we block (can't have timers running
9865 +	 * for blocked jobs). Preemption go first for the same reason.
9866 +	 */
9867 +	if (!np && (out_of_time || sleep) && !blocks && !preempt)
9868 +		job_completion(entry->scheduled, !sleep);
9869 +
9870 +	/* Link pending task if we became unlinked.
9871 +	 */
9872 +	if (!entry->linked)
9873 +		link_task_to_cpu(__take_ready(&cluster->domain), entry);
9874 +
9875 +	/* The final scheduling decision. Do we need to switch for some reason?
9876 +	 * If linked is different from scheduled, then select linked as next.
9877 +	 */
9878 +	if ((!np || blocks) &&
9879 +	    entry->linked != entry->scheduled) {
9880 +		/* Schedule a linked job? */
9881 +		if (entry->linked) {
9882 +			entry->linked->rt_param.scheduled_on = entry->cpu;
9883 +			next = entry->linked;
9884 +		}
9885 +		if (entry->scheduled) {
9886 +			/* not gonna be scheduled soon */
9887 +			entry->scheduled->rt_param.scheduled_on = NO_CPU;
9888 +			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
9889 +		}
9890 +	} else
9891 +		/* Only override Linux scheduler if we have a real-time task
9892 +		 * scheduled that needs to continue.
9893 +		 */
9894 +		if (exists)
9895 +			next = prev;
9896 +
9897 +	sched_state_task_picked();
9898 +	raw_spin_unlock(&cluster->crm_lock);
9899 +
9900 +#ifdef WANT_ALL_SCHED_EVENTS
9901 +	TRACE("crm_lock released, next=0x%p\n", next);
9902 +
9903 +	if (next)
9904 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
9905 +	else if (exists && !next)
9906 +		TRACE("becomes idle at %llu.\n", litmus_clock());
9907 +#endif
9908 +
9909 +
9910 +	return next;
9911 +}
9912 +
9913 +
9914 +/* _finish_switch - we just finished the switch away from prev
9915 + */
9916 +static void crm_finish_switch(struct task_struct *prev)
9917 +{
9918 +	cpu_entry_t* 	entry = &__get_cpu_var(crm_cpu_entries);
9919 +
9920 +	entry->scheduled = is_realtime(current) ? current : NULL;
9921 +#ifdef WANT_ALL_SCHED_EVENTS
9922 +	TRACE_TASK(prev, "switched away from\n");
9923 +#endif
9924 +}
9925 +
9926 +
9927 +/*	Prepare a task for running in RT mode
9928 + */
9929 +static void crm_task_new(struct task_struct * t, int on_rq, int running)
9930 +{
9931 +	unsigned long 		flags;
9932 +	cpu_entry_t* 		entry;
9933 +	crm_domain_t*		cluster;
9934 +
9935 +	TRACE("crm: task new %d\n", t->pid);
9936 +
9937 +	/* the cluster doesn't change even if t is running */
9938 +	cluster = task_cpu_cluster(t);
9939 +
9940 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9941 +
9942 +	/* setup job params */
9943 +	release_at(t, litmus_clock());
9944 +
9945 +	if (running) {
9946 +		entry = &per_cpu(crm_cpu_entries, task_cpu(t));
9947 +		BUG_ON(entry->scheduled);
9948 +
9949 +		entry->scheduled = t;
9950 +		tsk_rt(t)->scheduled_on = task_cpu(t);
9951 +	} else {
9952 +		t->rt_param.scheduled_on = NO_CPU;
9953 +	}
9954 +	t->rt_param.linked_on          = NO_CPU;
9955 +
9956 +	crm_job_arrival(t);
9957 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
9958 +}
9959 +
9960 +static void crm_task_wake_up(struct task_struct *task)
9961 +{
9962 +	unsigned long flags;
9963 +	//lt_t now;
9964 +	crm_domain_t *cluster;
9965 +
9966 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
9967 +
9968 +	cluster = task_cpu_cluster(task);
9969 +
9970 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);
9971 +
9972 +#if 0  // sporadic task model
9973 +	/* We need to take suspensions because of semaphores into
9974 +	 * account! If a job resumes after being suspended due to acquiring
9975 +	 * a semaphore, it should never be treated as a new job release.
9976 +	 */
9977 +	if (get_rt_flags(task) == RT_F_EXIT_SEM) {
9978 +		set_rt_flags(task, RT_F_RUNNING);
9979 +	} else {
9980 +		now = litmus_clock();
9981 +		if (is_tardy(task, now)) {
9982 +			/* new sporadic release */
9983 +			release_at(task, now);
9984 +			sched_trace_task_release(task);
9985 +		}
9986 +		else {
9987 +			if (task->rt.time_slice) {
9988 +				/* came back in time before deadline
9989 +				*/
9990 +				set_rt_flags(task, RT_F_RUNNING);
9991 +			}
9992 +		}
9993 +	}
9994 +#endif
9995 +
9996 +	//BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
9997 +	set_rt_flags(task, RT_F_RUNNING);  // periodic model
9998 +
9999 +	if(tsk_rt(task)->linked_on == NO_CPU)
10000 +		crm_job_arrival(task);
10001 +	else
10002 +		TRACE("WTF, mate?!\n");
10003 +
10004 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
10005 +}
10006 +
10007 +static void crm_task_block(struct task_struct *t)
10008 +{
10009 +	unsigned long flags;
10010 +	crm_domain_t *cluster;
10011 +
10012 +	TRACE_TASK(t, "block at %llu\n", litmus_clock());
10013 +
10014 +	cluster = task_cpu_cluster(t);
10015 +
10016 +	/* unlink if necessary */
10017 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);
10018 +	unlink(t);
10019 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
10020 +
10021 +	BUG_ON(!is_realtime(t));
10022 +}
10023 +
10024 +
10025 +static void crm_task_exit(struct task_struct * t)
10026 +{
10027 +	unsigned long flags;
10028 +	crm_domain_t *cluster = task_cpu_cluster(t);
10029 +
10030 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10031 +	flush_tasklets(cluster, t);
10032 +#endif		
10033 +
10034 +	/* unlink if necessary */
10035 +	raw_spin_lock_irqsave(&cluster->crm_lock, flags);
10036 +	unlink(t);
10037 +	if (tsk_rt(t)->scheduled_on != NO_CPU) {
10038 +		cpu_entry_t *cpu;
10039 +		cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on);
10040 +		cpu->scheduled = NULL;
10041 +		tsk_rt(t)->scheduled_on = NO_CPU;
10042 +	}
10043 +	raw_spin_unlock_irqrestore(&cluster->crm_lock, flags);
10044 +	
10045 +	BUG_ON(!is_realtime(t));
10046 +        TRACE_TASK(t, "RIP\n");
10047 +}
10048 +
10049 +static long crm_admit_task(struct task_struct* tsk)
10050 +{
10051 +	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
10052 +}
10053 +
10054 +
10055 +
10056 +
10057 +
10058 +
10059 +
10060 +
10061 +
10062 +
10063 +
10064 +
10065 +
10066 +#ifdef CONFIG_LITMUS_LOCKING
10067 +
10068 +#include <litmus/fdso.h>
10069 +
10070 +
10071 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
10072 +{
10073 +	int linked_on;
10074 +	int check_preempt = 0;	
10075 +	
10076 +	crm_domain_t* cluster = task_cpu_cluster(t);
10077 +	
10078 +	if(prio_inh != NULL)
10079 +		TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
10080 +	else
10081 +		TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
10082 +	
10083 +	sched_trace_eff_prio_change(t, prio_inh);
10084 +	
10085 +	tsk_rt(t)->inh_task = prio_inh;
10086 +	
10087 +	linked_on  = tsk_rt(t)->linked_on;
10088 +	
10089 +	/* If it is scheduled, then we need to reorder the CPU heap. */
10090 +	if (linked_on != NO_CPU) {
10091 +		TRACE_TASK(t, "%s: linked  on %d\n",
10092 +				   __FUNCTION__, linked_on);
10093 +		/* Holder is scheduled; need to re-order CPUs.
10094 +		 * We can't use heap_decrease() here since
10095 +		 * the cpu_heap is ordered in reverse direction, so
10096 +		 * it is actually an increase. */
10097 +		bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
10098 +                     per_cpu(crm_cpu_entries, linked_on).hn);
10099 +		bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
10100 +                     per_cpu(crm_cpu_entries, linked_on).hn);
10101 +	} else {
10102 +		/* holder may be queued: first stop queue changes */
10103 +		raw_spin_lock(&cluster->domain.release_lock);
10104 +		if (is_queued(t)) {
10105 +			TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
10106 +			
10107 +			/* We need to update the position of holder in some
10108 +			 * heap. Note that this could be a release heap if we
10109 +			 * budget enforcement is used and this job overran. */
10110 +			check_preempt = !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node);
10111 +			
10112 +		} else {
10113 +			/* Nothing to do: if it is not queued and not linked
10114 +			 * then it is either sleeping or currently being moved
10115 +			 * by other code (e.g., a timer interrupt handler) that
10116 +			 * will use the correct priority when enqueuing the
10117 +			 * task. */
10118 +			TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
10119 +		}
10120 +		raw_spin_unlock(&cluster->domain.release_lock);
10121 +		
10122 +		/* If holder was enqueued in a release heap, then the following
10123 +		 * preemption check is pointless, but we can't easily detect
10124 +		 * that case. If you want to fix this, then consider that
10125 +		 * simply adding a state flag requires O(n) time to update when
10126 +		 * releasing n tasks, which conflicts with the goal to have
10127 +		 * O(log n) merges. */
10128 +		if (check_preempt) {
10129 +			/* heap_decrease() hit the top level of the heap: make
10130 +			 * sure preemption checks get the right task, not the
10131 +			 * potentially stale cache. */
10132 +			bheap_uncache_min(rm_ready_order, &cluster->domain.ready_queue);
10133 +			check_for_preemptions(cluster);
10134 +		}
10135 +	}
10136 +}
10137 +
10138 +/* called with IRQs off */
10139 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
10140 +{
10141 +	crm_domain_t* cluster = task_cpu_cluster(t);
10142 +	
10143 +	raw_spin_lock(&cluster->crm_lock);
10144 +	
10145 +	__set_priority_inheritance(t, prio_inh);
10146 +	
10147 +#ifdef CONFIG_LITMUS_SOFTIRQD
10148 +	if(tsk_rt(t)->cur_klitirqd != NULL)
10149 +	{
10150 +		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
10151 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
10152 +		
10153 +		__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
10154 +	}
10155 +#endif
10156 +	
10157 +	raw_spin_unlock(&cluster->crm_lock);
10158 +}
10159 +
10160 +
10161 +/* called with IRQs off */
10162 +static void __clear_priority_inheritance(struct task_struct* t)
10163 +{
10164 +    TRACE_TASK(t, "priority restored\n");
10165 +	
10166 +    if(tsk_rt(t)->scheduled_on != NO_CPU)
10167 +    {
10168 +		sched_trace_eff_prio_change(t, NULL);
10169 +		
10170 +        tsk_rt(t)->inh_task = NULL;
10171 +        
10172 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
10173 +         * since the priority was effectively lowered. */
10174 +        unlink(t);
10175 +        crm_job_arrival(t);
10176 +    }
10177 +    else
10178 +    {
10179 +        __set_priority_inheritance(t, NULL);
10180 +    }
10181 +	
10182 +#ifdef CONFIG_LITMUS_SOFTIRQD
10183 +	if(tsk_rt(t)->cur_klitirqd != NULL)
10184 +	{
10185 +		TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
10186 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
10187 +		
10188 +		if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
10189 +		{
10190 +			sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
10191 +			
10192 +			tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
10193 +			
10194 +			/* Check if rescheduling is necessary. We can't use heap_decrease()
10195 +			 * since the priority was effectively lowered. */
10196 +			unlink(tsk_rt(t)->cur_klitirqd);
10197 +			crm_job_arrival(tsk_rt(t)->cur_klitirqd);
10198 +		}
10199 +		else
10200 +		{
10201 +			__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
10202 +		}
10203 +	}
10204 +#endif
10205 +}
10206 +
10207 +/* called with IRQs off */
10208 +static void clear_priority_inheritance(struct task_struct* t)
10209 +{
10210 +	crm_domain_t* cluster = task_cpu_cluster(t);
10211 +	
10212 +	raw_spin_lock(&cluster->crm_lock);
10213 +	__clear_priority_inheritance(t);
10214 +	raw_spin_unlock(&cluster->crm_lock);
10215 +}
10216 +
10217 +
10218 +
10219 +#ifdef CONFIG_LITMUS_SOFTIRQD
10220 +/* called with IRQs off */
10221 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
10222 +											  struct task_struct* old_owner,
10223 +											  struct task_struct* new_owner)
10224 +{
10225 +	crm_domain_t* cluster = task_cpu_cluster(klitirqd);
10226 +	
10227 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
10228 +	
10229 +	raw_spin_lock(&cluster->crm_lock);
10230 +	
10231 +	if(old_owner != new_owner)
10232 +	{
10233 +		if(old_owner)
10234 +		{
10235 +			// unreachable?
10236 +			tsk_rt(old_owner)->cur_klitirqd = NULL;
10237 +		}
10238 +		
10239 +		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
10240 +				   new_owner->comm, new_owner->pid);
10241 +		
10242 +		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
10243 +	}
10244 +	
10245 +	__set_priority_inheritance(klitirqd,
10246 +							   (tsk_rt(new_owner)->inh_task == NULL) ?
10247 +							   new_owner :
10248 +							   tsk_rt(new_owner)->inh_task);
10249 +	
10250 +	raw_spin_unlock(&cluster->crm_lock);
10251 +}
10252 +
10253 +/* called with IRQs off */
10254 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
10255 +												struct task_struct* old_owner)
10256 +{
10257 +	crm_domain_t* cluster = task_cpu_cluster(klitirqd);
10258 +	
10259 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
10260 +	
10261 +	raw_spin_lock(&cluster->crm_lock);
10262 +    
10263 +    TRACE_TASK(klitirqd, "priority restored\n");
10264 +	
10265 +    if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
10266 +    {
10267 +        tsk_rt(klitirqd)->inh_task = NULL;
10268 +        
10269 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
10270 +         * since the priority was effectively lowered. */
10271 +        unlink(klitirqd);
10272 +        crm_job_arrival(klitirqd);
10273 +    }
10274 +    else
10275 +    {
10276 +        __set_priority_inheritance(klitirqd, NULL);
10277 +    }
10278 +	
10279 +	tsk_rt(old_owner)->cur_klitirqd = NULL;
10280 +	
10281 +	raw_spin_unlock(&cluster->crm_lock);
10282 +}
10283 +#endif  // CONFIG_LITMUS_SOFTIRQD
10284 +
10285 +
10286 +/* ******************** KFMLP support ********************** */
10287 +
10288 +/* struct for semaphore with priority inheritance */
10289 +struct kfmlp_queue
10290 +{
10291 +	wait_queue_head_t wait;
10292 +	struct task_struct* owner;
10293 +	struct task_struct* hp_waiter;
10294 +	int count; /* number of waiters + holder */
10295 +};
10296 +
10297 +struct kfmlp_semaphore
10298 +{
10299 +	struct litmus_lock litmus_lock;
10300 +	
10301 +	spinlock_t lock;
10302 +	
10303 +	int num_resources; /* aka k */
10304 +	struct kfmlp_queue *queues; /* array */
10305 +	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
10306 +};
10307 +
10308 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
10309 +{
10310 +	return container_of(lock, struct kfmlp_semaphore, litmus_lock);
10311 +}
10312 +
10313 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
10314 +								struct kfmlp_queue* queue)
10315 +{
10316 +	return (queue - &sem->queues[0]);
10317 +}
10318 +
10319 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
10320 +												  struct task_struct* holder)
10321 +{
10322 +	int i;
10323 +	for(i = 0; i < sem->num_resources; ++i)
10324 +		if(sem->queues[i].owner == holder)
10325 +			return(&sem->queues[i]);
10326 +	return(NULL);
10327 +}
10328 +
10329 +/* caller is responsible for locking */
10330 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
10331 +										 struct task_struct *skip)
10332 +{
10333 +	struct list_head	*pos;
10334 +	struct task_struct 	*queued, *found = NULL;
10335 +	
10336 +	list_for_each(pos, &kqueue->wait.task_list) {
10337 +		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
10338 +												   task_list)->private;
10339 +		
10340 +		/* Compare task prios, find high prio task. */
10341 +		if (queued != skip && rm_higher_prio(queued, found))
10342 +			found = queued;
10343 +	}
10344 +	return found;
10345 +}
10346 +
10347 +static inline struct kfmlp_queue* kfmlp_find_shortest(
10348 +										  struct kfmlp_semaphore* sem,
10349 +										  struct kfmlp_queue* search_start)
10350 +{
10351 +	// we start our search at search_start instead of at the beginning of the
10352 +	// queue list to load-balance across all resources.
10353 +	struct kfmlp_queue* step = search_start;
10354 +	struct kfmlp_queue* shortest = sem->shortest_queue;
10355 +	
10356 +	do
10357 +	{
10358 +		step = (step+1 != &sem->queues[sem->num_resources]) ?
10359 +		step+1 : &sem->queues[0];
10360 +		if(step->count < shortest->count)
10361 +		{
10362 +			shortest = step;
10363 +			if(step->count == 0)
10364 +				break; /* can't get any shorter */
10365 +		}
10366 +	}while(step != search_start);
10367 +	
10368 +	return(shortest);
10369 +}
10370 +
10371 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
10372 +{
10373 +	/* must hold sem->lock */
10374 +	
10375 +	struct kfmlp_queue *my_queue = NULL;
10376 +	struct task_struct *max_hp = NULL;
10377 +	
10378 +	
10379 +	struct list_head	*pos;
10380 +	struct task_struct 	*queued;
10381 +	int i;
10382 +	
10383 +	for(i = 0; i < sem->num_resources; ++i)
10384 +	{
10385 +		if( (sem->queues[i].count > 1) &&
10386 +		   ((my_queue == NULL) ||
10387 +			(rm_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
10388 +		{
10389 +			my_queue = &sem->queues[i];
10390 +		}
10391 +	}
10392 +	
10393 +	if(my_queue)
10394 +	{
10395 +		crm_domain_t* cluster;
10396 +		
10397 +		max_hp = my_queue->hp_waiter;
10398 +		BUG_ON(!max_hp);
10399 +
10400 +		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
10401 +				  kfmlp_get_idx(sem, my_queue),
10402 +				  max_hp->comm, max_hp->pid,
10403 +				  kfmlp_get_idx(sem, my_queue));
10404 +		
10405 +		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
10406 +		
10407 +		/*
10408 +		 if(my_queue->hp_waiter)
10409 +		 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
10410 +		 kfmlp_get_idx(sem, my_queue),
10411 +		 my_queue->hp_waiter->comm,
10412 +		 my_queue->hp_waiter->pid);
10413 +		 else
10414 +		 TRACE_CUR("queue %d: new hp_waiter is %p\n",
10415 +		 kfmlp_get_idx(sem, my_queue), NULL);
10416 +		 */
10417 +	
10418 +		cluster = task_cpu_cluster(max_hp);
10419 +
10420 +		raw_spin_lock(&cluster->crm_lock);
10421 +		
10422 +		/*
10423 +		 if(my_queue->owner)
10424 +		 TRACE_CUR("queue %d: owner is %s/%d\n",
10425 +		 kfmlp_get_idx(sem, my_queue),
10426 +		 my_queue->owner->comm,
10427 +		 my_queue->owner->pid);
10428 +		 else
10429 +		 TRACE_CUR("queue %d: owner is %p\n",
10430 +		 kfmlp_get_idx(sem, my_queue),
10431 +		 NULL);
10432 +		 */
10433 +		
10434 +		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
10435 +		{
10436 +			__clear_priority_inheritance(my_queue->owner);
10437 +			if(my_queue->hp_waiter != NULL)
10438 +			{
10439 +				__set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
10440 +			}
10441 +		}
10442 +		raw_spin_unlock(&cluster->crm_lock);
10443 +		
10444 +		list_for_each(pos, &my_queue->wait.task_list)
10445 +		{
10446 +			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
10447 +													   task_list)->private;
10448 +			/* Compare task prios, find high prio task. */
10449 +			if (queued == max_hp)
10450 +			{
10451 +				/*
10452 +				 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
10453 +				 kfmlp_get_idx(sem, my_queue));
10454 +				 */
10455 +				__remove_wait_queue(&my_queue->wait,
10456 +									list_entry(pos, wait_queue_t, task_list));
10457 +				break;
10458 +			}
10459 +		}
10460 +		--(my_queue->count);
10461 +	}
10462 +	
10463 +	return(max_hp);
10464 +}
10465 +
10466 +int crm_kfmlp_lock(struct litmus_lock* l)
10467 +{
10468 +	struct task_struct* t = current;
10469 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10470 +	struct kfmlp_queue* my_queue;
10471 +	wait_queue_t wait;
10472 +	unsigned long flags;
10473 +	
10474 +	if (!is_realtime(t))
10475 +		return -EPERM;
10476 +	
10477 +	spin_lock_irqsave(&sem->lock, flags);
10478 +	
10479 +	my_queue = sem->shortest_queue;
10480 +	
10481 +	if (my_queue->owner) {
10482 +		/* resource is not free => must suspend and wait */
10483 +		TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
10484 +				  kfmlp_get_idx(sem, my_queue));
10485 +		
10486 +		init_waitqueue_entry(&wait, t);
10487 +		
10488 +		/* FIXME: interruptible would be nice some day */
10489 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
10490 +		
10491 +		__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
10492 +		
10493 +		/* check if we need to activate priority inheritance */
10494 +		if (rm_higher_prio(t, my_queue->hp_waiter))
10495 +		{
10496 +			my_queue->hp_waiter = t;
10497 +			if (rm_higher_prio(t, my_queue->owner))
10498 +			{
10499 +				set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
10500 +			}
10501 +		}
10502 +		
10503 +		++(my_queue->count);
10504 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
10505 +		
10506 +		/* release lock before sleeping */
10507 +		spin_unlock_irqrestore(&sem->lock, flags);
10508 +		
10509 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
10510 +		 * when we wake up; we are guaranteed to have the lock since
10511 +		 * there is only one wake up per release (or steal).
10512 +		 */
10513 +		schedule();
10514 +		
10515 +		
10516 +		if(my_queue->owner == t)
10517 +		{
10518 +			TRACE_CUR("queue %d: acquired through waiting\n",
10519 +					  kfmlp_get_idx(sem, my_queue));
10520 +		}
10521 +		else
10522 +		{
10523 +			/* this case may happen if our wait entry was stolen
10524 +			 between queues.  record where we went.*/
10525 +			my_queue = kfmlp_get_queue(sem, t);
10526 +			BUG_ON(!my_queue);
10527 +			TRACE_CUR("queue %d: acquired through stealing\n",
10528 +					  kfmlp_get_idx(sem, my_queue));
10529 +		}
10530 +	}
10531 +	else
10532 +	{
10533 +		TRACE_CUR("queue %d: acquired immediately\n",
10534 +				  kfmlp_get_idx(sem, my_queue));
10535 +		
10536 +		my_queue->owner = t;
10537 +		
10538 +		++(my_queue->count);
10539 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);		
10540 +		
10541 +		spin_unlock_irqrestore(&sem->lock, flags);
10542 +	}
10543 +	
10544 +	return kfmlp_get_idx(sem, my_queue);
10545 +}
10546 +
10547 +int crm_kfmlp_unlock(struct litmus_lock* l)
10548 +{
10549 +	struct task_struct *t = current, *next;
10550 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10551 +	struct kfmlp_queue *my_queue;
10552 +	unsigned long flags;
10553 +	int err = 0;
10554 +	
10555 +	spin_lock_irqsave(&sem->lock, flags);
10556 +	
10557 +	my_queue = kfmlp_get_queue(sem, t);
10558 +	
10559 +	if (!my_queue) {
10560 +		err = -EINVAL;
10561 +		goto out;
10562 +	}
10563 +	
10564 +	/* check if there are jobs waiting for this resource */
10565 +	next = __waitqueue_remove_first(&my_queue->wait);
10566 +	if (next) {
10567 +		/*
10568 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
10569 +		 kfmlp_get_idx(sem, my_queue),
10570 +		 next->comm, next->pid);
10571 +		 */
10572 +		/* next becomes the resouce holder */
10573 +		my_queue->owner = next;
10574 +		
10575 +		--(my_queue->count);
10576 +		if(my_queue->count < sem->shortest_queue->count)
10577 +		{
10578 +			sem->shortest_queue = my_queue;
10579 +		}	
10580 +		
10581 +		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
10582 +				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
10583 +		
10584 +		/* determine new hp_waiter if necessary */
10585 +		if (next == my_queue->hp_waiter) {
10586 +			TRACE_TASK(next, "was highest-prio waiter\n");
10587 +			/* next has the highest priority --- it doesn't need to
10588 +			 * inherit.  However, we need to make sure that the
10589 +			 * next-highest priority in the queue is reflected in
10590 +			 * hp_waiter. */
10591 +			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
10592 +			if (my_queue->hp_waiter)
10593 +				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
10594 +			else
10595 +				TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
10596 +		} else {
10597 +			/* Well, if next is not the highest-priority waiter,
10598 +			 * then it ought to inherit the highest-priority
10599 +			 * waiter's priority. */
10600 +			set_priority_inheritance(next, my_queue->hp_waiter);
10601 +		}
10602 +		
10603 +		/* wake up next */
10604 +		wake_up_process(next);
10605 +	}
10606 +	else
10607 +	{
10608 +		TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
10609 +		
10610 +		next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
10611 +		
10612 +		/*
10613 +		 if(next)
10614 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
10615 +		 kfmlp_get_idx(sem, my_queue),
10616 +		 next->comm, next->pid);
10617 +		 */
10618 +		
10619 +		my_queue->owner = next;
10620 +		
10621 +		if(next)
10622 +		{
10623 +			TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
10624 +					  kfmlp_get_idx(sem, my_queue),
10625 +					  next->comm, next->pid);
10626 +			
10627 +			/* wake up next */
10628 +			wake_up_process(next);			
10629 +		}
10630 +		else
10631 +		{
10632 +			TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
10633 +			
10634 +			--(my_queue->count);
10635 +			if(my_queue->count < sem->shortest_queue->count)
10636 +			{
10637 +				sem->shortest_queue = my_queue;
10638 +			}
10639 +		}
10640 +	}
10641 +	
10642 +	/* we lose the benefit of priority inheritance (if any) */
10643 +	if (tsk_rt(t)->inh_task)
10644 +		clear_priority_inheritance(t);
10645 +	
10646 +out:
10647 +	spin_unlock_irqrestore(&sem->lock, flags);
10648 +	
10649 +	return err;
10650 +}
10651 +
10652 +int crm_kfmlp_close(struct litmus_lock* l)
10653 +{
10654 +	struct task_struct *t = current;
10655 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10656 +	struct kfmlp_queue *my_queue;
10657 +	unsigned long flags;
10658 +	
10659 +	int owner;
10660 +	
10661 +	spin_lock_irqsave(&sem->lock, flags);
10662 +	
10663 +	my_queue = kfmlp_get_queue(sem, t);	
10664 +	owner = (my_queue) ? (my_queue->owner == t) : 0;
10665 +	
10666 +	spin_unlock_irqrestore(&sem->lock, flags);
10667 +	
10668 +	if (owner)
10669 +		crm_kfmlp_unlock(l);
10670 +	
10671 +	return 0;
10672 +}
10673 +
10674 +void crm_kfmlp_free(struct litmus_lock* l)
10675 +{
10676 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
10677 +	kfree(sem->queues);
10678 +	kfree(sem);
10679 +}
10680 +
10681 +static struct litmus_lock_ops crm_kfmlp_lock_ops = {
10682 +	.close  = crm_kfmlp_close,
10683 +	.lock   = crm_kfmlp_lock,
10684 +	.unlock = crm_kfmlp_unlock,
10685 +	.deallocate = crm_kfmlp_free,
10686 +};
10687 +
10688 +static struct litmus_lock* crm_new_kfmlp(void* __user arg, int* ret_code)
10689 +{
10690 +	struct kfmlp_semaphore* sem;
10691 +	int num_resources = 0;
10692 +	int i;
10693 +	
10694 +	if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
10695 +	{
10696 +		*ret_code = -EINVAL;
10697 +		return(NULL);
10698 +	}
10699 +	if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
10700 +	{
10701 +		*ret_code = -EINVAL;
10702 +		return(NULL);
10703 +	}
10704 +	if(num_resources < 1)
10705 +	{
10706 +		*ret_code = -EINVAL;
10707 +		return(NULL);		
10708 +	}
10709 +	
10710 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
10711 +	if(!sem)
10712 +	{
10713 +		*ret_code = -ENOMEM;
10714 +		return NULL;
10715 +	}
10716 +	
10717 +	sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
10718 +	if(!sem->queues)
10719 +	{
10720 +		kfree(sem);
10721 +		*ret_code = -ENOMEM;
10722 +		return NULL;		
10723 +	}
10724 +	
10725 +	sem->litmus_lock.ops = &crm_kfmlp_lock_ops;
10726 +	spin_lock_init(&sem->lock);
10727 +	sem->num_resources = num_resources;
10728 +	
10729 +	for(i = 0; i < num_resources; ++i)
10730 +	{
10731 +		sem->queues[i].owner = NULL;
10732 +		sem->queues[i].hp_waiter = NULL;
10733 +		init_waitqueue_head(&sem->queues[i].wait);
10734 +		sem->queues[i].count = 0;
10735 +	}
10736 +	
10737 +	sem->shortest_queue = &sem->queues[0];
10738 +	
10739 +	*ret_code = 0;
10740 +	return &sem->litmus_lock;
10741 +}
10742 +
10743 +
10744 +/* **** lock constructor **** */
10745 +
10746 +static long crm_allocate_lock(struct litmus_lock **lock, int type,
10747 +								 void* __user arg)
10748 +{
10749 +	int err = -ENXIO;
10750 +	
10751 +	/* C-RM currently only supports the FMLP for global resources
10752 +		WITHIN a given cluster.  DO NOT USE CROSS-CLUSTER! */
10753 +	switch (type) {
10754 +		case KFMLP_SEM:
10755 +			*lock = crm_new_kfmlp(arg, &err);
10756 +			break;
10757 +	};
10758 +	
10759 +	return err;
10760 +}
10761 +
10762 +#endif  // CONFIG_LITMUS_LOCKING
10763 +
10764 +
10765 +
10766 +
10767 +
10768 +
10769 +/* total number of cluster */
10770 +static int num_clusters;
10771 +/* we do not support cluster of different sizes */
10772 +static unsigned int cluster_size;
10773 +
10774 +#ifdef VERBOSE_INIT
10775 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
10776 +{
10777 +	int chk;
10778 +	char buf[255];
10779 +
10780 +	chk = cpulist_scnprintf(buf, 254, mask);
10781 +	buf[chk] = '\0';
10782 +	printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
10783 +
10784 +}
10785 +#endif
10786 +
10787 +static int clusters_allocated = 0;
10788 +
10789 +static void cleanup_crm(void)
10790 +{
10791 +	int i;
10792 +
10793 +	if (clusters_allocated) {
10794 +		for (i = 0; i < num_clusters; i++) {
10795 +			kfree(crm[i].cpus);
10796 +			kfree(crm[i].heap_node);
10797 +			free_cpumask_var(crm[i].cpu_map);
10798 +		}
10799 +
10800 +		kfree(crm);
10801 +	}
10802 +}
10803 +
10804 +static long crm_activate_plugin(void)
10805 +{
10806 +	int i, j, cpu, ccpu, cpu_count;
10807 +	cpu_entry_t *entry;
10808 +
10809 +	cpumask_var_t mask;
10810 +	int chk = 0;
10811 +
10812 +	/* de-allocate old clusters, if any */
10813 +	cleanup_crm();
10814 +
10815 +	printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n",
10816 +			cluster_config);
10817 +
10818 +	/* need to get cluster_size first */
10819 +	if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
10820 +		return -ENOMEM;
10821 +
10822 +	if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
10823 +		cluster_size = num_online_cpus();
10824 +	} else {
10825 +		chk = get_shared_cpu_map(mask, 0, cluster_config);
10826 +		if (chk) {
10827 +			/* if chk != 0 then it is the max allowed index */
10828 +			printk(KERN_INFO "C-RM: Cluster configuration = %d "
10829 +			       "is not supported on this hardware.\n",
10830 +			       cluster_config);
10831 +			/* User should notice that the configuration failed, so
10832 +			 * let's bail out. */
10833 +			return -EINVAL;
10834 +		}
10835 +
10836 +		cluster_size = cpumask_weight(mask);
10837 +	}
10838 +
10839 +	if ((num_online_cpus() % cluster_size) != 0) {
10840 +		/* this can't be right, some cpus are left out */
10841 +		printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n",
10842 +				num_online_cpus(), cluster_size);
10843 +		return -1;
10844 +	}
10845 +
10846 +	num_clusters = num_online_cpus() / cluster_size;
10847 +	printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n",
10848 +			num_clusters, cluster_size);
10849 +
10850 +	/* initialize clusters */
10851 +	crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC);
10852 +	for (i = 0; i < num_clusters; i++) {
10853 +
10854 +		crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
10855 +				GFP_ATOMIC);
10856 +		crm[i].heap_node = kmalloc(
10857 +				cluster_size * sizeof(struct bheap_node),
10858 +				GFP_ATOMIC);
10859 +		bheap_init(&(crm[i].cpu_heap));
10860 +		rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs);
10861 +
10862 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10863 +		crm[i].pending_tasklets.head = NULL;
10864 +		crm[i].pending_tasklets.tail = &(crm[i].pending_tasklets.head);
10865 +#endif		
10866 +		
10867 +		if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC))
10868 +			return -ENOMEM;
10869 +	}
10870 +
10871 +	/* cycle through cluster and add cpus to them */
10872 +	for (i = 0; i < num_clusters; i++) {
10873 +
10874 +		for_each_online_cpu(cpu) {
10875 +			/* check if the cpu is already in a cluster */
10876 +			for (j = 0; j < num_clusters; j++)
10877 +				if (cpumask_test_cpu(cpu, crm[j].cpu_map))
10878 +					break;
10879 +			/* if it is in a cluster go to next cpu */
10880 +			if (j < num_clusters &&
10881 +					cpumask_test_cpu(cpu, crm[j].cpu_map))
10882 +				continue;
10883 +
10884 +			/* this cpu isn't in any cluster */
10885 +			/* get the shared cpus */
10886 +			if (unlikely(cluster_config == GLOBAL_CLUSTER))
10887 +				cpumask_copy(mask, cpu_online_mask);
10888 +			else
10889 +				get_shared_cpu_map(mask, cpu, cluster_config);
10890 +
10891 +			cpumask_copy(crm[i].cpu_map, mask);
10892 +#ifdef VERBOSE_INIT
10893 +			print_cluster_topology(mask, cpu);
10894 +#endif
10895 +			/* add cpus to current cluster and init cpu_entry_t */
10896 +			cpu_count = 0;
10897 +			for_each_cpu(ccpu, crm[i].cpu_map) {
10898 +
10899 +				entry = &per_cpu(crm_cpu_entries, ccpu);
10900 +				crm[i].cpus[cpu_count] = entry;
10901 +				atomic_set(&entry->will_schedule, 0);
10902 +				entry->cpu = ccpu;
10903 +				entry->cluster = &crm[i];
10904 +				entry->hn = &(crm[i].heap_node[cpu_count]);
10905 +				bheap_node_init(&entry->hn, entry);
10906 +
10907 +				cpu_count++;
10908 +
10909 +				entry->linked = NULL;
10910 +				entry->scheduled = NULL;
10911 +				update_cpu_position(entry);
10912 +			}
10913 +			/* done with this cluster */
10914 +			break;
10915 +		}
10916 +	}
10917 +	
10918 +#ifdef CONFIG_LITMUS_SOFTIRQD
10919 +	{
10920 +		/* distribute the daemons evenly across the clusters. */
10921 +		int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
10922 +		int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
10923 +		int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
10924 +		
10925 +		int daemon = 0;
10926 +		for(i = 0; i < num_clusters; ++i)
10927 +		{
10928 +			int num_on_this_cluster = num_daemons_per_cluster;
10929 +			if(left_over)
10930 +			{
10931 +				++num_on_this_cluster;
10932 +				--left_over;
10933 +			}
10934 +			
10935 +			for(j = 0; j < num_on_this_cluster; ++j)
10936 +			{
10937 +				// first CPU of this cluster
10938 +				affinity[daemon++] = i*cluster_size;
10939 +			}
10940 +		}
10941 +	
10942 +		spawn_klitirqd(affinity);
10943 +		
10944 +		kfree(affinity);
10945 +	}
10946 +#endif
10947 +	
10948 +#ifdef CONFIG_LITMUS_NVIDIA
10949 +	init_nvidia_info();
10950 +#endif	
10951 +
10952 +	free_cpumask_var(mask);
10953 +	clusters_allocated = 1;
10954 +	return 0;
10955 +}
10956 +
10957 +/*	Plugin object	*/
10958 +static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = {
10959 +	.plugin_name		= "C-RM",
10960 +	.finish_switch		= crm_finish_switch,
10961 +	.tick			= crm_tick,
10962 +	.task_new		= crm_task_new,
10963 +	.complete_job		= complete_job,
10964 +	.task_exit		= crm_task_exit,
10965 +	.schedule		= crm_schedule,
10966 +	.task_wake_up		= crm_task_wake_up,
10967 +	.task_block		= crm_task_block,
10968 +	.admit_task		= crm_admit_task,
10969 +	.activate_plugin	= crm_activate_plugin,
10970 +#ifdef CONFIG_LITMUS_LOCKING
10971 +	.allocate_lock	= crm_allocate_lock,
10972 +    .set_prio_inh   = set_priority_inheritance,
10973 +    .clear_prio_inh = clear_priority_inheritance,	
10974 +#endif
10975 +#ifdef CONFIG_LITMUS_SOFTIRQD
10976 +	.set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
10977 +	.clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
10978 +#endif
10979 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
10980 +	.enqueue_pai_tasklet = enqueue_pai_tasklet,
10981 +	.run_tasklets = run_tasklets,
10982 +#endif	
10983 +};
10984 +
10985 +static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL;
10986 +
10987 +static int __init init_crm(void)
10988 +{
10989 +	int err, fs;
10990 +
10991 +	err = register_sched_plugin(&crm_plugin);
10992 +	if (!err) {
10993 +		fs = make_plugin_proc_dir(&crm_plugin, &crm_dir);
10994 +		if (!fs)
10995 +			cluster_file = create_cluster_file(crm_dir, &cluster_config);
10996 +		else
10997 +			printk(KERN_ERR "Could not allocate C-RM procfs dir.\n");
10998 +	}
10999 +	return err;
11000 +}
11001 +
11002 +static void clean_crm(void)
11003 +{
11004 +	cleanup_crm();
11005 +	if (cluster_file)
11006 +		remove_proc_entry("cluster", crm_dir);
11007 +	if (crm_dir)
11008 +		remove_plugin_proc_dir(&crm_plugin);
11009 +}
11010 +
11011 +module_init(init_crm);
11012 +module_exit(clean_crm);
11013 diff --git a/litmus/sched_crm_srt.c b/litmus/sched_crm_srt.c
11014 new file mode 100644
11015 index 0000000..f0064d4
11016 --- /dev/null
11017 +++ b/litmus/sched_crm_srt.c
11018 @@ -0,0 +1,2058 @@
11019 +/*
11020 + * litmus/sched_crm_srt.c
11021 + *
11022 + * Implementation of the C-RM-SRT scheduling algorithm.
11023 + *
11024 + * This implementation is based on G-EDF:
11025 + * - CPUs are clustered around L2 or L3 caches.
11026 + * - Clusters topology is automatically detected (this is arch dependent
11027 + *   and is working only on x86 at the moment --- and only with modern
11028 + *   cpus that exports cpuid4 information)
11029 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
11030 + *   the programmer needs to be aware of the topology to place tasks
11031 + *   in the desired cluster
11032 + * - default clustering is around L2 cache (cache index = 2)
11033 + *   supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
11034 + *   online_cpus are placed in a single cluster).
11035 + *
11036 + *   For details on functions, take a look at sched_gsn_edf.c
11037 + *
11038 + * Currently, we do not support changes in the number of online cpus.
11039 + * If the num_online_cpus() dynamically changes, the plugin is broken.
11040 + *
11041 + * This version uses the simple approach and serializes all scheduling
11042 + * decisions by the use of a queue lock. This is probably not the
11043 + * best way to do it, but it should suffice for now.
11044 + */
11045 +
11046 +#include <linux/spinlock.h>
11047 +#include <linux/percpu.h>
11048 +#include <linux/sched.h>
11049 +#include <linux/slab.h>
11050 +#include <linux/uaccess.h>
11051 +
11052 +#include <linux/module.h>
11053 +
11054 +#include <litmus/litmus.h>
11055 +#include <litmus/jobs.h>
11056 +#include <litmus/preempt.h>
11057 +#include <litmus/sched_plugin.h>
11058 +#include <litmus/rm_srt_common.h>
11059 +#include <litmus/sched_trace.h>
11060 +
11061 +#include <litmus/clustered.h>
11062 +
11063 +#include <litmus/bheap.h>
11064 +
11065 +/* to configure the cluster size */
11066 +#include <litmus/litmus_proc.h>
11067 +
11068 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11069 +#include <litmus/affinity.h>
11070 +#endif
11071 +
11072 +#ifdef CONFIG_LITMUS_SOFTIRQD
11073 +#include <litmus/litmus_softirq.h>
11074 +#endif
11075 +
11076 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11077 +#include <linux/interrupt.h>
11078 +#include <litmus/trace.h>
11079 +#endif
11080 +
11081 +#ifdef CONFIG_LITMUS_NVIDIA
11082 +#include <litmus/nvidia_info.h>
11083 +#endif
11084 +
11085 +/* Reference configuration variable. Determines which cache level is used to
11086 + * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
11087 + * all CPUs form a single cluster (just like GSN-EDF).
11088 + */
11089 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
11090 +
11091 +struct clusterdomain;
11092 +
11093 +/* cpu_entry_t - maintain the linked and scheduled state
11094 + *
11095 + * A cpu also contains a pointer to the crm_srt_domain_t cluster
11096 + * that owns it (struct clusterdomain*)
11097 + */
11098 +typedef struct  {
11099 +	int 			cpu;
11100 +	struct clusterdomain*	cluster;	/* owning cluster */
11101 +	struct task_struct*	linked;		/* only RT tasks */
11102 +	struct task_struct*	scheduled;	/* only RT tasks */
11103 +	atomic_t		will_schedule;	/* prevent unneeded IPIs */
11104 +	struct bheap_node*	hn;
11105 +} cpu_entry_t;
11106 +
11107 +/* one cpu_entry_t per CPU */
11108 +DEFINE_PER_CPU(cpu_entry_t, crm_srt_cpu_entries);
11109 +
11110 +#define set_will_schedule() \
11111 +	(atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 1))
11112 +#define clear_will_schedule() \
11113 +	(atomic_set(&__get_cpu_var(crm_srt_cpu_entries).will_schedule, 0))
11114 +#define test_will_schedule(cpu) \
11115 +	(atomic_read(&per_cpu(crm_srt_cpu_entries, cpu).will_schedule))
11116 +
11117 +
11118 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11119 +struct tasklet_head
11120 +{
11121 +	struct tasklet_struct *head;
11122 +	struct tasklet_struct **tail;
11123 +};
11124 +#endif
11125 +
11126 +/*
11127 + * In C-RM-SRT there is a crm_srt domain _per_ cluster
11128 + * The number of clusters is dynamically determined accordingly to the
11129 + * total cpu number and the cluster size
11130 + */
11131 +typedef struct clusterdomain {
11132 +	/* rt_domain for this cluster */
11133 +	rt_domain_t	domain;
11134 +	/* cpus in this cluster */
11135 +	cpu_entry_t*	*cpus;
11136 +	/* map of this cluster cpus */
11137 +	cpumask_var_t	cpu_map;
11138 +	/* the cpus queue themselves according to priority in here */
11139 +	struct bheap_node *heap_node;
11140 +	struct bheap      cpu_heap;
11141 +	/* lock for this cluster */
11142 +#define crm_srt_lock domain.ready_lock
11143 +	
11144 +	
11145 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11146 +	struct tasklet_head pending_tasklets;
11147 +#endif	
11148 +	
11149 +} crm_srt_domain_t;
11150 +
11151 +/* a crm_srt_domain per cluster; allocation is done at init/activation time */
11152 +crm_srt_domain_t *crm_srt;
11153 +
11154 +#define remote_cluster(cpu)	((crm_srt_domain_t *) per_cpu(crm_srt_cpu_entries, cpu).cluster)
11155 +#define task_cpu_cluster(task)	remote_cluster(get_partition(task))
11156 +
11157 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
11158 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
11159 + * information during the initialization of the plugin (e.g., topology)
11160 +#define WANT_ALL_SCHED_EVENTS
11161 + */
11162 +#define VERBOSE_INIT
11163 +
11164 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
11165 +{
11166 +	cpu_entry_t *a, *b;
11167 +	a = _a->value;
11168 +	b = _b->value;
11169 +	/* Note that a and b are inverted: we want the lowest-priority CPU at
11170 +	 * the top of the heap.
11171 +	 */
11172 +	return rm_srt_higher_prio(b->linked, a->linked);
11173 +}
11174 +
11175 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
11176 + *                       order in the cpu queue. Caller must hold crm_srt lock.
11177 + */
11178 +static void update_cpu_position(cpu_entry_t *entry)
11179 +{
11180 +	crm_srt_domain_t *cluster = entry->cluster;
11181 +
11182 +	if (likely(bheap_node_in_heap(entry->hn)))
11183 +		bheap_delete(cpu_lower_prio,
11184 +				&cluster->cpu_heap,
11185 +				entry->hn);
11186 +
11187 +	bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
11188 +}
11189 +
11190 +/* caller must hold crm_srt lock */
11191 +static cpu_entry_t* lowest_prio_cpu(crm_srt_domain_t *cluster)
11192 +{
11193 +	struct bheap_node* hn;
11194 +	hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
11195 +	return hn->value;
11196 +}
11197 +
11198 +
11199 +/* link_task_to_cpu - Update the link of a CPU.
11200 + *                    Handles the case where the to-be-linked task is already
11201 + *                    scheduled on a different CPU.
11202 + */
11203 +static noinline void link_task_to_cpu(struct task_struct* linked,
11204 +				      cpu_entry_t *entry)
11205 +{
11206 +	cpu_entry_t *sched;
11207 +	struct task_struct* tmp;
11208 +	int on_cpu;
11209 +
11210 +	BUG_ON(linked && !is_realtime(linked));
11211 +
11212 +	/* Currently linked task is set to be unlinked. */
11213 +	if (entry->linked) {
11214 +		entry->linked->rt_param.linked_on = NO_CPU;
11215 +	}
11216 +
11217 +	/* Link new task to CPU. */
11218 +	if (linked) {
11219 +		set_rt_flags(linked, RT_F_RUNNING);
11220 +		/* handle task is already scheduled somewhere! */
11221 +		on_cpu = linked->rt_param.scheduled_on;
11222 +		if (on_cpu != NO_CPU) {
11223 +			sched = &per_cpu(crm_srt_cpu_entries, on_cpu);
11224 +			/* this should only happen if not linked already */
11225 +			BUG_ON(sched->linked == linked);
11226 +
11227 +			/* If we are already scheduled on the CPU to which we
11228 +			 * wanted to link, we don't need to do the swap --
11229 +			 * we just link ourselves to the CPU and depend on
11230 +			 * the caller to get things right.
11231 +			 */
11232 +			if (entry != sched) {
11233 +				TRACE_TASK(linked,
11234 +					   "already scheduled on %d, updating link.\n",
11235 +					   sched->cpu);
11236 +				tmp = sched->linked;
11237 +				linked->rt_param.linked_on = sched->cpu;
11238 +				sched->linked = linked;
11239 +				update_cpu_position(sched);
11240 +				linked = tmp;
11241 +			}
11242 +		}
11243 +		if (linked) /* might be NULL due to swap */
11244 +			linked->rt_param.linked_on = entry->cpu;
11245 +	}
11246 +	entry->linked = linked;
11247 +#ifdef WANT_ALL_SCHED_EVENTS
11248 +	if (linked)
11249 +		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
11250 +	else
11251 +		TRACE("NULL linked to %d.\n", entry->cpu);
11252 +#endif
11253 +	update_cpu_position(entry);
11254 +}
11255 +
11256 +/* unlink - Make sure a task is not linked any longer to an entry
11257 + *          where it was linked before. Must hold crm_srt_lock.
11258 + */
11259 +static noinline void unlink(struct task_struct* t)
11260 +{
11261 +    	cpu_entry_t *entry;
11262 +
11263 +	if (t->rt_param.linked_on != NO_CPU) {
11264 +		/* unlink */
11265 +		entry = &per_cpu(crm_srt_cpu_entries, t->rt_param.linked_on);
11266 +		t->rt_param.linked_on = NO_CPU;
11267 +		link_task_to_cpu(NULL, entry);
11268 +	} else if (is_queued(t)) {
11269 +		/* This is an interesting situation: t is scheduled,
11270 +		 * but was just recently unlinked.  It cannot be
11271 +		 * linked anywhere else (because then it would have
11272 +		 * been relinked to this CPU), thus it must be in some
11273 +		 * queue. We must remove it from the list in this
11274 +		 * case.
11275 +		 *
11276 +		 * in C-RM-SRT case is should be somewhere in the queue for
11277 +		 * its domain, therefore and we can get the domain using
11278 +		 * task_cpu_cluster
11279 +		 */
11280 +		remove(&(task_cpu_cluster(t))->domain, t);
11281 +	}
11282 +}
11283 +
11284 +
11285 +/* preempt - force a CPU to reschedule
11286 + */
11287 +static void preempt(cpu_entry_t *entry)
11288 +{
11289 +	preempt_if_preemptable(entry->scheduled, entry->cpu);
11290 +}
11291 +
11292 +/* requeue - Put an unlinked task into c-rm-srt domain.
11293 + *           Caller must hold crm_srt_lock.
11294 + */
11295 +static noinline void requeue(struct task_struct* task)
11296 +{
11297 +	crm_srt_domain_t *cluster = task_cpu_cluster(task);
11298 +	BUG_ON(!task);
11299 +	/* sanity check before insertion */
11300 +	BUG_ON(is_queued(task));
11301 +
11302 +	if (is_released(task, litmus_clock()))
11303 +		__add_ready(&cluster->domain, task);
11304 +	else {
11305 +		/* it has got to wait */
11306 +		add_release(&cluster->domain, task);
11307 +	}
11308 +}
11309 +
11310 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11311 +static cpu_entry_t* crm_srt_get_nearest_available_cpu(
11312 +				crm_srt_domain_t *cluster, cpu_entry_t* start)
11313 +{
11314 +	cpu_entry_t* affinity;
11315 +
11316 +	get_nearest_available_cpu(affinity, start, crm_srt_cpu_entries, -1);
11317 +
11318 +	/* make sure CPU is in our cluster */
11319 +	if(affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
11320 +		return(affinity);
11321 +	else
11322 +		return(NULL);
11323 +}
11324 +#endif
11325 +
11326 +
11327 +/* check for any necessary preemptions */
11328 +static void check_for_preemptions(crm_srt_domain_t *cluster)
11329 +{
11330 +	struct task_struct *task;
11331 +	cpu_entry_t *last;
11332 +
11333 +	for(last = lowest_prio_cpu(cluster);
11334 +	    rm_srt_preemption_needed(&cluster->domain, last->linked);
11335 +	    last = lowest_prio_cpu(cluster)) {
11336 +		/* preemption necessary */
11337 +		task = __take_ready(&cluster->domain);
11338 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11339 +		{
11340 +			cpu_entry_t* affinity =
11341 +					crm_srt_get_nearest_available_cpu(cluster,
11342 +							&per_cpu(crm_srt_cpu_entries, task_cpu(task)));
11343 +			if(affinity)
11344 +				last = affinity;
11345 +			else if(last->linked)
11346 +				requeue(last->linked);
11347 +		}
11348 +#else
11349 +		if (last->linked)
11350 +			requeue(last->linked);
11351 +#endif
11352 +		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
11353 +				task->pid, last->cpu);
11354 +		link_task_to_cpu(task, last);
11355 +		preempt(last);
11356 +	}
11357 +}
11358 +
11359 +/* crm_srt_job_arrival: task is either resumed or released */
11360 +static noinline void crm_srt_job_arrival(struct task_struct* task)
11361 +{
11362 +	crm_srt_domain_t *cluster = task_cpu_cluster(task);
11363 +	BUG_ON(!task);
11364 +
11365 +	requeue(task);
11366 +	check_for_preemptions(cluster);
11367 +}
11368 +
11369 +static void crm_srt_release_jobs(rt_domain_t* rt, struct bheap* tasks)
11370 +{
11371 +	crm_srt_domain_t* cluster = container_of(rt, crm_srt_domain_t, domain);
11372 +	unsigned long flags;
11373 +
11374 +	raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
11375 +
11376 +	__merge_ready(&cluster->domain, tasks);
11377 +	check_for_preemptions(cluster);
11378 +
11379 +	raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11380 +}
11381 +
11382 +/* caller holds crm_srt_lock */
11383 +static noinline void job_completion(struct task_struct *t, int forced)
11384 +{
11385 +	BUG_ON(!t);
11386 +
11387 +	sched_trace_task_completion(t, forced);
11388 +
11389 +#ifdef CONFIG_LITMUS_NVIDIA
11390 +	atomic_set(&tsk_rt(t)->nv_int_count, 0);
11391 +#endif
11392 +
11393 +	TRACE_TASK(t, "job_completion().\n");
11394 +
11395 +	/* set flags */
11396 +	set_rt_flags(t, RT_F_SLEEP);
11397 +	/* prepare for next period */
11398 +	prepare_for_next_period(t);
11399 +	if (is_released(t, litmus_clock()))
11400 +		sched_trace_task_release(t);
11401 +	/* unlink */
11402 +	unlink(t);
11403 +	/* requeue
11404 +	 * But don't requeue a blocking task. */
11405 +	if (is_running(t))
11406 +		crm_srt_job_arrival(t);
11407 +}
11408 +
11409 +/* crm_srt_tick - this function is called for every local timer
11410 + *                         interrupt.
11411 + *
11412 + *                   checks whether the current task has expired and checks
11413 + *                   whether we need to preempt it if it has not expired
11414 + */
11415 +static void crm_srt_tick(struct task_struct* t)
11416 +{
11417 +	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
11418 +		if (!is_np(t)) {
11419 +			/* np tasks will be preempted when they become
11420 +			 * preemptable again
11421 +			 */
11422 +			litmus_reschedule_local();
11423 +			set_will_schedule();
11424 +			TRACE("crm_srt_scheduler_tick: "
11425 +			      "%d is preemptable "
11426 +			      " => FORCE_RESCHED\n", t->pid);
11427 +		} else if (is_user_np(t)) {
11428 +			TRACE("crm_srt_scheduler_tick: "
11429 +			      "%d is non-preemptable, "
11430 +			      "preemption delayed.\n", t->pid);
11431 +			request_exit_np(t);
11432 +		}
11433 +	}
11434 +}
11435 +
11436 +
11437 +
11438 +
11439 +
11440 +
11441 +
11442 +
11443 +
11444 +
11445 +
11446 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
11447 +
11448 +
11449 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
11450 +{
11451 +	if (!atomic_read(&tasklet->count)) {
11452 +		sched_trace_tasklet_begin(tasklet->owner);
11453 +		
11454 +		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
11455 +		{
11456 +			BUG();
11457 +		}
11458 +		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed);
11459 +		tasklet->func(tasklet->data);
11460 +		tasklet_unlock(tasklet);
11461 +		
11462 +		sched_trace_tasklet_end(tasklet->owner, flushed);
11463 +	}
11464 +	else {
11465 +		BUG();
11466 +	}
11467 +}
11468 +
11469 +
11470 +static void __extract_tasklets(crm_srt_domain_t* cluster, struct task_struct* task, struct tasklet_head* task_tasklets)
11471 +{
11472 +	struct tasklet_struct* step;
11473 +	struct tasklet_struct* tasklet;
11474 +	struct tasklet_struct* prev;
11475 +	
11476 +	task_tasklets->head = NULL;
11477 +	task_tasklets->tail = &(task_tasklets->head);
11478 +	
11479 +	prev = NULL;
11480 +	for(step = cluster->pending_tasklets.head; step != NULL; step = step->next)
11481 +	{
11482 +		if(step->owner == task)
11483 +		{
11484 +			TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
11485 +			
11486 +			tasklet = step;
11487 +			
11488 +			if(prev) {
11489 +				prev->next = tasklet->next;
11490 +			}
11491 +			else if(cluster->pending_tasklets.head == tasklet) {
11492 +				// we're at the head.
11493 +				cluster->pending_tasklets.head = tasklet->next;
11494 +			}
11495 +			
11496 +			if(cluster->pending_tasklets.tail == &tasklet) {
11497 +				// we're at the tail
11498 +				if(prev) {
11499 +					cluster->pending_tasklets.tail = &prev;
11500 +				}
11501 +				else {
11502 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
11503 +				}
11504 +			}
11505 +			
11506 +			tasklet->next = NULL;
11507 +			*(task_tasklets->tail) = tasklet;
11508 +			task_tasklets->tail = &(tasklet->next);
11509 +		}
11510 +		else {
11511 +			prev = step;
11512 +		}
11513 +	}
11514 +}
11515 +
11516 +static void flush_tasklets(crm_srt_domain_t* cluster, struct task_struct* task)
11517 +{
11518 +	unsigned long flags;
11519 +	struct tasklet_head task_tasklets;
11520 +	struct tasklet_struct* step;
11521 +	
11522 +	raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
11523 +	__extract_tasklets(cluster, task, &task_tasklets);
11524 +	raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11525 +	
11526 +	if(cluster->pending_tasklets.head != NULL) {
11527 +		TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
11528 +	}
11529 +	
11530 +	// now execute any flushed tasklets.
11531 +	for(step = cluster->pending_tasklets.head; step != NULL; /**/)
11532 +	{
11533 +		struct tasklet_struct* temp = step->next;
11534 +		
11535 +		step->next = NULL;
11536 +		__do_lit_tasklet(step, 1ul);
11537 +		
11538 +		step = temp;
11539 +	}
11540 +}
11541 +
11542 +
11543 +static void do_lit_tasklets(crm_srt_domain_t* cluster, struct task_struct* sched_task)
11544 +{
11545 +	int work_to_do = 1;
11546 +	struct tasklet_struct *tasklet = NULL;
11547 +	//struct tasklet_struct *step;
11548 +	unsigned long flags;
11549 +	
11550 +	while(work_to_do) {
11551 +		
11552 +		TS_NV_SCHED_BOTISR_START;
11553 +		
11554 +		// remove tasklet at head of list if it has higher priority.
11555 +		raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);	
11556 +		
11557 +		/*
11558 +		step = cluster->pending_tasklets.head;
11559 +		TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
11560 +		while(step != NULL){
11561 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11562 +			step = step->next;
11563 +		}
11564 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11565 +		TRACE("%s: done.\n", __FUNCTION__);
11566 +		*/
11567 +		
11568 +		if(cluster->pending_tasklets.head != NULL) {
11569 +			// remove tasklet at head.
11570 +			tasklet = cluster->pending_tasklets.head;
11571 +			
11572 +			if(rm_srt_higher_prio(tasklet->owner, sched_task)) {
11573 +				
11574 +				if(NULL == tasklet->next) {
11575 +					// tasklet is at the head, list only has one element
11576 +					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
11577 +					cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
11578 +				}
11579 +				
11580 +				// remove the tasklet from the queue
11581 +				cluster->pending_tasklets.head = tasklet->next;
11582 +				
11583 +				TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
11584 +			}
11585 +			else {
11586 +				TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
11587 +				tasklet = NULL;
11588 +			}
11589 +		}
11590 +		else {
11591 +			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
11592 +		}
11593 +		
11594 +		/*
11595 +		step = cluster->pending_tasklets.head;
11596 +		TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
11597 +		while(step != NULL){
11598 +			TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11599 +			step = step->next;
11600 +		}
11601 +		TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11602 +		TRACE("%s: done.\n", __FUNCTION__);
11603 +		*/
11604 +		
11605 +		raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11606 +		
11607 +		TS_NV_SCHED_BOTISR_END;
11608 +		
11609 +		if(tasklet) {
11610 +			__do_lit_tasklet(tasklet, 0ul);
11611 +			tasklet = NULL;	
11612 +		}
11613 +		else {
11614 +			work_to_do = 0;
11615 +		}
11616 +	}
11617 +	
11618 +	//TRACE("%s: exited.\n", __FUNCTION__);
11619 +}
11620 +
11621 +
11622 +static void run_tasklets(struct task_struct* sched_task)
11623 +{
11624 +	crm_srt_domain_t* cluster;
11625 +	
11626 +#if 0
11627 +	int task_is_rt = is_realtime(sched_task);
11628 +	crm_srt_domain_t* cluster;
11629 +	
11630 +	if(is_realtime(sched_task)) {
11631 +		cluster = task_cpu_cluster(sched_task);
11632 +	}
11633 +	else {
11634 +		cluster = remote_cluster(get_cpu());
11635 +	}
11636 +	
11637 +	if(cluster && cluster->pending_tasklets.head != NULL) {
11638 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
11639 +		
11640 +		do_lit_tasklets(cluster, sched_task);
11641 +	}
11642 +	
11643 +	if(!task_is_rt) {
11644 +		put_cpu_no_resched();
11645 +	}
11646 +#else
11647 +	
11648 +	preempt_disable();
11649 +	
11650 +	cluster = (is_realtime(sched_task)) ?
11651 +	task_cpu_cluster(sched_task) :
11652 +	remote_cluster(smp_processor_id());
11653 +	
11654 +	if(cluster && cluster->pending_tasklets.head != NULL) {
11655 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
11656 +		do_lit_tasklets(cluster, sched_task);
11657 +	}
11658 +	
11659 +	preempt_enable_no_resched();
11660 +	
11661 +#endif
11662 +}
11663 +
11664 +
11665 +static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_srt_domain_t* cluster)
11666 +{
11667 +	struct tasklet_struct* step;
11668 +	
11669 +	/*
11670 +	step = cluster->pending_tasklets.head;
11671 +	TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
11672 +	while(step != NULL){
11673 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11674 +		step = step->next;
11675 +	}
11676 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11677 +	TRACE("%s: done.\n", __FUNCTION__);
11678 +	*/
11679 +	
11680 +	tasklet->next = NULL;  // make sure there are no old values floating around
11681 +	
11682 +	step = cluster->pending_tasklets.head;
11683 +	if(step == NULL) {
11684 +		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
11685 +		// insert at tail.
11686 +		*(cluster->pending_tasklets.tail) = tasklet;
11687 +		cluster->pending_tasklets.tail = &(tasklet->next);		
11688 +	}
11689 +	else if((*(cluster->pending_tasklets.tail) != NULL) &&
11690 +			rm_srt_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
11691 +		// insert at tail.
11692 +		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
11693 +		
11694 +		*(cluster->pending_tasklets.tail) = tasklet;
11695 +		cluster->pending_tasklets.tail = &(tasklet->next);
11696 +	}
11697 +	else {
11698 +		
11699 +        //WARN_ON(1 == 1);
11700 +		
11701 +		// insert the tasklet somewhere in the middle.
11702 +		
11703 +        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
11704 +		
11705 +		while(step->next && rm_srt_higher_prio(step->next->owner, tasklet->owner)) {
11706 +			step = step->next;
11707 +		}
11708 +		
11709 +		// insert tasklet right before step->next.
11710 +		
11711 +		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
11712 +		
11713 +		tasklet->next = step->next;
11714 +		step->next = tasklet;
11715 +		
11716 +		// patch up the head if needed.
11717 +		if(cluster->pending_tasklets.head == step)
11718 +		{
11719 +			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
11720 +			cluster->pending_tasklets.head = tasklet;
11721 +		}
11722 +	}
11723 +	
11724 +	/*
11725 +	step = cluster->pending_tasklets.head;
11726 +	TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
11727 +	while(step != NULL){
11728 +		TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
11729 +		step = step->next;
11730 +	}
11731 +	TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(cluster->pending_tasklets.tail), (*(cluster->pending_tasklets.tail) != NULL) ? (*(cluster->pending_tasklets.tail))->owner->pid : -1);
11732 +	TRACE("%s: done.\n", __FUNCTION__);	
11733 +	 */
11734 +	
11735 +	// TODO: Maintain this list in priority order.
11736 +	//	tasklet->next = NULL;
11737 +	//	*(cluster->pending_tasklets.tail) = tasklet;
11738 +	//	cluster->pending_tasklets.tail = &tasklet->next;
11739 +}
11740 +
11741 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
11742 +{
11743 +	crm_srt_domain_t *cluster = NULL;
11744 +	cpu_entry_t *targetCPU = NULL;
11745 +	int thisCPU;
11746 +	int runLocal = 0;
11747 +	int runNow = 0;
11748 +	unsigned long flags;
11749 +	
11750 +    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
11751 +    {
11752 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
11753 +		return 0;
11754 +    }	
11755 +	
11756 +	cluster = task_cpu_cluster(tasklet->owner);
11757 +	
11758 +	raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);		
11759 +	
11760 +	thisCPU = smp_processor_id();
11761 +	
11762 +#if 1
11763 +#ifdef CONFIG_SCHED_CPU_AFFINITY
11764 +	{
11765 +		cpu_entry_t* affinity = NULL;
11766 +		
11767 +		// use this CPU if it is in our cluster and isn't running any RT work.
11768 +		if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(crm_srt_cpu_entries).linked == NULL)) {
11769 +			affinity = &(__get_cpu_var(crm_srt_cpu_entries));
11770 +		}
11771 +		else {
11772 +			// this CPU is busy or shouldn't run tasklet in this cluster.
11773 +			// look for available near by CPUs.
11774 +			// NOTE: Affinity towards owner and not this CPU.  Is this right?
11775 +			affinity = 
11776 +				crm_srt_get_nearest_available_cpu(cluster,
11777 +						&per_cpu(crm_srt_cpu_entries, task_cpu(tasklet->owner)));
11778 +		}
11779 +		
11780 +		targetCPU = affinity;
11781 +	}
11782 +#endif
11783 +#endif
11784 +	
11785 +	if (targetCPU == NULL) {
11786 +		targetCPU = lowest_prio_cpu(cluster);
11787 +	}
11788 +	
11789 +	if (rm_srt_higher_prio(tasklet->owner, targetCPU->linked)) {
11790 +		if (thisCPU == targetCPU->cpu) {
11791 +			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
11792 +			runLocal = 1;
11793 +			runNow = 1;
11794 +		}
11795 +		else {
11796 +			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
11797 +			runLocal = 0;
11798 +			runNow = 1;
11799 +		}
11800 +	}
11801 +	else {
11802 +		runLocal = 0;
11803 +		runNow = 0;
11804 +	}
11805 +	
11806 +	if(!runLocal) {
11807 +		// enqueue the tasklet
11808 +		__add_pai_tasklet(tasklet, cluster);
11809 +	}
11810 +	
11811 +	raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
11812 +	
11813 +	
11814 +	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
11815 +		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
11816 +		__do_lit_tasklet(tasklet, 0ul);
11817 +	}
11818 +	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
11819 +		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
11820 +		preempt(targetCPU);  // need to be protected by crm_srt_lock?
11821 +	}
11822 +	else {
11823 +		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
11824 +	}
11825 +	
11826 +	return(1); // success
11827 +}
11828 +
11829 +
11830 +#endif
11831 +
11832 +
11833 +
11834 +
11835 +
11836 +
11837 +
11838 +
11839 +
11840 +
11841 +
11842 +
11843 +
11844 +
11845 +
11846 +
11847 +
11848 +
11849 +
11850 +/* Getting schedule() right is a bit tricky. schedule() may not make any
11851 + * assumptions on the state of the current task since it may be called for a
11852 + * number of reasons. The reasons include a scheduler_tick() determined that it
11853 + * was necessary, because sys_exit_np() was called, because some Linux
11854 + * subsystem determined so, or even (in the worst case) because there is a bug
11855 + * hidden somewhere. Thus, we must take extreme care to determine what the
11856 + * current state is.
11857 + *
11858 + * The CPU could currently be scheduling a task (or not), be linked (or not).
11859 + *
11860 + * The following assertions for the scheduled task could hold:
11861 + *
11862 + *      - !is_running(scheduled)        // the job blocks
11863 + *	- scheduled->timeslice == 0	// the job completed (forcefully)
11864 + *	- get_rt_flag() == RT_F_SLEEP	// the job completed (by syscall)
11865 + * 	- linked != scheduled		// we need to reschedule (for any reason)
11866 + * 	- is_np(scheduled)		// rescheduling must be delayed,
11867 + *					   sys_exit_np must be requested
11868 + *
11869 + * Any of these can occur together.
11870 + */
11871 +static struct task_struct* crm_srt_schedule(struct task_struct * prev)
11872 +{
11873 +	cpu_entry_t* entry = &__get_cpu_var(crm_srt_cpu_entries);
11874 +	crm_srt_domain_t *cluster = entry->cluster;
11875 +	int out_of_time, sleep, preempt, np, exists, blocks;
11876 +	struct task_struct* next = NULL;
11877 +
11878 +	raw_spin_lock(&cluster->crm_srt_lock);
11879 +	clear_will_schedule();
11880 +
11881 +	/* sanity checking */
11882 +	BUG_ON(entry->scheduled && entry->scheduled != prev);
11883 +	BUG_ON(entry->scheduled && !is_realtime(prev));
11884 +	BUG_ON(is_realtime(prev) && !entry->scheduled);
11885 +
11886 +	/* (0) Determine state */
11887 +	exists      = entry->scheduled != NULL;
11888 +	blocks      = exists && !is_running(entry->scheduled);
11889 +	out_of_time = exists &&
11890 +				  budget_enforced(entry->scheduled) &&
11891 +				  budget_exhausted(entry->scheduled);
11892 +	np 	    = exists && is_np(entry->scheduled);
11893 +	sleep	    = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
11894 +	preempt     = entry->scheduled != entry->linked;
11895 +
11896 +#ifdef WANT_ALL_SCHED_EVENTS
11897 +	TRACE_TASK(prev, "invoked crm_srt_schedule.\n");
11898 +#endif
11899 +
11900 +	if (exists)
11901 +		TRACE_TASK(prev,
11902 +			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
11903 +			   "state:%d sig:%d\n",
11904 +			   blocks, out_of_time, np, sleep, preempt,
11905 +			   prev->state, signal_pending(prev));
11906 +	if (entry->linked && preempt)
11907 +		TRACE_TASK(prev, "will be preempted by %s/%d\n",
11908 +			   entry->linked->comm, entry->linked->pid);
11909 +
11910 +
11911 +	/* If a task blocks we have no choice but to reschedule.
11912 +	 */
11913 +	if (blocks)
11914 +		unlink(entry->scheduled);
11915 +
11916 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
11917 +	 * We need to make sure to update the link structure anyway in case
11918 +	 * that we are still linked. Multiple calls to request_exit_np() don't
11919 +	 * hurt.
11920 +	 */
11921 +	if (np && (out_of_time || preempt || sleep)) {
11922 +		unlink(entry->scheduled);
11923 +		request_exit_np(entry->scheduled);
11924 +	}
11925 +
11926 +	/* Any task that is preemptable and either exhausts its execution
11927 +	 * budget or wants to sleep completes. We may have to reschedule after
11928 +	 * this. Don't do a job completion if we block (can't have timers running
11929 +	 * for blocked jobs). Preemption go first for the same reason.
11930 +	 */
11931 +	if (!np && (out_of_time || sleep) && !blocks && !preempt)
11932 +		job_completion(entry->scheduled, !sleep);
11933 +
11934 +	/* Link pending task if we became unlinked.
11935 +	 */
11936 +	if (!entry->linked)
11937 +		link_task_to_cpu(__take_ready(&cluster->domain), entry);
11938 +
11939 +	/* The final scheduling decision. Do we need to switch for some reason?
11940 +	 * If linked is different from scheduled, then select linked as next.
11941 +	 */
11942 +	if ((!np || blocks) &&
11943 +	    entry->linked != entry->scheduled) {
11944 +		/* Schedule a linked job? */
11945 +		if (entry->linked) {
11946 +			entry->linked->rt_param.scheduled_on = entry->cpu;
11947 +			next = entry->linked;
11948 +		}
11949 +		if (entry->scheduled) {
11950 +			/* not gonna be scheduled soon */
11951 +			entry->scheduled->rt_param.scheduled_on = NO_CPU;
11952 +			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
11953 +		}
11954 +	} else
11955 +		/* Only override Linux scheduler if we have a real-time task
11956 +		 * scheduled that needs to continue.
11957 +		 */
11958 +		if (exists)
11959 +			next = prev;
11960 +
11961 +	sched_state_task_picked();
11962 +	raw_spin_unlock(&cluster->crm_srt_lock);
11963 +
11964 +#ifdef WANT_ALL_SCHED_EVENTS
11965 +	TRACE("crm_srt_lock released, next=0x%p\n", next);
11966 +
11967 +	if (next)
11968 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
11969 +	else if (exists && !next)
11970 +		TRACE("becomes idle at %llu.\n", litmus_clock());
11971 +#endif
11972 +
11973 +
11974 +	return next;
11975 +}
11976 +
11977 +
11978 +/* _finish_switch - we just finished the switch away from prev
11979 + */
11980 +static void crm_srt_finish_switch(struct task_struct *prev)
11981 +{
11982 +	cpu_entry_t* 	entry = &__get_cpu_var(crm_srt_cpu_entries);
11983 +
11984 +	entry->scheduled = is_realtime(current) ? current : NULL;
11985 +#ifdef WANT_ALL_SCHED_EVENTS
11986 +	TRACE_TASK(prev, "switched away from\n");
11987 +#endif
11988 +}
11989 +
11990 +
11991 +/*	Prepare a task for running in RT mode
11992 + */
11993 +static void crm_srt_task_new(struct task_struct * t, int on_rq, int running)
11994 +{
11995 +	unsigned long 		flags;
11996 +	cpu_entry_t* 		entry;
11997 +	crm_srt_domain_t*		cluster;
11998 +
11999 +	TRACE("crm srt: task new %d\n", t->pid);
12000 +
12001 +	/* the cluster doesn't change even if t is running */
12002 +	cluster = task_cpu_cluster(t);
12003 +
12004 +	raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12005 +
12006 +	/* setup job params */
12007 +	release_at(t, litmus_clock());
12008 +
12009 +	if (running) {
12010 +		entry = &per_cpu(crm_srt_cpu_entries, task_cpu(t));
12011 +		BUG_ON(entry->scheduled);
12012 +
12013 +		entry->scheduled = t;
12014 +		tsk_rt(t)->scheduled_on = task_cpu(t);
12015 +	} else {
12016 +		t->rt_param.scheduled_on = NO_CPU;
12017 +	}
12018 +	t->rt_param.linked_on          = NO_CPU;
12019 +
12020 +	crm_srt_job_arrival(t);
12021 +	raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12022 +}
12023 +
12024 +static void crm_srt_task_wake_up(struct task_struct *task)
12025 +{
12026 +	unsigned long flags;
12027 +	//lt_t now;
12028 +	crm_srt_domain_t *cluster;
12029 +
12030 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
12031 +
12032 +	cluster = task_cpu_cluster(task);
12033 +
12034 +	raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12035 +
12036 +#if 0  // sporadic task model
12037 +	/* We need to take suspensions because of semaphores into
12038 +	 * account! If a job resumes after being suspended due to acquiring
12039 +	 * a semaphore, it should never be treated as a new job release.
12040 +	 */
12041 +	if (get_rt_flags(task) == RT_F_EXIT_SEM) {
12042 +		set_rt_flags(task, RT_F_RUNNING);
12043 +	} else {
12044 +		now = litmus_clock();
12045 +		if (is_tardy(task, now)) {
12046 +			/* new sporadic release */
12047 +			release_at(task, now);
12048 +			sched_trace_task_release(task);
12049 +		}
12050 +		else {
12051 +			if (task->rt.time_slice) {
12052 +				/* came back in time before deadline
12053 +				*/
12054 +				set_rt_flags(task, RT_F_RUNNING);
12055 +			}
12056 +		}
12057 +	}
12058 +#endif
12059 +
12060 +	//BUG_ON(tsk_rt(task)->linked_on != NO_CPU);
12061 +	set_rt_flags(task, RT_F_RUNNING);  // periodic model
12062 +
12063 +	if(tsk_rt(task)->linked_on == NO_CPU)
12064 +		crm_srt_job_arrival(task);
12065 +	else
12066 +		TRACE("WTF, mate?!\n");
12067 +
12068 +	raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12069 +}
12070 +
12071 +static void crm_srt_task_block(struct task_struct *t)
12072 +{
12073 +	unsigned long flags;
12074 +	crm_srt_domain_t *cluster;
12075 +
12076 +	TRACE_TASK(t, "block at %llu\n", litmus_clock());
12077 +
12078 +	cluster = task_cpu_cluster(t);
12079 +
12080 +	/* unlink if necessary */
12081 +	raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12082 +	unlink(t);
12083 +	raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12084 +
12085 +	BUG_ON(!is_realtime(t));
12086 +}
12087 +
12088 +
12089 +static void crm_srt_task_exit(struct task_struct * t)
12090 +{
12091 +	unsigned long flags;
12092 +	crm_srt_domain_t *cluster = task_cpu_cluster(t);
12093 +
12094 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12095 +	flush_tasklets(cluster, t);
12096 +#endif	
12097 +	
12098 +	/* unlink if necessary */
12099 +	raw_spin_lock_irqsave(&cluster->crm_srt_lock, flags);
12100 +	unlink(t);
12101 +	if (tsk_rt(t)->scheduled_on != NO_CPU) {
12102 +		cpu_entry_t *cpu;
12103 +		cpu = &per_cpu(crm_srt_cpu_entries, tsk_rt(t)->scheduled_on);
12104 +		cpu->scheduled = NULL;
12105 +		tsk_rt(t)->scheduled_on = NO_CPU;
12106 +	}
12107 +	raw_spin_unlock_irqrestore(&cluster->crm_srt_lock, flags);
12108 +		
12109 +	BUG_ON(!is_realtime(t));
12110 +        TRACE_TASK(t, "RIP\n");
12111 +}
12112 +
12113 +static long crm_srt_admit_task(struct task_struct* tsk)
12114 +{
12115 +	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
12116 +}
12117 +
12118 +
12119 +
12120 +
12121 +
12122 +
12123 +
12124 +
12125 +
12126 +
12127 +
12128 +
12129 +
12130 +#ifdef CONFIG_LITMUS_LOCKING
12131 +
12132 +#include <litmus/fdso.h>
12133 +
12134 +
12135 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
12136 +{
12137 +	int linked_on;
12138 +	int check_preempt = 0;	
12139 +	
12140 +	crm_srt_domain_t* cluster = task_cpu_cluster(t);
12141 +	
12142 +	if(prio_inh != NULL)
12143 +		TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
12144 +	else
12145 +		TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
12146 +	
12147 +	sched_trace_eff_prio_change(t, prio_inh);
12148 +	
12149 +	tsk_rt(t)->inh_task = prio_inh;
12150 +	
12151 +	linked_on  = tsk_rt(t)->linked_on;
12152 +	
12153 +	/* If it is scheduled, then we need to reorder the CPU heap. */
12154 +	if (linked_on != NO_CPU) {
12155 +		TRACE_TASK(t, "%s: linked  on %d\n",
12156 +				   __FUNCTION__, linked_on);
12157 +		/* Holder is scheduled; need to re-order CPUs.
12158 +		 * We can't use heap_decrease() here since
12159 +		 * the cpu_heap is ordered in reverse direction, so
12160 +		 * it is actually an increase. */
12161 +		bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
12162 +                     per_cpu(crm_srt_cpu_entries, linked_on).hn);
12163 +		bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
12164 +                     per_cpu(crm_srt_cpu_entries, linked_on).hn);
12165 +	} else {
12166 +		/* holder may be queued: first stop queue changes */
12167 +		raw_spin_lock(&cluster->domain.release_lock);
12168 +		if (is_queued(t)) {
12169 +			TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
12170 +			
12171 +			/* We need to update the position of holder in some
12172 +			 * heap. Note that this could be a release heap if we
12173 +			 * budget enforcement is used and this job overran. */
12174 +			check_preempt = !bheap_decrease(rm_srt_ready_order, tsk_rt(t)->heap_node);
12175 +			
12176 +		} else {
12177 +			/* Nothing to do: if it is not queued and not linked
12178 +			 * then it is either sleeping or currently being moved
12179 +			 * by other code (e.g., a timer interrupt handler) that
12180 +			 * will use the correct priority when enqueuing the
12181 +			 * task. */
12182 +			TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
12183 +		}
12184 +		raw_spin_unlock(&cluster->domain.release_lock);
12185 +		
12186 +		/* If holder was enqueued in a release heap, then the following
12187 +		 * preemption check is pointless, but we can't easily detect
12188 +		 * that case. If you want to fix this, then consider that
12189 +		 * simply adding a state flag requires O(n) time to update when
12190 +		 * releasing n tasks, which conflicts with the goal to have
12191 +		 * O(log n) merges. */
12192 +		if (check_preempt) {
12193 +			/* heap_decrease() hit the top level of the heap: make
12194 +			 * sure preemption checks get the right task, not the
12195 +			 * potentially stale cache. */
12196 +			bheap_uncache_min(rm_srt_ready_order, &cluster->domain.ready_queue);
12197 +			check_for_preemptions(cluster);
12198 +		}
12199 +	}
12200 +}
12201 +
12202 +/* called with IRQs off */
12203 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
12204 +{
12205 +	crm_srt_domain_t* cluster = task_cpu_cluster(t);
12206 +	
12207 +	raw_spin_lock(&cluster->crm_srt_lock);
12208 +	
12209 +	__set_priority_inheritance(t, prio_inh);
12210 +	
12211 +#ifdef CONFIG_LITMUS_SOFTIRQD
12212 +	if(tsk_rt(t)->cur_klitirqd != NULL)
12213 +	{
12214 +		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
12215 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12216 +		
12217 +		__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
12218 +	}
12219 +#endif
12220 +	
12221 +	raw_spin_unlock(&cluster->crm_srt_lock);
12222 +}
12223 +
12224 +
12225 +/* called with IRQs off */
12226 +static void __clear_priority_inheritance(struct task_struct* t)
12227 +{
12228 +    TRACE_TASK(t, "priority restored\n");
12229 +	
12230 +    if(tsk_rt(t)->scheduled_on != NO_CPU)
12231 +    {
12232 +		sched_trace_eff_prio_change(t, NULL);
12233 +		
12234 +        tsk_rt(t)->inh_task = NULL;
12235 +        
12236 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
12237 +         * since the priority was effectively lowered. */
12238 +        unlink(t);
12239 +        crm_srt_job_arrival(t);
12240 +    }
12241 +    else
12242 +    {
12243 +        __set_priority_inheritance(t, NULL);
12244 +    }
12245 +	
12246 +#ifdef CONFIG_LITMUS_SOFTIRQD
12247 +	if(tsk_rt(t)->cur_klitirqd != NULL)
12248 +	{
12249 +		TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
12250 +				   tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
12251 +		
12252 +		if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
12253 +		{
12254 +			sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
12255 +			
12256 +			tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
12257 +			
12258 +			/* Check if rescheduling is necessary. We can't use heap_decrease()
12259 +			 * since the priority was effectively lowered. */
12260 +			unlink(tsk_rt(t)->cur_klitirqd);
12261 +			crm_srt_job_arrival(tsk_rt(t)->cur_klitirqd);
12262 +		}
12263 +		else
12264 +		{
12265 +			__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
12266 +		}
12267 +	}
12268 +#endif
12269 +}
12270 +
12271 +/* called with IRQs off */
12272 +static void clear_priority_inheritance(struct task_struct* t)
12273 +{
12274 +	crm_srt_domain_t* cluster = task_cpu_cluster(t);
12275 +	
12276 +	raw_spin_lock(&cluster->crm_srt_lock);
12277 +	__clear_priority_inheritance(t);
12278 +	raw_spin_unlock(&cluster->crm_srt_lock);
12279 +}
12280 +
12281 +
12282 +
12283 +#ifdef CONFIG_LITMUS_SOFTIRQD
12284 +/* called with IRQs off */
12285 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12286 +											  struct task_struct* old_owner,
12287 +											  struct task_struct* new_owner)
12288 +{
12289 +	crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd);
12290 +	
12291 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12292 +	
12293 +	raw_spin_lock(&cluster->crm_srt_lock);
12294 +	
12295 +	if(old_owner != new_owner)
12296 +	{
12297 +		if(old_owner)
12298 +		{
12299 +			// unreachable?
12300 +			tsk_rt(old_owner)->cur_klitirqd = NULL;
12301 +		}
12302 +		
12303 +		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
12304 +				   new_owner->comm, new_owner->pid);
12305 +		
12306 +		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
12307 +	}
12308 +	
12309 +	__set_priority_inheritance(klitirqd,
12310 +							   (tsk_rt(new_owner)->inh_task == NULL) ?
12311 +							   new_owner :
12312 +							   tsk_rt(new_owner)->inh_task);
12313 +	
12314 +	raw_spin_unlock(&cluster->crm_srt_lock);
12315 +}
12316 +
12317 +/* called with IRQs off */
12318 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
12319 +												struct task_struct* old_owner)
12320 +{
12321 +	crm_srt_domain_t* cluster = task_cpu_cluster(klitirqd);
12322 +	
12323 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
12324 +	
12325 +	raw_spin_lock(&cluster->crm_srt_lock);
12326 +    
12327 +    TRACE_TASK(klitirqd, "priority restored\n");
12328 +	
12329 +    if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
12330 +    {
12331 +        tsk_rt(klitirqd)->inh_task = NULL;
12332 +        
12333 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
12334 +         * since the priority was effectively lowered. */
12335 +        unlink(klitirqd);
12336 +        crm_srt_job_arrival(klitirqd);
12337 +    }
12338 +    else
12339 +    {
12340 +        __set_priority_inheritance(klitirqd, NULL);
12341 +    }
12342 +	
12343 +	tsk_rt(old_owner)->cur_klitirqd = NULL;
12344 +	
12345 +	raw_spin_unlock(&cluster->crm_srt_lock);
12346 +}
12347 +#endif  // CONFIG_LITMUS_SOFTIRQD
12348 +
12349 +
12350 +/* ******************** KFMLP support ********************** */
12351 +
12352 +/* struct for semaphore with priority inheritance */
12353 +struct kfmlp_queue
12354 +{
12355 +	wait_queue_head_t wait;
12356 +	struct task_struct* owner;
12357 +	struct task_struct* hp_waiter;
12358 +	int count; /* number of waiters + holder */
12359 +};
12360 +
12361 +struct kfmlp_semaphore
12362 +{
12363 +	struct litmus_lock litmus_lock;
12364 +	
12365 +	spinlock_t lock;
12366 +	
12367 +	int num_resources; /* aka k */
12368 +	struct kfmlp_queue *queues; /* array */
12369 +	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
12370 +};
12371 +
12372 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
12373 +{
12374 +	return container_of(lock, struct kfmlp_semaphore, litmus_lock);
12375 +}
12376 +
12377 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
12378 +								struct kfmlp_queue* queue)
12379 +{
12380 +	return (queue - &sem->queues[0]);
12381 +}
12382 +
12383 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
12384 +												  struct task_struct* holder)
12385 +{
12386 +	int i;
12387 +	for(i = 0; i < sem->num_resources; ++i)
12388 +		if(sem->queues[i].owner == holder)
12389 +			return(&sem->queues[i]);
12390 +	return(NULL);
12391 +}
12392 +
12393 +/* caller is responsible for locking */
12394 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
12395 +										 struct task_struct *skip)
12396 +{
12397 +	struct list_head	*pos;
12398 +	struct task_struct 	*queued, *found = NULL;
12399 +	
12400 +	list_for_each(pos, &kqueue->wait.task_list) {
12401 +		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
12402 +												   task_list)->private;
12403 +		
12404 +		/* Compare task prios, find high prio task. */
12405 +		if (queued != skip && rm_srt_higher_prio(queued, found))
12406 +			found = queued;
12407 +	}
12408 +	return found;
12409 +}
12410 +
12411 +static inline struct kfmlp_queue* kfmlp_find_shortest(
12412 +										  struct kfmlp_semaphore* sem,
12413 +										  struct kfmlp_queue* search_start)
12414 +{
12415 +	// we start our search at search_start instead of at the beginning of the
12416 +	// queue list to load-balance across all resources.
12417 +	struct kfmlp_queue* step = search_start;
12418 +	struct kfmlp_queue* shortest = sem->shortest_queue;
12419 +	
12420 +	do
12421 +	{
12422 +		step = (step+1 != &sem->queues[sem->num_resources]) ?
12423 +		step+1 : &sem->queues[0];
12424 +		if(step->count < shortest->count)
12425 +		{
12426 +			shortest = step;
12427 +			if(step->count == 0)
12428 +				break; /* can't get any shorter */
12429 +		}
12430 +	}while(step != search_start);
12431 +	
12432 +	return(shortest);
12433 +}
12434 +
12435 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
12436 +{
12437 +	/* must hold sem->lock */
12438 +	
12439 +	struct kfmlp_queue *my_queue = NULL;
12440 +	struct task_struct *max_hp = NULL;
12441 +	
12442 +	
12443 +	struct list_head	*pos;
12444 +	struct task_struct 	*queued;
12445 +	int i;
12446 +	
12447 +	for(i = 0; i < sem->num_resources; ++i)
12448 +	{
12449 +		if( (sem->queues[i].count > 1) &&
12450 +		   ((my_queue == NULL) ||
12451 +			(rm_srt_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
12452 +		{
12453 +			my_queue = &sem->queues[i];
12454 +		}
12455 +	}
12456 +	
12457 +	if(my_queue)
12458 +	{
12459 +		crm_srt_domain_t* cluster;
12460 +		
12461 +		max_hp = my_queue->hp_waiter;
12462 +		BUG_ON(!max_hp);
12463 +
12464 +		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
12465 +				  kfmlp_get_idx(sem, my_queue),
12466 +				  max_hp->comm, max_hp->pid,
12467 +				  kfmlp_get_idx(sem, my_queue));
12468 +		
12469 +		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
12470 +		
12471 +		/*
12472 +		 if(my_queue->hp_waiter)
12473 +		 TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
12474 +		 kfmlp_get_idx(sem, my_queue),
12475 +		 my_queue->hp_waiter->comm,
12476 +		 my_queue->hp_waiter->pid);
12477 +		 else
12478 +		 TRACE_CUR("queue %d: new hp_waiter is %p\n",
12479 +		 kfmlp_get_idx(sem, my_queue), NULL);
12480 +		 */
12481 +	
12482 +		cluster = task_cpu_cluster(max_hp);
12483 +
12484 +		raw_spin_lock(&cluster->crm_srt_lock);
12485 +		
12486 +		/*
12487 +		 if(my_queue->owner)
12488 +		 TRACE_CUR("queue %d: owner is %s/%d\n",
12489 +		 kfmlp_get_idx(sem, my_queue),
12490 +		 my_queue->owner->comm,
12491 +		 my_queue->owner->pid);
12492 +		 else
12493 +		 TRACE_CUR("queue %d: owner is %p\n",
12494 +		 kfmlp_get_idx(sem, my_queue),
12495 +		 NULL);
12496 +		 */
12497 +		
12498 +		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
12499 +		{
12500 +			__clear_priority_inheritance(my_queue->owner);
12501 +			if(my_queue->hp_waiter != NULL)
12502 +			{
12503 +				__set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
12504 +			}
12505 +		}
12506 +		raw_spin_unlock(&cluster->crm_srt_lock);
12507 +		
12508 +		list_for_each(pos, &my_queue->wait.task_list)
12509 +		{
12510 +			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
12511 +													   task_list)->private;
12512 +			/* Compare task prios, find high prio task. */
12513 +			if (queued == max_hp)
12514 +			{
12515 +				/*
12516 +				 TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
12517 +				 kfmlp_get_idx(sem, my_queue));
12518 +				 */
12519 +				__remove_wait_queue(&my_queue->wait,
12520 +									list_entry(pos, wait_queue_t, task_list));
12521 +				break;
12522 +			}
12523 +		}
12524 +		--(my_queue->count);
12525 +	}
12526 +	
12527 +	return(max_hp);
12528 +}
12529 +
12530 +int crm_srt_kfmlp_lock(struct litmus_lock* l)
12531 +{
12532 +	struct task_struct* t = current;
12533 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12534 +	struct kfmlp_queue* my_queue;
12535 +	wait_queue_t wait;
12536 +	unsigned long flags;
12537 +	
12538 +	if (!is_realtime(t))
12539 +		return -EPERM;
12540 +	
12541 +	spin_lock_irqsave(&sem->lock, flags);
12542 +	
12543 +	my_queue = sem->shortest_queue;
12544 +	
12545 +	if (my_queue->owner) {
12546 +		/* resource is not free => must suspend and wait */
12547 +		TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
12548 +				  kfmlp_get_idx(sem, my_queue));
12549 +		
12550 +		init_waitqueue_entry(&wait, t);
12551 +		
12552 +		/* FIXME: interruptible would be nice some day */
12553 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
12554 +		
12555 +		__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
12556 +		
12557 +		/* check if we need to activate priority inheritance */
12558 +		if (rm_srt_higher_prio(t, my_queue->hp_waiter))
12559 +		{
12560 +			my_queue->hp_waiter = t;
12561 +			if (rm_srt_higher_prio(t, my_queue->owner))
12562 +			{
12563 +				set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
12564 +			}
12565 +		}
12566 +		
12567 +		++(my_queue->count);
12568 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
12569 +		
12570 +		/* release lock before sleeping */
12571 +		spin_unlock_irqrestore(&sem->lock, flags);
12572 +		
12573 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
12574 +		 * when we wake up; we are guaranteed to have the lock since
12575 +		 * there is only one wake up per release (or steal).
12576 +		 */
12577 +		schedule();
12578 +		
12579 +		
12580 +		if(my_queue->owner == t)
12581 +		{
12582 +			TRACE_CUR("queue %d: acquired through waiting\n",
12583 +					  kfmlp_get_idx(sem, my_queue));
12584 +		}
12585 +		else
12586 +		{
12587 +			/* this case may happen if our wait entry was stolen
12588 +			 between queues.  record where we went.*/
12589 +			my_queue = kfmlp_get_queue(sem, t);
12590 +			BUG_ON(!my_queue);
12591 +			TRACE_CUR("queue %d: acquired through stealing\n",
12592 +					  kfmlp_get_idx(sem, my_queue));
12593 +		}
12594 +	}
12595 +	else
12596 +	{
12597 +		TRACE_CUR("queue %d: acquired immediately\n",
12598 +				  kfmlp_get_idx(sem, my_queue));
12599 +		
12600 +		my_queue->owner = t;
12601 +		
12602 +		++(my_queue->count);
12603 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);		
12604 +		
12605 +		spin_unlock_irqrestore(&sem->lock, flags);
12606 +	}
12607 +	
12608 +	return kfmlp_get_idx(sem, my_queue);
12609 +}
12610 +
12611 +int crm_srt_kfmlp_unlock(struct litmus_lock* l)
12612 +{
12613 +	struct task_struct *t = current, *next;
12614 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12615 +	struct kfmlp_queue *my_queue;
12616 +	unsigned long flags;
12617 +	int err = 0;
12618 +	
12619 +	spin_lock_irqsave(&sem->lock, flags);
12620 +	
12621 +	my_queue = kfmlp_get_queue(sem, t);
12622 +	
12623 +	if (!my_queue) {
12624 +		err = -EINVAL;
12625 +		goto out;
12626 +	}
12627 +	
12628 +	/* check if there are jobs waiting for this resource */
12629 +	next = __waitqueue_remove_first(&my_queue->wait);
12630 +	if (next) {
12631 +		/*
12632 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
12633 +		 kfmlp_get_idx(sem, my_queue),
12634 +		 next->comm, next->pid);
12635 +		 */
12636 +		/* next becomes the resouce holder */
12637 +		my_queue->owner = next;
12638 +		
12639 +		--(my_queue->count);
12640 +		if(my_queue->count < sem->shortest_queue->count)
12641 +		{
12642 +			sem->shortest_queue = my_queue;
12643 +		}	
12644 +		
12645 +		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
12646 +				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
12647 +		
12648 +		/* determine new hp_waiter if necessary */
12649 +		if (next == my_queue->hp_waiter) {
12650 +			TRACE_TASK(next, "was highest-prio waiter\n");
12651 +			/* next has the highest priority --- it doesn't need to
12652 +			 * inherit.  However, we need to make sure that the
12653 +			 * next-highest priority in the queue is reflected in
12654 +			 * hp_waiter. */
12655 +			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
12656 +			if (my_queue->hp_waiter)
12657 +				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
12658 +			else
12659 +				TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
12660 +		} else {
12661 +			/* Well, if next is not the highest-priority waiter,
12662 +			 * then it ought to inherit the highest-priority
12663 +			 * waiter's priority. */
12664 +			set_priority_inheritance(next, my_queue->hp_waiter);
12665 +		}
12666 +		
12667 +		/* wake up next */
12668 +		wake_up_process(next);
12669 +	}
12670 +	else
12671 +	{
12672 +		TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
12673 +		
12674 +		next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
12675 +		
12676 +		/*
12677 +		 if(next)
12678 +		 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
12679 +		 kfmlp_get_idx(sem, my_queue),
12680 +		 next->comm, next->pid);
12681 +		 */
12682 +		
12683 +		my_queue->owner = next;
12684 +		
12685 +		if(next)
12686 +		{
12687 +			TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
12688 +					  kfmlp_get_idx(sem, my_queue),
12689 +					  next->comm, next->pid);
12690 +			
12691 +			/* wake up next */
12692 +			wake_up_process(next);			
12693 +		}
12694 +		else
12695 +		{
12696 +			TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
12697 +			
12698 +			--(my_queue->count);
12699 +			if(my_queue->count < sem->shortest_queue->count)
12700 +			{
12701 +				sem->shortest_queue = my_queue;
12702 +			}
12703 +		}
12704 +	}
12705 +	
12706 +	/* we lose the benefit of priority inheritance (if any) */
12707 +	if (tsk_rt(t)->inh_task)
12708 +		clear_priority_inheritance(t);
12709 +	
12710 +out:
12711 +	spin_unlock_irqrestore(&sem->lock, flags);
12712 +	
12713 +	return err;
12714 +}
12715 +
12716 +int crm_srt_kfmlp_close(struct litmus_lock* l)
12717 +{
12718 +	struct task_struct *t = current;
12719 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12720 +	struct kfmlp_queue *my_queue;
12721 +	unsigned long flags;
12722 +	
12723 +	int owner;
12724 +	
12725 +	spin_lock_irqsave(&sem->lock, flags);
12726 +	
12727 +	my_queue = kfmlp_get_queue(sem, t);	
12728 +	owner = (my_queue) ? (my_queue->owner == t) : 0;
12729 +	
12730 +	spin_unlock_irqrestore(&sem->lock, flags);
12731 +	
12732 +	if (owner)
12733 +		crm_srt_kfmlp_unlock(l);
12734 +	
12735 +	return 0;
12736 +}
12737 +
12738 +void crm_srt_kfmlp_free(struct litmus_lock* l)
12739 +{
12740 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
12741 +	kfree(sem->queues);
12742 +	kfree(sem);
12743 +}
12744 +
12745 +static struct litmus_lock_ops crm_srt_kfmlp_lock_ops = {
12746 +	.close  = crm_srt_kfmlp_close,
12747 +	.lock   = crm_srt_kfmlp_lock,
12748 +	.unlock = crm_srt_kfmlp_unlock,
12749 +	.deallocate = crm_srt_kfmlp_free,
12750 +};
12751 +
12752 +static struct litmus_lock* crm_srt_new_kfmlp(void* __user arg, int* ret_code)
12753 +{
12754 +	struct kfmlp_semaphore* sem;
12755 +	int num_resources = 0;
12756 +	int i;
12757 +	
12758 +	if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
12759 +	{
12760 +		*ret_code = -EINVAL;
12761 +		return(NULL);
12762 +	}
12763 +	if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
12764 +	{
12765 +		*ret_code = -EINVAL;
12766 +		return(NULL);
12767 +	}
12768 +	if(num_resources < 1)
12769 +	{
12770 +		*ret_code = -EINVAL;
12771 +		return(NULL);		
12772 +	}
12773 +	
12774 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
12775 +	if(!sem)
12776 +	{
12777 +		*ret_code = -ENOMEM;
12778 +		return NULL;
12779 +	}
12780 +	
12781 +	sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
12782 +	if(!sem->queues)
12783 +	{
12784 +		kfree(sem);
12785 +		*ret_code = -ENOMEM;
12786 +		return NULL;		
12787 +	}
12788 +	
12789 +	sem->litmus_lock.ops = &crm_srt_kfmlp_lock_ops;
12790 +	spin_lock_init(&sem->lock);
12791 +	sem->num_resources = num_resources;
12792 +	
12793 +	for(i = 0; i < num_resources; ++i)
12794 +	{
12795 +		sem->queues[i].owner = NULL;
12796 +		sem->queues[i].hp_waiter = NULL;
12797 +		init_waitqueue_head(&sem->queues[i].wait);
12798 +		sem->queues[i].count = 0;
12799 +	}
12800 +	
12801 +	sem->shortest_queue = &sem->queues[0];
12802 +	
12803 +	*ret_code = 0;
12804 +	return &sem->litmus_lock;
12805 +}
12806 +
12807 +
12808 +/* **** lock constructor **** */
12809 +
12810 +static long crm_srt_allocate_lock(struct litmus_lock **lock, int type,
12811 +								 void* __user arg)
12812 +{
12813 +	int err = -ENXIO;
12814 +	
12815 +	/* C-RM-SRT currently only supports the FMLP for global resources
12816 +		WITHIN a given cluster.  DO NOT USE CROSS-CLUSTER! */
12817 +	switch (type) {
12818 +		case KFMLP_SEM:
12819 +			*lock = crm_srt_new_kfmlp(arg, &err);
12820 +			break;
12821 +	};
12822 +	
12823 +	return err;
12824 +}
12825 +
12826 +#endif  // CONFIG_LITMUS_LOCKING
12827 +
12828 +
12829 +
12830 +
12831 +
12832 +
12833 +/* total number of cluster */
12834 +static int num_clusters;
12835 +/* we do not support cluster of different sizes */
12836 +static unsigned int cluster_size;
12837 +
12838 +#ifdef VERBOSE_INIT
12839 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
12840 +{
12841 +	int chk;
12842 +	char buf[255];
12843 +
12844 +	chk = cpulist_scnprintf(buf, 254, mask);
12845 +	buf[chk] = '\0';
12846 +	printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
12847 +
12848 +}
12849 +#endif
12850 +
12851 +static int clusters_allocated = 0;
12852 +
12853 +static void cleanup_crm_srt(void)
12854 +{
12855 +	int i;
12856 +
12857 +	if (clusters_allocated) {
12858 +		for (i = 0; i < num_clusters; i++) {
12859 +			kfree(crm_srt[i].cpus);
12860 +			kfree(crm_srt[i].heap_node);
12861 +			free_cpumask_var(crm_srt[i].cpu_map);
12862 +		}
12863 +
12864 +		kfree(crm_srt);
12865 +	}
12866 +}
12867 +
12868 +static long crm_srt_activate_plugin(void)
12869 +{
12870 +	int i, j, cpu, ccpu, cpu_count;
12871 +	cpu_entry_t *entry;
12872 +
12873 +	cpumask_var_t mask;
12874 +	int chk = 0;
12875 +
12876 +	/* de-allocate old clusters, if any */
12877 +	cleanup_crm_srt();
12878 +
12879 +	printk(KERN_INFO "C-RM-SRT: Activate Plugin, cluster configuration = %d\n",
12880 +			cluster_config);
12881 +
12882 +	/* need to get cluster_size first */
12883 +	if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
12884 +		return -ENOMEM;
12885 +
12886 +	if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
12887 +		cluster_size = num_online_cpus();
12888 +	} else {
12889 +		chk = get_shared_cpu_map(mask, 0, cluster_config);
12890 +		if (chk) {
12891 +			/* if chk != 0 then it is the max allowed index */
12892 +			printk(KERN_INFO "C-RM-SRT: Cluster configuration = %d "
12893 +			       "is not supported on this hardware.\n",
12894 +			       cluster_config);
12895 +			/* User should notice that the configuration failed, so
12896 +			 * let's bail out. */
12897 +			return -EINVAL;
12898 +		}
12899 +
12900 +		cluster_size = cpumask_weight(mask);
12901 +	}
12902 +
12903 +	if ((num_online_cpus() % cluster_size) != 0) {
12904 +		/* this can't be right, some cpus are left out */
12905 +		printk(KERN_ERR "C-RM-SRT: Trying to group %d cpus in %d!\n",
12906 +				num_online_cpus(), cluster_size);
12907 +		return -1;
12908 +	}
12909 +
12910 +	num_clusters = num_online_cpus() / cluster_size;
12911 +	printk(KERN_INFO "C-RM-SRT: %d cluster(s) of size = %d\n",
12912 +			num_clusters, cluster_size);
12913 +
12914 +	/* initialize clusters */
12915 +	crm_srt = kmalloc(num_clusters * sizeof(crm_srt_domain_t), GFP_ATOMIC);
12916 +	for (i = 0; i < num_clusters; i++) {
12917 +
12918 +		crm_srt[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
12919 +				GFP_ATOMIC);
12920 +		crm_srt[i].heap_node = kmalloc(
12921 +				cluster_size * sizeof(struct bheap_node),
12922 +				GFP_ATOMIC);
12923 +		bheap_init(&(crm_srt[i].cpu_heap));
12924 +		rm_srt_domain_init(&(crm_srt[i].domain), NULL, crm_srt_release_jobs);
12925 +
12926 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
12927 +		crm_srt[i].pending_tasklets.head = NULL;
12928 +		crm_srt[i].pending_tasklets.tail = &(crm_srt[i].pending_tasklets.head);
12929 +#endif		
12930 +		
12931 +		if(!zalloc_cpumask_var(&crm_srt[i].cpu_map, GFP_ATOMIC))
12932 +			return -ENOMEM;
12933 +	}
12934 +
12935 +	/* cycle through cluster and add cpus to them */
12936 +	for (i = 0; i < num_clusters; i++) {
12937 +
12938 +		for_each_online_cpu(cpu) {
12939 +			/* check if the cpu is already in a cluster */
12940 +			for (j = 0; j < num_clusters; j++)
12941 +				if (cpumask_test_cpu(cpu, crm_srt[j].cpu_map))
12942 +					break;
12943 +			/* if it is in a cluster go to next cpu */
12944 +			if (j < num_clusters &&
12945 +					cpumask_test_cpu(cpu, crm_srt[j].cpu_map))
12946 +				continue;
12947 +
12948 +			/* this cpu isn't in any cluster */
12949 +			/* get the shared cpus */
12950 +			if (unlikely(cluster_config == GLOBAL_CLUSTER))
12951 +				cpumask_copy(mask, cpu_online_mask);
12952 +			else
12953 +				get_shared_cpu_map(mask, cpu, cluster_config);
12954 +
12955 +			cpumask_copy(crm_srt[i].cpu_map, mask);
12956 +#ifdef VERBOSE_INIT
12957 +			print_cluster_topology(mask, cpu);
12958 +#endif
12959 +			/* add cpus to current cluster and init cpu_entry_t */
12960 +			cpu_count = 0;
12961 +			for_each_cpu(ccpu, crm_srt[i].cpu_map) {
12962 +
12963 +				entry = &per_cpu(crm_srt_cpu_entries, ccpu);
12964 +				crm_srt[i].cpus[cpu_count] = entry;
12965 +				atomic_set(&entry->will_schedule, 0);
12966 +				entry->cpu = ccpu;
12967 +				entry->cluster = &crm_srt[i];
12968 +				entry->hn = &(crm_srt[i].heap_node[cpu_count]);
12969 +				bheap_node_init(&entry->hn, entry);
12970 +
12971 +				cpu_count++;
12972 +
12973 +				entry->linked = NULL;
12974 +				entry->scheduled = NULL;
12975 +				update_cpu_position(entry);
12976 +			}
12977 +			/* done with this cluster */
12978 +			break;
12979 +		}
12980 +	}
12981 +	
12982 +#ifdef CONFIG_LITMUS_SOFTIRQD
12983 +	{
12984 +		/* distribute the daemons evenly across the clusters. */
12985 +		int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
12986 +		int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
12987 +		int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
12988 +		
12989 +		int daemon = 0;
12990 +		for(i = 0; i < num_clusters; ++i)
12991 +		{
12992 +			int num_on_this_cluster = num_daemons_per_cluster;
12993 +			if(left_over)
12994 +			{
12995 +				++num_on_this_cluster;
12996 +				--left_over;
12997 +			}
12998 +			
12999 +			for(j = 0; j < num_on_this_cluster; ++j)
13000 +			{
13001 +				// first CPU of this cluster
13002 +				affinity[daemon++] = i*cluster_size;
13003 +			}
13004 +		}
13005 +	
13006 +		spawn_klitirqd(affinity);
13007 +		
13008 +		kfree(affinity);
13009 +	}
13010 +#endif
13011 +	
13012 +#ifdef CONFIG_LITMUS_NVIDIA
13013 +	init_nvidia_info();
13014 +#endif	
13015 +
13016 +	free_cpumask_var(mask);
13017 +	clusters_allocated = 1;
13018 +	return 0;
13019 +}
13020 +
13021 +/*	Plugin object	*/
13022 +static struct sched_plugin crm_srt_plugin __cacheline_aligned_in_smp = {
13023 +	.plugin_name		= "C-RM-SRT",
13024 +	.finish_switch		= crm_srt_finish_switch,
13025 +	.tick			= crm_srt_tick,
13026 +	.task_new		= crm_srt_task_new,
13027 +	.complete_job		= complete_job,
13028 +	.task_exit		= crm_srt_task_exit,
13029 +	.schedule		= crm_srt_schedule,
13030 +	.task_wake_up		= crm_srt_task_wake_up,
13031 +	.task_block		= crm_srt_task_block,
13032 +	.admit_task		= crm_srt_admit_task,
13033 +	.activate_plugin	= crm_srt_activate_plugin,
13034 +#ifdef CONFIG_LITMUS_LOCKING
13035 +	.allocate_lock	= crm_srt_allocate_lock,
13036 +    .set_prio_inh   = set_priority_inheritance,
13037 +    .clear_prio_inh = clear_priority_inheritance,	
13038 +#endif
13039 +#ifdef CONFIG_LITMUS_SOFTIRQD
13040 +	.set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
13041 +	.clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
13042 +#endif
13043 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13044 +	.enqueue_pai_tasklet = enqueue_pai_tasklet,
13045 +	.run_tasklets = run_tasklets,
13046 +#endif	
13047 +};
13048 +
13049 +static struct proc_dir_entry *cluster_file = NULL, *crm_srt_dir = NULL;
13050 +
13051 +static int __init init_crm_srt(void)
13052 +{
13053 +	int err, fs;
13054 +
13055 +	err = register_sched_plugin(&crm_srt_plugin);
13056 +	if (!err) {
13057 +		fs = make_plugin_proc_dir(&crm_srt_plugin, &crm_srt_dir);
13058 +		if (!fs)
13059 +			cluster_file = create_cluster_file(crm_srt_dir, &cluster_config);
13060 +		else
13061 +			printk(KERN_ERR "Could not allocate C-RM-SRT procfs dir.\n");
13062 +	}
13063 +	return err;
13064 +}
13065 +
13066 +static void clean_crm_srt(void)
13067 +{
13068 +	cleanup_crm_srt();
13069 +	if (cluster_file)
13070 +		remove_proc_entry("cluster", crm_srt_dir);
13071 +	if (crm_srt_dir)
13072 +		remove_plugin_proc_dir(&crm_srt_plugin);
13073 +}
13074 +
13075 +module_init(init_crm_srt);
13076 +module_exit(clean_crm_srt);
13077 diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
13078 index 3092797..30c745f 100644
13079 --- a/litmus/sched_gsn_edf.c
13080 +++ b/litmus/sched_gsn_edf.c
13081 @@ -12,6 +12,8 @@
13082  #include <linux/percpu.h>
13083  #include <linux/sched.h>
13084  #include <linux/slab.h>
13085 +#include <linux/uaccess.h>
13086 +
13087  
13088  #include <litmus/litmus.h>
13089  #include <litmus/jobs.h>
13090 @@ -25,6 +27,24 @@
13091  
13092  #include <linux/module.h>
13093  
13094 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13095 +#include <litmus/affinity.h>
13096 +#endif
13097 +
13098 +#ifdef CONFIG_LITMUS_SOFTIRQD
13099 +#include <litmus/litmus_softirq.h>
13100 +#endif
13101 +
13102 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13103 +#include <linux/interrupt.h>
13104 +#include <litmus/trace.h>
13105 +#endif
13106 +
13107 +#ifdef CONFIG_LITMUS_NVIDIA
13108 +#include <litmus/nvidia_info.h>
13109 +#endif
13110 +
13111 +
13112  /* Overview of GSN-EDF operations.
13113   *
13114   * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
13115 @@ -111,6 +131,16 @@ static struct bheap      gsnedf_cpu_heap;
13116  static rt_domain_t gsnedf;
13117  #define gsnedf_lock (gsnedf.ready_lock)
13118  
13119 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13120 +struct tasklet_head
13121 +{
13122 +	struct tasklet_struct *head;
13123 +	struct tasklet_struct **tail;
13124 +};
13125 +
13126 +struct tasklet_head gsnedf_pending_tasklets;
13127 +#endif
13128 +
13129  
13130  /* Uncomment this if you want to see all scheduling decisions in the
13131   * TRACE() log.
13132 @@ -253,21 +283,52 @@ static noinline void requeue(struct task_struct* task)
13133  	}
13134  }
13135  
13136 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13137 +static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t* start)
13138 +{
13139 +	cpu_entry_t* affinity;
13140 +
13141 +	get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
13142 +#ifdef CONFIG_RELEASE_MASTER
13143 +					gsnedf.release_master
13144 +#else
13145 +					-1
13146 +#endif
13147 +					);
13148 +
13149 +	return(affinity);
13150 +}
13151 +#endif
13152 +
13153  /* check for any necessary preemptions */
13154  static void check_for_preemptions(void)
13155  {
13156  	struct task_struct *task;
13157 -	cpu_entry_t* last;
13158 +	cpu_entry_t *last;
13159  
13160  	for(last = lowest_prio_cpu();
13161  	    edf_preemption_needed(&gsnedf, last->linked);
13162  	    last = lowest_prio_cpu()) {
13163  		/* preemption necessary */
13164  		task = __take_ready(&gsnedf);
13165 -		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
13166 -		      task->pid, last->cpu);
13167 +
13168 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13169 +		{
13170 +			cpu_entry_t* affinity = gsnedf_get_nearest_available_cpu(
13171 +							&per_cpu(gsnedf_cpu_entries, task_cpu(task)));
13172 +			if(affinity)
13173 +				last = affinity;
13174 +			else if(last->linked)
13175 +				requeue(last->linked);
13176 +		}
13177 +#else
13178  		if (last->linked)
13179  			requeue(last->linked);
13180 +#endif
13181 +
13182 +		TRACE("check_for_preemptions: attempting to link task %d to %d\n", 
13183 +						task->pid, last->cpu);
13184 +
13185  		link_task_to_cpu(task, last);
13186  		preempt(last);
13187  	}
13188 @@ -277,7 +338,7 @@ static void check_for_preemptions(void)
13189  static noinline void gsnedf_job_arrival(struct task_struct* task)
13190  {
13191  	BUG_ON(!task);
13192 -
13193 +    
13194  	requeue(task);
13195  	check_for_preemptions();
13196  }
13197 @@ -298,9 +359,13 @@ static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
13198  static noinline void job_completion(struct task_struct *t, int forced)
13199  {
13200  	BUG_ON(!t);
13201 -
13202 +	
13203  	sched_trace_task_completion(t, forced);
13204  
13205 +#ifdef CONFIG_LITMUS_NVIDIA
13206 +	atomic_set(&tsk_rt(t)->nv_int_count, 0);
13207 +#endif
13208 +
13209  	TRACE_TASK(t, "job_completion().\n");
13210  
13211  	/* set flags */
13212 @@ -343,6 +408,414 @@ static void gsnedf_tick(struct task_struct* t)
13213  	}
13214  }
13215  
13216 +
13217 +
13218 +
13219 +
13220 +
13221 +
13222 +
13223 +
13224 +
13225 +
13226 +
13227 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13228 +
13229 +
13230 +static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
13231 +{
13232 +	if (!atomic_read(&tasklet->count)) {
13233 +		sched_trace_tasklet_begin(tasklet->owner);
13234 +		
13235 +		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
13236 +		{
13237 +			BUG();
13238 +		}
13239 +		TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", __FUNCTION__, tasklet->owner->pid, flushed);
13240 +		tasklet->func(tasklet->data);
13241 +		tasklet_unlock(tasklet);
13242 +		
13243 +		sched_trace_tasklet_end(tasklet->owner, flushed);
13244 +	}
13245 +	else {
13246 +		BUG();
13247 +	}
13248 +}
13249 +
13250 +
13251 +static void __extract_tasklets(struct task_struct* task, struct tasklet_head* task_tasklets)
13252 +{
13253 +	struct tasklet_struct* step;
13254 +	struct tasklet_struct* tasklet;
13255 +	struct tasklet_struct* prev;
13256 +	
13257 +	task_tasklets->head = NULL;
13258 +	task_tasklets->tail = &(task_tasklets->head);
13259 +	
13260 +	prev = NULL;
13261 +	for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next)
13262 +	{
13263 +		if(step->owner == task)
13264 +		{
13265 +			TRACE("%s: Found tasklet to flush: %d\n", __FUNCTION__, step->owner->pid);
13266 +			
13267 +			tasklet = step;
13268 +			
13269 +			if(prev) {
13270 +				prev->next = tasklet->next;
13271 +			}
13272 +			else if(gsnedf_pending_tasklets.head == tasklet) {
13273 +				// we're at the head.
13274 +				gsnedf_pending_tasklets.head = tasklet->next;
13275 +			}
13276 +			
13277 +			if(gsnedf_pending_tasklets.tail == &tasklet) {
13278 +				// we're at the tail
13279 +				if(prev) {
13280 +					gsnedf_pending_tasklets.tail = &prev;
13281 +				}
13282 +				else {
13283 +					gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13284 +				}
13285 +			}
13286 +			
13287 +			tasklet->next = NULL;
13288 +			*(task_tasklets->tail) = tasklet;
13289 +			task_tasklets->tail = &(tasklet->next);
13290 +		}
13291 +		else {
13292 +			prev = step;
13293 +		}
13294 +	}
13295 +}
13296 +
13297 +static void flush_tasklets(struct task_struct* task)
13298 +{
13299 +	unsigned long flags;
13300 +	struct tasklet_head task_tasklets;
13301 +	struct tasklet_struct* step;
13302 +	
13303 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13304 +	__extract_tasklets(task, &task_tasklets);
13305 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13306 +	
13307 +	if(gsnedf_pending_tasklets.head != NULL) {
13308 +		TRACE("%s: Flushing tasklets for %d...\n", __FUNCTION__, task->pid);
13309 +	}
13310 +	
13311 +	// now execute any flushed tasklets.
13312 +	for(step = gsnedf_pending_tasklets.head; step != NULL; /**/)
13313 +	{
13314 +		struct tasklet_struct* temp = step->next;
13315 +		
13316 +		step->next = NULL;
13317 +		__do_lit_tasklet(step, 1ul);
13318 +		
13319 +		step = temp;
13320 +	}
13321 +}
13322 +
13323 +
13324 +static void do_lit_tasklets(struct task_struct* sched_task)
13325 +{
13326 +	int work_to_do = 1;
13327 +	struct tasklet_struct *tasklet = NULL;
13328 +	//struct tasklet_struct *step;
13329 +	unsigned long flags;
13330 +	
13331 +	while(work_to_do) {
13332 +		
13333 +		TS_NV_SCHED_BOTISR_START;
13334 +		
13335 +		// remove tasklet at head of list if it has higher priority.
13336 +		raw_spin_lock_irqsave(&gsnedf_lock, flags);	
13337 +		
13338 +		/*
13339 +		 step = gsnedf_pending_tasklets.head;
13340 +		 TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
13341 +		 while(step != NULL){
13342 +		 TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13343 +		 step = step->next;
13344 +		 }
13345 +		 TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13346 +		 TRACE("%s: done.\n", __FUNCTION__);
13347 +		 */
13348 +		
13349 +		
13350 +		if(gsnedf_pending_tasklets.head != NULL) {
13351 +			// remove tasklet at head.
13352 +			tasklet = gsnedf_pending_tasklets.head;
13353 +			
13354 +			if(edf_higher_prio(tasklet->owner, sched_task)) {
13355 +				
13356 +				if(NULL == tasklet->next) {
13357 +					// tasklet is at the head, list only has one element
13358 +					TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13359 +					gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
13360 +				}
13361 +				
13362 +				// remove the tasklet from the queue
13363 +				gsnedf_pending_tasklets.head = tasklet->next;
13364 +				
13365 +				TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
13366 +			}
13367 +			else {
13368 +				TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
13369 +				tasklet = NULL;
13370 +			}
13371 +		}
13372 +		else {
13373 +			TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
13374 +		}
13375 +		
13376 +		
13377 +		/*
13378 +		 step = gsnedf_pending_tasklets.head;
13379 +		 TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
13380 +		 while(step != NULL){
13381 +		 TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13382 +		 step = step->next;
13383 +		 }
13384 +		 TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13385 +		 TRACE("%s: done.\n", __FUNCTION__);
13386 +		 */
13387 +		
13388 +		raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13389 +		
13390 +		TS_NV_SCHED_BOTISR_END;
13391 +		
13392 +		if(tasklet) {
13393 +			__do_lit_tasklet(tasklet, 0ul);
13394 +			tasklet = NULL;	
13395 +		}
13396 +		else {
13397 +			work_to_do = 0;
13398 +		}
13399 +	}
13400 +	
13401 +	//TRACE("%s: exited.\n", __FUNCTION__);
13402 +}
13403 +
13404 +
13405 +static void run_tasklets(struct task_struct* sched_task)
13406 +{
13407 +#if 0
13408 +	int task_is_rt = is_realtime(sched_task);
13409 +	cedf_domain_t* cluster;
13410 +	
13411 +	if(is_realtime(sched_task)) {
13412 +		cluster = task_cpu_cluster(sched_task);
13413 +	}
13414 +	else {
13415 +		cluster = remote_cluster(get_cpu());
13416 +	}
13417 +	
13418 +	if(cluster && gsnedf_pending_tasklets.head != NULL) {
13419 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
13420 +		
13421 +		do_lit_tasklets(cluster, sched_task);
13422 +	}
13423 +	
13424 +	if(!task_is_rt) {
13425 +		put_cpu_no_resched();
13426 +	}
13427 +#else
13428 +	
13429 +	preempt_disable();
13430 +	
13431 +	if(gsnedf_pending_tasklets.head != NULL) {
13432 +		TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
13433 +		do_lit_tasklets(sched_task);
13434 +	}
13435 +	
13436 +	preempt_enable_no_resched();
13437 +	
13438 +#endif
13439 +}
13440 +
13441 +
13442 +static void __add_pai_tasklet(struct tasklet_struct* tasklet)
13443 +{
13444 +	struct tasklet_struct* step;
13445 +	
13446 +	/*
13447 +	 step = gsnedf_pending_tasklets.head;
13448 +	 TRACE("%s: (BEFORE) dumping tasklet queue...\n", __FUNCTION__);
13449 +	 while(step != NULL){
13450 +	 TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13451 +	 step = step->next;
13452 +	 }
13453 +	 TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13454 +	 TRACE("%s: done.\n", __FUNCTION__);
13455 +	 */
13456 +	
13457 +	
13458 +	tasklet->next = NULL;  // make sure there are no old values floating around
13459 +	
13460 +	step = gsnedf_pending_tasklets.head;
13461 +	if(step == NULL) {
13462 +		TRACE("%s: tasklet queue empty.  inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
13463 +		// insert at tail.
13464 +		*(gsnedf_pending_tasklets.tail) = tasklet;
13465 +		gsnedf_pending_tasklets.tail = &(tasklet->next);		
13466 +	}
13467 +	else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
13468 +			edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
13469 +		// insert at tail.
13470 +		TRACE("%s: tasklet belongs at end.  inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
13471 +		
13472 +		*(gsnedf_pending_tasklets.tail) = tasklet;
13473 +		gsnedf_pending_tasklets.tail = &(tasklet->next);
13474 +	}
13475 +	else {
13476 +		
13477 +        //WARN_ON(1 == 1);
13478 +		
13479 +		// insert the tasklet somewhere in the middle.
13480 +		
13481 +        TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
13482 +		
13483 +		while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
13484 +			step = step->next;
13485 +		}
13486 +		
13487 +		// insert tasklet right before step->next.
13488 +		
13489 +		TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
13490 +		
13491 +		tasklet->next = step->next;
13492 +		step->next = tasklet;
13493 +		
13494 +		// patch up the head if needed.
13495 +		if(gsnedf_pending_tasklets.head == step)
13496 +		{
13497 +			TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
13498 +			gsnedf_pending_tasklets.head = tasklet;
13499 +		}
13500 +	}
13501 +	
13502 +	/*
13503 +	 step = gsnedf_pending_tasklets.head;
13504 +	 TRACE("%s: (AFTER) dumping tasklet queue...\n", __FUNCTION__);
13505 +	 while(step != NULL){
13506 +	 TRACE("%s: %p (%d)\n", __FUNCTION__, step, step->owner->pid);
13507 +	 step = step->next;
13508 +	 }
13509 +	 TRACE("%s: tail = %p (%d)\n", __FUNCTION__, *(gsnedf_pending_tasklets.tail), (*(gsnedf_pending_tasklets.tail) != NULL) ? (*(gsnedf_pending_tasklets.tail))->owner->pid : -1);
13510 +	 TRACE("%s: done.\n", __FUNCTION__);
13511 +	 */
13512 +	
13513 +	// TODO: Maintain this list in priority order.
13514 +	//	tasklet->next = NULL;
13515 +	//	*(gsnedf_pending_tasklets.tail) = tasklet;
13516 +	//	gsnedf_pending_tasklets.tail = &tasklet->next;
13517 +}
13518 +
13519 +static int enqueue_pai_tasklet(struct tasklet_struct* tasklet)
13520 +{
13521 +	cpu_entry_t *targetCPU = NULL;
13522 +	int thisCPU;
13523 +	int runLocal = 0;
13524 +	int runNow = 0;
13525 +	unsigned long flags;
13526 +	
13527 +    if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
13528 +    {
13529 +        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
13530 +		return 0;
13531 +    }	
13532 +
13533 +	
13534 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);		
13535 +	
13536 +	thisCPU = smp_processor_id();
13537 +	
13538 +#if 1
13539 +#ifdef CONFIG_SCHED_CPU_AFFINITY
13540 +	{
13541 +		cpu_entry_t* affinity = NULL;
13542 +		
13543 +		// use this CPU if it is in our cluster and isn't running any RT work.
13544 +		if(
13545 +#ifdef CONFIG_RELEASE_MASTER
13546 +		   (thisCPU != gsnedf.release_master) &&
13547 +#endif
13548 +		   (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
13549 +			affinity = &(__get_cpu_var(gsnedf_cpu_entries));
13550 +		}
13551 +		else {
13552 +			// this CPU is busy or shouldn't run tasklet in this cluster.
13553 +			// look for available near by CPUs.
13554 +			// NOTE: Affinity towards owner and not this CPU.  Is this right?		
13555 +			affinity = 
13556 +				gsnedf_get_nearest_available_cpu(
13557 +					&per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
13558 +		}
13559 +		
13560 +		targetCPU = affinity;
13561 +	}
13562 +#endif
13563 +#endif
13564 +	
13565 +	if (targetCPU == NULL) {
13566 +		targetCPU = lowest_prio_cpu();
13567 +	}
13568 +	
13569 +	if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
13570 +		if (thisCPU == targetCPU->cpu) {
13571 +			TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
13572 +			runLocal = 1;
13573 +			runNow = 1;
13574 +		}
13575 +		else {
13576 +			TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
13577 +			runLocal = 0;
13578 +			runNow = 1;
13579 +		}
13580 +	}
13581 +	else {
13582 +		runLocal = 0;
13583 +		runNow = 0;
13584 +	}
13585 +	
13586 +	if(!runLocal) {
13587 +		// enqueue the tasklet
13588 +		__add_pai_tasklet(tasklet);
13589 +	}
13590 +	
13591 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13592 +	
13593 +	
13594 +	if (runLocal /*&& runNow */) {  // runNow == 1 is implied
13595 +		TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
13596 +		__do_lit_tasklet(tasklet, 0ul);
13597 +	}
13598 +	else if (runNow /*&& !runLocal */) {  // runLocal == 0 is implied
13599 +		TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
13600 +		preempt(targetCPU);  // need to be protected by cedf_lock?
13601 +	}
13602 +	else {
13603 +		TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
13604 +	}
13605 +	
13606 +	return(1); // success
13607 +}
13608 +
13609 +
13610 +#endif
13611 +
13612 +
13613 +
13614 +
13615 +
13616 +
13617 +
13618 +
13619 +
13620 +
13621 +
13622 +
13623 +
13624  /* Getting schedule() right is a bit tricky. schedule() may not make any
13625   * assumptions on the state of the current task since it may be called for a
13626   * number of reasons. The reasons include a scheduler_tick() determined that it
13627 @@ -401,17 +874,19 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13628  	TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
13629  #endif
13630  
13631 +	/*
13632  	if (exists)
13633  		TRACE_TASK(prev,
13634  			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
13635  			   "state:%d sig:%d\n",
13636  			   blocks, out_of_time, np, sleep, preempt,
13637  			   prev->state, signal_pending(prev));
13638 +	 */
13639 +	
13640  	if (entry->linked && preempt)
13641  		TRACE_TASK(prev, "will be preempted by %s/%d\n",
13642  			   entry->linked->comm, entry->linked->pid);
13643  
13644 -
13645  	/* If a task blocks we have no choice but to reschedule.
13646  	 */
13647  	if (blocks)
13648 @@ -456,12 +931,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13649  			entry->scheduled->rt_param.scheduled_on = NO_CPU;
13650  			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
13651  		}
13652 -	} else
13653 +	}
13654 +	else
13655 +	{
13656  		/* Only override Linux scheduler if we have a real-time task
13657  		 * scheduled that needs to continue.
13658  		 */
13659  		if (exists)
13660  			next = prev;
13661 +	}
13662  
13663  	sched_state_task_picked();
13664  
13665 @@ -486,8 +964,9 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
13666  static void gsnedf_finish_switch(struct task_struct *prev)
13667  {
13668  	cpu_entry_t* 	entry = &__get_cpu_var(gsnedf_cpu_entries);
13669 -
13670 +	
13671  	entry->scheduled = is_realtime(current) ? current : NULL;
13672 +	
13673  #ifdef WANT_ALL_SCHED_EVENTS
13674  	TRACE_TASK(prev, "switched away from\n");
13675  #endif
13676 @@ -536,11 +1015,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
13677  static void gsnedf_task_wake_up(struct task_struct *task)
13678  {
13679  	unsigned long flags;
13680 -	lt_t now;
13681 -
13682 +	//lt_t now;	
13683 +	
13684  	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
13685  
13686  	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13687 +	
13688 +	
13689 +#if 0  // sporadic task model
13690  	/* We need to take suspensions because of semaphores into
13691  	 * account! If a job resumes after being suspended due to acquiring
13692  	 * a semaphore, it should never be treated as a new job release.
13693 @@ -562,19 +1044,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
13694  			}
13695  		}
13696  	}
13697 +#else  // periodic task model
13698 +	set_rt_flags(task, RT_F_RUNNING);
13699 +#endif
13700 +	
13701  	gsnedf_job_arrival(task);
13702  	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13703  }
13704  
13705  static void gsnedf_task_block(struct task_struct *t)
13706  {
13707 +	// TODO: is this called on preemption??
13708  	unsigned long flags;
13709  
13710  	TRACE_TASK(t, "block at %llu\n", litmus_clock());
13711  
13712  	/* unlink if necessary */
13713  	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13714 +	
13715  	unlink(t);
13716 +	
13717  	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13718  
13719  	BUG_ON(!is_realtime(t));
13720 @@ -585,6 +1074,10 @@ static void gsnedf_task_exit(struct task_struct * t)
13721  {
13722  	unsigned long flags;
13723  
13724 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
13725 +	flush_tasklets(t);
13726 +#endif		
13727 +	
13728  	/* unlink if necessary */
13729  	raw_spin_lock_irqsave(&gsnedf_lock, flags);
13730  	unlink(t);
13731 @@ -593,7 +1086,7 @@ static void gsnedf_task_exit(struct task_struct * t)
13732  		tsk_rt(t)->scheduled_on = NO_CPU;
13733  	}
13734  	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
13735 -
13736 +	
13737  	BUG_ON(!is_realtime(t));
13738          TRACE_TASK(t, "RIP\n");
13739  }
13740 @@ -608,51 +1101,53 @@ static long gsnedf_admit_task(struct task_struct* tsk)
13741  
13742  #include <litmus/fdso.h>
13743  
13744 -/* called with IRQs off */
13745 -static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13746 +
13747 +static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13748  {
13749  	int linked_on;
13750 -	int check_preempt = 0;
13751 -
13752 -	raw_spin_lock(&gsnedf_lock);
13753 -
13754 -	TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
13755 +	int check_preempt = 0;	
13756 +	
13757 +	if(prio_inh != NULL)
13758 +		TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
13759 +	else
13760 +		TRACE_TASK(t, "inherits priority from %p\n", prio_inh);
13761 +	
13762 +	sched_trace_eff_prio_change(t, prio_inh);
13763 +	
13764  	tsk_rt(t)->inh_task = prio_inh;
13765 -
13766 +	
13767  	linked_on  = tsk_rt(t)->linked_on;
13768 -
13769 +	
13770  	/* If it is scheduled, then we need to reorder the CPU heap. */
13771  	if (linked_on != NO_CPU) {
13772  		TRACE_TASK(t, "%s: linked  on %d\n",
13773 -			   __FUNCTION__, linked_on);
13774 +				   __FUNCTION__, linked_on);
13775  		/* Holder is scheduled; need to re-order CPUs.
13776  		 * We can't use heap_decrease() here since
13777  		 * the cpu_heap is ordered in reverse direction, so
13778  		 * it is actually an increase. */
13779  		bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
13780 -			    gsnedf_cpus[linked_on]->hn);
13781 +                     gsnedf_cpus[linked_on]->hn);
13782  		bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
13783 -			    gsnedf_cpus[linked_on]->hn);
13784 +                     gsnedf_cpus[linked_on]->hn);
13785  	} else {
13786  		/* holder may be queued: first stop queue changes */
13787  		raw_spin_lock(&gsnedf.release_lock);
13788  		if (is_queued(t)) {
13789 -			TRACE_TASK(t, "%s: is queued\n",
13790 -				   __FUNCTION__);
13791 +			TRACE_TASK(t, "%s: is queued\n", __FUNCTION__);
13792 +
13793  			/* We need to update the position of holder in some
13794  			 * heap. Note that this could be a release heap if we
13795  			 * budget enforcement is used and this job overran. */
13796 -			check_preempt =
13797 -				!bheap_decrease(edf_ready_order,
13798 -					       tsk_rt(t)->heap_node);
13799 +			check_preempt = !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
13800 +
13801  		} else {
13802  			/* Nothing to do: if it is not queued and not linked
13803  			 * then it is either sleeping or currently being moved
13804  			 * by other code (e.g., a timer interrupt handler) that
13805  			 * will use the correct priority when enqueuing the
13806  			 * task. */
13807 -			TRACE_TASK(t, "%s: is NOT queued => Done.\n",
13808 -				   __FUNCTION__);
13809 +			TRACE_TASK(t, "%s: is NOT queued => Done.\n", __FUNCTION__);
13810  		}
13811  		raw_spin_unlock(&gsnedf.release_lock);
13812  
13813 @@ -666,34 +1161,148 @@ static void set_priority_inheritance(struct task_struct* t, struct task_struct*
13814  			/* heap_decrease() hit the top level of the heap: make
13815  			 * sure preemption checks get the right task, not the
13816  			 * potentially stale cache. */
13817 -			bheap_uncache_min(edf_ready_order,
13818 -					 &gsnedf.ready_queue);
13819 +			bheap_uncache_min(edf_ready_order, &gsnedf.ready_queue);
13820  			check_for_preemptions();
13821  		}
13822  	}
13823 +}
13824  
13825 +/* called with IRQs off */
13826 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
13827 +{
13828 +	raw_spin_lock(&gsnedf_lock);
13829 +
13830 +	__set_priority_inheritance(t, prio_inh);
13831 +
13832 +#ifdef CONFIG_LITMUS_SOFTIRQD
13833 +	if(tsk_rt(t)->cur_klitirqd != NULL)
13834 +	{
13835 +		TRACE_TASK(t, "%s/%d inherits a new priority!\n",
13836 +				tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
13837 +
13838 +		__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
13839 +	}
13840 +#endif
13841 +	
13842  	raw_spin_unlock(&gsnedf_lock);
13843  }
13844  
13845 +
13846 +/* called with IRQs off */
13847 +static void __clear_priority_inheritance(struct task_struct* t)
13848 +{
13849 +    TRACE_TASK(t, "priority restored\n");
13850 +	
13851 +    if(tsk_rt(t)->scheduled_on != NO_CPU)
13852 +    {
13853 +		sched_trace_eff_prio_change(t, NULL);
13854 +		
13855 +        tsk_rt(t)->inh_task = NULL;
13856 +        
13857 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
13858 +         * since the priority was effectively lowered. */
13859 +        unlink(t);
13860 +        gsnedf_job_arrival(t);
13861 +    }
13862 +    else
13863 +    {
13864 +        __set_priority_inheritance(t, NULL);
13865 +    }
13866 +
13867 +#ifdef CONFIG_LITMUS_SOFTIRQD
13868 +	if(tsk_rt(t)->cur_klitirqd != NULL)
13869 +	{
13870 +		TRACE_TASK(t, "%s/%d inheritance set back to owner.\n",
13871 +				tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
13872 +
13873 +		if(tsk_rt(tsk_rt(t)->cur_klitirqd)->scheduled_on != NO_CPU)
13874 +		{
13875 +			sched_trace_eff_prio_change(tsk_rt(t)->cur_klitirqd, t);
13876 +			
13877 +			tsk_rt(tsk_rt(t)->cur_klitirqd)->inh_task = t;
13878 +			
13879 +			/* Check if rescheduling is necessary. We can't use heap_decrease()
13880 +			 * since the priority was effectively lowered. */
13881 +			unlink(tsk_rt(t)->cur_klitirqd);
13882 +			gsnedf_job_arrival(tsk_rt(t)->cur_klitirqd);
13883 +		}
13884 +		else
13885 +		{
13886 +			__set_priority_inheritance(tsk_rt(t)->cur_klitirqd, t);
13887 +		}
13888 +	}
13889 +#endif
13890 +}
13891 +
13892  /* called with IRQs off */
13893  static void clear_priority_inheritance(struct task_struct* t)
13894  {
13895  	raw_spin_lock(&gsnedf_lock);
13896 +	__clear_priority_inheritance(t);
13897 +	raw_spin_unlock(&gsnedf_lock);
13898 +}
13899  
13900 -	/* A job only stops inheriting a priority when it releases a
13901 -	 * resource. Thus we can make the following assumption.*/
13902 -	BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
13903 -
13904 -	TRACE_TASK(t, "priority restored\n");
13905 -	tsk_rt(t)->inh_task = NULL;
13906 +#ifdef CONFIG_LITMUS_SOFTIRQD
13907 +/* called with IRQs off */
13908 +static void set_priority_inheritance_klitirqd(struct task_struct* klitirqd,
13909 +											  struct task_struct* old_owner,
13910 +											  struct task_struct* new_owner)
13911 +{
13912 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
13913 +	
13914 +	raw_spin_lock(&gsnedf_lock);
13915 +	
13916 +	if(old_owner != new_owner)
13917 +	{
13918 +		if(old_owner)
13919 +		{
13920 +			// unreachable?
13921 +			tsk_rt(old_owner)->cur_klitirqd = NULL;
13922 +		}
13923 +	
13924 +		TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
13925 +				   new_owner->comm, new_owner->pid);
13926  
13927 -	/* Check if rescheduling is necessary. We can't use heap_decrease()
13928 -	 * since the priority was effectively lowered. */
13929 -	unlink(t);
13930 -	gsnedf_job_arrival(t);
13931 +		tsk_rt(new_owner)->cur_klitirqd = klitirqd;
13932 +	}
13933 +	
13934 +	__set_priority_inheritance(klitirqd,
13935 +			(tsk_rt(new_owner)->inh_task == NULL) ?
13936 +				new_owner :
13937 +				tsk_rt(new_owner)->inh_task);
13938 +	
13939 +	raw_spin_unlock(&gsnedf_lock);
13940 +}
13941  
13942 +/* called with IRQs off */
13943 +static void clear_priority_inheritance_klitirqd(struct task_struct* klitirqd,
13944 +												struct task_struct* old_owner)
13945 +{
13946 +	BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
13947 +	
13948 +	raw_spin_lock(&gsnedf_lock);
13949 +    
13950 +    TRACE_TASK(klitirqd, "priority restored\n");
13951 +	
13952 +    if(tsk_rt(klitirqd)->scheduled_on != NO_CPU)
13953 +    {
13954 +        tsk_rt(klitirqd)->inh_task = NULL;
13955 +        
13956 +        /* Check if rescheduling is necessary. We can't use heap_decrease()
13957 +         * since the priority was effectively lowered. */
13958 +        unlink(klitirqd);
13959 +        gsnedf_job_arrival(klitirqd);
13960 +    }
13961 +    else
13962 +    {
13963 +        __set_priority_inheritance(klitirqd, NULL);
13964 +    }
13965 +	
13966 +	tsk_rt(old_owner)->cur_klitirqd = NULL;
13967 +	
13968  	raw_spin_unlock(&gsnedf_lock);
13969  }
13970 +#endif
13971  
13972  
13973  /* ******************** FMLP support ********************** */
13974 @@ -892,11 +1501,483 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
13975  	return &sem->litmus_lock;
13976  }
13977  
13978 +
13979 +
13980 +
13981 +
13982 +
13983 +
13984 +/* ******************** KFMLP support ********************** */
13985 +
13986 +/* struct for semaphore with priority inheritance */
13987 +struct kfmlp_queue
13988 +{
13989 +	wait_queue_head_t wait;
13990 +	struct task_struct* owner;
13991 +	struct task_struct* hp_waiter;
13992 +	int count; /* number of waiters + holder */
13993 +};
13994 +
13995 +struct kfmlp_semaphore
13996 +{
13997 +	struct litmus_lock litmus_lock;
13998 +	
13999 +	spinlock_t	lock;
14000 +	
14001 +	int num_resources; /* aka k */
14002 +	
14003 +	struct kfmlp_queue *queues; /* array */
14004 +	struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
14005 +};
14006 +
14007 +static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
14008 +{
14009 +	return container_of(lock, struct kfmlp_semaphore, litmus_lock);
14010 +}
14011 +
14012 +static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
14013 +								struct kfmlp_queue* queue)
14014 +{
14015 +	return (queue - &sem->queues[0]);
14016 +}
14017 +
14018 +static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
14019 +												  struct task_struct* holder)
14020 +{
14021 +	int i;
14022 +	for(i = 0; i < sem->num_resources; ++i)
14023 +		if(sem->queues[i].owner == holder)
14024 +			return(&sem->queues[i]);
14025 +	return(NULL);
14026 +}
14027 +
14028 +/* caller is responsible for locking */
14029 +static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
14030 +										 struct task_struct *skip)
14031 +{
14032 +	struct list_head	*pos;
14033 +	struct task_struct 	*queued, *found = NULL;
14034 +	
14035 +	list_for_each(pos, &kqueue->wait.task_list) {
14036 +		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
14037 +												   task_list)->private;
14038 +		
14039 +		/* Compare task prios, find high prio task. */
14040 +		if (queued != skip && edf_higher_prio(queued, found))
14041 +			found = queued;
14042 +	}
14043 +	return found;
14044 +}
14045 +
14046 +static inline struct kfmlp_queue* kfmlp_find_shortest(
14047 +										struct kfmlp_semaphore* sem,
14048 +										struct kfmlp_queue* search_start)
14049 +{
14050 +	// we start our search at search_start instead of at the beginning of the
14051 +	// queue list to load-balance across all resources.
14052 +	struct kfmlp_queue* step = search_start;
14053 +	struct kfmlp_queue* shortest = sem->shortest_queue;
14054 +	
14055 +	do
14056 +	{
14057 +		step = (step+1 != &sem->queues[sem->num_resources]) ?
14058 +			step+1 : &sem->queues[0];
14059 +
14060 +		if(step->count < shortest->count)
14061 +		{
14062 +			shortest = step;
14063 +			if(step->count == 0)
14064 +				break; /* can't get any shorter */
14065 +		}
14066 +
14067 +	}while(step != search_start);
14068 +	
14069 +	return(shortest);
14070 +}
14071 +
14072 +static struct task_struct* kfmlp_remove_hp_waiter(struct kfmlp_semaphore* sem)
14073 +{
14074 +	/* must hold sem->lock */
14075 +	
14076 +	struct kfmlp_queue *my_queue = NULL;
14077 +	struct task_struct *max_hp = NULL;
14078 +
14079 +	
14080 +	struct list_head	*pos;
14081 +	struct task_struct 	*queued;
14082 +	int i;
14083 +	
14084 +	for(i = 0; i < sem->num_resources; ++i)
14085 +	{
14086 +		if( (sem->queues[i].count > 1) &&
14087 +			((my_queue == NULL) ||
14088 +			 (edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
14089 +		{
14090 +			my_queue = &sem->queues[i];
14091 +		}
14092 +	}
14093 +	
14094 +	if(my_queue)
14095 +	{		
14096 +		max_hp = my_queue->hp_waiter;
14097 +		
14098 +		BUG_ON(!max_hp);
14099 +		
14100 +		TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
14101 +				  kfmlp_get_idx(sem, my_queue),
14102 +				  max_hp->comm, max_hp->pid,
14103 +				  kfmlp_get_idx(sem, my_queue));
14104 +		
14105 +		my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, max_hp);
14106 +		
14107 +		/*
14108 +		if(my_queue->hp_waiter)
14109 +			TRACE_CUR("queue %d: new hp_waiter is %s/%d\n",
14110 +					  kfmlp_get_idx(sem, my_queue),
14111 +					  my_queue->hp_waiter->comm,
14112 +					  my_queue->hp_waiter->pid);
14113 +		else
14114 +			TRACE_CUR("queue %d: new hp_waiter is %p\n",
14115 +					  kfmlp_get_idx(sem, my_queue), NULL);
14116 +		 */
14117 +		
14118 +		raw_spin_lock(&gsnedf_lock);
14119 +		
14120 +		/*
14121 +		if(my_queue->owner)
14122 +			TRACE_CUR("queue %d: owner is %s/%d\n",
14123 +					  kfmlp_get_idx(sem, my_queue),
14124 +					  my_queue->owner->comm,
14125 +					  my_queue->owner->pid);
14126 +		else
14127 +			TRACE_CUR("queue %d: owner is %p\n",
14128 +					  kfmlp_get_idx(sem, my_queue),
14129 +					  NULL);
14130 +		 */
14131 +		
14132 +		if(tsk_rt(my_queue->owner)->inh_task == max_hp)
14133 +		{
14134 +			__clear_priority_inheritance(my_queue->owner);
14135 +			if(my_queue->hp_waiter != NULL)
14136 +			{
14137 +				__set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
14138 +			}
14139 +		}
14140 +		raw_spin_unlock(&gsnedf_lock);
14141 +		
14142 +		list_for_each(pos, &my_queue->wait.task_list)
14143 +		{
14144 +			queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
14145 +													   task_list)->private;
14146 +			/* Compare task prios, find high prio task. */
14147 +			if (queued == max_hp)
14148 +			{
14149 +				/*
14150 +				TRACE_CUR("queue %d: found entry in wait queue.  REMOVING!\n",
14151 +						  kfmlp_get_idx(sem, my_queue));
14152 +				*/
14153 +				__remove_wait_queue(&my_queue->wait,
14154 +									list_entry(pos, wait_queue_t, task_list));
14155 +				break;
14156 +			}
14157 +		}
14158 +		--(my_queue->count);
14159 +	}
14160 +	
14161 +	return(max_hp);
14162 +}
14163 +
14164 +int gsnedf_kfmlp_lock(struct litmus_lock* l)
14165 +{
14166 +	struct task_struct* t = current;
14167 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14168 +	struct kfmlp_queue* my_queue;
14169 +	wait_queue_t wait;
14170 +	unsigned long flags;
14171 +	
14172 +	if (!is_realtime(t))
14173 +		return -EPERM;
14174 +	
14175 +	spin_lock_irqsave(&sem->lock, flags);
14176 +	
14177 +	my_queue = sem->shortest_queue;
14178 +	
14179 +	if (my_queue->owner) {
14180 +		/* resource is not free => must suspend and wait */
14181 +		TRACE_CUR("queue %d: Resource is not free => must suspend and wait.\n",
14182 +				  kfmlp_get_idx(sem, my_queue));
14183 +		
14184 +		init_waitqueue_entry(&wait, t);
14185 +		
14186 +		/* FIXME: interruptible would be nice some day */
14187 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
14188 +		
14189 +		__add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
14190 +		
14191 +		/* check if we need to activate priority inheritance */
14192 +		if (edf_higher_prio(t, my_queue->hp_waiter))
14193 +		{
14194 +			my_queue->hp_waiter = t;
14195 +			if (edf_higher_prio(t, my_queue->owner))
14196 +			{
14197 +				set_priority_inheritance(my_queue->owner, my_queue->hp_waiter);
14198 +			}
14199 +		}
14200 +		
14201 +		++(my_queue->count);
14202 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
14203 +		
14204 +		/* release lock before sleeping */
14205 +		spin_unlock_irqrestore(&sem->lock, flags);
14206 +		
14207 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
14208 +		 * when we wake up; we are guaranteed to have the lock since
14209 +		 * there is only one wake up per release (or steal).
14210 +		 */
14211 +		schedule();
14212 +
14213 +
14214 +		if(my_queue->owner == t)
14215 +		{
14216 +			TRACE_CUR("queue %d: acquired through waiting\n",
14217 +					  kfmlp_get_idx(sem, my_queue));
14218 +		}
14219 +		else
14220 +		{
14221 +			/* this case may happen if our wait entry was stolen
14222 +			   between queues. record where we went. */
14223 +			my_queue = kfmlp_get_queue(sem, t);
14224 +
14225 +			BUG_ON(!my_queue);
14226 +			TRACE_CUR("queue %d: acquired through stealing\n",
14227 +					  kfmlp_get_idx(sem, my_queue));
14228 +		}
14229 +	}
14230 +	else
14231 +	{
14232 +		TRACE_CUR("queue %d: acquired immediately\n",
14233 +				  kfmlp_get_idx(sem, my_queue));
14234 +
14235 +		my_queue->owner = t;
14236 +		
14237 +		++(my_queue->count);
14238 +		sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);		
14239 +		
14240 +		spin_unlock_irqrestore(&sem->lock, flags);
14241 +	}
14242 +	
14243 +	return kfmlp_get_idx(sem, my_queue);
14244 +}
14245 +
14246 +int gsnedf_kfmlp_unlock(struct litmus_lock* l)
14247 +{
14248 +	struct task_struct *t = current, *next;
14249 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14250 +	struct kfmlp_queue *my_queue;
14251 +	unsigned long flags;
14252 +	int err = 0;
14253 +	
14254 +	spin_lock_irqsave(&sem->lock, flags);
14255 +	
14256 +	my_queue = kfmlp_get_queue(sem, t);
14257 +	
14258 +	if (!my_queue) {
14259 +		err = -EINVAL;
14260 +		goto out;
14261 +	}
14262 +	
14263 +	/* check if there are jobs waiting for this resource */
14264 +	next = __waitqueue_remove_first(&my_queue->wait);
14265 +	if (next) {
14266 +		/*
14267 +		TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
14268 +				  kfmlp_get_idx(sem, my_queue),
14269 +				  next->comm, next->pid);
14270 +		*/
14271 +		/* next becomes the resouce holder */
14272 +		my_queue->owner = next;
14273 +		
14274 +		--(my_queue->count);
14275 +		// the '=' of '<=' is a dumb method to attempt to build
14276 +		// affinity until tasks can tell us where they ran last...
14277 +		if(my_queue->count <= sem->shortest_queue->count)
14278 +		{
14279 +			sem->shortest_queue = my_queue;
14280 +		}	
14281 +		
14282 +		TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
14283 +				  kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
14284 +		
14285 +		/* determine new hp_waiter if necessary */
14286 +		if (next == my_queue->hp_waiter) {
14287 +			TRACE_TASK(next, "was highest-prio waiter\n");
14288 +			/* next has the highest priority --- it doesn't need to
14289 +			 * inherit.  However, we need to make sure that the
14290 +			 * next-highest priority in the queue is reflected in
14291 +			 * hp_waiter. */
14292 +			my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
14293 +			if (my_queue->hp_waiter)
14294 +				TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
14295 +			else
14296 +				TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
14297 +		} else {
14298 +			/* Well, if next is not the highest-priority waiter,
14299 +			 * then it ought to inherit the highest-priority
14300 +			 * waiter's priority. */
14301 +			set_priority_inheritance(next, my_queue->hp_waiter);
14302 +		}
14303 +		
14304 +		/* wake up next */
14305 +		wake_up_process(next);
14306 +	}
14307 +	else
14308 +	{
14309 +		TRACE_CUR("queue %d: looking to steal someone...\n", kfmlp_get_idx(sem, my_queue));
14310 +		
14311 +		next = kfmlp_remove_hp_waiter(sem); /* returns NULL if nothing to steal */
14312 +
14313 +		/*
14314 +		if(next)
14315 +			TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - steal\n",
14316 +					  kfmlp_get_idx(sem, my_queue),
14317 +					  next->comm, next->pid);
14318 +		*/
14319 +		
14320 +		my_queue->owner = next;
14321 +		
14322 +		if(next)
14323 +		{
14324 +			TRACE_CUR("queue %d: lock ownership passed to %s/%d (which was stolen)\n",
14325 +					  kfmlp_get_idx(sem, my_queue),
14326 +					  next->comm, next->pid);
14327 +			
14328 +			/* wake up next */
14329 +			wake_up_process(next);			
14330 +		}
14331 +		else
14332 +		{
14333 +			TRACE_CUR("queue %d: no one to steal.\n", kfmlp_get_idx(sem, my_queue));
14334 +			
14335 +			--(my_queue->count);
14336 +			// the '=' of '<=' is a dumb method to attempt to build
14337 +			// affinity until tasks can tell us where they ran last...
14338 +			if(my_queue->count <= sem->shortest_queue->count)
14339 +			{
14340 +				sem->shortest_queue = my_queue;
14341 +			}
14342 +		}
14343 +	}
14344 +	
14345 +	/* we lose the benefit of priority inheritance (if any) */
14346 +	if (tsk_rt(t)->inh_task)
14347 +		clear_priority_inheritance(t);
14348 +	
14349 +out:
14350 +	spin_unlock_irqrestore(&sem->lock, flags);
14351 +	
14352 +	return err;
14353 +}
14354 +
14355 +int gsnedf_kfmlp_close(struct litmus_lock* l)
14356 +{
14357 +	struct task_struct *t = current;
14358 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14359 +	struct kfmlp_queue *my_queue;
14360 +	unsigned long flags;
14361 +	
14362 +	int owner;
14363 +	
14364 +	spin_lock_irqsave(&sem->lock, flags);
14365 +	
14366 +	my_queue = kfmlp_get_queue(sem, t);	
14367 +	owner = (my_queue) ? (my_queue->owner == t) : 0;
14368 +
14369 +	spin_unlock_irqrestore(&sem->lock, flags);
14370 +	
14371 +	if (owner)
14372 +		gsnedf_kfmlp_unlock(l);
14373 +	
14374 +	return 0;
14375 +}
14376 +
14377 +void gsnedf_kfmlp_free(struct litmus_lock* l)
14378 +{
14379 +	struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
14380 +	kfree(sem->queues);
14381 +	kfree(sem);
14382 +}
14383 +
14384 +static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
14385 +	.close  = gsnedf_kfmlp_close,
14386 +	.lock   = gsnedf_kfmlp_lock,
14387 +	.unlock = gsnedf_kfmlp_unlock,
14388 +	.deallocate = gsnedf_kfmlp_free,
14389 +};
14390 +
14391 +static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg, int* ret_code)
14392 +{
14393 +	struct kfmlp_semaphore* sem;
14394 +	int num_resources = 0;
14395 +	int i;
14396 +	
14397 +	if(!access_ok(VERIFY_READ, arg, sizeof(num_resources)))
14398 +	{
14399 +		*ret_code = -EINVAL;
14400 +		return(NULL);
14401 +	}
14402 +	if(__copy_from_user(&num_resources, arg, sizeof(num_resources)))
14403 +	{
14404 +		*ret_code = -EINVAL;
14405 +		return(NULL);
14406 +	}
14407 +	if(num_resources < 1)
14408 +	{
14409 +		*ret_code = -EINVAL;
14410 +		return(NULL);		
14411 +	}
14412 +	
14413 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
14414 +	if(!sem)
14415 +	{
14416 +		*ret_code = -ENOMEM;
14417 +		return NULL;
14418 +	}
14419 +	
14420 +	sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
14421 +	if(!sem->queues)
14422 +	{
14423 +		kfree(sem);
14424 +		*ret_code = -ENOMEM;
14425 +		return NULL;		
14426 +	}
14427 +	
14428 +	sem->litmus_lock.ops = &gsnedf_kfmlp_lock_ops;
14429 +	spin_lock_init(&sem->lock);
14430 +	sem->num_resources = num_resources;
14431 +	
14432 +	for(i = 0; i < num_resources; ++i)
14433 +	{
14434 +		sem->queues[i].owner = NULL;
14435 +		sem->queues[i].hp_waiter = NULL;
14436 +		init_waitqueue_head(&sem->queues[i].wait);
14437 +		sem->queues[i].count = 0;
14438 +	}
14439 +	
14440 +	sem->shortest_queue = &sem->queues[0];
14441 +	
14442 +	*ret_code = 0;
14443 +	return &sem->litmus_lock;
14444 +}
14445 +
14446 +
14447 +
14448 +
14449 +
14450  /* **** lock constructor **** */
14451  
14452  
14453  static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14454 -				 void* __user unused)
14455 +				 void* __user arg)
14456  {
14457  	int err = -ENXIO;
14458  
14459 @@ -911,7 +1992,10 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14460  		else
14461  			err = -ENOMEM;
14462  		break;
14463 -
14464 +			
14465 +	case KFMLP_SEM:
14466 +		*lock = gsnedf_new_kfmlp(arg, &err);
14467 +		break;
14468  	};
14469  
14470  	return err;
14471 @@ -919,7 +2003,6 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
14472  
14473  #endif
14474  
14475 -
14476  static long gsnedf_activate_plugin(void)
14477  {
14478  	int cpu;
14479 @@ -946,6 +2029,20 @@ static long gsnedf_activate_plugin(void)
14480  		}
14481  #endif
14482  	}
14483 +	
14484 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14485 +	gsnedf_pending_tasklets.head = NULL;
14486 +	gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
14487 +#endif	
14488 +    
14489 +#ifdef CONFIG_LITMUS_SOFTIRQD
14490 +    spawn_klitirqd(NULL);
14491 +#endif
14492 +
14493 +#ifdef CONFIG_LITMUS_NVIDIA
14494 +	init_nvidia_info();
14495 +#endif
14496 +	
14497  	return 0;
14498  }
14499  
14500 @@ -963,7 +2060,17 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
14501  	.admit_task		= gsnedf_admit_task,
14502  	.activate_plugin	= gsnedf_activate_plugin,
14503  #ifdef CONFIG_LITMUS_LOCKING
14504 -	.allocate_lock		= gsnedf_allocate_lock,
14505 +	.allocate_lock	= gsnedf_allocate_lock,
14506 +    .set_prio_inh   = set_priority_inheritance,
14507 +    .clear_prio_inh = clear_priority_inheritance,	
14508 +#endif
14509 +#ifdef CONFIG_LITMUS_SOFTIRQD
14510 +	.set_prio_inh_klitirqd = set_priority_inheritance_klitirqd,
14511 +	.clear_prio_inh_klitirqd = clear_priority_inheritance_klitirqd,
14512 +#endif
14513 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14514 +	.enqueue_pai_tasklet = enqueue_pai_tasklet,
14515 +	.run_tasklets = run_tasklets,
14516  #endif
14517  };
14518  
14519 diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
14520 index e695289..1bca2e1 100644
14521 --- a/litmus/sched_litmus.c
14522 +++ b/litmus/sched_litmus.c
14523 @@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
14524  		}
14525  #ifdef  __ARCH_WANT_UNLOCKED_CTXSW
14526  		if (next->oncpu)
14527 +		{
14528  			TRACE_TASK(next, "waiting for !oncpu");
14529 +		}
14530  		while (next->oncpu) {
14531  			cpu_relax();
14532  			mb();
14533 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
14534 index d54886d..d977e80 100644
14535 --- a/litmus/sched_plugin.c
14536 +++ b/litmus/sched_plugin.c
14537 @@ -129,6 +129,40 @@ static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
14538  	return -ENXIO;
14539  }
14540  
14541 +static void litmus_dummy_set_prio_inh(struct task_struct* a, struct task_struct* b)
14542 +{
14543 +}
14544 +
14545 +static void litmus_dummy_clear_prio_inh(struct task_struct* t)
14546 +{
14547 +}
14548 +
14549 +#endif
14550 +
14551 +#ifdef CONFIG_LITMUS_SOFTIRQD
14552 +static void litmus_dummy_set_prio_inh_klitirq(struct task_struct* klitirqd,
14553 +                                       struct task_struct* old_owner,
14554 +                                       struct task_struct* new_owner)
14555 +{
14556 +}
14557 +
14558 +static void litmus_dummy_clear_prio_inh_klitirqd(struct task_struct* klitirqd,
14559 +                                          struct task_struct* old_owner)
14560 +{
14561 +}
14562 +#endif
14563 +
14564 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14565 +static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
14566 +{
14567 +	TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14568 +	return(0); // failure.
14569 +}
14570 +
14571 +static void litmus_dummy_run_tasklets(struct task_struct* t)
14572 +{
14573 +	//TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
14574 +}
14575  #endif
14576  
14577  
14578 @@ -149,6 +183,16 @@ struct sched_plugin linux_sched_plugin = {
14579  	.deactivate_plugin = litmus_dummy_deactivate_plugin,
14580  #ifdef CONFIG_LITMUS_LOCKING
14581  	.allocate_lock = litmus_dummy_allocate_lock,
14582 +    .set_prio_inh = litmus_dummy_set_prio_inh,
14583 +    .clear_prio_inh = litmus_dummy_clear_prio_inh,
14584 +#endif
14585 +#ifdef CONFIG_LITMUS_SOFTIRQD
14586 +	.set_prio_inh_klitirqd = litmus_dummy_set_prio_inh_klitirq,
14587 +	.clear_prio_inh_klitirqd = litmus_dummy_clear_prio_inh_klitirqd,
14588 +#endif
14589 +#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
14590 +	.enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
14591 +	.run_tasklets = litmus_dummy_run_tasklets,
14592  #endif
14593  	.admit_task = litmus_dummy_admit_task
14594  };
14595 @@ -187,6 +231,8 @@ int register_sched_plugin(struct sched_plugin* plugin)
14596  	CHECK(deactivate_plugin);
14597  #ifdef CONFIG_LITMUS_LOCKING
14598  	CHECK(allocate_lock);
14599 +    CHECK(set_prio_inh);
14600 +    CHECK(clear_prio_inh);
14601  #endif
14602  	CHECK(admit_task);
14603  
14604 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
14605 index 5ef8d09..d079df2 100644
14606 --- a/litmus/sched_task_trace.c
14607 +++ b/litmus/sched_task_trace.c
14608 @@ -7,6 +7,7 @@
14609  #include <linux/module.h>
14610  #include <linux/sched.h>
14611  #include <linux/percpu.h>
14612 +#include <linux/hardirq.h>
14613  
14614  #include <litmus/ftdev.h>
14615  #include <litmus/litmus.h>
14616 @@ -16,13 +17,13 @@
14617  #include <litmus/ftdev.h>
14618  
14619  
14620 -#define NO_EVENTS		(1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
14621 +#define NUM_EVENTS		(1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
14622  
14623  #define now() litmus_clock()
14624  
14625  struct local_buffer {
14626 -	struct st_event_record record[NO_EVENTS];
14627 -	char   flag[NO_EVENTS];
14628 +	struct st_event_record record[NUM_EVENTS];
14629 +	char   flag[NUM_EVENTS];
14630  	struct ft_buffer ftbuf;
14631  };
14632  
14633 @@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
14634  	int i, ok = 0, err;
14635  	printk("Allocated %u sched_trace_xxx() events per CPU "
14636  	       "(buffer size: %d bytes)\n",
14637 -	       NO_EVENTS, (int) sizeof(struct local_buffer));
14638 +	       NUM_EVENTS, (int) sizeof(struct local_buffer));
14639  
14640  	err = ftdev_init(&st_dev, THIS_MODULE,
14641  			num_online_cpus(), "sched_trace");
14642 @@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
14643  
14644  	for (i = 0; i < st_dev.minor_cnt; i++) {
14645  		buf = &per_cpu(st_event_buffer, i);
14646 -		ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
14647 +		ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
14648  				     sizeof(struct st_event_record),
14649  				     buf->flag,
14650  				     buf->record);
14651 @@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
14652  {
14653  	struct task_struct *t = (struct task_struct*) _task;
14654  	struct st_event_record* rec;
14655 -	if (is_realtime(t)) {
14656 +	//if (is_realtime(t))  /* comment out to trace EVERYTHING */
14657 +	{
14658  		rec = get_record(ST_SWITCH_TO, t);
14659  		if (rec) {
14660  			rec->data.switch_to.when      = now();
14661 @@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
14662  {
14663  	struct task_struct *t = (struct task_struct*) _task;
14664  	struct st_event_record* rec;
14665 -	if (is_realtime(t)) {
14666 +	//if (is_realtime(t))  /* comment out to trace EVERYTHING */
14667 +	{
14668  		rec = get_record(ST_SWITCH_AWAY, t);
14669  		if (rec) {
14670  			rec->data.switch_away.when      = now();
14671 @@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
14672  	if (rec) {
14673  		rec->data.completion.when   = now();
14674  		rec->data.completion.forced = forced;
14675 +#ifdef LITMUS_NVIDIA
14676 +		rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
14677 +#endif
14678  		put_record(rec);
14679  	}
14680  }
14681 @@ -239,3 +245,215 @@ feather_callback void do_sched_trace_action(unsigned long id,
14682  		put_record(rec);
14683  	}
14684  }
14685 +
14686 +
14687 +feather_callback void do_sched_trace_tasklet_release(unsigned long id,
14688 +												   unsigned long _owner)
14689 +{
14690 +	struct task_struct *t = (struct task_struct*) _owner;
14691 +	struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
14692 +	
14693 +	if (rec) {
14694 +		rec->data.tasklet_release.when = now();
14695 +		put_record(rec);
14696 +	}
14697 +}
14698 +
14699 +
14700 +feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
14701 +												   unsigned long _owner)
14702 +{
14703 +	struct task_struct *t = (struct task_struct*) _owner;
14704 +	struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
14705 +	
14706 +	if (rec) {
14707 +		rec->data.tasklet_begin.when = now();
14708 +
14709 +		if(!in_interrupt())
14710 +			rec->data.tasklet_begin.exe_pid = current->pid;
14711 +		else
14712 +			rec->data.tasklet_begin.exe_pid = 0;
14713 +
14714 +		put_record(rec);
14715 +	}
14716 +}
14717 +EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
14718 +
14719 +
14720 +feather_callback void do_sched_trace_tasklet_end(unsigned long id,
14721 +												 unsigned long _owner,
14722 +												 unsigned long _flushed)
14723 +{
14724 +	struct task_struct *t = (struct task_struct*) _owner;
14725 +	struct st_event_record *rec = get_record(ST_TASKLET_END, t);
14726 +	
14727 +	if (rec) {
14728 +		rec->data.tasklet_end.when = now();
14729 +		rec->data.tasklet_end.flushed = _flushed;
14730 +
14731 +		if(!in_interrupt())
14732 +			rec->data.tasklet_end.exe_pid = current->pid;
14733 +		else
14734 +			rec->data.tasklet_end.exe_pid = 0;
14735 +
14736 +		put_record(rec);
14737 +	}
14738 +}
14739 +EXPORT_SYMBOL(do_sched_trace_tasklet_end);
14740 +
14741 +
14742 +feather_callback void do_sched_trace_work_release(unsigned long id,
14743 +													 unsigned long _owner)
14744 +{
14745 +	struct task_struct *t = (struct task_struct*) _owner;
14746 +	struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
14747 +	
14748 +	if (rec) {
14749 +		rec->data.work_release.when = now();
14750 +		put_record(rec);
14751 +	}
14752 +}
14753 +
14754 +
14755 +feather_callback void do_sched_trace_work_begin(unsigned long id,
14756 +												unsigned long _owner,
14757 +												unsigned long _exe)
14758 +{
14759 +	struct task_struct *t = (struct task_struct*) _owner;
14760 +	struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
14761 +	
14762 +	if (rec) {
14763 +		struct task_struct *exe = (struct task_struct*) _exe;
14764 +		rec->data.work_begin.exe_pid = exe->pid;
14765 +		rec->data.work_begin.when = now();
14766 +		put_record(rec);
14767 +	}
14768 +}
14769 +EXPORT_SYMBOL(do_sched_trace_work_begin);
14770 +
14771 +
14772 +feather_callback void do_sched_trace_work_end(unsigned long id,
14773 +											  unsigned long _owner,
14774 +											  unsigned long _exe,
14775 +											  unsigned long _flushed)
14776 +{
14777 +	struct task_struct *t = (struct task_struct*) _owner;
14778 +	struct st_event_record *rec = get_record(ST_WORK_END, t);
14779 +	
14780 +	if (rec) {
14781 +		struct task_struct *exe = (struct task_struct*) _exe;
14782 +		rec->data.work_end.exe_pid = exe->pid;
14783 +		rec->data.work_end.flushed = _flushed;
14784 +		rec->data.work_end.when = now();
14785 +		put_record(rec);
14786 +	}
14787 +}
14788 +EXPORT_SYMBOL(do_sched_trace_work_end);
14789 +
14790 +
14791 +feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
14792 +											  unsigned long _task,
14793 +											  unsigned long _inh)
14794 +{
14795 +	struct task_struct *t = (struct task_struct*) _task;
14796 +	struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
14797 +	
14798 +	if (rec) {
14799 +		struct task_struct *inh = (struct task_struct*) _inh;
14800 +		rec->data.effective_priority_change.when = now();
14801 +		rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
14802 +			inh->pid :
14803 +			0xffff;
14804 +		
14805 +		put_record(rec);
14806 +	}
14807 +}
14808 +
14809 +/* pray for no nesting of nv interrupts on same CPU... */
14810 +struct tracing_interrupt_map
14811 +{
14812 +	int active;
14813 +	int count;
14814 +	unsigned long data[128]; // assume nesting less than 128...
14815 +	unsigned long serial[128];
14816 +};
14817 +DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
14818 +
14819 +
14820 +DEFINE_PER_CPU(u32, intCounter);
14821 +
14822 +feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
14823 +												unsigned long _device)
14824 +{
14825 +	struct st_event_record *rec;
14826 +	u32 serialNum;
14827 +
14828 +	{
14829 +		u32* serial;
14830 +		struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
14831 +		if(!int_map->active == 0xcafebabe)
14832 +		{
14833 +			int_map->count++;
14834 +		}
14835 +		else
14836 +		{
14837 +			int_map->active = 0xcafebabe;
14838 +			int_map->count = 1;
14839 +		}
14840 +		//int_map->data[int_map->count-1] = _device;
14841 +		
14842 +		serial = &per_cpu(intCounter, smp_processor_id());
14843 +		*serial += num_online_cpus();
14844 +		serialNum = *serial;
14845 +		int_map->serial[int_map->count-1] = serialNum;
14846 +	}
14847 +
14848 +	rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
14849 +	if(rec) {
14850 +		u32 device = _device;
14851 +		rec->data.nv_interrupt_begin.when = now();
14852 +		rec->data.nv_interrupt_begin.device = device;
14853 +		rec->data.nv_interrupt_begin.serialNumber = serialNum;
14854 +		put_record(rec);
14855 +	}
14856 +}
14857 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
14858 +
14859 +/*
14860 +int is_interrupt_tracing_active(void)
14861 +{
14862 +	struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
14863 +	if(int_map->active == 0xcafebabe)
14864 +		return 1;
14865 +	return 0;
14866 +}
14867 +*/
14868 +
14869 +feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
14870 +{
14871 +	struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
14872 +	if(int_map->active == 0xcafebabe)
14873 +	{
14874 +		struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
14875 +
14876 +		int_map->count--;
14877 +		if(int_map->count == 0)
14878 +			int_map->active = 0;
14879 +
14880 +		if(rec) {
14881 +			u32 device = _device;
14882 +			rec->data.nv_interrupt_end.when = now();
14883 +			//rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
14884 +			rec->data.nv_interrupt_end.device = device;
14885 +			rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
14886 +			put_record(rec);
14887 +		}
14888 +	}
14889 +}
14890 +EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
14891 +
14892 +
14893 +
14894 +
14895 +
14896 +
14897 diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
14898 new file mode 100644
14899 index 0000000..cf8e1d7
14900 --- /dev/null
14901 +++ b/litmus/sched_trace_external.c
14902 @@ -0,0 +1,64 @@
14903 +#include <linux/module.h>
14904 +
14905 +#include <litmus/trace.h>
14906 +#include <litmus/sched_trace.h>
14907 +#include <litmus/litmus.h>
14908 +
14909 +void __sched_trace_tasklet_begin_external(struct task_struct* t)
14910 +{
14911 +	sched_trace_tasklet_begin(t);
14912 +}
14913 +EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
14914 +
14915 +void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
14916 +{
14917 +	sched_trace_tasklet_end(t, flushed);
14918 +}
14919 +EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
14920 +
14921 +
14922 +
14923 +void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
14924 +{
14925 +	sched_trace_work_begin(t, e);
14926 +}
14927 +EXPORT_SYMBOL(__sched_trace_work_begin_external);
14928 +
14929 +void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
14930 +{
14931 +	sched_trace_work_end(t, e, f);
14932 +}
14933 +EXPORT_SYMBOL(__sched_trace_work_end_external);
14934 +
14935 +
14936 +
14937 +void __sched_trace_nv_interrupt_begin_external(u32 device)
14938 +{
14939 +	//unsigned long _device = device;
14940 +	sched_trace_nv_interrupt_begin((unsigned long)device);
14941 +}
14942 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
14943 +
14944 +void __sched_trace_nv_interrupt_end_external(u32 device)
14945 +{
14946 +	//unsigned long _device = device;
14947 +	sched_trace_nv_interrupt_end((unsigned long)device);
14948 +}
14949 +EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
14950 +
14951 +
14952 +#ifdef CONFIG_LITMUS_NVIDIA
14953 +
14954 +#define EXX_TS(evt) \
14955 +void __##evt(void) { evt; } \
14956 +EXPORT_SYMBOL(__##evt);
14957 +
14958 +EXX_TS(TS_NV_TOPISR_START)
14959 +EXX_TS(TS_NV_TOPISR_END)
14960 +EXX_TS(TS_NV_BOTISR_START)
14961 +EXX_TS(TS_NV_BOTISR_END)
14962 +EXX_TS(TS_NV_RELEASE_BOTISR_START)
14963 +EXX_TS(TS_NV_RELEASE_BOTISR_END)
14964 +
14965 +#endif
14966 +
14967
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
[get | view] (2015-06-04 21:22:59, 244.8 KB) [[attachment:MC2-liblitmus-imx6-rtss15.patch]]
[get | view] (2016-05-12 14:35:37, 51.9 KB) [[attachment:MC2-liblitmus-rtss16.patch]]
[get | view] (2016-05-12 14:36:06, 190.4 KB) [[attachment:MC2-litmus-rt-rtss16.patch]]
[get | view] (2015-07-19 10:27:52, 1119.9 KB) [[attachment:MC2-litmut-rt-imx6-rtss15.patch]]
[get | view] (2014-05-27 20:46:19, 58.3 KB) [[attachment:MC2_liblitmus_ipdps15.patch]]
[get | view] (2014-05-27 20:45:43, 1044.3 KB) [[attachment:MC2_litmusrt_ipdps15.patch]]
[get | view] (2017-04-07 21:48:09, 6099.5 KB) [[attachment:buff_sharing.tar]]
[get | view] (2015-01-08 14:20:07, 61.0 KB) [[attachment:feather-trace-patch-against-sched-deadline-v8.patch]]
[get | view] (2014-04-01 23:10:10, 38.9 KB) [[attachment:gedf-mp-rtas14.patch]]
[get | view] (2012-03-02 20:13:59, 1.9 KB) [[attachment:gpu-klmirqd-liblitmus-rt-ecrts12.patch]]
[get | view] (2012-03-02 20:14:25, 389.8 KB) [[attachment:gpu-klmirqd-litmus-rt-ecrts12.patch]]
[get | view] (2012-05-26 21:41:34, 418.0 KB) [[attachment:gpusync-rtss12.patch]]
[get | view] (2012-05-26 21:42:20, 8.6 KB) [[attachment:gpusync_liblitmus-rtss12.patch]]
[get | view] (2013-05-21 15:32:08, 208.6 KB) [[attachment:gpusync_rtss13_liblitmus.patch]]
[get | view] (2013-05-21 15:31:32, 779.5 KB) [[attachment:gpusync_rtss13_litmus.patch]]
[get | view] (2012-05-26 21:42:41, 71.4 KB) [[attachment:klt_tracker_v1.0.litmus.tgz]]
[get | view] (2016-10-13 21:14:05, 19.6 KB) [[attachment:liblitmus-rtas17.patch]]
[get | view] (2017-05-01 20:46:22, 90.0 KB) [[attachment:liblitmus-rtns17.patch]]
[get | view] (2018-12-11 01:38:53, 49.1 KB) [[attachment:liblitmus-semi-part-with-edfos.patch]]
[get | view] (2017-10-09 19:16:09, 304.0 KB) [[attachment:litmus-rt-os-isolation.patch]]
[get | view] (2016-10-13 21:13:27, 207.6 KB) [[attachment:litmus-rt-rtas17.patch]]
[get | view] (2017-05-01 20:46:40, 207.6 KB) [[attachment:litmus-rt-rtns17.patch]]
[get | view] (2018-12-11 01:39:04, 100.5 KB) [[attachment:litmus-rt-semi-part-with-edfos.patch]]
[get | view] (2018-06-26 04:31:48, 7.0 KB) [[attachment:mc2_liblitmus_2015.1-rtns18.patch]]
[get | view] (2018-06-26 04:31:33, 292.7 KB) [[attachment:mc2_litmus-rt_2015.1-rtns18.patch]]
[get | view] (2017-05-01 20:45:10, 2596.9 KB) [[attachment:mcp_study.zip]]
[get | view] (2013-07-13 14:11:53, 58.0 KB) [[attachment:omip-ecrts13.patch]]
[get | view] (2014-02-19 21:48:33, 17.2 KB) [[attachment:pgmrt-liblitmus-ecrts14.patch]]
[get | view] (2014-02-19 21:47:57, 87.8 KB) [[attachment:pgmrt-litmusrt-ecrts14.patch]]
[get | view] (2015-01-08 14:22:32, 61.0 KB) [[attachment:sched-deadline-v8-feather-trace-rtas14.patch]]
[get | view] (2018-06-26 04:32:13, 2545.1 KB) [[attachment:sched_study_rtns2018.tar.gz]]
[get | view] (2017-04-07 21:53:39, 5969.5 KB) [[attachment:seminal.tar]]
[get | view] (2017-04-07 21:51:13, 6064.0 KB) [[attachment:shared_libraries.tar]]
[get | view] (2013-07-13 13:58:25, 42.7 KB) [[attachment:tracing-and-dflp-rtas13.patch]]
All files | Selected Files: delete move to page copy to page
You are not allowed to attach a file to this page.