Attachment 'omip-ecrts13.patch'

Download

   1 diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
   2 index 48d086d..99be3af 100644
   3 --- a/include/litmus/debug_trace.h
   4 +++ b/include/litmus/debug_trace.h
   5 @@ -4,10 +4,18 @@
   6  #ifdef CONFIG_SCHED_DEBUG_TRACE
   7  void sched_trace_log_message(const char* fmt, ...);
   8  void dump_trace_buffer(int max);
   9 +
  10  #else
  11  
  12  #define sched_trace_log_message(fmt, ...)
  13 +#define dump_trace_buffer(max)
  14 +
  15 +#endif
  16  
  17 +#ifdef CONFIG_SCHED_DEBUG_DUMP_ON_OOPS
  18 +#define litmus_dump_trace() dump_trace_buffer(0)
  19 +#else
  20 +#define litmus_dump_trace()
  21  #endif
  22  
  23  extern atomic_t __log_seq_no;
  24 @@ -28,8 +36,11 @@ extern atomic_t __log_seq_no;
  25  				TRACE_ARGS,  ## args)
  26  
  27  #define TRACE_TASK(t, fmt, args...)			\
  28 -	TRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid,	\
  29 -	      (t)->rt_param.job_params.job_no,  ##args)
  30 +	TRACE("(%s/%d:%d) " fmt,			 \
  31 +	      t ? (t)->comm : "null",			 \
  32 +	      t ? (t)->pid : 0,				 \
  33 +	      t ? (t)->rt_param.job_params.job_no : 0,	 \
  34 +	      ##args)
  35  
  36  #define TRACE_CUR(fmt, args...) \
  37  	TRACE_TASK(current, fmt, ## args)
  38 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
  39 index f2115b8..181da1c 100644
  40 --- a/include/litmus/fdso.h
  41 +++ b/include/litmus/fdso.h
  42 @@ -1,6 +1,6 @@
  43  /* fdso.h - file descriptor attached shared objects
  44   *
  45 - * (c) 2007 B. Brandenburg, LITMUS^RT project
  46 + * (c) 2007-2013 B. Brandenburg, LITMUS^RT project
  47   */
  48  
  49  #ifndef _LINUX_FDSO_H_
  50 @@ -26,7 +26,10 @@ typedef enum  {
  51  
  52  	PCP_SEM         = 5,
  53  
  54 -	MAX_OBJ_TYPE	= 5
  55 +	OMLP_SEM	= 6,
  56 +	OMIP_SEM	= 7,
  57 +
  58 +	MAX_OBJ_TYPE	= 7
  59  } obj_type_t;
  60  
  61  struct inode_obj_id {
  62 diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
  63 index ac24929..c341258 100644
  64 --- a/include/litmus/rt_domain.h
  65 +++ b/include/litmus/rt_domain.h
  66 @@ -60,8 +60,14 @@ struct release_heap {
  67  	/* used to delegate releases */
  68  	struct hrtimer_start_on_info	info;
  69  #endif
  70 -	/* required for the timer callback */
  71 -	rt_domain_t*			dom;
  72 +
  73 +	union {
  74 +		/* required for the timer callback */
  75 +		rt_domain_t*			dom;
  76 +
  77 +		/* To simplify per-task timers. */
  78 +		struct task_struct*		task;
  79 +	};
  80  };
  81  
  82  
  83 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
  84 index 4cd06dd..4ddc790 100644
  85 --- a/include/litmus/rt_param.h
  86 +++ b/include/litmus/rt_param.h
  87 @@ -119,6 +119,8 @@ struct _rt_domain;
  88  struct bheap_node;
  89  struct release_heap;
  90  
  91 +struct migratory_prio_inh;
  92 +
  93  struct rt_job {
  94  	/* Time instant the the job was or will be released.  */
  95  	lt_t	release;
  96 @@ -182,7 +184,14 @@ struct rt_param {
  97  	 * could point to self if PI does not result in
  98  	 * an increased task priority.
  99  	 */
 100 -	 struct task_struct*	inh_task;
 101 +	struct task_struct*	inh_task;
 102 +
 103 +	int			priodon_state;
 104 +
 105 +	struct migratory_prio_inh* mpi;
 106 +
 107 +	/* priority donation support */
 108 +	int			donor_cpu;
 109  
 110  #ifdef CONFIG_NP_SECTION
 111  	/* For the FMLP under PSN-EDF, it is required to make the task
 112 diff --git a/kernel/panic.c b/kernel/panic.c
 113 index 6923167..d87528c 100644
 114 --- a/kernel/panic.c
 115 +++ b/kernel/panic.c
 116 @@ -24,6 +24,8 @@
 117  #include <linux/nmi.h>
 118  #include <linux/dmi.h>
 119  
 120 +#include <litmus/debug_trace.h>
 121 +
 122  #define PANIC_TIMER_STEP 100
 123  #define PANIC_BLINK_SPD 18
 124  
 125 @@ -77,6 +79,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 126  	vsnprintf(buf, sizeof(buf), fmt, args);
 127  	va_end(args);
 128  	printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
 129 +	litmus_dump_trace();
 130  #ifdef CONFIG_DEBUG_BUGVERBOSE
 131  	dump_stack();
 132  #endif
 133 @@ -354,6 +357,7 @@ void oops_exit(void)
 134  {
 135  	do_oops_enter_exit();
 136  	print_oops_end_marker();
 137 +	litmus_dump_trace();
 138  	kmsg_dump(KMSG_DUMP_OOPS);
 139  }
 140  
 141 diff --git a/kernel/sched.c b/kernel/sched.c
 142 index c4b6bd5..0d013b0 100644
 143 --- a/kernel/sched.c
 144 +++ b/kernel/sched.c
 145 @@ -5317,6 +5317,7 @@ recheck:
 146  
 147  	p->sched_reset_on_fork = reset_on_fork;
 148  
 149 +	preempt_disable();
 150  	if (p->policy == SCHED_LITMUS)
 151  		litmus_exit_task(p);
 152  
 153 @@ -5330,6 +5331,8 @@ recheck:
 154  		litmus->task_new(p, on_rq, running);
 155  	}
 156  
 157 +	preempt_enable();
 158 +
 159  	if (running)
 160  		p->sched_class->set_curr_task(rq);
 161  	if (on_rq)
 162 diff --git a/lib/bug.c b/lib/bug.c
 163 index 1955209..eac01fe 100644
 164 --- a/lib/bug.c
 165 +++ b/lib/bug.c
 166 @@ -43,6 +43,7 @@
 167  #include <linux/bug.h>
 168  #include <linux/sched.h>
 169  
 170 +#include <litmus/debug_trace.h>
 171  extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
 172  
 173  static inline unsigned long bug_addr(const struct bug_entry *bug)
 174 @@ -179,5 +180,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 175  		       "[verbose debug info unavailable]\n",
 176  		       (void *)bugaddr);
 177  
 178 +	litmus_dump_trace();
 179 +
 180  	return BUG_TRAP_TYPE_BUG;
 181  }
 182 diff --git a/litmus/Kconfig b/litmus/Kconfig
 183 index bd6635c..1e6cbfe 100644
 184 --- a/litmus/Kconfig
 185 +++ b/litmus/Kconfig
 186 @@ -263,6 +263,17 @@ config SCHED_DEBUG_TRACE_CALLER
 187  
 188  	 If unsure, say No.
 189  
 190 +config SCHED_DEBUG_DUMP_ON_OOPS
 191 +       bool "Dump  TRACE() log in case of BUG/OOPS"
 192 +       depends on SCHED_DEBUG_TRACE
 193 +       default n
 194 +       help
 195 +         With this option enabled, panic() and friends try to dump the remaining
 196 +	 contents of the TRACE() buffer to the console. This creates a fair bit
 197 +	 of clutter, but can be helpful in getting the last messages out.
 198 +
 199 +	 If unsure, say No.
 200 +
 201  config PREEMPT_STATE_TRACE
 202         bool "Trace preemption state machine transitions"
 203         depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
 204 diff --git a/litmus/fdso.c b/litmus/fdso.c
 205 index 250377d..2a88197 100644
 206 --- a/litmus/fdso.c
 207 +++ b/litmus/fdso.c
 208 @@ -1,6 +1,6 @@
 209  /* fdso.c - file descriptor attached shared objects
 210   *
 211 - * (c) 2007 B. Brandenburg, LITMUS^RT project
 212 + * (c) 2007-2013 B. Brandenburg, LITMUS^RT project
 213   *
 214   * Notes:
 215   *   - objects descriptor (OD) tables are not cloned during a fork.
 216 @@ -27,10 +27,14 @@ static const struct fdso_ops* fdso_ops[] = {
 217  	&generic_lock_ops, /* MPCP_VS_SEM */
 218  	&generic_lock_ops, /* DPCP_SEM */
 219  	&generic_lock_ops, /* PCP_SEM */
 220 +	&generic_lock_ops, /* OMLP_SEM */
 221 +	&generic_lock_ops, /* OMIP_SEM */
 222  };
 223  
 224  static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
 225  {
 226 +	BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
 227 +
 228  	if (fdso_ops[type]->create)
 229  		return fdso_ops[type]->create(obj_ref, type, config);
 230  	else
 231 diff --git a/litmus/litmus.c b/litmus/litmus.c
 232 index 9c6b738..dc94be7 100644
 233 --- a/litmus/litmus.c
 234 +++ b/litmus/litmus.c
 235 @@ -10,6 +10,7 @@
 236  #include <linux/module.h>
 237  #include <linux/slab.h>
 238  #include <linux/reboot.h>
 239 +#include <linux/stop_machine.h>
 240  
 241  #include <litmus/litmus.h>
 242  #include <litmus/bheap.h>
 243 @@ -24,9 +25,6 @@
 244  
 245  /* Number of RT tasks that exist in the system */
 246  atomic_t rt_task_count 		= ATOMIC_INIT(0);
 247 -static DEFINE_RAW_SPINLOCK(task_transition_lock);
 248 -/* synchronize plugin switching */
 249 -atomic_t cannot_use_plugin	= ATOMIC_INIT(0);
 250  
 251  /* Give log messages sequential IDs. */
 252  atomic_t __log_seq_no = ATOMIC_INIT(0);
 253 @@ -322,10 +320,12 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
 254  long litmus_admit_task(struct task_struct* tsk)
 255  {
 256  	long retval = 0;
 257 -	unsigned long flags;
 258  
 259  	BUG_ON(is_realtime(tsk));
 260  
 261 +	tsk_rt(tsk)->heap_node = NULL;
 262 +	tsk_rt(tsk)->rel_heap = NULL;
 263 +
 264  	if (get_rt_relative_deadline(tsk) == 0 ||
 265  	    get_exec_cost(tsk) >
 266  			min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
 267 @@ -347,9 +347,6 @@ long litmus_admit_task(struct task_struct* tsk)
 268  
 269  	INIT_LIST_HEAD(&tsk_rt(tsk)->list);
 270  
 271 -	/* avoid scheduler plugin changing underneath us */
 272 -	raw_spin_lock_irqsave(&task_transition_lock, flags);
 273 -
 274  	/* allocate heap node for this task */
 275  	tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
 276  	tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
 277 @@ -357,15 +354,14 @@ long litmus_admit_task(struct task_struct* tsk)
 278  	if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
 279  		printk(KERN_WARNING "litmus: no more heap node memory!?\n");
 280  
 281 -		bheap_node_free(tsk_rt(tsk)->heap_node);
 282 -		release_heap_free(tsk_rt(tsk)->rel_heap);
 283 -
 284  		retval = -ENOMEM;
 285 -		goto out_unlock;
 286 +		goto out;
 287  	} else {
 288  		bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
 289  	}
 290  
 291 +	preempt_disable();
 292 +
 293  	retval = litmus->admit_task(tsk);
 294  
 295  	if (!retval) {
 296 @@ -374,9 +370,13 @@ long litmus_admit_task(struct task_struct* tsk)
 297  		atomic_inc(&rt_task_count);
 298  	}
 299  
 300 -out_unlock:
 301 -	raw_spin_unlock_irqrestore(&task_transition_lock, flags);
 302 +	preempt_enable();
 303 +
 304  out:
 305 +	if (retval) {
 306 +		bheap_node_free(tsk_rt(tsk)->heap_node);
 307 +		release_heap_free(tsk_rt(tsk)->rel_heap);
 308 +	}
 309  	return retval;
 310  }
 311  
 312 @@ -396,37 +396,10 @@ void litmus_exit_task(struct task_struct* tsk)
 313  	}
 314  }
 315  
 316 -/* IPI callback to synchronize plugin switching */
 317 -static void synch_on_plugin_switch(void* info)
 318 -{
 319 -	atomic_inc(&cannot_use_plugin);
 320 -	while (atomic_read(&cannot_use_plugin) > 0)
 321 -		cpu_relax();
 322 -}
 323 -
 324 -/* Switching a plugin in use is tricky.
 325 - * We must watch out that no real-time tasks exists
 326 - * (and that none is created in parallel) and that the plugin is not
 327 - * currently in use on any processor (in theory).
 328 - */
 329 -int switch_sched_plugin(struct sched_plugin* plugin)
 330 +static int do_plugin_switch(void *_plugin)
 331  {
 332 -	unsigned long flags;
 333 -	int ret = 0;
 334 -
 335 -	BUG_ON(!plugin);
 336 -
 337 -	/* forbid other cpus to use the plugin */
 338 -	atomic_set(&cannot_use_plugin, 1);
 339 -	/* send IPI to force other CPUs to synch with us */
 340 -	smp_call_function(synch_on_plugin_switch, NULL, 0);
 341 -
 342 -	/* wait until all other CPUs have started synch */
 343 -	while (atomic_read(&cannot_use_plugin) < num_online_cpus())
 344 -		cpu_relax();
 345 -
 346 -	/* stop task transitions */
 347 -	raw_spin_lock_irqsave(&task_transition_lock, flags);
 348 +	int ret;
 349 +	struct sched_plugin* plugin = _plugin;
 350  
 351  	/* don't switch if there are active real-time tasks */
 352  	if (atomic_read(&rt_task_count) == 0) {
 353 @@ -444,11 +417,24 @@ int switch_sched_plugin(struct sched_plugin* plugin)
 354  	} else
 355  		ret = -EBUSY;
 356  out:
 357 -	raw_spin_unlock_irqrestore(&task_transition_lock, flags);
 358 -	atomic_set(&cannot_use_plugin, 0);
 359  	return ret;
 360  }
 361  
 362 +/* Switching a plugin in use is tricky.
 363 + * We must watch out that no real-time tasks exists
 364 + * (and that none is created in parallel) and that the plugin is not
 365 + * currently in use on any processor (in theory).
 366 + */
 367 +int switch_sched_plugin(struct sched_plugin* plugin)
 368 +{
 369 +	BUG_ON(!plugin);
 370 +
 371 +	if (atomic_read(&rt_task_count) == 0)
 372 +		return stop_machine(do_plugin_switch, plugin, NULL);
 373 +	else
 374 +		return -EBUSY;
 375 +}
 376 +
 377  /* Called upon fork.
 378   * p is the newly forked task.
 379   */
 380 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
 381 index b45b46f..6762d48 100644
 382 --- a/litmus/sched_cedf.c
 383 +++ b/litmus/sched_cedf.c
 384 @@ -1,7 +1,7 @@
 385  /*
 386   * litmus/sched_cedf.c
 387   *
 388 - * Implementation of the C-EDF scheduling algorithm.
 389 + * Implementation of the C-EDF scheduling algorithm with priority donation.
 390   *
 391   * This implementation is based on G-EDF:
 392   * - CPUs are clustered around L2 or L3 caches.
 393 @@ -15,17 +15,15 @@
 394   *   supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
 395   *   online_cpus are placed in a single cluster).
 396   *
 397 - *   For details on functions, take a look at sched_gsn_edf.c
 398 - *
 399   * Currently, we do not support changes in the number of online cpus.
 400   * If the num_online_cpus() dynamically changes, the plugin is broken.
 401   *
 402   * This version uses the simple approach and serializes all scheduling
 403 - * decisions by the use of a queue lock. This is probably not the
 404 - * best way to do it, but it should suffice for now.
 405 + * decisions by the use of a queue lock.
 406   */
 407  
 408  #include <linux/spinlock.h>
 409 +#include <linux/cpumask.h>
 410  #include <linux/percpu.h>
 411  #include <linux/sched.h>
 412  #include <linux/slab.h>
 413 @@ -39,6 +37,9 @@
 414  #include <litmus/sched_plugin.h>
 415  #include <litmus/edf_common.h>
 416  #include <litmus/sched_trace.h>
 417 +#include <litmus/trace.h>
 418 +#include <litmus/fdso.h>
 419 +#include <litmus/wait.h>
 420  
 421  #include <litmus/clustered.h>
 422  
 423 @@ -70,52 +71,228 @@ typedef struct  {
 424  	struct clusterdomain*	cluster;	/* owning cluster */
 425  	struct task_struct*	linked;		/* only RT tasks */
 426  	struct task_struct*	scheduled;	/* only RT tasks */
 427 -	atomic_t		will_schedule;	/* prevent unneeded IPIs */
 428  	struct bheap_node*	hn;
 429 +
 430 +	struct task_struct*	pd_task;	/* priority donation */
 431  } cpu_entry_t;
 432  
 433  /* one cpu_entry_t per CPU */
 434  DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
 435  
 436 -#define set_will_schedule() \
 437 -	(atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
 438 -#define clear_will_schedule() \
 439 -	(atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
 440 -#define test_will_schedule(cpu) \
 441 -	(atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
 442 -
 443  /*
 444   * In C-EDF there is a cedf domain _per_ cluster
 445   * The number of clusters is dynamically determined accordingly to the
 446   * total cpu number and the cluster size
 447   */
 448  typedef struct clusterdomain {
 449 -	/* rt_domain for this cluster */
 450 -	rt_domain_t	domain;
 451 +	/* lock for this cluster */
 452 +	raw_spinlock_t		cluster_lock;
 453 +	/* ready queue for this cluster */
 454 +	struct list_head        job_queue;
 455  	/* cpus in this cluster */
 456 -	cpu_entry_t*	*cpus;
 457 +	cpu_entry_t*		*cpus;
 458 +	/* which cluster is this? */
 459 +	int			cluster;
 460 +	/* how many CPUs are in this cluster? */
 461 +	int			num_cpus;
 462  	/* map of this cluster cpus */
 463 -	cpumask_var_t	cpu_map;
 464 +	cpumask_var_t		cpu_map;
 465  	/* the cpus queue themselves according to priority in here */
 466 -	struct bheap_node *heap_node;
 467 -	struct bheap      cpu_heap;
 468 -	/* lock for this cluster */
 469 -#define cluster_lock domain.ready_lock
 470 +	struct bheap_node	*heap_node;
 471 +	struct bheap		cpu_heap;
 472  } cedf_domain_t;
 473  
 474  /* a cedf_domain per cluster; allocation is done at init/activation time */
 475  cedf_domain_t *cedf;
 476  
 477 +#define remote_cpu(cpu)		(&per_cpu(cedf_cpu_entries, cpu))
 478  #define remote_cluster(cpu)	((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
 479  #define task_cpu_cluster(task)	remote_cluster(get_partition(task))
 480  
 481  /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
 482   * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
 483   * information during the initialization of the plugin (e.g., topology)
 484 -#define WANT_ALL_SCHED_EVENTS
 485 +
 486   */
 487 +#define WANT_ALL_SCHED_EVENTS
 488  #define VERBOSE_INIT
 489  
 490 +#define NOT_WAITING 0
 491 +#define WAITING_TO_BE_LINKED 1
 492 +#define WAITING_FOR_DONATION_END 2
 493 +
 494 +
 495 +static int is_waiting_to_be_linked(struct task_struct *t)
 496 +{
 497 +	return t && t->rt_param.priodon_state == WAITING_TO_BE_LINKED;
 498 +}
 499 +
 500 +static int is_waiting_for_donation_end(struct task_struct *t)
 501 +{
 502 +	return t && t->rt_param.priodon_state == WAITING_FOR_DONATION_END;
 503 +}
 504 +
 505 +
 506 +/* When the task is currently not subject to cross-cluster
 507 + * migration. */
 508 +#define NOT_XCLUSTER_MIGRATORY (-3)
 509 +#define ON_HOME_CLUSTER (-2)
 510 +
 511 +struct migratory_prio_inh {
 512 +	raw_spinlock_t lock;
 513 +
 514 +	struct task_struct *owner;
 515 +	struct cpumask scheduling_candidates;
 516 +
 517 +	int scheduled_on;
 518 +};
 519 +
 520 +#define MPI_EXIT_CS ((struct migratory_prio_inh *) 0x123)
 521 +
 522 +void mpi_init(struct migratory_prio_inh *mpi)
 523 +{
 524 +	raw_spin_lock_init(&mpi->lock);
 525 +	cpumask_clear(&mpi->scheduling_candidates);
 526 +	mpi->scheduled_on = NO_CPU;
 527 +	mpi->owner = NULL;
 528 +}
 529 +
 530 +/* assumes IRQ off */
 531 +struct task_struct* mpi_try_to_schedule(struct migratory_prio_inh *mpi)
 532 +{
 533 +	struct task_struct *t = NULL;
 534 +	int need_notify = 1;
 535 +
 536 +	/* Check whether we raced with the end of a critical section that
 537 +	 * was migrated to a remote core. The remote core will send an IPI
 538 +	 * when it is safe to schedule this task. */
 539 +	if (mpi == MPI_EXIT_CS)
 540 +		return NULL;
 541 +
 542 +	raw_spin_lock(&mpi->lock);
 543 +
 544 +	BUG_ON(!mpi->owner);
 545 +
 546 +	TRACE("trying to schedule mpi-task mpi:%p owner:%s/%d scheduled_on=%d\n",
 547 +	      mpi, mpi->owner->comm, mpi->owner->pid,
 548 +	      mpi->scheduled_on);
 549 +
 550 +	if (mpi->scheduled_on == NO_CPU &&
 551 +	    is_present(mpi->owner) &&
 552 +	    is_running(mpi->owner)) {
 553 +		/* it's not scheduled, so grab it */
 554 +		mpi->scheduled_on = smp_processor_id();
 555 +		t = mpi->owner;
 556 +		cpumask_clear_cpu(smp_processor_id(),
 557 +				  &mpi->scheduling_candidates);
 558 +		need_notify = 0;
 559 +
 560 +	} else if (mpi->scheduled_on == smp_processor_id()) {
 561 +		/* we already got it, nothing to change */
 562 +		if (is_present(mpi->owner) && is_running(mpi->owner)) {
 563 +			need_notify = 0;
 564 +			t = mpi->owner;
 565 +		} else {
 566 +			/* we can't actually schedule it */
 567 +			mpi->scheduled_on = NO_CPU;
 568 +		}
 569 +	}
 570 +
 571 +	if (need_notify) {
 572 +		/* can't get to it, let's register our interest */
 573 +		cpumask_set_cpu(smp_processor_id(),
 574 +				&mpi->scheduling_candidates);
 575 +	}
 576 +
 577 +	raw_spin_unlock(&mpi->lock);
 578 +
 579 +	return t;
 580 +}
 581 +
 582 +/* preempt - force a CPU to reschedule
 583 + */
 584 +static void preempt(cpu_entry_t *entry)
 585 +{
 586 +	preempt_if_preemptable(entry->scheduled, entry->cpu);
 587 +}
 588 +
 589 +void __mpi_notify_cpus(struct migratory_prio_inh *mpi)
 590 +{
 591 +	int cpu;
 592 +
 593 +	for_each_cpu(cpu, &mpi->scheduling_candidates) {
 594 +		TRACE("notifying P%d\n", cpu);
 595 +		preempt(remote_cpu(cpu)); /* XXX use propr smp_call */
 596 +	}
 597 +
 598 +	cpumask_clear(&mpi->scheduling_candidates);
 599 +}
 600 +
 601 +void mpi_notify_cpus(struct migratory_prio_inh *mpi)
 602 +{
 603 +	unsigned long flags;
 604 +
 605 +	raw_spin_lock_irqsave(&mpi->lock, flags);
 606 +	__mpi_notify_cpus(mpi);
 607 +	raw_spin_unlock_irqrestore(&mpi->lock, flags);
 608 +}
 609 +
 610 +
 611 +void mpi_deschedule(struct migratory_prio_inh *mpi,
 612 +		    struct task_struct *preempted)
 613 +{
 614 +	unsigned long flags;
 615 +
 616 +	raw_spin_lock_irqsave(&mpi->lock, flags);
 617 +
 618 +	if (preempted == mpi->owner) {
 619 +		mpi->scheduled_on = NO_CPU;
 620 +		/* XXX copy out mask and drop lock first? */
 621 +		__mpi_notify_cpus(mpi);
 622 +	}
 623 +
 624 +	raw_spin_unlock_irqrestore(&mpi->lock, flags);
 625 +}
 626 +
 627 +/* assumes IRQ off */
 628 +void mpi_update_owner(struct migratory_prio_inh *mpi,
 629 +		      struct task_struct *new_owner,
 630 +		      int running_on)
 631 +{
 632 +	raw_spin_lock(&mpi->lock);
 633 +
 634 +	mpi->owner = new_owner;
 635 +	mpi->scheduled_on = running_on;
 636 +	if (running_on == NO_CPU)
 637 +		__mpi_notify_cpus(mpi);
 638 +
 639 +	raw_spin_unlock(&mpi->lock);
 640 +}
 641 +
 642 +/* assumes preempt off */
 643 +static void mpi_migrate_back(void)
 644 +{
 645 +	struct task_struct *t = current;
 646 +	int cpu;
 647 +
 648 +	cpu = smp_processor_id();
 649 +
 650 +	if (task_cpu_cluster(t) != remote_cluster(cpu) ||
 651 +	    remote_cpu(cpu)->linked != t) {
 652 +
 653 +		TRACE_CUR("not on the linked CPU (%d), migrating back (part:%d)\n",
 654 +			  tsk_rt(t)->linked_on,
 655 +			  get_partition(t));
 656 +
 657 +		preempt_enable_no_resched();
 658 +
 659 +		schedule();
 660 +
 661 +		preempt_disable();
 662 +	} else
 663 +		tsk_rt(t)->mpi = NULL;
 664 +}
 665 +
 666  static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
 667  {
 668  	cpu_entry_t *a, *b;
 669 @@ -150,6 +327,49 @@ static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
 670  	return hn->value;
 671  }
 672  
 673 +static struct task_struct* select_job(struct task_struct *candidate)
 674 +{
 675 +	/* Special case: delegate can be used to mark a task as
 676 +	 * pseudo-suspended. It should not be scheduled in this case. */
 677 +	if (is_waiting_to_be_linked(candidate) ||
 678 +	    is_waiting_for_donation_end(candidate))
 679 +		candidate = NULL;
 680 +
 681 +	if (candidate && !(is_present(candidate) && is_running(candidate))) {
 682 +		TRACE_TASK(candidate, "should be scheduled, but is not running\n");
 683 +		candidate = NULL;
 684 +	}
 685 +
 686 +	return candidate;
 687 +}
 688 +
 689 +static int cedf_is_linked(struct task_struct* task)
 690 +{
 691 +	return task->rt_param.linked_on != NO_CPU;
 692 +}
 693 +
 694 +static cpu_entry_t *cedf_linked_cpu(struct task_struct *task)
 695 +{
 696 +	if (cedf_is_linked(task))
 697 +		return &per_cpu(cedf_cpu_entries, task->rt_param.linked_on);
 698 +	else
 699 +		return NULL;
 700 +}
 701 +
 702 +static int cedf_preempt_linked_cpu(struct task_struct* task)
 703 +{
 704 +	cpu_entry_t *cpu = cedf_linked_cpu(task);
 705 +	struct task_struct *should_schedule = select_job(task);
 706 +
 707 +	if (cpu && should_schedule &&
 708 +	    cpu->scheduled != should_schedule) {
 709 +		preempt(cpu);
 710 +		return 1;
 711 +	} else
 712 +		return 0;
 713 +}
 714 +
 715 +static void job_completion(struct task_struct *t, int forced);
 716  
 717  /* link_task_to_cpu - Update the link of a CPU.
 718   *                    Handles the case where the to-be-linked task is already
 719 @@ -159,22 +379,33 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
 720  				      cpu_entry_t *entry)
 721  {
 722  	cpu_entry_t *sched;
 723 -	struct task_struct* tmp;
 724 +	struct task_struct *tmp, *unlinked;
 725  	int on_cpu;
 726  
 727  	BUG_ON(linked && !is_realtime(linked));
 728  
 729 -	/* Currently linked task is set to be unlinked. */
 730 -	if (entry->linked) {
 731 -		entry->linked->rt_param.linked_on = NO_CPU;
 732 +	unlinked = entry->linked;
 733 +
 734 +	/* Currently linked task is will be unlinked. */
 735 +	if (unlinked) {
 736 +		unlinked->rt_param.linked_on = NO_CPU;
 737  	}
 738  
 739  	/* Link new task to CPU. */
 740  	if (linked) {
 741 -		tsk_rt(linked)->completed = 0;
 742 -		/* handle task is already scheduled somewhere! */
 743 +
 744 +		/* Handle task is already scheduled somewhere!
 745 +		 * Special case with m-PI: it might be scheduled outside
 746 +		 * of our cluster.
 747 +		 */
 748  		on_cpu = linked->rt_param.scheduled_on;
 749 -		if (on_cpu != NO_CPU) {
 750 +		if (on_cpu != NO_CPU &&
 751 +		    remote_cluster(on_cpu) != task_cpu_cluster(linked))  {
 752 +			TRACE_TASK(linked, "linked, but already scheduled "
 753 +				   "outside of cluster on CPU=%d\n", on_cpu);
 754 +		}
 755 +		if (on_cpu != NO_CPU &&
 756 +		    remote_cluster(on_cpu) == task_cpu_cluster(linked))  {
 757  			sched = &per_cpu(cedf_cpu_entries, on_cpu);
 758  			/* this should only happen if not linked already */
 759  			BUG_ON(sched->linked == linked);
 760 @@ -188,17 +419,46 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
 761  				TRACE_TASK(linked,
 762  					   "already scheduled on %d, updating link.\n",
 763  					   sched->cpu);
 764 +
 765  				tmp = sched->linked;
 766 +
 767 +				/* need to get priority donation right */
 768 +				if (tmp && sched->pd_task == sched->linked) {
 769 +					/* currently in a CS */
 770 +					BUG_ON(linked->rt_param.donor_cpu != NO_CPU);
 771 +					/* Entry just had a completion, there cannot
 772 +					 * be a CS in progress. */
 773 +					BUG_ON(entry->pd_task != NULL);
 774 +
 775 +					sched->pd_task = NULL;
 776 +					entry->pd_task = tmp;
 777 +					tmp->rt_param.donor_cpu = sched->cpu;
 778 +				} else if (is_waiting_for_donation_end(tmp)) {
 779 +					preempt(entry);
 780 +				}
 781 +
 782  				linked->rt_param.linked_on = sched->cpu;
 783  				sched->linked = linked;
 784  				update_cpu_position(sched);
 785  				linked = tmp;
 786  			}
 787 +		} else if (linked->rt_param.donor_cpu != NO_CPU &&
 788 +			   entry->cpu != linked->rt_param.donor_cpu) {
 789 +			/* It's not linked, it's not scheduled, but it's got a
 790 +			 * donor. We need to patch up the priority donation
 791 +			 * info. */
 792 +			entry->pd_task = linked;
 793 +			sched = remote_cpu(linked->rt_param.donor_cpu);
 794 +			linked->rt_param.donor_cpu = entry->cpu;
 795 +			sched->pd_task = NULL;
 796 +			if (is_waiting_for_donation_end(sched->linked))
 797 +				preempt(sched);
 798  		}
 799  		if (linked) /* might be NULL due to swap */
 800  			linked->rt_param.linked_on = entry->cpu;
 801  	}
 802  	entry->linked = linked;
 803 +
 804  #ifdef WANT_ALL_SCHED_EVENTS
 805  	if (linked)
 806  		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
 807 @@ -206,6 +466,14 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
 808  		TRACE("NULL linked to %d.\n", entry->cpu);
 809  #endif
 810  	update_cpu_position(entry);
 811 +
 812 +
 813 +	if (is_waiting_for_donation_end(unlinked) &&
 814 +	    is_completed(unlinked)) {
 815 +		/* delayed job completion */
 816 +		TRACE_TASK(unlinked, "delayed job completion\n");
 817 +		job_completion(unlinked, 0);
 818 +	}
 819  }
 820  
 821  /* unlink - Make sure a task is not linked any longer to an entry
 822 @@ -215,50 +483,178 @@ static noinline void unlink(struct task_struct* t)
 823  {
 824      	cpu_entry_t *entry;
 825  
 826 +	TRACE_TASK(t, "unlink (linked_on=%d)\n", t->rt_param.linked_on, in_list(&t->rt_param.list));
 827 +
 828 +	BUG_ON(t->rt_param.linked_on != NO_CPU && in_list(&t->rt_param.list));
 829 +
 830  	if (t->rt_param.linked_on != NO_CPU) {
 831  		/* unlink */
 832  		entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
 833  		t->rt_param.linked_on = NO_CPU;
 834  		link_task_to_cpu(NULL, entry);
 835 -	} else if (is_queued(t)) {
 836 -		/* This is an interesting situation: t is scheduled,
 837 -		 * but was just recently unlinked.  It cannot be
 838 -		 * linked anywhere else (because then it would have
 839 -		 * been relinked to this CPU), thus it must be in some
 840 -		 * queue. We must remove it from the list in this
 841 -		 * case.
 842 -		 *
 843 -		 * in C-EDF case is should be somewhere in the queue for
 844 -		 * its domain, therefore and we can get the domain using
 845 -		 * task_cpu_cluster
 846 -		 */
 847 -		remove(&(task_cpu_cluster(t))->domain, t);
 848 +	} else if (in_list(&t->rt_param.list)) {
 849 +		/* Remove from queue of pending jobs. */
 850 +		list_del(&t->rt_param.list);
 851  	}
 852  }
 853  
 854  
 855 -/* preempt - force a CPU to reschedule
 856 - */
 857 -static void preempt(cpu_entry_t *entry)
 858 +static int edf_queue_job(struct list_head *queue, struct task_struct *t)
 859  {
 860 -	preempt_if_preemptable(entry->scheduled, entry->cpu);
 861 +	unsigned int passed = 0;
 862 +	struct list_head *pos;
 863 +	struct task_struct *other;
 864 +
 865 +	BUG_ON(in_list(&t->rt_param.list));
 866 +
 867 +	/* sorted enqueue */
 868 +	list_for_each(pos, queue)
 869 +	{
 870 +		other = list_entry(pos, struct task_struct, rt_param.list);
 871 +		if (edf_higher_prio(t, other)) {
 872 +			/* found correct position: insert after prev and return */
 873 +			list_add(&t->rt_param.list, pos->prev);
 874 +			return passed;
 875 +		} else
 876 +			passed++;
 877 +	}
 878 +
 879 +	/* Fell through: hit end of list. */
 880 +
 881 +	list_add_tail(&t->rt_param.list, queue);
 882 +
 883 +	return passed;
 884  }
 885  
 886 -/* requeue - Put an unlinked task into gsn-edf domain.
 887 +static struct task_struct* job_queue_head(struct list_head *queue)
 888 +{
 889 +	if (list_empty(queue))
 890 +		return NULL;
 891 +	else
 892 +		return list_entry(queue->next, struct task_struct, rt_param.list);
 893 +}
 894 +
 895 +static struct task_struct* job_queue_first_runnable(struct list_head *queue)
 896 +{
 897 +	struct list_head *pos;
 898 +	struct task_struct *task;
 899 +
 900 +	/* sorted enqueue */
 901 +	list_for_each(pos, queue)
 902 +	{
 903 +		task = list_entry(pos, struct task_struct, rt_param.list);
 904 +		task = select_job(task);
 905 +		if (task && tsk_rt(task)->mpi)
 906 +			task = mpi_try_to_schedule(tsk_rt(task)->mpi);
 907 +		if (task
 908 +		    && tsk_rt(task)->scheduled_on == NO_CPU
 909 +		    && tsk_rt(task)->donor_cpu == NO_CPU)
 910 +			return task;
 911 +	}
 912 +
 913 +	return NULL;
 914 +}
 915 +
 916 +
 917 +static struct task_struct* dequeue_head(struct list_head *queue)
 918 +{
 919 +	struct task_struct* head = job_queue_head(queue);
 920 +
 921 +	if (head)
 922 +		list_del(&head->rt_param.list);
 923 +
 924 +	return head;
 925 +}
 926 +
 927 +static int edf_need_to_preempt(struct list_head *queue, struct task_struct *scheduled)
 928 +{
 929 +	if (list_empty(queue))
 930 +		return 0;
 931 +	if (!scheduled || !is_realtime(scheduled))
 932 +		return 1;
 933 +
 934 +	return edf_higher_prio(job_queue_head(queue), scheduled);
 935 +}
 936 +
 937 +static void check_for_preemptions(cedf_domain_t *cluster);
 938 +
 939 +enum hrtimer_restart on_cedf_release_timer(struct hrtimer *timer)
 940 +{
 941 +	/* We now that the calling thread/context is not holding the ready
 942 +	 * queue lock because the hrtimer subsystem pushes the arming of
 943 +	 * already expired timers into a softirq (see
 944 +	 * hrtimer_enqueue_reprogram). Thus, even when arming an already
 945 +	 * expired timer, the call to the callback function does not happen
 946 +	 * until after the ready queue lock has been dropped.
 947 +	*/
 948 +
 949 +	struct task_struct *task;
 950 +	cedf_domain_t* cluster;
 951 +	unsigned long flags;
 952 +
 953 +	task = container_of(timer, struct release_heap, timer)->task;
 954 +	cluster = task_cpu_cluster(task);
 955 +
 956 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
 957 +
 958 +	TRACE_TASK(task, "released! complete:%d\n", task->rt_param.completed);
 959 +	tsk_rt(task)->completed = 0;
 960 +
 961 +	edf_queue_job(&cluster->job_queue, task);
 962 +	check_for_preemptions(cluster);
 963 +
 964 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
 965 +
 966 +	return  HRTIMER_NORESTART;
 967 +}
 968 +
 969 +void arm_timer_for_job_release(struct task_struct *t)
 970 +{
 971 +	struct hrtimer *timer = &tsk_rt(t)->rel_heap->timer;
 972 +
 973 +	if (hrtimer_active(timer)) {
 974 +		TRACE_TASK(t, "BUG BUG BUG timer still active!\n");
 975 +		TRACE_TASK(t, "giving up\n");
 976 +		printk(KERN_CRIT "BUG BUG BUG\n");
 977 +		return;
 978 +	}
 979 +	BUG_ON(hrtimer_active(timer));
 980 +
 981 +	timer->function = on_cedf_release_timer;
 982 +	tsk_rt(t)->rel_heap->task = t;
 983 +
 984 +	TRACE_TASK(t, "arming job releas timer\n");
 985 +
 986 +	/* todo: release master support is missing */
 987 +	__hrtimer_start_range_ns(timer,
 988 +				 ns_to_ktime(get_release(t)),
 989 +				 0, HRTIMER_MODE_ABS_PINNED, 0);
 990 +
 991 +	sched_trace_task_release(t);
 992 +}
 993 +
 994 +/* requeue - Put an unlinked task into appropriate queue (ready or release).
 995   *           Caller must hold cedf_lock.
 996   */
 997  static noinline void requeue(struct task_struct* task)
 998  {
 999  	cedf_domain_t *cluster = task_cpu_cluster(task);
1000 -	BUG_ON(!task);
1001 -	/* sanity check before insertion */
1002 -	BUG_ON(is_queued(task));
1003 +	lt_t now = litmus_clock();
1004  
1005 -	if (is_released(task, litmus_clock()))
1006 -		__add_ready(&cluster->domain, task);
1007 +	TRACE_TASK(task, "requeue: rel=%d queued=%d linked=%d\n",
1008 +		   is_released(task, now),
1009 +		   in_list(&task->rt_param.list),
1010 +		   cedf_is_linked(task));
1011 +
1012 +	BUG_ON(task->rt_param.linked_on != NO_CPU && !is_released(task, litmus_clock()));
1013 +
1014 +	if (is_released(task, now)
1015 +		 && !in_list(&task->rt_param.list)
1016 +		 && !cedf_is_linked(task))
1017 +		edf_queue_job(&cluster->job_queue, task);
1018  	else {
1019 -		/* it has got to wait */
1020 -		add_release(&cluster->domain, task);
1021 +		if (!is_released(task, now))
1022 +			TRACE_TASK(task, "not requeueing not-yet-released job\n");
1023  	}
1024  }
1025  
1026 @@ -288,16 +684,18 @@ static cpu_entry_t* cedf_get_nearest_available_cpu(
1027  /* check for any necessary preemptions */
1028  static void check_for_preemptions(cedf_domain_t *cluster)
1029  {
1030 -	struct task_struct *task;
1031 +	struct task_struct *task, *unlinked;
1032  	cpu_entry_t *last;
1033  
1034  	for(last = lowest_prio_cpu(cluster);
1035 -	    edf_preemption_needed(&cluster->domain, last->linked);
1036 +	    edf_need_to_preempt(&cluster->job_queue, last->linked);
1037  	    last = lowest_prio_cpu(cluster)) {
1038  		/* preemption necessary */
1039 -		task = __take_ready(&cluster->domain);
1040 +		task = dequeue_head(&cluster->job_queue);
1041  		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
1042  		      task->pid, last->cpu);
1043 +		unlinked = last->linked;
1044 +		link_task_to_cpu(task, last);
1045  #ifdef CONFIG_SCHED_CPU_AFFINITY
1046  		{
1047  			cpu_entry_t *affinity =
1048 @@ -309,10 +707,9 @@ static void check_for_preemptions(cedf_domain_t *cluster)
1049  				requeue(last->linked);
1050  		}
1051  #else
1052 -		if (requeue_preempted_job(last->linked))
1053 -			requeue(last->linked);
1054 +		if (requeue_preempted_job(unlinked))
1055 +			requeue(unlinked);
1056  #endif
1057 -		link_task_to_cpu(task, last);
1058  		preempt(last);
1059  	}
1060  }
1061 @@ -325,42 +722,38 @@ static noinline void cedf_job_arrival(struct task_struct* task)
1062  
1063  	requeue(task);
1064  	check_for_preemptions(cluster);
1065 -}
1066 -
1067 -static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
1068 -{
1069 -	cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
1070 -	unsigned long flags;
1071 -
1072 -	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1073 -
1074 -	__merge_ready(&cluster->domain, tasks);
1075 -	check_for_preemptions(cluster);
1076 -
1077 -	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1078 +	cedf_preempt_linked_cpu(task);
1079  }
1080  
1081  /* caller holds cedf_lock */
1082 -static noinline void job_completion(struct task_struct *t, int forced)
1083 +static void job_completion(struct task_struct *t, int forced)
1084  {
1085 +	lt_t now = litmus_clock();
1086 +
1087  	BUG_ON(!t);
1088  
1089  	sched_trace_task_completion(t, forced);
1090  
1091 -	TRACE_TASK(t, "job_completion().\n");
1092 +	TRACE_TASK(t, "job_completion(). lateness=%ld\n",
1093 +		   (long) now - (long) get_deadline(t));
1094  
1095  	/* set flags */
1096  	tsk_rt(t)->completed = 1;
1097  	/* prepare for next period */
1098  	prepare_for_next_period(t);
1099 -	if (is_released(t, litmus_clock()))
1100 +	if (is_released(t, now))
1101  		sched_trace_task_release(t);
1102  	/* unlink */
1103  	unlink(t);
1104  	/* requeue
1105  	 * But don't requeue a blocking task. */
1106 -	if (is_running(t))
1107 -		cedf_job_arrival(t);
1108 +	tsk_rt(t)->completed = 0;
1109 +	if (is_present(t) && is_running(t)) {
1110 +		if (!is_released(t, now))
1111 +			arm_timer_for_job_release(t);
1112 +		else
1113 +			cedf_job_arrival(t);
1114 +	}
1115  }
1116  
1117  /* cedf_tick - this function is called for every local timer
1118 @@ -377,7 +770,6 @@ static void cedf_tick(struct task_struct* t)
1119  			 * preemptable again
1120  			 */
1121  			litmus_reschedule_local();
1122 -			set_will_schedule();
1123  			TRACE("cedf_scheduler_tick: "
1124  			      "%d is preemptable "
1125  			      " => FORCE_RESCHED\n", t->pid);
1126 @@ -415,8 +807,9 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1127  {
1128  	cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
1129  	cedf_domain_t *cluster = entry->cluster;
1130 -	int out_of_time, sleep, preempt, np, exists, blocks;
1131 +	int out_of_time, sleep, preempt, np, exists, blocks, dead, exiting;
1132  	struct task_struct* next = NULL;
1133 +	struct task_struct* should_schedule = NULL;
1134  
1135  #ifdef CONFIG_RELEASE_MASTER
1136  	/* Bail out early if we are the release master.
1137 @@ -429,10 +822,17 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1138  #endif
1139  
1140  	raw_spin_lock(&cluster->cluster_lock);
1141 -	clear_will_schedule();
1142  
1143  	/* sanity checking */
1144  	BUG_ON(entry->scheduled && entry->scheduled != prev);
1145 +	if (entry->scheduled && !is_realtime(prev)) {
1146 +		TRACE_TASK(prev, "BUG BUG BUG :: not real-time\n");
1147 +		TRACE_TASK(entry->scheduled, "scheduled instead\n");
1148 +		should_schedule = NULL;
1149 +		next = NULL;
1150 +		exists = blocks = np = 0;
1151 +		goto bailout;
1152 +	}
1153  	BUG_ON(entry->scheduled && !is_realtime(prev));
1154  	BUG_ON(is_realtime(prev) && !entry->scheduled);
1155  
1156 @@ -444,7 +844,51 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1157  				  budget_exhausted(entry->scheduled);
1158  	np 	    = exists && is_np(entry->scheduled);
1159  	sleep	    = exists && is_completed(entry->scheduled);
1160 -	preempt     = entry->scheduled != entry->linked;
1161 +	dead	    = exists && entry->scheduled->state == TASK_DEAD;
1162 +	exiting	    = exists && entry->scheduled->flags & PF_EXITING;
1163 +
1164 +	/* check whether linked is available */
1165 +	should_schedule = select_job(entry->linked);
1166 +
1167 +	BUG_ON(entry->pd_task && !entry->linked);
1168 +
1169 +	/* Check for priority donation */
1170 +	if (!should_schedule && entry->linked) {
1171 +
1172 +		if (is_waiting_for_donation_end(entry->linked) &&
1173 +		    !entry->pd_task) {
1174 +			TRACE_TASK(entry->linked, "no longer waiting for donation end\n");
1175 +			entry->linked->rt_param.priodon_state = NOT_WAITING;
1176 +
1177 +			if (is_completed(entry->linked)) {
1178 +				TRACE_TASK(entry->linked, "delayed job completion processed\n");
1179 +				job_completion(entry->linked, 0);
1180 +				sleep = exists && is_completed(entry->scheduled);
1181 +			}
1182 +
1183 +			should_schedule = select_job(entry->linked);
1184 +
1185 +		} else if (is_waiting_to_be_linked(entry->linked)) {
1186 +			TRACE_TASK(entry->linked, "no longer waiting to be linked\n");
1187 +			entry->linked->rt_param.priodon_state = NOT_WAITING;
1188 +			should_schedule = select_job(entry->linked);
1189 +
1190 +			/* A task is only blocked once by priority donation */
1191 +			BUG_ON(entry->pd_task);
1192 +		}
1193 +	}
1194 +
1195 +	if (entry->pd_task) {
1196 +		should_schedule = select_job(entry->pd_task);
1197 +
1198 +		/* special case: donor completion needs to be delayed */
1199 +		if (is_completed(entry->linked)) {
1200 +			TRACE_TASK(entry->linked, "job completion delayed\n");
1201 +			entry->linked->rt_param.priodon_state = WAITING_FOR_DONATION_END;
1202 +		}
1203 +	}
1204 +
1205 +	preempt     = entry->scheduled != should_schedule;
1206  
1207  #ifdef WANT_ALL_SCHED_EVENTS
1208  	TRACE_TASK(prev, "invoked cedf_schedule.\n");
1209 @@ -453,17 +897,22 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1210  	if (exists)
1211  		TRACE_TASK(prev,
1212  			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
1213 -			   "state:%d sig:%d\n",
1214 +			   "state:%d sig:%d dead:%d exit_state:%d exiting:%d\n",
1215  			   blocks, out_of_time, np, sleep, preempt,
1216 -			   prev->state, signal_pending(prev));
1217 -	if (entry->linked && preempt)
1218 +			   prev->state, signal_pending(prev), dead, prev->exit_state, exiting);
1219 +	if (should_schedule && preempt)
1220  		TRACE_TASK(prev, "will be preempted by %s/%d\n",
1221 -			   entry->linked->comm, entry->linked->pid);
1222 +			   should_schedule->comm, should_schedule->pid);
1223  
1224  
1225  	/* If a task blocks we have no choice but to reschedule.
1226  	 */
1227 -	if (blocks)
1228 +//	if (blocks)
1229 +//		unlink(entry->scheduled);
1230 +
1231 +
1232 +	/* Exiting tasks do not count as having pending jobs. */
1233 +	if (unlikely(blocks && exiting))
1234  		unlink(entry->scheduled);
1235  
1236  	/* Request a sys_exit_np() call if we would like to preempt but cannot.
1237 @@ -472,7 +921,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1238  	 * hurt.
1239  	 */
1240  	if (np && (out_of_time || preempt || sleep)) {
1241 -		unlink(entry->scheduled);
1242 +//		unlink(entry->scheduled);
1243  		request_exit_np(entry->scheduled);
1244  	}
1245  
1246 @@ -480,24 +929,65 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1247  	 * budget or wants to sleep completes. We may have to reschedule after
1248  	 * this. Don't do a job completion if we block (can't have timers running
1249  	 * for blocked jobs).
1250 +	 *
1251 +	 * Priority donation: priority donors may not complete.o
1252  	 */
1253 -	if (!np && (out_of_time || sleep) && !blocks)
1254 +	if (!np && (out_of_time || sleep) && !blocks
1255 +	    && !(entry->linked == entry->scheduled
1256 +		 && is_waiting_for_donation_end(entry->linked)))
1257  		job_completion(entry->scheduled, !sleep);
1258  
1259  	/* Link pending task if we became unlinked.
1260  	 */
1261 -	if (!entry->linked)
1262 -		link_task_to_cpu(__take_ready(&cluster->domain), entry);
1263 +	if (!entry->linked) {
1264 +		link_task_to_cpu(dequeue_head(&cluster->job_queue), entry);
1265 +		if (is_waiting_to_be_linked(entry->linked)) {
1266 +			TRACE_TASK(entry->linked, "no longer waiting to be linked\n");
1267 +			entry->linked->rt_param.priodon_state = NOT_WAITING;
1268 +
1269 +			/* A task is only blocked once by priority donation */
1270 +			BUG_ON(entry->pd_task);
1271 +		}
1272 +	}
1273 +
1274 +	TRACE_TASK(should_schedule, "PRE PD\n");
1275 +
1276 +	should_schedule = select_job(entry->pd_task);
1277  
1278 +	TRACE_TASK(should_schedule, "POST PD\n");
1279 +	if (!should_schedule) {
1280 +		/* check for migratory priority inheritance */
1281 +		if (entry->linked && entry->linked->rt_param.mpi)
1282 +			should_schedule = mpi_try_to_schedule(entry->linked->rt_param.mpi);
1283 +		else
1284 +			should_schedule = select_job(entry->linked);
1285 +	}
1286 +
1287 +	TRACE_TASK(should_schedule, "should_schedule\n");
1288 +
1289 +	if (entry->linked && entry->linked != should_schedule
1290 +	    && should_schedule)
1291 +		TRACE_TASK(entry->linked, "linked, but delegates to %s/%d\n",
1292 +			   should_schedule->comm, should_schedule->pid);
1293 +
1294 +	if (!should_schedule) {
1295 +		/* Neither linked nor delegate are available. */
1296 +		/* Pick someone else. */
1297 +		should_schedule = job_queue_first_runnable(&cluster->job_queue);
1298 +	}
1299 +
1300 +
1301 +bailout:
1302  	/* The final scheduling decision. Do we need to switch for some reason?
1303  	 * If linked is different from scheduled, then select linked as next.
1304  	 */
1305  	if ((!np || blocks) &&
1306 -	    entry->linked != entry->scheduled) {
1307 +	    should_schedule != entry->scheduled) {
1308  		/* Schedule a linked job? */
1309 -		if (entry->linked) {
1310 -			entry->linked->rt_param.scheduled_on = entry->cpu;
1311 -			next = entry->linked;
1312 +		if (should_schedule) {
1313 +			should_schedule->rt_param.scheduled_on = entry->cpu;
1314 +			next = should_schedule;
1315 +			TRACE_TASK(should_schedule, "should_schedule!\n");
1316  		}
1317  		if (entry->scheduled) {
1318  			/* not gonna be scheduled soon */
1319 @@ -528,13 +1018,39 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1320  }
1321  
1322  
1323 +// XXX When a task wakes up, we also need to check whether it
1324 +// has any notifiers attached.
1325 +
1326 +// XXX What if prev is already scheduled elsewhere?
1327 +// XXX Need to handle completion while migrated away!
1328 +
1329  /* _finish_switch - we just finished the switch away from prev
1330   */
1331  static void cedf_finish_switch(struct task_struct *prev)
1332  {
1333  	cpu_entry_t* 	entry = &__get_cpu_var(cedf_cpu_entries);
1334 +	cedf_domain_t *cluster  = task_cpu_cluster(prev);
1335 +	unsigned long flags;
1336 +
1337 +	/* check for preemption notifiers */
1338  
1339  	entry->scheduled = is_realtime(current) ? current : NULL;
1340 +	if (is_realtime(prev) && prev->rt_param.mpi) {
1341 +		if (prev->rt_param.mpi == MPI_EXIT_CS) {
1342 +			/* Signal to remote cores that is now safe
1343 +			 * to consider this task again. */
1344 +			prev->rt_param.mpi = NULL;
1345 +		} else
1346 +			mpi_deschedule(prev->rt_param.mpi, prev);
1347 +	}
1348 +
1349 +	if (cluster != entry->cluster) {
1350 +		/* needs to migrate back to its own cluster */
1351 +		raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1352 +		cedf_preempt_linked_cpu(prev);
1353 +		raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1354 +	}
1355 +
1356  #ifdef WANT_ALL_SCHED_EVENTS
1357  	TRACE_TASK(prev, "switched away from\n");
1358  #endif
1359 @@ -549,13 +1065,15 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
1360  	cpu_entry_t* 		entry;
1361  	cedf_domain_t*		cluster;
1362  
1363 -	TRACE("gsn edf: task new %d\n", t->pid);
1364 +	TRACE("cedf: task new %d\n", t->pid);
1365  
1366  	/* the cluster doesn't change even if t is running */
1367  	cluster = task_cpu_cluster(t);
1368  
1369  	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1370  
1371 +	t->rt_param.donor_cpu = NO_CPU;
1372 +
1373  	/* setup job params */
1374  	release_at(t, litmus_clock());
1375  
1376 @@ -595,36 +1113,44 @@ static void cedf_task_wake_up(struct task_struct *task)
1377  	cluster = task_cpu_cluster(task);
1378  
1379  	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1380 -	now = litmus_clock();
1381 -	if (is_tardy(task, now)) {
1382 -		/* new sporadic release */
1383 -		release_at(task, now);
1384 -		sched_trace_task_release(task);
1385 -	}
1386 -	else {
1387 -		if (task->rt.time_slice) {
1388 -			/* came back in time before deadline
1389 -			*/
1390 -			tsk_rt(task)->completed = 0;
1391 +
1392 +	if (task->rt_param.donor_cpu != NO_CPU) {
1393 +		/* preempt the CPU where the donor is located at */
1394 +		preempt(&per_cpu(cedf_cpu_entries, task->rt_param.donor_cpu));
1395 +	} else if (task->rt_param.mpi) {
1396 +		mpi_notify_cpus(task->rt_param.mpi);
1397 +	} else {
1398 +		now = litmus_clock();
1399 +		if (is_tardy(task, now)) {
1400 +			/* new sporadic release */
1401 +			release_at(task, now);
1402 +			sched_trace_task_release(task);
1403  		}
1404 +		else {
1405 +			if (task->rt.time_slice) {
1406 +				/* came back in time before deadline
1407 +				 */
1408 +				tsk_rt(task)->completed = 0;
1409 +			}
1410 +		}
1411 +		cedf_job_arrival(task);
1412  	}
1413 -	cedf_job_arrival(task);
1414  	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1415  }
1416  
1417  static void cedf_task_block(struct task_struct *t)
1418  {
1419 -	unsigned long flags;
1420 +//	unsigned long flags;
1421  	cedf_domain_t *cluster;
1422  
1423  	TRACE_TASK(t, "block at %llu\n", litmus_clock());
1424  
1425  	cluster = task_cpu_cluster(t);
1426  
1427 -	/* unlink if necessary */
1428 -	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1429 -	unlink(t);
1430 -	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1431 +//	/* unlink if necessary */
1432 +//	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1433 +//	unlink(t);
1434 +//	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1435  
1436  	BUG_ON(!is_realtime(t));
1437  }
1438 @@ -634,16 +1160,25 @@ static void cedf_task_exit(struct task_struct * t)
1439  {
1440  	unsigned long flags;
1441  	cedf_domain_t *cluster = task_cpu_cluster(t);
1442 +	cpu_entry_t *cpu;
1443  
1444  	/* unlink if necessary */
1445  	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1446 +
1447 +	cpu = &__get_cpu_var(cedf_cpu_entries);
1448 +	TRACE_TASK(t, "exiting linked_on=%d scheduled_on=%d\n",
1449 +		   tsk_rt(t)->linked_on,
1450 +		   tsk_rt(t)->scheduled_on);
1451 +	TRACE_TASK(cpu->linked, "locally linked\n");
1452 +	TRACE_TASK(cpu->scheduled, "locally scheduled\n");
1453 +
1454  	unlink(t);
1455  	if (tsk_rt(t)->scheduled_on != NO_CPU) {
1456 -		cpu_entry_t *cpu;
1457  		cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
1458  		cpu->scheduled = NULL;
1459  		tsk_rt(t)->scheduled_on = NO_CPU;
1460  	}
1461 +	check_for_preemptions(cluster);
1462  	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1463  
1464  	BUG_ON(!is_realtime(t));
1465 @@ -652,6 +1187,10 @@ static void cedf_task_exit(struct task_struct * t)
1466  
1467  static long cedf_admit_task(struct task_struct* tsk)
1468  {
1469 +	struct hrtimer *timer = &tsk_rt(tsk)->rel_heap->timer;
1470 +
1471 +	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1472 +
1473  	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
1474  }
1475  
1476 @@ -739,17 +1278,19 @@ static long cedf_activate_plugin(void)
1477  	/* initialize clusters */
1478  	cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
1479  	for (i = 0; i < num_clusters; i++) {
1480 -
1481 +		cedf[i].cluster = i;
1482  		cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
1483  				GFP_ATOMIC);
1484  		cedf[i].heap_node = kmalloc(
1485  				cluster_size * sizeof(struct bheap_node),
1486  				GFP_ATOMIC);
1487  		bheap_init(&(cedf[i].cpu_heap));
1488 -		edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
1489 +		INIT_LIST_HEAD(&cedf[i].job_queue);
1490 +		raw_spin_lock_init(&cedf[i].cluster_lock);
1491  
1492  		if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
1493  			return -ENOMEM;
1494 +		cedf[i].num_cpus = 0;
1495  #ifdef CONFIG_RELEASE_MASTER
1496  		cedf[i].domain.release_master = atomic_read(&release_master_cpu);
1497  #endif
1498 @@ -785,7 +1326,6 @@ static long cedf_activate_plugin(void)
1499  
1500  				entry = &per_cpu(cedf_cpu_entries, ccpu);
1501  				cedf[i].cpus[cpu_count] = entry;
1502 -				atomic_set(&entry->will_schedule, 0);
1503  				entry->cpu = ccpu;
1504  				entry->cluster = &cedf[i];
1505  				entry->hn = &(cedf[i].heap_node[cpu_count]);
1506 @@ -795,12 +1335,14 @@ static long cedf_activate_plugin(void)
1507  
1508  				entry->linked = NULL;
1509  				entry->scheduled = NULL;
1510 +				entry->pd_task = NULL;
1511  #ifdef CONFIG_RELEASE_MASTER
1512  				/* only add CPUs that should schedule jobs */
1513  				if (entry->cpu != entry->cluster->domain.release_master)
1514  #endif
1515  					update_cpu_position(entry);
1516  			}
1517 +			cedf[i].num_cpus = cpu_count;
1518  			/* done with this cluster */
1519  			break;
1520  		}
1521 @@ -811,6 +1353,494 @@ static long cedf_activate_plugin(void)
1522  	return 0;
1523  }
1524  
1525 +
1526 +#ifdef CONFIG_LITMUS_LOCKING
1527 +
1528 +static void priodon_become_eligible(void)
1529 +{
1530 +	struct task_struct* t = current;
1531 +	cpu_entry_t* cpu;
1532 +	int started = 0;
1533 +
1534 +	while (!started) {
1535 +
1536 +		preempt_disable();
1537 +
1538 +		cpu = &__get_cpu_var(cedf_cpu_entries);
1539 +
1540 +		BUG_ON(cpu->pd_task == t);
1541 +
1542 +		if (cpu->linked == t) {
1543 +			/* We observed that we are linked.
1544 +			 * => We are among the $m$ highest-priority pending jobs.
1545 +			 * => Start the critical section. preempt_disable() ensures
1546 +			 *    that this does not race with other critical
1547 +			 *    sections starting on the same CPU.
1548 +			 */
1549 +
1550 +
1551 +			if (cpu->pd_task) {
1552 +				/* Uh oh, we are currently a priority donor,
1553 +				 * so we are not allowed to start our own
1554 +				 * critical section yet.
1555 +				 */
1556 +				TRACE_TASK(t, "must wait for donation to end\n");
1557 +
1558 +				t->rt_param.priodon_state = WAITING_FOR_DONATION_END;
1559 +				preempt_enable_no_resched();
1560 +
1561 +				schedule();
1562 +			} else {
1563 +				started = 1;
1564 +				cpu->pd_task = t;
1565 +				t->rt_param.donor_cpu = cpu->cpu;
1566 +				preempt_enable();
1567 +			}
1568 +		} else {
1569 +			/* We are currently not linked. Must wait. */
1570 +			t->rt_param.priodon_state = WAITING_TO_BE_LINKED;
1571 +			TRACE_TASK(t, "must wait to be linked\n");
1572 +			preempt_enable_no_resched();
1573 +			schedule();
1574 +		}
1575 +	}
1576 +}
1577 +
1578 +static void priodon_complete_request(void)
1579 +{
1580 +	struct task_struct* t = current;
1581 +	cpu_entry_t* cpu;
1582 +
1583 +	preempt_disable();
1584 +
1585 +	cpu = &__get_cpu_var(cedf_cpu_entries);
1586 +
1587 +	BUG_ON(cpu->pd_task != t);
1588 +
1589 +	cpu->pd_task = NULL;
1590 +	t->rt_param.donor_cpu = NO_CPU;
1591 +
1592 +	/* Reschedule if we became unlinked during
1593 +	 * the critical section.
1594 +	 */
1595 +	if (cpu->linked != t)
1596 +		preempt(cpu);
1597 +
1598 +	preempt_enable();
1599 +}
1600 +
1601 +/* struct for semaphore with priority inheritance */
1602 +struct omlp_semaphore {
1603 +	struct litmus_lock litmus_lock;
1604 +
1605 +	/* current resource holder */
1606 +	struct task_struct *owner;
1607 +
1608 +	/* FIFO queue of waiting tasks */
1609 +	wait_queue_head_t fifo_wait;
1610 +};
1611 +
1612 +static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock)
1613 +{
1614 +	return container_of(lock, struct omlp_semaphore, litmus_lock);
1615 +}
1616 +
1617 +static int cedf_omlp_lock(struct litmus_lock* l)
1618 +{
1619 +	struct task_struct* t = current;
1620 +	struct omlp_semaphore *sem = omlp_from_lock(l);
1621 +	wait_queue_t wait;
1622 +	unsigned long flags;
1623 +
1624 +	if (!is_realtime(t))
1625 +		return -EPERM;
1626 +
1627 +	priodon_become_eligible();
1628 +
1629 +	spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1630 +
1631 +	if (sem->owner) {
1632 +		/* resource is not free => must suspend and wait */
1633 +
1634 +		init_waitqueue_entry(&wait, t);
1635 +
1636 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
1637 +
1638 +		__add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait);
1639 +
1640 +		TS_LOCK_SUSPEND;
1641 +
1642 +		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1643 +
1644 +		schedule();
1645 +
1646 +		TS_LOCK_RESUME;
1647 +
1648 +		BUG_ON(sem->owner != t);
1649 +	} else {
1650 +		/* it's ours now */
1651 +		sem->owner = t;
1652 +
1653 +		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1654 +	}
1655 +
1656 +	return 0;
1657 +}
1658 +
1659 +static int cedf_omlp_unlock(struct litmus_lock* l)
1660 +{
1661 +	struct task_struct *t = current, *next;
1662 +	struct omlp_semaphore *sem = omlp_from_lock(l);
1663 +	unsigned long flags;
1664 +	int err = 0;
1665 +
1666 +	spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1667 +
1668 +	if (sem->owner != t) {
1669 +		err = -EINVAL;
1670 +		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1671 +		goto out;
1672 +	}
1673 +
1674 +	/* check if there are jobs waiting for this resource */
1675 +	next = __waitqueue_remove_first(&sem->fifo_wait);
1676 +	if (next) {
1677 +		/* next becomes the resouce holder */
1678 +		sem->owner = next;
1679 +		TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
1680 +
1681 +		/* wake up next */
1682 +		wake_up_process(next);
1683 +	} else
1684 +		/* becomes available */
1685 +		sem->owner = NULL;
1686 +
1687 +	spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1688 +
1689 +	priodon_complete_request();
1690 +
1691 +out:
1692 +	return err;
1693 +}
1694 +
1695 +static int cedf_omlp_close(struct litmus_lock* l)
1696 +{
1697 +	struct task_struct *t = current;
1698 +	struct omlp_semaphore *sem = omlp_from_lock(l);
1699 +	unsigned long flags;
1700 +
1701 +	int owner;
1702 +
1703 +	spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1704 +
1705 +	owner = sem->owner == t;
1706 +
1707 +	spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1708 +
1709 +	if (owner)
1710 +		cedf_omlp_unlock(l);
1711 +
1712 +	return 0;
1713 +}
1714 +
1715 +static void cedf_omlp_free(struct litmus_lock* lock)
1716 +{
1717 +	kfree(omlp_from_lock(lock));
1718 +}
1719 +
1720 +static struct litmus_lock_ops cedf_omlp_lock_ops = {
1721 +	.close  = cedf_omlp_close,
1722 +	.lock   = cedf_omlp_lock,
1723 +	.unlock = cedf_omlp_unlock,
1724 +	.deallocate = cedf_omlp_free,
1725 +};
1726 +
1727 +static struct litmus_lock* cedf_new_omlp(void)
1728 +{
1729 +	struct omlp_semaphore* sem;
1730 +
1731 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1732 +	if (!sem)
1733 +		return NULL;
1734 +
1735 +	sem->owner   = NULL;
1736 +	init_waitqueue_head(&sem->fifo_wait);
1737 +	sem->litmus_lock.ops = &cedf_omlp_lock_ops;
1738 +
1739 +	return &sem->litmus_lock;
1740 +}
1741 +
1742 +// xxxx
1743 +
1744 +struct omip_per_cluster_info {
1745 +	/* per-cluster FIFO queue of waiting tasks */
1746 +	wait_queue_head_t fifo_wait;
1747 +	unsigned int queued;
1748 +
1749 +	/* Priority queue of waiting tasks, on each cluster. */
1750 +	wait_queue_head_t prio_wait;
1751 +};
1752 +
1753 +static void omip_per_cluster_init(struct omip_per_cluster_info *cluster)
1754 +{
1755 +	init_waitqueue_head(&cluster->fifo_wait);
1756 +	init_waitqueue_head(&cluster->prio_wait);
1757 +	cluster->queued = 0;
1758 +}
1759 +
1760 +
1761 +/* struct for semaphore with priority inheritance */
1762 +struct omip_semaphore {
1763 +	struct litmus_lock litmus_lock;
1764 +
1765 +	/* information for migratory priority inheritance */
1766 +	struct migratory_prio_inh mpi;
1767 +
1768 +	struct task_struct *owner;
1769 +
1770 +	/* FIFO queue of waiting tasks */
1771 +	wait_queue_head_t fifo_wait;
1772 +
1773 +	/* per-cluster queues */
1774 +	struct omip_per_cluster_info cluster[NR_CPUS];
1775 +};
1776 +
1777 +static inline struct omip_semaphore* omip_from_lock(struct litmus_lock* lock)
1778 +{
1779 +	return container_of(lock, struct omip_semaphore, litmus_lock);
1780 +}
1781 +
1782 +static void omip_enqueue(prio_wait_queue_t *wait,
1783 +			 struct omip_semaphore *sem,
1784 +			 struct task_struct *t)
1785 +{
1786 +	int c = get_partition(t);
1787 +	cedf_domain_t *cluster = remote_cluster(c);
1788 +
1789 +	if (sem->cluster[c].queued >= cluster->num_cpus) {
1790 +		/* must go into priority queue */
1791 +		TRACE_TASK(t, "sem=%p added to prio queue in cluster C%\n", sem, c);
1792 +		__add_wait_queue_prio_exclusive(&sem->cluster[c].prio_wait, wait);
1793 +	} else if (sem->cluster[c].queued) {
1794 +		/* must go into FIFO queue */
1795 +		TRACE_TASK(t, "sem=%p added to FIFO queue in cluster C%\n", sem, c);
1796 +		__add_wait_queue_tail_exclusive(&sem->cluster[c].fifo_wait,
1797 +						&wait->wq);
1798 +	} else {
1799 +		/* empty, t immediately progresses to the global queue */
1800 +		TRACE_TASK(t, "sem=%p added to global queue in cluster C%\n", sem, c);
1801 +		__add_wait_queue_tail_exclusive(&sem->fifo_wait,
1802 +						&wait->wq);
1803 +	}
1804 +
1805 +	sem->cluster[c].queued++;
1806 +}
1807 +
1808 +static void omip_dequeue(struct omip_semaphore *sem, struct task_struct *t)
1809 +{
1810 +	int c = get_partition(t);
1811 +	struct list_head* first;
1812 +
1813 +	BUG_ON(!sem->cluster[c].queued);
1814 +
1815 +	sem->cluster[c].queued--;
1816 +
1817 +	/* move from prio to FIFO queue within cluster */
1818 +	if (waitqueue_active(&sem->cluster[c].prio_wait)) {
1819 +		TRACE_TASK(t, "sem=%p moves head of PRIO queue to FIFO queue C%d\n", sem, c);
1820 +		first = sem->cluster[c].prio_wait.task_list.next;
1821 +		list_move_tail(first, &sem->cluster[c].fifo_wait.task_list);
1822 +	}
1823 +
1824 +	/* move new head from local to global queue */
1825 +	if (waitqueue_active(&sem->cluster[c].fifo_wait)) {
1826 +		TRACE_TASK(t, "sem=%p moves head of FIFO queue to global queue C%d\n", sem, c);
1827 +		first = sem->cluster[c].fifo_wait.task_list.next;
1828 +		list_move_tail(first, &sem->fifo_wait.task_list);
1829 +	}
1830 +}
1831 +
1832 +static int cedf_omip_lock(struct litmus_lock* l)
1833 +{
1834 +	struct task_struct* t = current;
1835 +	struct omip_semaphore *sem = omip_from_lock(l);
1836 +	prio_wait_queue_t wait;
1837 +	unsigned long flags;
1838 +
1839 +	if (!is_realtime(t))
1840 +		return -EPERM;
1841 +
1842 +	spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1843 +
1844 +	t->rt_param.mpi = &sem->mpi;
1845 +
1846 +	if (sem->owner) {
1847 +		/* resource is not free => must suspend and wait */
1848 +
1849 +		init_prio_waitqueue_entry(&wait, t, get_deadline(t));
1850 +
1851 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
1852 +
1853 +		omip_enqueue(&wait, sem, t);
1854 +
1855 +		TS_LOCK_SUSPEND;
1856 +
1857 +		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1858 +
1859 +		schedule();
1860 +
1861 +		TS_LOCK_RESUME;
1862 +
1863 +		BUG_ON(sem->owner != t);
1864 +	} else {
1865 +		/* it's ours now */
1866 +		sem->owner = t;
1867 +		sem->cluster[get_partition(t)].queued++;
1868 +
1869 +		mpi_update_owner(&sem->mpi, t, smp_processor_id());
1870 +
1871 +		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1872 +	}
1873 +
1874 +	return 0;
1875 +}
1876 +
1877 +static int cedf_omip_unlock(struct litmus_lock* l)
1878 +{
1879 +	struct task_struct *t = current, *next;
1880 +	struct omip_semaphore *sem = omip_from_lock(l);
1881 +	unsigned long flags;
1882 +	int err = 0;
1883 +
1884 +	preempt_disable();
1885 +
1886 +	spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1887 +
1888 +	if (sem->owner != t) {
1889 +		err = -EINVAL;
1890 +		spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1891 +		goto out;
1892 +	}
1893 +
1894 +	omip_dequeue(sem, t);
1895 +
1896 +	/* check if there are jobs waiting for this resource */
1897 +	next = __waitqueue_remove_first(&sem->fifo_wait);
1898 +	if (next) {
1899 +		/* next becomes the resouce holder */
1900 +		sem->owner = next;
1901 +		TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
1902 +
1903 +		/* wake up next */
1904 +		wake_up_process(next);
1905 +	} else
1906 +		/* becomes available */
1907 +		sem->owner = NULL;
1908 +
1909 +	/* Mark us that we are no longer holding a resource. This
1910 +	 * is required to prevent scheduler races when we have to
1911 +	 * migrate back to our core at the end of a CS. */
1912 +	tsk_rt(t)->mpi = MPI_EXIT_CS;
1913 +	/* Tell everyone else who the new owner is and notify
1914 +	 * other CPUs if required. */
1915 +	mpi_update_owner(&sem->mpi, sem->owner, NO_CPU);
1916 +
1917 +	spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1918 +
1919 +	mpi_migrate_back();
1920 +
1921 +out:
1922 +	preempt_enable();
1923 +	return err;
1924 +}
1925 +
1926 +static int cedf_omip_close(struct litmus_lock* l)
1927 +{
1928 +	struct task_struct *t = current;
1929 +	struct omip_semaphore *sem = omip_from_lock(l);
1930 +	unsigned long flags;
1931 +
1932 +	int owner;
1933 +
1934 +	spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1935 +
1936 +	owner = sem->owner == t;
1937 +
1938 +	spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1939 +
1940 +	if (owner)
1941 +		cedf_omip_unlock(l);
1942 +
1943 +	return 0;
1944 +}
1945 +
1946 +static void cedf_omip_free(struct litmus_lock* lock)
1947 +{
1948 +	kfree(omip_from_lock(lock));
1949 +}
1950 +
1951 +static struct litmus_lock_ops cedf_omip_lock_ops = {
1952 +	.close  = cedf_omip_close,
1953 +	.lock   = cedf_omip_lock,
1954 +	.unlock = cedf_omip_unlock,
1955 +	.deallocate = cedf_omip_free,
1956 +};
1957 +
1958 +static struct litmus_lock* cedf_new_omip(void)
1959 +{
1960 +	struct omip_semaphore* sem;
1961 +	int i;
1962 +
1963 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1964 +	if (!sem)
1965 +		return NULL;
1966 +
1967 +	sem->owner   = NULL;
1968 +	init_waitqueue_head(&sem->fifo_wait);
1969 +	mpi_init(&sem->mpi);
1970 +	sem->litmus_lock.ops = &cedf_omip_lock_ops;
1971 +	for (i = 0; i < num_clusters; i++)
1972 +		omip_per_cluster_init(sem->cluster + i);
1973 +
1974 +	return &sem->litmus_lock;
1975 +}
1976 +
1977 +
1978 +static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1979 +			       void* __user unused)
1980 +{
1981 +	int err = -ENXIO;
1982 +
1983 +	switch (type) {
1984 +
1985 +	case OMLP_SEM:
1986 +		/* O(m) Multiprocessor Locking Protocol */
1987 +		*lock  = cedf_new_omlp();
1988 +		if (*lock)
1989 +			err = 0;
1990 +		else
1991 +			err = -ENOMEM;
1992 +		break;
1993 +
1994 +	case OMIP_SEM:
1995 +		/* O(m) Multiprocessor Independence-Preserving Locking Protocol */
1996 +		*lock  = cedf_new_omip();
1997 +		if (*lock)
1998 +			err = 0;
1999 +		else
2000 +			err = -ENOMEM;
2001 +		break;
2002 +
2003 +	};
2004 +
2005 +
2006 +	return err;
2007 +}
2008 +
2009 +
2010 +#endif
2011 +
2012 +
2013  /*	Plugin object	*/
2014  static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
2015  	.plugin_name		= "C-EDF",
2016 @@ -824,6 +1854,9 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
2017  	.task_block		= cedf_task_block,
2018  	.admit_task		= cedf_admit_task,
2019  	.activate_plugin	= cedf_activate_plugin,
2020 +#ifdef CONFIG_LITMUS_LOCKING
2021 +	.allocate_lock		= cedf_allocate_lock,
2022 +#endif
2023  };
2024  
2025  static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
2026 diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
2027 index 91e5239..0e875a3 100644
2028 --- a/litmus/sched_pfp.c
2029 +++ b/litmus/sched_pfp.c
2030 @@ -182,7 +182,7 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
2031  	np 	    = exists && is_np(pfp->scheduled);
2032  	sleep	    = exists && is_completed(pfp->scheduled);
2033  	migrate     = exists && get_partition(pfp->scheduled) != pfp->cpu;
2034 -	preempt     = migrate || fp_preemption_needed(&pfp->ready_queue, prev);
2035 +	preempt     = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
2036  
2037  	/* If we need to preempt do so.
2038  	 * The following checks set resched to 1 in case of special
2039 @@ -1089,8 +1089,10 @@ static void pcp_priority_inheritance(void)
2040  		fp_set_prio_inh(pfp, blocker, blocked);
2041  	}
2042  
2043 -	/* check if anything changed */
2044 -	if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
2045 +	/* Check if anything changed. If the blocked job is current, then it is
2046 +	 * just blocking and hence is going to call the scheduler anyway. */
2047 +	if (blocked != current &&
2048 +	    fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
2049  		preempt(pfp);
2050  
2051  	raw_spin_unlock_irqrestore(&pfp->slock, flags);
2052 @@ -1201,10 +1203,10 @@ static void pcp_lower_ceiling(struct pcp_semaphore* sem)
2053  
2054  	TRACE_CUR("PCP released sem %p\n", sem);
2055  
2056 +	pcp_priority_inheritance();
2057 +
2058  	/* Wake up all ceiling-blocked jobs that now pass the ceiling. */
2059  	pcp_resume_unblocked();
2060 -
2061 -	pcp_priority_inheritance();
2062  }
2063  
2064  static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
2065 diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
2066 index f4171fd..536bc67 100644
2067 --- a/litmus/sched_trace.c
2068 +++ b/litmus/sched_trace.c
2069 @@ -75,7 +75,6 @@ void sched_trace_log_message(const char* fmt, ...)
2070  	va_end(args);
2071  }
2072  
2073 -
2074  /*
2075   * log_read - Read the trace buffer
2076   *
2077 @@ -199,23 +198,27 @@ static struct miscdevice litmus_log_dev = {
2078  	.fops    = &log_fops,
2079  };
2080  
2081 -#ifdef CONFIG_MAGIC_SYSRQ
2082 +
2083  void dump_trace_buffer(int max)
2084  {
2085 -	char line[80];
2086 +	char line[255];
2087  	int len;
2088  	int count = 0;
2089  
2090  	/* potential, but very unlikely, race... */
2091  	trace_recurse = 1;
2092 +	printk(KERN_CRIT "<<< LITMUS^RT TRACE() dump start >>>");
2093  	while ((max == 0 || count++ < max) &&
2094 -	       (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) {
2095 +	       (len = kfifo_out(&debug_buffer, line, sizeof(line) -1)) > 0) {
2096  		line[len] = '\0';
2097 -		printk("%s", line);
2098 +		printk(KERN_CRIT "%s", line);
2099  	}
2100 +	printk(KERN_CRIT "<<< LITMUS^RT TRACE() dump end >>>");
2101  	trace_recurse = 0;
2102  }
2103  
2104 +#ifdef CONFIG_MAGIC_SYSRQ
2105 +
2106  static void sysrq_dump_trace_buffer(int key)
2107  {
2108  	dump_trace_buffer(100);
2109 diff --git a/litmus/sync.c b/litmus/sync.c
2110 index 873b3ff..3e79e0a 100644
2111 --- a/litmus/sync.c
2112 +++ b/litmus/sync.c
2113 @@ -89,7 +89,7 @@ static long do_release_ts(lt_t start)
2114  {
2115  	long  task_count = 0;
2116  
2117 -	struct list_head	*pos;
2118 +	struct list_head	*pos, *safe;
2119  	struct ts_release_wait	*wait;
2120  
2121  	if (mutex_lock_interruptible(&task_release_lock)) {
2122 @@ -101,7 +101,7 @@ static long do_release_ts(lt_t start)
2123  	sched_trace_sys_release(&start);
2124  
2125  	task_count = 0;
2126 -	list_for_each(pos, &task_release_list) {
2127 +	list_for_each_safe(pos, safe, &task_release_list) {
2128  		wait = (struct ts_release_wait*)
2129  			list_entry(pos, struct ts_release_wait, list);
2130  

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2015-06-04 21:22:59, 244.8 KB) [[attachment:MC2-liblitmus-imx6-rtss15.patch]]
  • [get | view] (2016-05-12 14:35:37, 51.9 KB) [[attachment:MC2-liblitmus-rtss16.patch]]
  • [get | view] (2016-05-12 14:36:06, 190.4 KB) [[attachment:MC2-litmus-rt-rtss16.patch]]
  • [get | view] (2015-07-19 10:27:52, 1119.9 KB) [[attachment:MC2-litmut-rt-imx6-rtss15.patch]]
  • [get | view] (2014-05-27 20:46:19, 58.3 KB) [[attachment:MC2_liblitmus_ipdps15.patch]]
  • [get | view] (2014-05-27 20:45:43, 1044.3 KB) [[attachment:MC2_litmusrt_ipdps15.patch]]
  • [get | view] (2017-04-07 21:48:09, 6099.5 KB) [[attachment:buff_sharing.tar]]
  • [get | view] (2015-01-08 14:20:07, 61.0 KB) [[attachment:feather-trace-patch-against-sched-deadline-v8.patch]]
  • [get | view] (2014-04-01 23:10:10, 38.9 KB) [[attachment:gedf-mp-rtas14.patch]]
  • [get | view] (2012-03-02 20:13:59, 1.9 KB) [[attachment:gpu-klmirqd-liblitmus-rt-ecrts12.patch]]
  • [get | view] (2012-03-02 20:14:25, 389.8 KB) [[attachment:gpu-klmirqd-litmus-rt-ecrts12.patch]]
  • [get | view] (2012-05-26 21:41:34, 418.0 KB) [[attachment:gpusync-rtss12.patch]]
  • [get | view] (2012-05-26 21:42:20, 8.6 KB) [[attachment:gpusync_liblitmus-rtss12.patch]]
  • [get | view] (2013-05-21 15:32:08, 208.6 KB) [[attachment:gpusync_rtss13_liblitmus.patch]]
  • [get | view] (2013-05-21 15:31:32, 779.5 KB) [[attachment:gpusync_rtss13_litmus.patch]]
  • [get | view] (2012-05-26 21:42:41, 71.4 KB) [[attachment:klt_tracker_v1.0.litmus.tgz]]
  • [get | view] (2016-10-13 21:14:05, 19.6 KB) [[attachment:liblitmus-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:22, 90.0 KB) [[attachment:liblitmus-rtns17.patch]]
  • [get | view] (2018-12-11 01:38:53, 49.1 KB) [[attachment:liblitmus-semi-part-with-edfos.patch]]
  • [get | view] (2017-10-09 19:16:09, 304.0 KB) [[attachment:litmus-rt-os-isolation.patch]]
  • [get | view] (2016-10-13 21:13:27, 207.6 KB) [[attachment:litmus-rt-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:40, 207.6 KB) [[attachment:litmus-rt-rtns17.patch]]
  • [get | view] (2018-12-11 01:39:04, 100.5 KB) [[attachment:litmus-rt-semi-part-with-edfos.patch]]
  • [get | view] (2018-06-26 04:31:48, 7.0 KB) [[attachment:mc2_liblitmus_2015.1-rtns18.patch]]
  • [get | view] (2018-06-26 04:31:33, 292.7 KB) [[attachment:mc2_litmus-rt_2015.1-rtns18.patch]]
  • [get | view] (2017-05-01 20:45:10, 2596.9 KB) [[attachment:mcp_study.zip]]
  • [get | view] (2013-07-13 14:11:53, 58.0 KB) [[attachment:omip-ecrts13.patch]]
  • [get | view] (2014-02-19 21:48:33, 17.2 KB) [[attachment:pgmrt-liblitmus-ecrts14.patch]]
  • [get | view] (2014-02-19 21:47:57, 87.8 KB) [[attachment:pgmrt-litmusrt-ecrts14.patch]]
  • [get | view] (2015-01-08 14:22:32, 61.0 KB) [[attachment:sched-deadline-v8-feather-trace-rtas14.patch]]
  • [get | view] (2018-06-26 04:32:13, 2545.1 KB) [[attachment:sched_study_rtns2018.tar.gz]]
  • [get | view] (2017-04-07 21:53:39, 5969.5 KB) [[attachment:seminal.tar]]
  • [get | view] (2017-04-07 21:51:13, 6064.0 KB) [[attachment:shared_libraries.tar]]
  • [get | view] (2013-07-13 13:58:25, 42.7 KB) [[attachment:tracing-and-dflp-rtas13.patch]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.