Attachment 'pgmrt-litmusrt-ecrts14.patch'

Download

   1 From 4013e79601ec3b912be6dabb6cdd5f4c17569bc8 Mon Sep 17 00:00:00 2001
   2 From: Glenn Elliott <gelliott@cs.unc.edu>
   3 Date: Wed, 19 Feb 2014 15:49:23 -0500
   4 Subject: [PATCH] ECRTS14 PGM^RT patch for Litmus^RT
   5 
   6 ---
   7  include/litmus/edf_split_common.h |   25 +
   8  include/litmus/feather_trace.h    |    3 +-
   9  include/litmus/litmus.h           |    4 +
  10  include/litmus/litmus_proc.h      |   38 ++
  11  include/litmus/pgm.h              |   12 +
  12  include/litmus/rt_param.h         |   21 +
  13  include/litmus/sched_plugin.h     |    6 +
  14  include/litmus/sched_trace.h      |   54 +-
  15  include/trace/events/litmus.h     |   53 +-
  16  include/trace/ftrace.h            |    7 +-
  17  litmus/Kconfig                    |   30 +-
  18  litmus/Makefile                   |    2 +
  19  litmus/edf_split_common.c         |  106 ++++
  20  litmus/litmus.c                   |   46 +-
  21  litmus/litmus_proc.c              |  171 +++++-
  22  litmus/pgm.c                      |   61 ++
  23  litmus/sched_cedf.c               |  143 ++++-
  24  litmus/sched_cfl_split.c          | 1146 +++++++++++++++++++++++++++++++++++++
  25  litmus/sched_gsn_edf.c            |  135 ++++-
  26  litmus/sched_pfair.c              |   64 +++
  27  litmus/sched_pfp.c                |   48 ++
  28  litmus/sched_plugin.c             |    8 +
  29  litmus/sched_psn_edf.c            |   50 ++
  30  litmus/sched_task_trace.c         |   24 +-
  31  24 files changed, 2222 insertions(+), 35 deletions(-)
  32  create mode 100644 include/litmus/edf_split_common.h
  33  create mode 100644 include/litmus/pgm.h
  34  create mode 100644 litmus/edf_split_common.c
  35  create mode 100644 litmus/pgm.c
  36  create mode 100644 litmus/sched_cfl_split.c
  37 
  38 diff --git a/include/litmus/edf_split_common.h b/include/litmus/edf_split_common.h
  39 new file mode 100644
  40 index 0000000..4e7c0ce
  41 --- /dev/null
  42 +++ b/include/litmus/edf_split_common.h
  43 @@ -0,0 +1,25 @@
  44 +/*
  45 + * EDF common data structures and utility functions shared by all EDF
  46 + * based scheduler plugins
  47 + */
  48 +
  49 +/* CLEANUP: Add comments and make it less messy.
  50 + *
  51 + */
  52 +
  53 +#ifndef __UNC_EDF_SPLIT_COMMON_H__
  54 +#define __UNC_EDF_SPLIT_COMMON_H__
  55 +
  56 +#include <litmus/rt_domain.h>
  57 +
  58 +void edf_split_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
  59 +			   release_jobs_t release);
  60 +
  61 +int edf_split_higher_prio(struct task_struct* first,
  62 +			  struct task_struct* second);
  63 +
  64 +int edf_split_ready_order(struct bheap_node* a, struct bheap_node* b);
  65 +
  66 +int edf_split_preemption_needed(rt_domain_t* rt, struct task_struct *t);
  67 +
  68 +#endif
  69 diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
  70 index 028dfb2..44481e9 100644
  71 --- a/include/litmus/feather_trace.h
  72 +++ b/include/litmus/feather_trace.h
  73 @@ -31,8 +31,7 @@ static inline int fetch_and_dec(int *val)
  74  #else /* !__ARCH_HAS_FEATHER_TRACE */
  75  
  76  /* provide default implementation */
  77 -
  78 -#include <asm/timex.h> /* for get_cycles() */
  79 +#include <linux/timex.h> /* for get_cycles() */
  80  
  81  static inline unsigned long long ft_timestamp(void)
  82  {
  83 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
  84 index e35c38c..c240d9c 100644
  85 --- a/include/litmus/litmus.h
  86 +++ b/include/litmus/litmus.h
  87 @@ -67,6 +67,7 @@ void litmus_exit_task(struct task_struct *tsk);
  88  /* job_param macros */
  89  #define get_exec_time(t)    (tsk_rt(t)->job_params.exec_time)
  90  #define get_deadline(t)		(tsk_rt(t)->job_params.deadline)
  91 +#define get_subjob_deadline(t)	(tsk_rt(t)->job_params.subjob_deadline)
  92  #define get_release(t)		(tsk_rt(t)->job_params.release)
  93  #define get_lateness(t)		(tsk_rt(t)->job_params.lateness)
  94  
  95 @@ -118,6 +119,9 @@ static inline lt_t litmus_clock(void)
  96  #define earlier_release(a, b)  (lt_before(\
  97  	(a)->rt_param.job_params.release,\
  98  	(b)->rt_param.job_params.release))
  99 +#define earlier_subjob_deadline(a, b) (lt_before(\
 100 +	(a)->rt_param.job_params.subjob_deadline,\
 101 +	(b)->rt_param.job_params.subjob_deadline))
 102  
 103  void preempt_if_preemptable(struct task_struct* t, int on_cpu);
 104  
 105 diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
 106 index 6800e72..a5db24c 100644
 107 --- a/include/litmus/litmus_proc.h
 108 +++ b/include/litmus/litmus_proc.h
 109 @@ -4,6 +4,22 @@
 110  int __init init_litmus_proc(void);
 111  void exit_litmus_proc(void);
 112  
 113 +struct cd_mapping
 114 +{
 115 +	int id;
 116 +	cpumask_var_t mask;
 117 +	struct proc_dir_entry *proc_file;
 118 +};
 119 +
 120 +struct domain_proc_info
 121 +{
 122 +	int num_cpus;
 123 +	int num_domains;
 124 +
 125 +	struct cd_mapping *cpu_to_domains;
 126 +	struct cd_mapping *domain_to_cpus;
 127 +};
 128 +
 129  /*
 130   * On success, returns 0 and sets the pointer to the location of the new
 131   * proc dir entry, otherwise returns an error code and sets pde to NULL.
 132 @@ -17,6 +33,28 @@ long make_plugin_proc_dir(struct sched_plugin* plugin,
 133   */
 134  void remove_plugin_proc_dir(struct sched_plugin* plugin);
 135  
 136 +/*
 137 + * Setup the CPU <-> sched domain mappings in proc
 138 + */
 139 +long activate_domain_proc(struct domain_proc_info* map);
 140 +
 141 +/*
 142 + * Remove the CPU <-> sched domain mappings from proc
 143 + */
 144 +long deactivate_domain_proc(void);
 145 +
 146 +/*
 147 + * Alloc memory for the mapping
 148 + * Note: Does not set up proc files. Use make_sched_domain_maps for that.
 149 + */
 150 +long init_domain_proc_info(struct domain_proc_info* map,
 151 +	int num_cpus, int num_domains);
 152 +
 153 +/*
 154 + * Free memory of the mapping
 155 + * Note: Does not clean up proc files. Use deactivate_domain_proc for that.
 156 + */
 157 +void destroy_domain_proc_info(struct domain_proc_info* map);
 158  
 159  /* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
 160   * remove a '\n' if present. Returns the number of bytes that were read or
 161 diff --git a/include/litmus/pgm.h b/include/litmus/pgm.h
 162 new file mode 100644
 163 index 0000000..5682a76
 164 --- /dev/null
 165 +++ b/include/litmus/pgm.h
 166 @@ -0,0 +1,12 @@
 167 +#ifndef _LITMUS_PGM_H_
 168 +#define _LITMUS_PGM_H_
 169 +
 170 +#include <litmus/litmus.h>
 171 +
 172 +#define is_pgm_waiting(t) (tsk_rt(t)->ctrl_page && tsk_rt(t)->ctrl_page->pgm_waiting)
 173 +#define is_pgm_sending(t) (tsk_rt(t)->ctrl_page && tsk_rt(t)->ctrl_page->pgm_sending)
 174 +#define is_pgm_satisfied(t) (tsk_rt(t)->ctrl_page && tsk_rt(t)->ctrl_page->pgm_satisfied)
 175 +
 176 +int setup_pgm_release(struct task_struct* t);
 177 +
 178 +#endif
 179 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
 180 index 138799f..76bb7d5 100644
 181 --- a/include/litmus/rt_param.h
 182 +++ b/include/litmus/rt_param.h
 183 @@ -51,6 +51,14 @@ typedef enum {
 184  	TASK_EARLY
 185  } release_policy_t;
 186  
 187 +typedef enum {
 188 +	PGM_NOT_A_NODE,
 189 +	PGM_SRC,
 190 +	PGM_SINK,
 191 +	PGM_SRC_SINK,
 192 +	PGM_INTERNAL
 193 +} pgm_node_type_t;
 194 +
 195  /* We use the common priority interpretation "lower index == higher priority",
 196   * which is commonly used in fixed-priority schedulability analysis papers.
 197   * So, a numerically lower priority value implies higher scheduling priority,
 198 @@ -76,11 +84,14 @@ struct rt_task {
 199  	lt_t 		period;
 200  	lt_t		relative_deadline;
 201  	lt_t		phase;
 202 +	int		split;
 203  	unsigned int	cpu;
 204  	unsigned int	priority;
 205  	task_class_t	cls;
 206  	budget_policy_t  budget_policy;  /* ignored by pfair */
 207  	release_policy_t release_policy;
 208 +	pgm_node_type_t	pgm_type;
 209 +	lt_t		pgm_expected_etoe;
 210  };
 211  
 212  union np_flag {
 213 @@ -121,6 +132,11 @@ struct control_page {
 214  	uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall
 215  				     * started. */
 216  
 217 +	/* Flags from userspace signifying PGM wait states. */
 218 +	volatile uint32_t	pgm_waiting;    /* waiting for tokens */
 219 +	volatile uint32_t	pgm_sending;    /* sending tokens */
 220 +	volatile uint32_t	pgm_satisfied;  /* done waiting/sending */
 221 +
 222  	/* to be extended */
 223  };
 224  
 225 @@ -144,6 +160,11 @@ struct rt_job {
 226  	/* What is the current deadline? */
 227  	lt_t   	deadline;
 228  
 229 +#ifdef CONFIG_JOB_SPLITTING
 230 +	/* What is the deadline of the current subjob under splitting? */
 231 +	lt_t	subjob_deadline;
 232 +#endif
 233 +
 234  	/* How much service has this job received so far? */
 235  	lt_t	exec_time;
 236  
 237 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
 238 index 0f2fe90..f173dca 100644
 239 --- a/include/litmus/sched_plugin.h
 240 +++ b/include/litmus/sched_plugin.h
 241 @@ -16,6 +16,8 @@
 242  typedef long (*activate_plugin_t) (void);
 243  typedef long (*deactivate_plugin_t) (void);
 244  
 245 +struct domain_proc_info;
 246 +typedef long (*get_domain_proc_info_t) (struct domain_proc_info **info);
 247  
 248  
 249  /********************* scheduler invocation ******************/
 250 @@ -69,6 +71,9 @@ typedef long (*admit_task_t)(struct task_struct* tsk);
 251  
 252  typedef void (*release_at_t)(struct task_struct *t, lt_t start);
 253  
 254 +/************************ misc routines ***********************/
 255 +
 256 +
 257  struct sched_plugin {
 258  	struct list_head	list;
 259  	/* 	basic info 		*/
 260 @@ -77,6 +82,7 @@ struct sched_plugin {
 261  	/*	setup			*/
 262  	activate_plugin_t	activate_plugin;
 263  	deactivate_plugin_t	deactivate_plugin;
 264 +	get_domain_proc_info_t	get_domain_proc_info;
 265  
 266  	/* 	scheduler invocation 	*/
 267  	scheduler_tick_t        tick;
 268 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
 269 index 82bde82..8c7dfae 100644
 270 --- a/include/litmus/sched_trace.h
 271 +++ b/include/litmus/sched_trace.h
 272 @@ -80,6 +80,18 @@ struct st_sys_release_data {
 273  	u64	release;
 274  };
 275  
 276 +struct st_pgm_param_data {
 277 +	u32 node_type;
 278 +	u16 graph_pid;
 279 +	u16 unused;
 280 +	u64 expected_graph_etoe;
 281 +};
 282 +
 283 +struct st_pgm_release_data {
 284 +	u64	release;	/* PGM-adjusted release time */
 285 +	u64	deadline;	/* PGM-adjusted deadline */
 286 +};
 287 +
 288  #define DATA(x) struct st_ ## x ## _data x;
 289  
 290  typedef enum {
 291 @@ -94,7 +106,9 @@ typedef enum {
 292  	ST_BLOCK,
 293  	ST_RESUME,
 294  	ST_ACTION,
 295 -	ST_SYS_RELEASE
 296 +	ST_SYS_RELEASE,
 297 +	ST_PGM_PARAM,
 298 +	ST_PGM_RELEASE
 299  } st_event_record_type_t;
 300  
 301  struct st_event_record {
 302 @@ -113,6 +127,8 @@ struct st_event_record {
 303  		DATA(resume);
 304  		DATA(action);
 305  		DATA(sys_release);
 306 +		DATA(pgm_param);
 307 +		DATA(pgm_release);
 308  	} data;
 309  };
 310  
 311 @@ -154,6 +170,10 @@ feather_callback void do_sched_trace_action(unsigned long id,
 312  					    unsigned long action);
 313  feather_callback void do_sched_trace_sys_release(unsigned long id,
 314  						 lt_t* start);
 315 +feather_callback void do_sched_trace_pgm_param(unsigned long id,
 316 +						struct task_struct* task);
 317 +feather_callback void do_sched_trace_pgm_release(unsigned long id,
 318 +						struct task_struct* task);
 319  
 320  #endif
 321  
 322 @@ -179,6 +199,8 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
 323  #define trace_litmus_task_block(t)
 324  #define trace_litmus_task_resume(t)
 325  #define trace_litmus_sys_release(start)
 326 +#define trace_litmus_pgm_param(t)
 327 +#define trace_litmus_pgm_release(t)
 328  
 329  #endif
 330  
 331 @@ -204,23 +226,25 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
 332  		trace_litmus_task_release(t);				\
 333  	} while (0)
 334  
 335 +/* place holder for sched_trace_task_assigned (+4) */
 336 +
 337  #define sched_trace_task_switch_to(t)					\
 338  	do {								\
 339 -		SCHED_TRACE(SCHED_TRACE_BASE_ID + 4,			\
 340 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 5,			\
 341  			do_sched_trace_task_switch_to, t);		\
 342  		trace_litmus_switch_to(t);				\
 343  	} while (0)
 344  
 345  #define sched_trace_task_switch_away(t)					\
 346  	do {								\
 347 -		SCHED_TRACE(SCHED_TRACE_BASE_ID + 5,			\
 348 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 6,			\
 349  			do_sched_trace_task_switch_away, t);		\
 350  		trace_litmus_switch_away(t);				\
 351  	} while (0)
 352  
 353  #define sched_trace_task_completion(t, forced)				\
 354  	do {								\
 355 -		SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6,			\
 356 +		SCHED_TRACE2(SCHED_TRACE_BASE_ID + 7,			\
 357  				do_sched_trace_task_completion, t,	\
 358  				(unsigned long) forced);		\
 359  		trace_litmus_task_completion(t, forced);		\
 360 @@ -228,30 +252,44 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
 361  
 362  #define sched_trace_task_block(t)					\
 363  	do {								\
 364 -		SCHED_TRACE(SCHED_TRACE_BASE_ID + 7,			\
 365 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 8,			\
 366  			do_sched_trace_task_block, t);			\
 367  		trace_litmus_task_block(t);				\
 368  	} while (0)
 369  
 370  #define sched_trace_task_resume(t)					\
 371  	do {								\
 372 -		SCHED_TRACE(SCHED_TRACE_BASE_ID + 8,			\
 373 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 9,			\
 374  				do_sched_trace_task_resume, t);		\
 375  		trace_litmus_task_resume(t);				\
 376  	} while (0)
 377  
 378  #define sched_trace_action(t, action)					\
 379 -	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9,				\
 380 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 10,				\
 381  		do_sched_trace_action, t, (unsigned long) action);
 382  
 383  /* when is a pointer, it does not need an explicit cast to unsigned long */
 384  #define sched_trace_sys_release(when)					\
 385  	do {								\
 386 -		SCHED_TRACE(SCHED_TRACE_BASE_ID + 10,			\
 387 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 11,			\
 388  			do_sched_trace_sys_release, when);		\
 389  		trace_litmus_sys_release(when);				\
 390  	} while (0)
 391  
 392 +#define sched_trace_pgm_param(t)					\
 393 +	do {								\
 394 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 12,			\
 395 +				do_sched_trace_pgm_param, t);		\
 396 +		trace_litmus_pgm_param(t);					\
 397 +	} while (0)
 398 +
 399 +#define sched_trace_pgm_release(t)					\
 400 +	do {								\
 401 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 13,			\
 402 +				do_sched_trace_pgm_release, t);		\
 403 +		trace_litmus_pgm_release(t);				\
 404 +	} while (0)
 405 +
 406  #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
 407  
 408  #endif /* __KERNEL__ */
 409 diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
 410 index 0fffcee..02b4b54 100644
 411 --- a/include/trace/events/litmus.h
 412 +++ b/include/trace/events/litmus.h
 413 @@ -39,7 +39,7 @@ TRACE_EVENT(litmus_task_param,
 414  		__entry->partition = get_partition(t);
 415  	),
 416  
 417 -	TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n",
 418 +	TP_printk("period(%u, %Lu).\nwcet(%d, %Lu).\n",
 419  		__entry->pid, __entry->period,
 420  		__entry->pid, __entry->wcet)
 421  );
 422 @@ -225,6 +225,57 @@ TRACE_EVENT(litmus_sys_release,
 423  	TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
 424  );
 425  
 426 +/* Tracing PGM node parameters */
 427 +TRACE_EVENT(litmus_pgm_param,
 428 +
 429 +	TP_PROTO(struct task_struct *t),
 430 +
 431 +	TP_ARGS(t),
 432 +
 433 +	TP_STRUCT__entry(
 434 +		__field( pid_t, pid )
 435 +		__field( pgm_node_type_t, node_type )
 436 +		__field( pid_t, graph_pid )
 437 +	),
 438 +
 439 +	TP_fast_assign(
 440 +		__entry->pid		= t ? t->pid  : 0;
 441 +		__entry->node_type	= t ? t->rt_param.task_params.pgm_type : PGM_NOT_A_NODE;
 442 +		__entry->graph_pid	= t ? t->tgid : 0;
 443 +	),
 444 +
 445 +	TP_printk("pgm node (%u, node type = %d) in graph (%u)\n",
 446 +		__entry->pid, __entry->node_type, __entry->graph_pid)
 447 +);
 448 +
 449 +/*
 450 + * Tracing PGM-adjusted job release
 451 + */
 452 +TRACE_EVENT(litmus_pgm_release,
 453 +
 454 +	TP_PROTO(struct task_struct *t),
 455 +
 456 +	TP_ARGS(t),
 457 +
 458 +	TP_STRUCT__entry(
 459 +		__field( pid_t,		pid	)
 460 +		__field( unsigned int,	job	)
 461 +		__field( lt_t,		release	)
 462 +		__field( lt_t,		deadline	)
 463 +	),
 464 +
 465 +	TP_fast_assign(
 466 +		__entry->pid	= t ? t->pid : 0;
 467 +		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
 468 +		__entry->release	= get_release(t);
 469 +		__entry->deadline	= get_deadline(t);
 470 +	),
 471 +
 472 +	TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n",
 473 +			__entry->pid, __entry->job, __entry->release,
 474 +			__entry->pid, __entry->job, __entry->deadline)
 475 +);
 476 +
 477  #endif /* _SCHED_TASK_TRACEPOINT_H */
 478  
 479  /* Must stay outside the protection */
 480 diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
 481 index 19edd7f..53b90b6 100644
 482 --- a/include/trace/ftrace.h
 483 +++ b/include/trace/ftrace.h
 484 @@ -18,6 +18,9 @@
 485  
 486  #include <linux/ftrace_event.h>
 487  
 488 +/* for litmus_clock() */
 489 +#include <litmus/litmus.h>
 490 +
 491  /*
 492   * DECLARE_EVENT_CLASS can be used to add a generic function
 493   * handlers for events. That is, if all events have the same
 494 @@ -54,7 +57,7 @@
 495  #define __string(item, src) __dynamic_array(char, item, -1)
 496  
 497  #undef TP_STRUCT__entry
 498 -#define TP_STRUCT__entry(args...) args
 499 +#define TP_STRUCT__entry(args...) args __field( unsigned long long, __rt_ts )
 500  
 501  #undef DECLARE_EVENT_CLASS
 502  #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)	\
 503 @@ -501,7 +504,7 @@ static inline notrace int ftrace_get_offsets_##call(			\
 504  	strcpy(__get_str(dst), src);
 505  
 506  #undef TP_fast_assign
 507 -#define TP_fast_assign(args...) args
 508 +#define TP_fast_assign(args...) args; __entry->__rt_ts = litmus_clock();
 509  
 510  #undef TP_perf_assign
 511  #define TP_perf_assign(args...)
 512 diff --git a/litmus/Kconfig b/litmus/Kconfig
 513 index 5d5d6eb29..63d35db 100644
 514 --- a/litmus/Kconfig
 515 +++ b/litmus/Kconfig
 516 @@ -12,6 +12,15 @@ config PLUGIN_CEDF
 517            On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
 518            makes little sense since there aren't any shared caches.
 519  
 520 +config PLUGIN_CFL
 521 +	bool "Clustered-Fair-Lateness"
 522 +	depends on X86 && SYSFS && JOB_SPLITTING
 523 +	default n
 524 +	help
 525 +	  Include the Clustered Fair Lateness (C-FL) plugin in the kernel.
 526 +	  This implements Anderson and Erickson's EDF-based scheduler.
 527 +	  Supports job splitting.
 528 +
 529  config PLUGIN_PFAIR
 530  	bool "PFAIR"
 531  	depends on HIGH_RES_TIMERS && HZ_PERIODIC && HZ = "1000"
 532 @@ -26,6 +35,13 @@ config PLUGIN_PFAIR
 533  
 534  	  If unsure, say Yes.
 535  
 536 +config JOB_SPLITTING
 537 +	bool "Job Splitting"
 538 +	default n
 539 +	help
 540 +	  Enable job-splitting features for fair-lateness schedulers, such
 541 +	  as C-FL.
 542 +
 543  config RELEASE_MASTER
 544          bool "Release-master Support"
 545  	depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
 546 @@ -58,6 +74,18 @@ config BUG_ON_MIGRATION_DEADLOCK
 547  	  BUG() triggers, the scheduler is broken and turning off this option
 548  	  won't fix it.
 549  
 550 +config SCHED_PGM
 551 +	bool "PGM Support"
 552 +	default n
 553 +	depends on LITMUS_LOCKING && ALLOW_EARLY_RELEASE
 554 +	help
 555 +	  Include infrastructure for scheduling PGM graphs. Since PGM token
 556 +	  constraints are not (yet) implemented in the kernel, a job must
 557 +	  tell Litmus when it is waiting for tokens. Litmus boost's the
 558 +	  priority of waiting jobs (which are expected to be well-behaved
 559 +	  and sleep while waiting for tokens) to ensure bounded priority
 560 +	  inversions. Litmus may also change a jobs release/deadline depending
 561 +	  upon when the jobs input tokens are generated.
 562  
 563  endmenu
 564  
 565 @@ -216,7 +244,7 @@ config SCHED_TASK_TRACE
 566  config SCHED_TASK_TRACE_SHIFT
 567         int "Buffer size for sched_trace_xxx() events"
 568         depends on SCHED_TASK_TRACE
 569 -       range 8 13
 570 +       range 8 28
 571         default 9
 572         help
 573  
 574 diff --git a/litmus/Makefile b/litmus/Makefile
 575 index 2bddc94..6c83cf1 100644
 576 --- a/litmus/Makefile
 577 +++ b/litmus/Makefile
 578 @@ -25,7 +25,9 @@ obj-y     = sched_plugin.o litmus.o \
 579  
 580  obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
 581  obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
 582 +obj-$(CONFIG_PLUGIN_CFL) += sched_cfl_split.o edf_split_common.o
 583  obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
 584 +obj-$(CONFIG_SCHED_PGM) += pgm.o
 585  
 586  obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 587  obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
 588 diff --git a/litmus/edf_split_common.c b/litmus/edf_split_common.c
 589 new file mode 100644
 590 index 0000000..7d7f542
 591 --- /dev/null
 592 +++ b/litmus/edf_split_common.c
 593 @@ -0,0 +1,106 @@
 594 +/*
 595 + * kernel/edf_split_common.c
 596 + *
 597 + * Common functions for EDF based scheduler with split jobs.
 598 + */
 599 +
 600 +#include <linux/percpu.h>
 601 +#include <linux/sched.h>
 602 +#include <linux/list.h>
 603 +
 604 +#include <litmus/litmus.h>
 605 +#include <litmus/sched_plugin.h>
 606 +#include <litmus/sched_trace.h>
 607 +
 608 +#include <litmus/edf_split_common.h>
 609 +#include <litmus/edf_common.h>
 610 +
 611 +/* edf_split_higher_prio -  returns true if first has a higher subjob
 612 + *                    EDF priority than second.
 613 + *
 614 + * both first and second may be NULL
 615 + */
 616 +int edf_split_higher_prio(struct task_struct* first,
 617 +			  struct task_struct* second)
 618 +{
 619 +	struct task_struct *first_task = first;
 620 +	struct task_struct *second_task = second;
 621 +
 622 +	/* There is no point in comparing a task to itself. */
 623 +	if (first && first == second) {
 624 +		TRACE_TASK(first,
 625 +			   "WARNING: pointless edf priority comparison.\n");
 626 +		return 0;
 627 +	}
 628 +
 629 +	/* check for NULL tasks */
 630 +	if (!first || !second)
 631 +		return first && !second;
 632 +
 633 +#ifdef CONFIG_LITMUS_LOCKING
 634 +
 635 +	/* Check for inherited priorities. Change task
 636 +	 * used for comparison in such a case.
 637 +	 */
 638 +	if (unlikely(first->rt_param.inh_task))
 639 +		first_task = first->rt_param.inh_task;
 640 +	if (unlikely(second->rt_param.inh_task))
 641 +		second_task = second->rt_param.inh_task;
 642 +
 643 +	/* Check for priority boosting. Tie-break by start of boosting.
 644 +	 */
 645 +	if (unlikely(is_priority_boosted(first_task))) {
 646 +		/* first_task is boosted, how about second_task? */
 647 +		if (!is_priority_boosted(second_task) ||
 648 +			lt_before(get_boost_start(first_task),
 649 +					get_boost_start(second_task)))
 650 +			return 1;
 651 +		else
 652 +			return 0;
 653 +	} else if (unlikely(is_priority_boosted(second_task)))
 654 +		/* second_task is boosted, first is not*/
 655 +		return 0;
 656 +#endif
 657 +
 658 +	if (earlier_subjob_deadline(first_task, second_task)) {
 659 +		return 1;
 660 +	}
 661 +	else if (get_subjob_deadline(first_task) == get_subjob_deadline(second_task)) {
 662 +		/* use normal edf to tie-break */
 663 +		return edf_higher_prio(first, second);
 664 +	}
 665 +	return 0; /* fall-through. prio(second_task) > prio(first_task) */	
 666 +}
 667 +
 668 +int edf_split_ready_order(struct bheap_node* a, struct bheap_node* b)
 669 +{
 670 +	return edf_split_higher_prio(bheap2task(a), bheap2task(b));
 671 +}
 672 +
 673 +void edf_split_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
 674 +		      release_jobs_t release)
 675 +{
 676 +	rt_domain_init(rt,  edf_split_ready_order, resched, release);
 677 +}
 678 +
 679 +/* need_to_preempt - check whether the task t needs to be preempted
 680 + *                   call only with irqs disabled and with  ready_lock acquired
 681 + *                   THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
 682 + */
 683 +int edf_split_preemption_needed(rt_domain_t* rt, struct task_struct *t)
 684 +{
 685 +	/* we need the read lock for edf_ready_queue */
 686 +	/* no need to preempt if there is nothing pending */
 687 +	if (!__jobs_pending(rt))
 688 +		return 0;
 689 +	/* we need to reschedule if t doesn't exist */
 690 +	if (!t)
 691 +		return 1;
 692 +
 693 +	/* NOTE: We cannot check for non-preemptibility since we
 694 +	 *       don't know what address space we're currently in.
 695 +	 */
 696 +
 697 +	/* make sure to get non-rt stuff out of the way */
 698 +	return !is_realtime(t) || edf_split_higher_prio(__next_ready(rt), t);
 699 +}
 700 diff --git a/litmus/litmus.c b/litmus/litmus.c
 701 index 10e45b7..058cb95 100644
 702 --- a/litmus/litmus.c
 703 +++ b/litmus/litmus.c
 704 @@ -107,12 +107,19 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
 705  	if (tp.relative_deadline == 0)
 706  		tp.relative_deadline = tp.period;
 707  
 708 -	if (tp.exec_cost <= 0)
 709 +	if (tp.exec_cost <= 0) {
 710 +		printk(KERN_INFO "litmus: real-time task %d rejected "
 711 +				"because declared job execution time <= 0.", pid);
 712  		goto out_unlock;
 713 -	if (tp.period <= 0)
 714 +	}
 715 +	if (tp.period <= 0) {
 716 +		printk(KERN_INFO "litmus: real-time task %d rejected "
 717 +				"because declared job period <= 0.", pid);
 718  		goto out_unlock;
 719 -	if (!cpu_online(tp.cpu))
 720 +	}
 721 +	if (!cpu_online(tp.cpu)) {
 722  		goto out_unlock;
 723 +	}
 724  	if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
 725  	{
 726  		printk(KERN_INFO "litmus: real-time task %d rejected "
 727 @@ -137,6 +144,12 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
 728  		       pid, tp.budget_policy);
 729  		goto out_unlock;
 730  	}
 731 +	if (tp.pgm_type < PGM_NOT_A_NODE || tp.pgm_type > PGM_INTERNAL) {
 732 +		printk(KERN_INFO "litmus: real-time task %d rejected "
 733 +			   "because of unknown PGM node type specified (%d)\n",
 734 +			   pid, tp.pgm_type);
 735 +		goto out_unlock;
 736 +	}
 737  
 738  	target->rt_param.task_params = tp;
 739  
 740 @@ -331,9 +344,13 @@ long litmus_admit_task(struct task_struct* tsk)
 741  	if (get_rt_relative_deadline(tsk) == 0 ||
 742  	    get_exec_cost(tsk) >
 743  			min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
 744 +		printk(KERN_INFO "litmus: invalid task parameters "
 745 +			"(e = %llu, p = %llu, d = %llu)\n",
 746 +			get_exec_cost(tsk), get_rt_period(tsk),
 747 +			get_rt_relative_deadline(tsk));
 748  		TRACE_TASK(tsk,
 749  			"litmus admit: invalid task parameters "
 750 -			"(e = %lu, p = %lu, d = %lu)\n",
 751 +			"(e = %llu, p = %llu, d = %llu)\n",
 752  			get_exec_cost(tsk), get_rt_period(tsk),
 753  			get_rt_relative_deadline(tsk));
 754  		retval = -EINVAL;
 755 @@ -341,6 +358,8 @@ long litmus_admit_task(struct task_struct* tsk)
 756  	}
 757  
 758  	if (!cpu_online(get_partition(tsk))) {
 759 +		printk(KERN_INFO "litmus: cpu %d is not online\n",
 760 +			   get_partition(tsk));
 761  		TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
 762  			   get_partition(tsk));
 763  		retval = -EINVAL;
 764 @@ -369,6 +388,7 @@ long litmus_admit_task(struct task_struct* tsk)
 765  	if (!retval) {
 766  		sched_trace_task_name(tsk);
 767  		sched_trace_task_param(tsk);
 768 +		sched_trace_pgm_param(tsk);
 769  		atomic_inc(&rt_task_count);
 770  	}
 771  
 772 @@ -376,8 +396,10 @@ long litmus_admit_task(struct task_struct* tsk)
 773  
 774  out:
 775  	if (retval) {
 776 -		bheap_node_free(tsk_rt(tsk)->heap_node);
 777 -		release_heap_free(tsk_rt(tsk)->rel_heap);
 778 +		if (tsk_rt(tsk)->heap_node)
 779 +			bheap_node_free(tsk_rt(tsk)->heap_node);
 780 +		if (tsk_rt(tsk)->rel_heap)
 781 +			release_heap_free(tsk_rt(tsk)->rel_heap);
 782  	}
 783  	return retval;
 784  }
 785 @@ -402,20 +424,30 @@ static int do_plugin_switch(void *_plugin)
 786  {
 787  	int ret;
 788  	struct sched_plugin* plugin = _plugin;
 789 +	struct domain_proc_info* domain_info;
 790  
 791  	/* don't switch if there are active real-time tasks */
 792  	if (atomic_read(&rt_task_count) == 0) {
 793 +		deactivate_domain_proc();
 794  		ret = litmus->deactivate_plugin();
 795 -		if (0 != ret)
 796 +		if (0 != ret) {
 797 +			/* reactivate the old proc info */
 798 +			if(!litmus->get_domain_proc_info(&domain_info))
 799 +				activate_domain_proc(domain_info);
 800  			goto out;
 801 +		}
 802  		ret = plugin->activate_plugin();
 803  		if (0 != ret) {
 804  			printk(KERN_INFO "Can't activate %s (%d).\n",
 805  			       plugin->plugin_name, ret);
 806  			plugin = &linux_sched_plugin;
 807  		}
 808 +
 809  		printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
 810  		litmus = plugin;
 811 +
 812 +		if(!litmus->get_domain_proc_info(&domain_info))
 813 +			activate_domain_proc(domain_info);
 814  	} else
 815  		ret = -EBUSY;
 816  out:
 817 diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
 818 index 1ebf127..4db3fe2 100644
 819 --- a/litmus/litmus_proc.c
 820 +++ b/litmus/litmus_proc.c
 821 @@ -3,6 +3,7 @@
 822   */
 823  
 824  #include <linux/sched.h>
 825 +#include <linux/slab.h>
 826  #include <linux/uaccess.h>
 827  #include <linux/seq_file.h>
 828  
 829 @@ -21,7 +22,10 @@ static struct proc_dir_entry *litmus_dir = NULL,
 830  #ifdef CONFIG_RELEASE_MASTER
 831  	*release_master_file = NULL,
 832  #endif
 833 -	*plugs_file = NULL;
 834 +	*plugs_file = NULL,
 835 +	*domains_dir = NULL,
 836 +	*cpus_dir = NULL;
 837 +
 838  
 839  /* in litmus/sync.c */
 840  int count_tasks_waiting_for_release(void);
 841 @@ -218,11 +222,32 @@ int __init init_litmus_proc(void)
 842  	plugs_file = proc_create("loaded", 0444, plugs_dir,
 843  				 &litmus_loaded_proc_fops);
 844  
 845 +	domains_dir = proc_mkdir("domains", litmus_dir);
 846 +	if (!domains_dir) {
 847 +		printk(KERN_ERR "Could not allocate domains directory "
 848 +				"procfs entry.\n");
 849 +		return -ENOMEM;
 850 +	}
 851 +
 852 +	cpus_dir = proc_mkdir("cpus", litmus_dir);
 853 +	if (!cpus_dir) {
 854 +		printk(KERN_ERR "Could not allocate cpus directory "
 855 +				"procfs entry.\n");
 856 +		return -ENOMEM;
 857 +	}
 858 +
 859  	return 0;
 860  }
 861  
 862  void exit_litmus_proc(void)
 863  {
 864 +	if (cpus_dir || domains_dir) {
 865 +		deactivate_domain_proc();
 866 +		if (cpus_dir)
 867 +			remove_proc_entry("cpus", litmus_dir);
 868 +		if (domains_dir)
 869 +			remove_proc_entry("domains", litmus_dir);
 870 +	}
 871  	if (plugs_file)
 872  		remove_proc_entry("loaded", plugs_dir);
 873  	if (plugs_dir)
 874 @@ -405,3 +430,147 @@ struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
 875  	}
 876  	return cluster_file;
 877  }
 878 +
 879 +static struct domain_proc_info* active_mapping = NULL;
 880 +
 881 +static int litmus_mapping_proc_show(struct seq_file *m, void *v)
 882 +{
 883 +	struct cd_mapping *mapping = (struct cd_mapping*) m->private;
 884 +	char buf[256];
 885 +
 886 +	if(!mapping)
 887 +		return 0;
 888 +
 889 +	cpumask_scnprintf(buf, sizeof(buf), mapping->mask);
 890 +	buf[255] = '\0'; /* just in case... */
 891 +	seq_printf(m, "%s\n", buf);
 892 +	return 0;
 893 +}
 894 +
 895 +static int litmus_mapping_proc_open(struct inode *inode, struct file *file)
 896 +{
 897 +	return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode));
 898 +}
 899 +
 900 +static const struct file_operations litmus_domain_proc_fops = {
 901 +	.open		= litmus_mapping_proc_open,
 902 +	.read		= seq_read,
 903 +	.llseek 	= seq_lseek,
 904 +	.release 	= single_release,
 905 +};
 906 +
 907 +long activate_domain_proc(struct domain_proc_info* map)
 908 +{
 909 +	int i;
 910 +	char name[8];
 911 +
 912 +	if (!map)
 913 +		return -EINVAL;
 914 +	if (cpus_dir == NULL || domains_dir == NULL)
 915 +		return -EINVAL;
 916 +
 917 +	if (active_mapping)
 918 +		deactivate_domain_proc();
 919 +
 920 +	active_mapping = map;
 921 +
 922 +	for (i = 0; i < map->num_cpus; ++i) {
 923 +		struct cd_mapping* m = &map->cpu_to_domains[i];
 924 +		snprintf(name, sizeof(name), "%d", m->id);
 925 +		m->proc_file = proc_create_data(name, 0444, cpus_dir,
 926 +			&litmus_domain_proc_fops, (void*)m);
 927 +	}
 928 +
 929 +	for (i = 0; i < map->num_domains; ++i) {
 930 +		struct cd_mapping* m = &map->domain_to_cpus[i];
 931 +		snprintf(name, sizeof(name), "%d", m->id);
 932 +		m->proc_file = proc_create_data(name, 0444, domains_dir,
 933 +			&litmus_domain_proc_fops, (void*)m);
 934 +	}
 935 +
 936 +	return 0;
 937 +}
 938 +
 939 +long deactivate_domain_proc()
 940 +{
 941 +	int i;
 942 +	char name[65];
 943 +
 944 +	struct domain_proc_info* map = active_mapping;
 945 +
 946 +	if (!map)
 947 +		return -EINVAL;
 948 +
 949 +	for (i = 0; i < map->num_cpus; ++i) {
 950 +		struct cd_mapping* m = &map->cpu_to_domains[i];
 951 +		snprintf(name, sizeof(name), "%d", m->id);
 952 +		remove_proc_entry(name, cpus_dir);
 953 +		m->proc_file = NULL;
 954 +	}
 955 +	for (i = 0; i < map->num_domains; ++i) {
 956 +		struct cd_mapping* m = &map->domain_to_cpus[i];
 957 +		snprintf(name, sizeof(name), "%d", m->id);
 958 +		remove_proc_entry(name, domains_dir);
 959 +		m->proc_file = NULL;
 960 +	}
 961 +
 962 +	active_mapping = NULL;
 963 +
 964 +	return 0;
 965 +}
 966 +
 967 +long init_domain_proc_info(struct domain_proc_info* m,
 968 +				int num_cpus, int num_domains)
 969 +{
 970 +	int i;
 971 +	int num_alloced_cpu_masks = 0;
 972 +	int num_alloced_domain_masks = 0;
 973 +
 974 +	m->cpu_to_domains =
 975 +		kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus,
 976 +			GFP_ATOMIC);
 977 +	if(!m->cpu_to_domains)
 978 +		goto failure;
 979 +
 980 +	m->domain_to_cpus =
 981 +		kmalloc(sizeof(*(m->domain_to_cpus))*num_domains,
 982 +			GFP_ATOMIC);
 983 +	if(!m->domain_to_cpus)
 984 +		goto failure;
 985 +
 986 +	for(i = 0; i < num_cpus; ++i) {
 987 +		if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC))
 988 +			goto failure;
 989 +		++num_alloced_cpu_masks;
 990 +	}
 991 +	for(i = 0; i < num_domains; ++i) {
 992 +		if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC))
 993 +			goto failure;
 994 +		++num_alloced_domain_masks;
 995 +	}
 996 +
 997 +	return 0;
 998 +
 999 +failure:
1000 +	for(i = 0; i < num_alloced_cpu_masks; ++i)
1001 +		free_cpumask_var(m->cpu_to_domains[i].mask);
1002 +	for(i = 0; i < num_alloced_domain_masks; ++i)
1003 +		free_cpumask_var(m->domain_to_cpus[i].mask);
1004 +	if(m->cpu_to_domains)
1005 +		kfree(m->cpu_to_domains);
1006 +	if(m->domain_to_cpus)
1007 +		kfree(m->domain_to_cpus);
1008 +	return -ENOMEM;
1009 +}
1010 +
1011 +void destroy_domain_proc_info(struct domain_proc_info* m)
1012 +{
1013 +	int i;
1014 +	for(i = 0; i < m->num_cpus; ++i)
1015 +		free_cpumask_var(m->cpu_to_domains[i].mask);
1016 +	for(i = 0; i < m->num_domains; ++i)
1017 +		free_cpumask_var(m->domain_to_cpus[i].mask);
1018 +	kfree(m->cpu_to_domains);
1019 +	kfree(m->domain_to_cpus);
1020 +	memset(m, sizeof(*m), 0);
1021 +}
1022 diff --git a/litmus/pgm.c b/litmus/pgm.c
1023 new file mode 100644
1024 index 0000000..db3378f
1025 --- /dev/null
1026 +++ b/litmus/pgm.c
1027 @@ -0,0 +1,61 @@
1028 +/* litmus/pgm.c - common pgm control code
1029 + */
1030 +
1031 +#include <linux/sched.h>
1032 +#include <litmus/litmus.h>
1033 +#include <litmus/pgm.h>
1034 +#include <litmus/sched_trace.h>
1035 +
1036 +/* Only readjust release/deadline if difference is over a given threshold.
1037 +   It's a weak method for accounting overheads. Ideally, we'd know the last
1038 +   time t was woken up by its last predecessor, rather than having to look
1039 +   at 'now'. Adjustment threshold currently set to 200us. */
1040 +#define ADJUSTMENT_THRESH_NS (200*1000LL)
1041 +
1042 +int setup_pgm_release(struct task_struct* t)
1043 +{
1044 +	int shifted_release = 0;
1045 +
1046 +	/* approximate time last predecessor gave us tokens */
1047 +	lt_t now = litmus_clock();
1048 +
1049 +	TRACE_TASK(t, "is starting a new PGM job: waiting:%d\n",
1050 +		tsk_rt(t)->ctrl_page->pgm_waiting);
1051 +
1052 +	BUG_ON(!tsk_rt(t)->ctrl_page->pgm_waiting);
1053 +
1054 +	/* Adjust release time if we got the last tokens after release of this job.
1055 +	   This is possible since PGM jobs are early-released. Don't shift our
1056 +	   deadline if we got the tokens earlier than expected. */
1057 +	if (now > tsk_rt(t)->job_params.release) {
1058 +		long long diff_ns = now - tsk_rt(t)->job_params.release;
1059 +		if (diff_ns > ADJUSTMENT_THRESH_NS) {
1060 +			lt_t adj_deadline = now + get_rt_relative_deadline(t);
1061 +
1062 +			TRACE_TASK(t, "adjusting PGM release time from (r = %llu, d = %llu) "
1063 +				"to (r = %llu, d = %llu)\n",
1064 +				tsk_rt(t)->job_params.release, tsk_rt(t)->job_params.deadline,
1065 +				now, adj_deadline);
1066 +
1067 +			tsk_rt(t)->job_params.release = now;
1068 +			tsk_rt(t)->job_params.deadline = adj_deadline;
1069 +			shifted_release = 1;
1070 +		}
1071 +		else {
1072 +			TRACE_TASK(t, "adjustment falls below threshold. %lld < %lld\n",
1073 +				diff_ns, ADJUSTMENT_THRESH_NS);
1074 +		}
1075 +	}
1076 +	else {
1077 +		TRACE_TASK(t, "got tokens early--no need to adjust release. "
1078 +			"cur time = %llu, release time = %llu\n",
1079 +			now, tsk_rt(t)->job_params.release);
1080 +	}
1081 +
1082 +	/* possible that there can be multiple instances of pgm_release logged.
1083 +	   analysis tools should filter out all but the last pgm_release for
1084 +	   a given job release */
1085 +	sched_trace_pgm_release(t);
1086 +
1087 +	return shifted_release;
1088 +}
1089 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
1090 index 7cb3cc0..33ea89e 100644
1091 --- a/litmus/sched_cedf.c
1092 +++ b/litmus/sched_cedf.c
1093 @@ -48,6 +48,10 @@
1094  #include <litmus/affinity.h>
1095  #endif
1096  
1097 +#ifdef CONFIG_SCHED_PGM
1098 +#include <litmus/pgm.h>
1099 +#endif
1100 +
1101  /* to configure the cluster size */
1102  #include <litmus/litmus_proc.h>
1103  #include <linux/uaccess.h>
1104 @@ -452,13 +456,71 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1105  	if (exists)
1106  		TRACE_TASK(prev,
1107  			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
1108 -			   "state:%d sig:%d\n",
1109 +			   "state:%d sig:%d boosted:%d\n",
1110  			   blocks, out_of_time, np, sleep, preempt,
1111 -			   prev->state, signal_pending(prev));
1112 +			   prev->state, signal_pending(prev),
1113 +			   is_priority_boosted(entry->scheduled));
1114  	if (entry->linked && preempt)
1115  		TRACE_TASK(prev, "will be preempted by %s/%d\n",
1116  			   entry->linked->comm, entry->linked->pid);
1117  
1118 +#ifdef CONFIG_SCHED_PGM
1119 +	if (exists) {
1120 +		if (is_pgm_sending(entry->scheduled)) {
1121 +			if (!is_pgm_satisfied(entry->scheduled)) {
1122 +				if (!is_priority_boosted(entry->scheduled)) {
1123 +					TRACE_TASK(entry->scheduled, "is sending PGM tokens and needs boosting.\n");
1124 +					BUG_ON(is_pgm_satisfied(entry->scheduled));
1125 +
1126 +					/* We are either sending tokens or waiting for tokes.
1127 +					   If waiting: Boost priority so we'll be scheduled
1128 +						immediately when needed tokens arrive.
1129 +					   If sending: Boost priority so no one (specifically, our
1130 +						consumers) will preempt us while signalling the token
1131 +						transmission.
1132 +					*/
1133 +					tsk_rt(entry->scheduled)->priority_boosted = 1;
1134 +					tsk_rt(entry->scheduled)->boost_start_time = litmus_clock();
1135 +
1136 +					if (likely(!blocks)) {
1137 +						unlink(entry->scheduled);
1138 +						cedf_job_arrival(entry->scheduled);
1139 +						/* we may regain the processor */
1140 +						if (preempt) {
1141 +							preempt = entry->scheduled != entry->linked;
1142 +							if (!preempt) {
1143 +								TRACE_TASK(entry->scheduled, "blocked preemption by lazy boosting.\n");
1144 +							}
1145 +						}
1146 +					}
1147 +				}
1148 +			}
1149 +			else { /* sending is satisfied */
1150 +				tsk_rt(entry->scheduled)->ctrl_page->pgm_sending = 0;
1151 +				tsk_rt(entry->scheduled)->ctrl_page->pgm_satisfied = 0;
1152 +
1153 +				if (is_priority_boosted(entry->scheduled)) {
1154 +					TRACE_TASK(entry->scheduled,
1155 +							"is done sending PGM tokens must relinquish boosting.\n");
1156 +					/* clear boosting */
1157 +					tsk_rt(entry->scheduled)->priority_boosted = 0;
1158 +					if(likely(!blocks)) {
1159 +						/* recheck priority */
1160 +						unlink(entry->scheduled);
1161 +						cedf_job_arrival(entry->scheduled);
1162 +						/* we may lose the processor */
1163 +						if (!preempt) {
1164 +							preempt = entry->scheduled != entry->linked;
1165 +							if (preempt) {
1166 +								TRACE_TASK(entry->scheduled, "preempted by lazy unboosting.\n");
1167 +							}
1168 +						}
1169 +					}
1170 +				}
1171 +			}
1172 +		}
1173 +	}
1174 +#endif
1175  
1176  	/* If a task blocks we have no choice but to reschedule.
1177  	 */
1178 @@ -470,10 +532,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1179  	 * that we are still linked. Multiple calls to request_exit_np() don't
1180  	 * hurt.
1181  	 */
1182 -	if (np && (out_of_time || preempt || sleep)) {
1183 +	if (np && (out_of_time || sleep)) {
1184  		unlink(entry->scheduled);
1185  		request_exit_np(entry->scheduled);
1186  	}
1187 +	else if(np && preempt) {
1188 +		request_exit_np(entry->scheduled);
1189 +	}
1190  
1191  	/* Any task that is preemptable and either exhausts its execution
1192  	 * budget or wants to sleep completes. We may have to reschedule after
1193 @@ -503,12 +568,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1194  			entry->scheduled->rt_param.scheduled_on = NO_CPU;
1195  			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
1196  		}
1197 -	} else
1198 +	} else {
1199  		/* Only override Linux scheduler if we have a real-time task
1200  		 * scheduled that needs to continue.
1201  		 */
1202  		if (exists)
1203  			next = prev;
1204 +	}
1205  
1206  	sched_state_task_picked();
1207  	raw_spin_unlock(&cluster->cluster_lock);
1208 @@ -522,7 +588,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
1209  		TRACE("becomes idle at %llu.\n", litmus_clock());
1210  #endif
1211  
1212 -
1213  	return next;
1214  }
1215  
1216 @@ -601,6 +666,10 @@ static void cedf_task_wake_up(struct task_struct *task)
1217  		release_at(task, now);
1218  		sched_trace_task_release(task);
1219  	}
1220 +	if (is_pgm_waiting(task)) {
1221 +		/* shift out release/deadline, if needed */
1222 +		setup_pgm_release(task);
1223 +	}
1224  	cedf_job_arrival(task);
1225  	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1226  }
1227 @@ -684,6 +753,55 @@ static void cleanup_cedf(void)
1228  	}
1229  }
1230  
1231 +static struct domain_proc_info cedf_domain_proc_info;
1232 +static long cedf_get_domain_proc_info(struct domain_proc_info **ret)
1233 +{
1234 +	*ret = &cedf_domain_proc_info;
1235 +	return 0;
1236 +}
1237 +
1238 +static void cedf_setup_domain_proc(void)
1239 +{
1240 +	int i, cpu, domain;
1241 +#ifdef CONFIG_RELEASE_MASTER
1242 +	int release_master = atomic_read(&release_master_cpu);
1243 +	/* skip over the domain with the release master if cluster size is 1 */
1244 +	int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
1245 +			release_master : NO_CPU;
1246 +#else
1247 +	int release_master = NO_CPU;
1248 +	int skip_domain = NO_CPU;
1249 +#endif
1250 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
1251 +	int num_rt_domains = num_clusters - (skip_domain != NO_CPU);
1252 +	struct cd_mapping *map;
1253 +
1254 +	memset(&cedf_domain_proc_info, sizeof(cedf_domain_proc_info), 0);
1255 +	init_domain_proc_info(&cedf_domain_proc_info, num_rt_cpus, num_rt_domains);
1256 +	cedf_domain_proc_info.num_cpus = num_rt_cpus;
1257 +	cedf_domain_proc_info.num_domains = num_rt_domains;
1258 +
1259 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
1260 +		if (cpu == release_master)
1261 +			continue;
1262 +		map = &cedf_domain_proc_info.cpu_to_domains[i];
1263 +		/* pointer math to figure out the domain index */
1264 +		domain = remote_cluster(cpu) - cedf;
1265 +		map->id = cpu;
1266 +		cpumask_set_cpu(domain, map->mask);
1267 +		++i;
1268 +	}
1269 +
1270 +	for (domain = 0, i = 0; domain < num_clusters; ++domain) {
1271 +		if (domain == skip_domain)
1272 +			continue;
1273 +		map = &cedf_domain_proc_info.domain_to_cpus[i];
1274 +		map->id = i;
1275 +		cpumask_copy(map->mask, cedf[domain].cpu_map);
1276 +		++i;
1277 +	}
1278 +}
1279 +
1280  static long cedf_activate_plugin(void)
1281  {
1282  	int i, j, cpu, ccpu, cpu_count;
1283 @@ -702,7 +820,7 @@ static long cedf_activate_plugin(void)
1284  	if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
1285  		return -ENOMEM;
1286  
1287 -	if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
1288 +	if (cluster_config == GLOBAL_CLUSTER) {
1289  		cluster_size = num_online_cpus();
1290  	} else {
1291  		chk = get_shared_cpu_map(mask, 0, cluster_config);
1292 @@ -800,8 +918,17 @@ static long cedf_activate_plugin(void)
1293  		}
1294  	}
1295  
1296 -	free_cpumask_var(mask);
1297  	clusters_allocated = 1;
1298 +	free_cpumask_var(mask);
1299 +
1300 +	cedf_setup_domain_proc();
1301 +
1302 +	return 0;
1303 +}
1304 +
1305 +static long cedf_deactivate_plugin(void)
1306 +{
1307 +	destroy_domain_proc_info(&cedf_domain_proc_info);
1308  	return 0;
1309  }
1310  
1311 @@ -818,6 +945,8 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
1312  	.task_block		= cedf_task_block,
1313  	.admit_task		= cedf_admit_task,
1314  	.activate_plugin	= cedf_activate_plugin,
1315 +	.deactivate_plugin	= cedf_deactivate_plugin,
1316 +	.get_domain_proc_info	= cedf_get_domain_proc_info,
1317  };
1318  
1319  static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
1320 diff --git a/litmus/sched_cfl_split.c b/litmus/sched_cfl_split.c
1321 new file mode 100644
1322 index 0000000..eef10f4
1323 --- /dev/null
1324 +++ b/litmus/sched_cfl_split.c
1325 @@ -0,0 +1,1146 @@
1326 +/*
1327 + * litmus/sched_cfl_split.c
1328 + *
1329 + * Implementation of a clustered version of the C-FL scheduling algorithm,
1330 + * with job splitting.
1331 + *
1332 + * This implementation is based on C-FL-split:
1333 + * - CPUs are clustered around L2 or L3 caches.
1334 + * - Clusters topology is automatically detected (this is arch dependent
1335 + *   and is working only on x86 at the moment --- and only with modern
1336 + *   cpus that exports cpuid4 information)
1337 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
1338 + *   the programmer needs to be aware of the topology to place tasks
1339 + *   in the desired cluster
1340 + * - default clustering is around L2 cache (cache index = 2)
1341 + *   supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
1342 + *   online_cpus are placed in a single cluster).
1343 + *
1344 + *   For details on functions, take a look at sched_gsn_edf.c
1345 + *
1346 + * Currently, we do not support changes in the number of online cpus.
1347 + * If the num_online_cpus() dynamically changes, the plugin is broken.
1348 + *
1349 + * This version uses the simple approach and serializes all scheduling
1350 + * decisions by the use of a queue lock. This is probably not the
1351 + * best way to do it, but it should suffice for now.
1352 + */
1353 +
1354 +#include <linux/spinlock.h>
1355 +#include <linux/percpu.h>
1356 +#include <linux/sched.h>
1357 +#include <linux/slab.h>
1358 +
1359 +#include <linux/module.h>
1360 +
1361 +#include <litmus/litmus.h>
1362 +#include <litmus/jobs.h>
1363 +#include <litmus/preempt.h>
1364 +#include <litmus/budget.h>
1365 +#include <litmus/sched_plugin.h>
1366 +#include <litmus/edf_split_common.h>
1367 +#include <litmus/sched_trace.h>
1368 +
1369 +#include <litmus/clustered.h>
1370 +
1371 +#include <litmus/bheap.h>
1372 +
1373 +#ifdef CONFIG_SCHED_CPU_AFFINITY
1374 +#include <litmus/affinity.h>
1375 +#endif
1376 +
1377 +#ifdef CONFIG_SCHED_PGM
1378 +#include <litmus/pgm.h>
1379 +#endif
1380 +
1381 +/* to configure the cluster size */
1382 +#include <litmus/litmus_proc.h>
1383 +#include <linux/uaccess.h>
1384 +
1385 +/* Reference configuration variable. Determines which cache level is used to
1386 + * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
1387 + * all CPUs form a single cluster (just like G-FL).
1388 + */
1389 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
1390 +
1391 +struct clusterdomain;
1392 +
1393 +/* cpu_entry_t - maintain the linked and scheduled state
1394 + *
1395 + * A cpu also contains a pointer to the cflsplit_domain_t cluster
1396 + * that owns it (struct clusterdomain*)
1397 + */
1398 +typedef struct  {
1399 +	int 			cpu;
1400 +	struct clusterdomain*	cluster;	/* owning cluster */
1401 +	struct task_struct*	linked;		/* only RT tasks */
1402 +	struct task_struct*	scheduled;	/* only RT tasks */
1403 +	atomic_t		will_schedule;	/* prevent unneeded IPIs */
1404 +	struct bheap_node*	hn;
1405 +	struct hrtimer		split_timer;
1406 +	int			timer_armed;
1407 +} cpu_entry_t;
1408 +
1409 +/* one cpu_entry_t per CPU */
1410 +DEFINE_PER_CPU(cpu_entry_t, cflsplit_cpu_entries);
1411 +
1412 +#define set_will_schedule() \
1413 +	(atomic_set(&__get_cpu_var(cflsplit_cpu_entries).will_schedule, 1))
1414 +#define clear_will_schedule() \
1415 +	(atomic_set(&__get_cpu_var(cflsplit_cpu_entries).will_schedule, 0))
1416 +#define test_will_schedule(cpu) \
1417 +	(atomic_read(&per_cpu(cflsplit_cpu_entries, cpu).will_schedule))
1418 +
1419 +/*
1420 + * In C-FL-split there is a cflsplit domain _per_ cluster
1421 + * The number of clusters is dynamically determined accordingly to the
1422 + * total cpu number and the cluster size
1423 + */
1424 +typedef struct clusterdomain {
1425 +	/* rt_domain for this cluster */
1426 +	rt_domain_t	domain;
1427 +	/* cpus in this cluster */
1428 +	cpu_entry_t*	*cpus;
1429 +	/* map of this cluster cpus */
1430 +	cpumask_var_t	cpu_map;
1431 +	/* the cpus queue themselves according to priority in here */
1432 +	struct bheap_node *heap_node;
1433 +	struct bheap      cpu_heap;
1434 +	/* lock for this cluster */
1435 +#define cluster_lock domain.ready_lock
1436 +} cflsplit_domain_t;
1437 +
1438 +/* a cflsplit_domain per cluster; allocation is done at init/activation time */
1439 +cflsplit_domain_t *cflsplit;
1440 +
1441 +#define remote_cluster(cpu)	((cflsplit_domain_t *) per_cpu(cflsplit_cpu_entries, cpu).cluster)
1442 +#define task_cpu_cluster(task)	remote_cluster(get_partition(task))
1443 +
1444 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
1445 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
1446 + * information during the initialization of the plugin (e.g., topology)
1447 +#define WANT_ALL_SCHED_EVENTS
1448 + */
1449 +#define VERBOSE_INIT
1450 +
1451 +inline static int get_slice_num(struct task_struct* t)
1452 +{
1453 +	int basic = ((t->rt_param.job_params.exec_time *
1454 +		      t->rt_param.task_params.split) /
1455 +		      t->rt_param.task_params.exec_cost) + 1;
1456 +	if (basic <= t->rt_param.task_params.split){
1457 +		return basic;
1458 +	}
1459 +	else{
1460 +		/*Since we don't police budget, just leave where it's at.*/
1461 +		return t->rt_param.task_params.split;
1462 +	}
1463 +}
1464 +
1465 +/* Returns the appropriate subjob deadline.*/
1466 +inline static lt_t get_proper_deadline(struct task_struct* t)
1467 +{
1468 +	unsigned int num_cpus = num_online_cpus();
1469 +	return t->rt_param.job_params.release +
1470 +	       ((t->rt_param.task_params.period * get_slice_num(t))
1471 +	       / t->rt_param.task_params.split)
1472 +	       /* G-FL correction */
1473 +	       - (((num_cpus - 1) * t->rt_param.task_params.exec_cost)
1474 +	       / (num_cpus * t->rt_param.task_params.split));
1475 +}
1476 +
1477 +/* Tells us if the current deadline is too small.*/
1478 +inline static int needs_deadline_move(struct task_struct* t)
1479 +{
1480 +	BUG_ON(get_proper_deadline(t) < t->rt_param.job_params.subjob_deadline);
1481 +	return get_proper_deadline(t) != tsk_rt(t)->job_params.subjob_deadline;
1482 +}
1483 +
1484 +/*Returns execution time until the next deadline move.
1485 + * 0 means the task has no more deadline moves
1486 + */
1487 +inline static lt_t time_to_next_move(struct task_struct* t)
1488 +{
1489 +	if (get_slice_num(t) == t->rt_param.task_params.split){
1490 +		return 0;
1491 +	}
1492 +	/* +1 upper bounds ceiling, since integer division is floor*/
1493 +	return ((get_slice_num(t) * t->rt_param.task_params.exec_cost)
1494 +		 / t->rt_param.task_params.split) + 1
1495 +		 - t->rt_param.job_params.exec_time;
1496 +}
1497 +
1498 +/* Timer stuff - similar to budget.c. */
1499 +static enum hrtimer_restart on_split_timeout(struct hrtimer *timer)
1500 +{
1501 +	cpu_entry_t* st = container_of(timer,
1502 +				       cpu_entry_t,
1503 +				       split_timer);
1504 +
1505 +	unsigned long flags;
1506 +
1507 +	local_irq_save(flags);
1508 +	TRACE("split timer fired: %llu\n", litmus_clock());
1509 +	st->timer_armed = 0;
1510 +	/* Activate scheduler */
1511 +	litmus_reschedule_local();
1512 +	local_irq_restore(flags);
1513 +
1514 +	return HRTIMER_NORESTART;
1515 +}
1516 +
1517 +static void cancel_split_timer(cpu_entry_t* ce)
1518 +{
1519 +	int ret;
1520 +
1521 +	TRACE("cancelling split time.\n");
1522 +
1523 +	/* Since interrupts are disabled and et->timer_armed is only
1524 +	 * modified locally, we do not need any locks.
1525 +	 */
1526 +
1527 +	if (ce->timer_armed) {
1528 +		ret = hrtimer_try_to_cancel(&ce->split_timer);
1529 +		/* Should never be inactive. */
1530 +		BUG_ON(ret == 0);
1531 +		/* Should never be running concurrently.*/
1532 +		BUG_ON(ret == -1);
1533 +
1534 +		ce->timer_armed = 0;
1535 +	}
1536 +}
1537 +
1538 +/* assumes called with IRQs off */
1539 +static void arm_split_timer(cpu_entry_t *ce,
1540 +				struct task_struct* t)
1541 +{
1542 +	lt_t when_to_fire;
1543 +	lt_t time_to_move;
1544 +	lt_t now = litmus_clock();
1545 +
1546 +	/* __hrtimer_start_range_ns() cancels the timer
1547 +	 * anyway, so we don't have to check whether it is still armed */
1548 +
1549 +	/*We won't do any new deadline moves if the budget has been exhausted*/
1550 +	if (likely(!is_np(t) && (time_to_move = time_to_next_move(t)))) {
1551 +		when_to_fire = now + time_to_move;
1552 +		TRACE_TASK(t, "actually arming for %llu into the future\n",
1553 +			   time_to_move);
1554 +		__hrtimer_start_range_ns(&ce->split_timer,
1555 +					 ns_to_ktime(when_to_fire),
1556 +					 0 /* delta */,
1557 +					 HRTIMER_MODE_ABS_PINNED,
1558 +					 0 /* no wakeup */);
1559 +		ce->timer_armed = 1;
1560 +	}
1561 +}
1562 +
1563 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
1564 +{
1565 +	cpu_entry_t *a, *b;
1566 +	a = _a->value;
1567 +	b = _b->value;
1568 +	/* Note that a and b are inverted: we want the lowest-priority CPU at
1569 +	 * the top of the heap.
1570 +	 */
1571 +	return edf_split_higher_prio(b->linked, a->linked);
1572 +}
1573 +
1574 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
1575 + *                       order in the cpu queue. Caller must hold cflsplit lock.
1576 + */
1577 +static void update_cpu_position(cpu_entry_t *entry)
1578 +{
1579 +	cflsplit_domain_t *cluster = entry->cluster;
1580 +
1581 +	if (likely(bheap_node_in_heap(entry->hn)))
1582 +		bheap_delete(cpu_lower_prio,
1583 +				&cluster->cpu_heap,
1584 +				entry->hn);
1585 +
1586 +	bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
1587 +}
1588 +
1589 +/* caller must hold cflsplit lock */
1590 +static cpu_entry_t* lowest_prio_cpu(cflsplit_domain_t *cluster)
1591 +{
1592 +	struct bheap_node* hn;
1593 +	hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
1594 +	return hn->value;
1595 +}
1596 +
1597 +
1598 +/* link_task_to_cpu - Update the link of a CPU.
1599 + *                    Handles the case where the to-be-linked task is already
1600 + *                    scheduled on a different CPU.
1601 + */
1602 +static noinline void link_task_to_cpu(struct task_struct* linked,
1603 +				      cpu_entry_t *entry)
1604 +{
1605 +	cpu_entry_t *sched;
1606 +	struct task_struct* tmp;
1607 +	int on_cpu;
1608 +
1609 +	BUG_ON(linked && !is_realtime(linked));
1610 +
1611 +	/* Currently linked task is set to be unlinked. */
1612 +	if (entry->linked) {
1613 +		entry->linked->rt_param.linked_on = NO_CPU;
1614 +	}
1615 +
1616 +	/* Link new task to CPU. */
1617 +	if (linked) {
1618 +		/* handle task is already scheduled somewhere! */
1619 +		on_cpu = linked->rt_param.scheduled_on;
1620 +		if (on_cpu != NO_CPU) {
1621 +			sched = &per_cpu(cflsplit_cpu_entries, on_cpu);
1622 +			/* this should only happen if not linked already */
1623 +			BUG_ON(sched->linked == linked);
1624 +
1625 +			/* If we are already scheduled on the CPU to which we
1626 +			 * wanted to link, we don't need to do the swap --
1627 +			 * we just link ourselves to the CPU and depend on
1628 +			 * the caller to get things right.
1629 +			 */
1630 +			if (entry != sched) {
1631 +				TRACE_TASK(linked,
1632 +					   "already scheduled on %d, updating link.\n",
1633 +					   sched->cpu);
1634 +				tmp = sched->linked;
1635 +				linked->rt_param.linked_on = sched->cpu;
1636 +				sched->linked = linked;
1637 +				update_cpu_position(sched);
1638 +				linked = tmp;
1639 +			}
1640 +		}
1641 +		if (linked) /* might be NULL due to swap */
1642 +			linked->rt_param.linked_on = entry->cpu;
1643 +	}
1644 +	entry->linked = linked;
1645 +#ifdef WANT_ALL_SCHED_EVENTS
1646 +	if (linked)
1647 +		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
1648 +	else
1649 +		TRACE("NULL linked to %d.\n", entry->cpu);
1650 +#endif
1651 +	update_cpu_position(entry);
1652 +}
1653 +
1654 +/* unlink - Make sure a task is not linked any longer to an entry
1655 + *          where it was linked before. Must hold cflsplit_lock.
1656 + */
1657 +static noinline void unlink(struct task_struct* t)
1658 +{
1659 +    	cpu_entry_t *entry;
1660 +
1661 +	if (t->rt_param.linked_on != NO_CPU) {
1662 +		/* unlink */
1663 +		entry = &per_cpu(cflsplit_cpu_entries, t->rt_param.linked_on);
1664 +		t->rt_param.linked_on = NO_CPU;
1665 +		link_task_to_cpu(NULL, entry);
1666 +	} else if (is_queued(t)) {
1667 +		/* This is an interesting situation: t is scheduled,
1668 +		 * but was just recently unlinked.  It cannot be
1669 +		 * linked anywhere else (because then it would have
1670 +		 * been relinked to this CPU), thus it must be in some
1671 +		 * queue. We must remove it from the list in this
1672 +		 * case.
1673 +		 *
1674 +		 * in C-FL-split case is should be somewhere in the queue for
1675 +		 * its domain, therefore and we can get the domain using
1676 +		 * task_cpu_cluster
1677 +		 */
1678 +		remove(&(task_cpu_cluster(t))->domain, t);
1679 +	}
1680 +}
1681 +
1682 +
1683 +/* preempt - force a CPU to reschedule
1684 + */
1685 +static void preempt(cpu_entry_t *entry)
1686 +{
1687 +	preempt_if_preemptable(entry->scheduled, entry->cpu);
1688 +}
1689 +
1690 +/* requeue - Put an unlinked task into gsn-edf domain.
1691 + *           Caller must hold cflsplit_lock.
1692 + */
1693 +static noinline void requeue(struct task_struct* task)
1694 +{
1695 +	cflsplit_domain_t *cluster = task_cpu_cluster(task);
1696 +	BUG_ON(!task);
1697 +	/* sanity check before insertion */
1698 +	BUG_ON(is_queued(task));
1699 +
1700 +	if (is_early_releasing(task) || is_released(task, litmus_clock()))
1701 +		__add_ready(&cluster->domain, task);
1702 +	else {
1703 +		/* it has got to wait */
1704 +		add_release(&cluster->domain, task);
1705 +	}
1706 +}
1707 +
1708 +#ifdef CONFIG_SCHED_CPU_AFFINITY
1709 +static cpu_entry_t* cflsplit_get_nearest_available_cpu(
1710 +				cflsplit_domain_t *cluster, cpu_entry_t *start)
1711 +{
1712 +	cpu_entry_t *affinity;
1713 +
1714 +	get_nearest_available_cpu(affinity, start, cflsplit_cpu_entries,
1715 +#ifdef CONFIG_RELEASE_MASTER
1716 +		cluster->domain.release_master
1717 +#else
1718 +		NO_CPU
1719 +#endif
1720 +		);
1721 +
1722 +	if (affinity) {
1723 +		/* make sure CPU is in our cluster */
1724 +		if(cpu_isset(affinity->cpu, *cluster->cpu_map)) {
1725 +			return(affinity);
1726 +		}
1727 +		else {
1728 +			TRACE("CPU %d is not in our cluster.\n", affinity->cpu);
1729 +		}
1730 +	}
1731 +	return(NULL);
1732 +}
1733 +#endif
1734 +
1735 +
1736 +/* check for any necessary preemptions */
1737 +static void check_for_preemptions(cflsplit_domain_t *cluster)
1738 +{
1739 +	struct task_struct *task;
1740 +	cpu_entry_t *last;
1741 +
1742 +	for(last = lowest_prio_cpu(cluster);
1743 +	    edf_split_preemption_needed(&cluster->domain, last->linked);
1744 +	    last = lowest_prio_cpu(cluster)) {
1745 +		/* preemption necessary */
1746 +		task = __take_ready(&cluster->domain);
1747 +		TRACE("check_for_preemptions: attempting to link task %s/%d to %d\n",
1748 +		      task->comm, task->pid, last->cpu);
1749 +#ifdef CONFIG_SCHED_CPU_AFFINITY
1750 +		{
1751 +			cpu_entry_t *affinity =
1752 +					cflsplit_get_nearest_available_cpu(cluster,
1753 +						&per_cpu(cflsplit_cpu_entries, task_cpu(task)));
1754 +			if(affinity)
1755 +				last = affinity;
1756 +			else if(requeue_preempted_job(last->linked))
1757 +				requeue(last->linked);
1758 +		}
1759 +#else
1760 +		if (requeue_preempted_job(last->linked))
1761 +			requeue(last->linked);
1762 +#endif
1763 +		link_task_to_cpu(task, last);
1764 +		preempt(last);
1765 +	}
1766 +}
1767 +
1768 +/* cflsplit_job_arrival: task is either resumed or released */
1769 +static noinline void cflsplit_job_arrival(struct task_struct* task)
1770 +{
1771 +	cflsplit_domain_t *cluster = task_cpu_cluster(task);
1772 +	BUG_ON(!task);
1773 +
1774 +	requeue(task);
1775 +	check_for_preemptions(cluster);
1776 +}
1777 +
1778 +static void cflsplit_release_jobs(rt_domain_t* rt, struct bheap* tasks)
1779 +{
1780 +	cflsplit_domain_t* cluster = container_of(rt, cflsplit_domain_t, domain);
1781 +	unsigned long flags;
1782 +
1783 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1784 +
1785 +	__merge_ready(&cluster->domain, tasks);
1786 +	check_for_preemptions(cluster);
1787 +
1788 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1789 +}
1790 +
1791 +/* caller holds cflsplit_lock */
1792 +static noinline void job_completion(struct task_struct *t, int forced)
1793 +{
1794 +	BUG_ON(!t);
1795 +
1796 +	sched_trace_task_completion(t, forced);
1797 +
1798 +	TRACE_TASK(t, "job_completion().\n");
1799 +
1800 +	/* set flags */
1801 +	tsk_rt(t)->completed = 0;
1802 +	/* prepare for next period */
1803 +	prepare_for_next_period(t);
1804 +	/* We now also set the subjob deadline to what it should be for
1805 +	 * scheduling priority.
1806 +	 */
1807 +	t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
1808 +	if (is_early_releasing(t) || is_released(t, litmus_clock()))
1809 +		sched_trace_task_release(t);
1810 +	/* unlink */
1811 +	unlink(t);
1812 +	/* requeue
1813 +	 * But don't requeue a blocking task. */
1814 +	if (is_running(t))
1815 +		cflsplit_job_arrival(t);
1816 +}
1817 +
1818 +static void move_deadline(struct task_struct *t)
1819 +{
1820 +	tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t);
1821 +	/* Check if rescheduling needed with lower priority. */
1822 +	unlink(t);
1823 +	cflsplit_job_arrival(t);
1824 +}
1825 +
1826 +/* cflsplit_tick - this function is called for every local timer
1827 + *                         interrupt.
1828 + *
1829 + *                   checks whether the current task has expired and checks
1830 + *                   whether we need to preempt it if it has not expired
1831 + */
1832 +static void cflsplit_tick(struct task_struct* t)
1833 +{
1834 +	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
1835 +		if (!is_np(t)) {
1836 +			/* np tasks will be preempted when they become
1837 +			 * preemptable again
1838 +			 */
1839 +			litmus_reschedule_local();
1840 +			set_will_schedule();
1841 +			TRACE("cflsplit_scheduler_tick: "
1842 +			      "%d is preemptable "
1843 +			      " => FORCE_RESCHED\n", t->pid);
1844 +		} else if (is_user_np(t)) {
1845 +			TRACE("cflsplit_scheduler_tick: "
1846 +			      "%d is non-preemptable, "
1847 +			      "preemption delayed.\n", t->pid);
1848 +			request_exit_np(t);
1849 +		}
1850 +	}
1851 +}
1852 +
1853 +/* Getting schedule() right is a bit tricky. schedule() may not make any
1854 + * assumptions on the state of the current task since it may be called for a
1855 + * number of reasons. The reasons include a scheduler_tick() determined that it
1856 + * was necessary, because sys_exit_np() was called, because some Linux
1857 + * subsystem determined so, or even (in the worst case) because there is a bug
1858 + * hidden somewhere. Thus, we must take extreme care to determine what the
1859 + * current state is.
1860 + *
1861 + * The CPU could currently be scheduling a task (or not), be linked (or not).
1862 + *
1863 + * The following assertions for the scheduled task could hold:
1864 + *
1865 + *      - !is_running(scheduled)        // the job blocks
1866 + *	- scheduled->timeslice == 0	// the job completed (forcefully)
1867 + *	- is_completed()			// the job completed (by syscall)
1868 + * 	- linked != scheduled		// we need to reschedule (for any reason)
1869 + * 	- is_np(scheduled)		// rescheduling must be delayed,
1870 + *					   sys_exit_np must be requested
1871 + *
1872 + * Any of these can occur together.
1873 + */
1874 +static struct task_struct* cflsplit_schedule(struct task_struct * prev)
1875 +{
1876 +	cpu_entry_t* entry = &__get_cpu_var(cflsplit_cpu_entries);
1877 +	cflsplit_domain_t *cluster = entry->cluster;
1878 +	int out_of_time, sleep, preempt, np, exists, blocks, needs_move;
1879 +	struct task_struct* next = NULL;
1880 +
1881 +#ifdef CONFIG_RELEASE_MASTER
1882 +	/* Bail out early if we are the release master.
1883 +	 * The release master never schedules any real-time tasks.
1884 +	 */
1885 +	if (unlikely(cluster->domain.release_master == entry->cpu)) {
1886 +		sched_state_task_picked();
1887 +		return NULL;
1888 +	}
1889 +#endif
1890 +
1891 +	raw_spin_lock(&cluster->cluster_lock);
1892 +	clear_will_schedule();
1893 +
1894 +	/* sanity checking */
1895 +	BUG_ON(entry->scheduled && entry->scheduled != prev);
1896 +	BUG_ON(entry->scheduled && !is_realtime(prev));
1897 +	BUG_ON(is_realtime(prev) && !entry->scheduled);
1898 +
1899 +	/* (0) Determine state */
1900 +	exists      = entry->scheduled != NULL;
1901 +	blocks      = exists && !is_running(entry->scheduled);
1902 +	out_of_time = exists &&
1903 +				  budget_enforced(entry->scheduled) &&
1904 +				  budget_exhausted(entry->scheduled);
1905 +	needs_move  = exists && needs_deadline_move(entry->scheduled);
1906 +	np 	    = exists && is_np(entry->scheduled);
1907 +	sleep		= exists && is_completed(entry->scheduled);
1908 +	preempt     = entry->scheduled != entry->linked;
1909 +
1910 +#ifdef WANT_ALL_SCHED_EVENTS
1911 +	TRACE_TASK(prev, "invoked cflsplit_schedule.\n");
1912 +#endif
1913 +
1914 +	if (exists)
1915 +		TRACE_TASK(prev,
1916 +			   "blocks:%d out_of_time:%d needs_move: %d np:%d"
1917 +			   " sleep:%d preempt:%d state:%d sig:%d boosted:%d\n",
1918 +			   blocks, out_of_time, needs_move, np, sleep, preempt,
1919 +			   prev->state, signal_pending(prev),
1920 +			   is_priority_boosted(entry->scheduled));
1921 +
1922 +	if (entry->linked && preempt)
1923 +		TRACE_TASK(prev, "will be preempted by %s/%d\n",
1924 +			   entry->linked->comm, entry->linked->pid);
1925 +
1926 +#ifdef CONFIG_SCHED_PGM
1927 +	if (exists) {
1928 +		if (is_pgm_sending(entry->scheduled)) {
1929 +			if (!is_pgm_satisfied(entry->scheduled)) {
1930 +				if (!is_priority_boosted(entry->scheduled)) {
1931 +					TRACE_TASK(entry->scheduled, "is sending PGM tokens and needs boosting.\n");
1932 +					BUG_ON(is_pgm_satisfied(entry->scheduled));
1933 +
1934 +					/* We are either sending tokens or waiting for tokes.
1935 +					   If waiting: Boost priority so we'll be scheduled
1936 +						immediately when needed tokens arrive.
1937 +					   If sending: Boost priority so no one (specifically, our
1938 +						consumers) will preempt us while signalling the token
1939 +						transmission.
1940 +					*/
1941 +					tsk_rt(entry->scheduled)->priority_boosted = 1;
1942 +					tsk_rt(entry->scheduled)->boost_start_time = litmus_clock();
1943 +
1944 +					if (likely(!blocks)) {
1945 +						unlink(entry->scheduled);
1946 +						cflsplit_job_arrival(entry->scheduled);
1947 +						/* we may regain the processor */
1948 +						if (preempt) {
1949 +							preempt = entry->scheduled != entry->linked;
1950 +							if (!preempt) {
1951 +								TRACE_TASK(entry->scheduled, "blocked preemption by lazy boosting.\n");
1952 +							}
1953 +						}
1954 +					}
1955 +				}
1956 +			}
1957 +			else { /* sending is satisfied */
1958 +				tsk_rt(entry->scheduled)->ctrl_page->pgm_sending = 0;
1959 +				tsk_rt(entry->scheduled)->ctrl_page->pgm_satisfied = 0;
1960 +
1961 +				if (is_priority_boosted(entry->scheduled)) {
1962 +					TRACE_TASK(entry->scheduled,
1963 +							"is done sending PGM tokens must relinquish boosting.\n");
1964 +					/* clear boosting */
1965 +					tsk_rt(entry->scheduled)->priority_boosted = 0;
1966 +					if(likely(!blocks)) {
1967 +						/* recheck priority */
1968 +						unlink(entry->scheduled);
1969 +						cflsplit_job_arrival(entry->scheduled);
1970 +						/* we may lose the processor */
1971 +						if (!preempt) {
1972 +							preempt = entry->scheduled != entry->linked;
1973 +							if (preempt) {
1974 +								TRACE_TASK(entry->scheduled, "preempted by lazy unboosting.\n");
1975 +							}
1976 +						}
1977 +					}
1978 +				}
1979 +			}
1980 +		}
1981 +	}
1982 +#endif
1983 +
1984 +	/* If a task blocks we have no choice but to reschedule.
1985 +	 */
1986 +	if (blocks)
1987 +		unlink(entry->scheduled);
1988 +
1989 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
1990 +	 * We need to make sure to update the link structure anyway in case
1991 +	 * that we are still linked. Multiple calls to request_exit_np() don't
1992 +	 * hurt.
1993 +	 *
1994 +	 * Job deadline moves handled similarly
1995 +	 */
1996 +	if (np && (out_of_time || sleep)) {
1997 +		unlink(entry->scheduled);
1998 +		request_exit_np(entry->scheduled);
1999 +	}
2000 +	else if (np && (needs_move || preempt)) {
2001 +		request_exit_np(entry->scheduled);
2002 +	}
2003 +
2004 +	/* Any task that is preemptable and either exhausts its execution
2005 +	 * budget or wants to sleep completes. We may have to reschedule after
2006 +	 * this. Don't do a job completion if we block (can't have timers running
2007 +	 * for blocked jobs). Preemption go first for the same reason.
2008 +	 */
2009 +	if (!np && (out_of_time || sleep) && !blocks)
2010 +		job_completion(entry->scheduled, !sleep);
2011 +	else if (!np && needs_move && !blocks) {
2012 +		move_deadline(entry->scheduled);
2013 +	}
2014 +
2015 +	/* Link pending task if we became unlinked.
2016 +	 */
2017 +	if (!entry->linked)
2018 +		link_task_to_cpu(__take_ready(&cluster->domain), entry);
2019 +
2020 +	/* The final scheduling decision. Do we need to switch for some reason?
2021 +	 * If linked is different from scheduled, then select linked as next.
2022 +	 */
2023 +	if ((!np || blocks) &&
2024 +	    entry->linked != entry->scheduled) {
2025 +		/* Schedule a linked job? */
2026 +		if (entry->linked) {
2027 +			entry->linked->rt_param.scheduled_on = entry->cpu;
2028 +			next = entry->linked;
2029 +		}
2030 +		if (entry->scheduled) {
2031 +			/* not gonna be scheduled soon */
2032 +			entry->scheduled->rt_param.scheduled_on = NO_CPU;
2033 +			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
2034 +		}
2035 +	} else {
2036 +		/* Only override Linux scheduler if we have a real-time task
2037 +		 * scheduled that needs to continue.
2038 +		 */
2039 +		if (exists)
2040 +			next = prev;
2041 +	}
2042 +
2043 +	sched_state_task_picked();
2044 +	raw_spin_unlock(&cluster->cluster_lock);
2045 +
2046 +	if (next) {
2047 +		arm_split_timer(entry, next);
2048 +	}
2049 +	else if (entry->timer_armed) {
2050 +		cancel_split_timer(entry);
2051 +	}
2052 +
2053 +#ifdef WANT_ALL_SCHED_EVENTS
2054 +	TRACE("cflsplit_lock released, next=0x%p\n", next);
2055 +
2056 +	if (next)
2057 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
2058 +	else if (exists && !next)
2059 +		TRACE("becomes idle at %llu.\n", litmus_clock());
2060 +#endif
2061 +
2062 +	return next;
2063 +}
2064 +
2065 +
2066 +/* _finish_switch - we just finished the switch away from prev
2067 + */
2068 +static void cflsplit_finish_switch(struct task_struct *prev)
2069 +{
2070 +	cpu_entry_t* 	entry = &__get_cpu_var(cflsplit_cpu_entries);
2071 +
2072 +	entry->scheduled = is_realtime(current) ? current : NULL;
2073 +#ifdef WANT_ALL_SCHED_EVENTS
2074 +	TRACE_TASK(prev, "switched away from\n");
2075 +#endif
2076 +}
2077 +
2078 +
2079 +static void cflsplit_release_at(struct task_struct *t, lt_t start)
2080 +{
2081 +	release_at(t, start);
2082 +	t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
2083 +}
2084 +
2085 +
2086 +/*	Prepare a task for running in RT mode
2087 + */
2088 +static void cflsplit_task_new(struct task_struct * t, int on_rq, int is_scheduled)
2089 +{
2090 +	unsigned long 		flags;
2091 +	cpu_entry_t* 		entry;
2092 +	cflsplit_domain_t*		cluster;
2093 +
2094 +	TRACE("gsn edf: task new %d\n", t->pid);
2095 +
2096 +	/* the cluster doesn't change even if t is scheduled */
2097 +	cluster = task_cpu_cluster(t);
2098 +
2099 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
2100 +
2101 +	/* setup job params */
2102 +	cflsplit_release_at(t, litmus_clock());
2103 +
2104 +	if (is_scheduled) {
2105 +		entry = &per_cpu(cflsplit_cpu_entries, task_cpu(t));
2106 +		BUG_ON(entry->scheduled);
2107 +
2108 +#ifdef CONFIG_RELEASE_MASTER
2109 +		if (entry->cpu != cluster->domain.release_master) {
2110 +#endif
2111 +			entry->scheduled = t;
2112 +			tsk_rt(t)->scheduled_on = task_cpu(t);
2113 +#ifdef CONFIG_RELEASE_MASTER
2114 +		} else {
2115 +			/* do not schedule on release master */
2116 +			preempt(entry); /* force resched */
2117 +			tsk_rt(t)->scheduled_on = NO_CPU;
2118 +		}
2119 +#endif
2120 +	} else {
2121 +		t->rt_param.scheduled_on = NO_CPU;
2122 +	}
2123 +	t->rt_param.linked_on          = NO_CPU;
2124 +
2125 +	if (is_running(t))
2126 +		cflsplit_job_arrival(t);
2127 +	raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
2128 +}
2129 +
2130 +static void cflsplit_task_wake_up(struct task_struct *task)
2131 +{
2132 +	unsigned long flags;
2133 +	lt_t now;
2134 +	cflsplit_domain_t *cluster;
2135 +
2136 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
2137 +
2138 +	cluster = task_cpu_cluster(task);
2139 +
2140 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
2141 +	now = litmus_clock();
2142 +	if (is_sporadic(task) && is_tardy(task, now)) {
2143 +		/* new sporadic release */
2144 +		cflsplit_release_at(task, now);
2145 +		sched_trace_task_release(task);
2146 +	}
2147 +	if (is_pgm_waiting(task)) {
2148 +		/* shift out release/deadline, if needed */
2149 +		setup_pgm_release(task);
2150 +	}
2151 +	cflsplit_job_arrival(task);
2152 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
2153 +}
2154 +
2155 +static void cflsplit_task_block(struct task_struct *t)
2156 +{
2157 +	unsigned long flags;
2158 +	cflsplit_domain_t *cluster;
2159 +
2160 +	TRACE_TASK(t, "block at %llu\n", litmus_clock());
2161 +
2162 +	cluster = task_cpu_cluster(t);
2163 +
2164 +	/* unlink if necessary */
2165 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
2166 +	unlink(t);
2167 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
2168 +
2169 +	BUG_ON(!is_realtime(t));
2170 +}
2171 +
2172 +
2173 +static void cflsplit_task_exit(struct task_struct * t)
2174 +{
2175 +	unsigned long flags;
2176 +	cflsplit_domain_t *cluster = task_cpu_cluster(t);
2177 +
2178 +	/* unlink if necessary */
2179 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
2180 +	unlink(t);
2181 +	if (tsk_rt(t)->scheduled_on != NO_CPU) {
2182 +		cpu_entry_t *cpu;
2183 +		cpu = &per_cpu(cflsplit_cpu_entries, tsk_rt(t)->scheduled_on);
2184 +		cpu->scheduled = NULL;
2185 +		tsk_rt(t)->scheduled_on = NO_CPU;
2186 +	}
2187 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
2188 +
2189 +	BUG_ON(!is_realtime(t));
2190 +        TRACE_TASK(t, "RIP\n");
2191 +}
2192 +
2193 +static long cflsplit_admit_task(struct task_struct* tsk)
2194 +{
2195 +	if (remote_cluster(task_cpu(tsk)) != task_cpu_cluster(tsk)) {
2196 +		unsigned int assigned_cpu = get_partition(tsk);
2197 +		unsigned int cur_cpu = task_cpu(tsk);
2198 +		printk(KERN_INFO "cluster mismatch: %u assigned cpu %d but on cpu %d\n",
2199 +			tsk->pid, assigned_cpu, cur_cpu);
2200 +		TRACE_TASK(tsk, "cluster mismatch: assigned cpu %d but on cpu %d\n",
2201 +			assigned_cpu, cur_cpu);
2202 +		return -EINVAL;
2203 +	}
2204 +	return 0;
2205 +}
2206 +
2207 +/* total number of cluster */
2208 +static int num_clusters;
2209 +/* we do not support cluster of different sizes */
2210 +static unsigned int cluster_size;
2211 +
2212 +#ifdef VERBOSE_INIT
2213 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
2214 +{
2215 +	int chk;
2216 +	char buf[255];
2217 +
2218 +	chk = cpulist_scnprintf(buf, 254, mask);
2219 +	buf[chk] = '\0';
2220 +	printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
2221 +
2222 +}
2223 +#endif
2224 +
2225 +static int clusters_allocated = 0;
2226 +
2227 +static void cleanup_cflsplit(void)
2228 +{
2229 +	int i;
2230 +
2231 +	if (clusters_allocated) {
2232 +		for (i = 0; i < num_clusters; i++) {
2233 +			kfree(cflsplit[i].cpus);
2234 +			kfree(cflsplit[i].heap_node);
2235 +			free_cpumask_var(cflsplit[i].cpu_map);
2236 +		}
2237 +
2238 +		kfree(cflsplit);
2239 +	}
2240 +}
2241 +
2242 +static struct domain_proc_info cflsplit_domain_proc_info;
2243 +static long cflsplit_get_domain_proc_info(struct domain_proc_info **ret)
2244 +{
2245 +	*ret = &cflsplit_domain_proc_info;
2246 +	return 0;
2247 +}
2248 +
2249 +static void cflsplit_setup_domain_proc(void)
2250 +{
2251 +	int i, cpu, domain;
2252 +#ifdef CONFIG_RELEASE_MASTER
2253 +	int release_master = atomic_read(&release_master_cpu);
2254 +	/* skip over the domain with the release master if cluster size is 1 */
2255 +	int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
2256 +			release_master : NO_CPU;
2257 +#else
2258 +	int release_master = NO_CPU;
2259 +	int skip_domain = NO_CPU;
2260 +#endif
2261 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
2262 +	int num_rt_domains = num_clusters - (skip_domain != NO_CPU);
2263 +	struct cd_mapping *map;
2264 +
2265 +	memset(&cflsplit_domain_proc_info, sizeof(cflsplit_domain_proc_info), 0);
2266 +	init_domain_proc_info(&cflsplit_domain_proc_info, num_rt_cpus, num_rt_domains);
2267 +	cflsplit_domain_proc_info.num_cpus = num_rt_cpus;
2268 +	cflsplit_domain_proc_info.num_domains = num_rt_domains;
2269 +
2270 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
2271 +		if (cpu == release_master)
2272 +			continue;
2273 +		map = &cflsplit_domain_proc_info.cpu_to_domains[i];
2274 +		/* pointer math to figure out the domain index */
2275 +		domain = remote_cluster(cpu) - cflsplit;
2276 +		map->id = cpu;
2277 +		cpumask_set_cpu(domain, map->mask);
2278 +		++i;
2279 +	}
2280 +
2281 +	for (domain = 0, i = 0; domain < num_clusters; ++domain) {
2282 +		if (domain == skip_domain)
2283 +			continue;
2284 +		map = &cflsplit_domain_proc_info.domain_to_cpus[i];
2285 +		map->id = i;
2286 +		cpumask_copy(map->mask, cflsplit[domain].cpu_map);
2287 +		++i;
2288 +	}
2289 +}
2290 +
2291 +static long cflsplit_activate_plugin(void)
2292 +{
2293 +	int i, j, cpu, ccpu, cpu_count;
2294 +	cpu_entry_t *entry;
2295 +
2296 +	cpumask_var_t mask;
2297 +	int chk = 0;
2298 +
2299 +	/* de-allocate old clusters, if any */
2300 +	cleanup_cflsplit();
2301 +
2302 +	printk(KERN_INFO "C-FL-split: Activate Plugin, cluster configuration = %d\n",
2303 +			cluster_config);
2304 +
2305 +	/* need to get cluster_size first */
2306 +	if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
2307 +		return -ENOMEM;
2308 +
2309 +	if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
2310 +		cluster_size = num_online_cpus();
2311 +	} else {
2312 +		chk = get_shared_cpu_map(mask, 0, cluster_config);
2313 +		if (chk) {
2314 +			/* if chk != 0 then it is the max allowed index */
2315 +			printk(KERN_INFO "C-FL-split: Cluster configuration = %d "
2316 +			       "is not supported on this hardware.\n",
2317 +			       cluster_config);
2318 +			/* User should notice that the configuration failed, so
2319 +			 * let's bail out. */
2320 +			return -EINVAL;
2321 +		}
2322 +
2323 +		cluster_size = cpumask_weight(mask);
2324 +	}
2325 +
2326 +	if ((num_online_cpus() % cluster_size) != 0) {
2327 +		/* this can't be right, some cpus are left out */
2328 +		printk(KERN_ERR "C-FL-split: Trying to group %d cpus in %d!\n",
2329 +				num_online_cpus(), cluster_size);
2330 +		return -1;
2331 +	}
2332 +
2333 +	num_clusters = num_online_cpus() / cluster_size;
2334 +	printk(KERN_INFO "C-FL-split: %d cluster(s) of size = %d\n",
2335 +			num_clusters, cluster_size);
2336 +
2337 +	/* initialize clusters */
2338 +	cflsplit = kmalloc(num_clusters * sizeof(cflsplit_domain_t), GFP_ATOMIC);
2339 +	for (i = 0; i < num_clusters; i++) {
2340 +
2341 +		cflsplit[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
2342 +				GFP_ATOMIC);
2343 +		cflsplit[i].heap_node = kmalloc(
2344 +				cluster_size * sizeof(struct bheap_node),
2345 +				GFP_ATOMIC);
2346 +		bheap_init(&(cflsplit[i].cpu_heap));
2347 +		edf_split_domain_init(&(cflsplit[i].domain), NULL,
2348 +				      cflsplit_release_jobs);
2349 +
2350 +		if(!zalloc_cpumask_var(&cflsplit[i].cpu_map, GFP_ATOMIC))
2351 +			return -ENOMEM;
2352 +#ifdef CONFIG_RELEASE_MASTER
2353 +		cflsplit[i].domain.release_master = atomic_read(&release_master_cpu);
2354 +#endif
2355 +	}
2356 +
2357 +	/* cycle through cluster and add cpus to them */
2358 +	for (i = 0; i < num_clusters; i++) {
2359 +
2360 +		for_each_online_cpu(cpu) {
2361 +			/* check if the cpu is already in a cluster */
2362 +			for (j = 0; j < num_clusters; j++)
2363 +				if (cpumask_test_cpu(cpu, cflsplit[j].cpu_map))
2364 +					break;
2365 +			/* if it is in a cluster go to next cpu */
2366 +			if (j < num_clusters &&
2367 +					cpumask_test_cpu(cpu, cflsplit[j].cpu_map))
2368 +				continue;
2369 +
2370 +			/* this cpu isn't in any cluster */
2371 +			/* get the shared cpus */
2372 +			if (unlikely(cluster_config == GLOBAL_CLUSTER))
2373 +				cpumask_copy(mask, cpu_online_mask);
2374 +			else
2375 +				get_shared_cpu_map(mask, cpu, cluster_config);
2376 +
2377 +			cpumask_copy(cflsplit[i].cpu_map, mask);
2378 +#ifdef VERBOSE_INIT
2379 +			print_cluster_topology(mask, cpu);
2380 +#endif
2381 +			/* add cpus to current cluster and init cpu_entry_t */
2382 +			cpu_count = 0;
2383 +			for_each_cpu(ccpu, cflsplit[i].cpu_map) {
2384 +
2385 +				entry = &per_cpu(cflsplit_cpu_entries, ccpu);
2386 +				cflsplit[i].cpus[cpu_count] = entry;
2387 +				atomic_set(&entry->will_schedule, 0);
2388 +				entry->cpu = ccpu;
2389 +				entry->cluster = &cflsplit[i];
2390 +				entry->hn = &(cflsplit[i].heap_node[cpu_count]);
2391 +				hrtimer_init(&entry->split_timer,
2392 +					     CLOCK_MONOTONIC,
2393 +					     HRTIMER_MODE_ABS);
2394 +				entry->split_timer.function = on_split_timeout;
2395 +				bheap_node_init(&entry->hn, entry);
2396 +
2397 +				cpu_count++;
2398 +
2399 +				entry->linked = NULL;
2400 +				entry->scheduled = NULL;
2401 +#ifdef CONFIG_RELEASE_MASTER
2402 +				/* only add CPUs that should schedule jobs */
2403 +				if (entry->cpu != entry->cluster->domain.release_master)
2404 +#endif
2405 +					update_cpu_position(entry);
2406 +			}
2407 +			/* done with this cluster */
2408 +			break;
2409 +		}
2410 +	}
2411 +
2412 +	clusters_allocated = 1;
2413 +	free_cpumask_var(mask);
2414 +
2415 +	cflsplit_setup_domain_proc();
2416 +
2417 +	return 0;
2418 +}
2419 +
2420 +static long cflsplit_deactivate_plugin(void)
2421 +{
2422 +	destroy_domain_proc_info(&cflsplit_domain_proc_info);
2423 +	return 0;
2424 +}
2425 +
2426 +/*	Plugin object	*/
2427 +static struct sched_plugin cflsplit_plugin __cacheline_aligned_in_smp = {
2428 +	.plugin_name		= "C-FL-split",
2429 +	.finish_switch		= cflsplit_finish_switch,
2430 +	.tick			= cflsplit_tick,
2431 +	.task_new		= cflsplit_task_new,
2432 +	.complete_job		= complete_job,
2433 +	.task_exit		= cflsplit_task_exit,
2434 +	.schedule		= cflsplit_schedule,
2435 +	.release_at		= cflsplit_release_at,
2436 +	.task_wake_up		= cflsplit_task_wake_up,
2437 +	.task_block		= cflsplit_task_block,
2438 +	.admit_task		= cflsplit_admit_task,
2439 +	.activate_plugin	= cflsplit_activate_plugin,
2440 +	.deactivate_plugin  = cflsplit_deactivate_plugin,
2441 +	.get_domain_proc_info   = cflsplit_get_domain_proc_info,
2442 +};
2443 +
2444 +static struct proc_dir_entry *cluster_file = NULL, *cflsplit_dir = NULL;
2445 +
2446 +static int __init init_cflsplit(void)
2447 +{
2448 +	int err, fs;
2449 +
2450 +	err = register_sched_plugin(&cflsplit_plugin);
2451 +	if (!err) {
2452 +		fs = make_plugin_proc_dir(&cflsplit_plugin, &cflsplit_dir);
2453 +		if (!fs)
2454 +			cluster_file = create_cluster_file(cflsplit_dir, &cluster_config);
2455 +		else
2456 +			printk(KERN_ERR "Could not allocate C-FL-split procfs dir.\n");
2457 +	}
2458 +	return err;
2459 +}
2460 +
2461 +static void clean_cflsplit(void)
2462 +{
2463 +	cleanup_cflsplit();
2464 +	if (cluster_file)
2465 +		remove_proc_entry("cluster", cflsplit_dir);
2466 +	if (cflsplit_dir)
2467 +		remove_plugin_proc_dir(&cflsplit_plugin);
2468 +}
2469 +
2470 +module_init(init_cflsplit);
2471 +module_exit(clean_cflsplit);
2472 diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
2473 index 66ffa3e..207f4b6 100644
2474 --- a/litmus/sched_gsn_edf.c
2475 +++ b/litmus/sched_gsn_edf.c
2476 @@ -28,6 +28,16 @@
2477  #ifdef CONFIG_SCHED_CPU_AFFINITY
2478  #include <litmus/affinity.h>
2479  #endif
2480 +/* to set up domain/cpu mappings */
2481 +#include <litmus/litmus_proc.h>
2482 +
2483 +#ifdef CONFIG_SCHED_PGM
2484 +#include <litmus/pgm.h>
2485 +#endif
2486 +
2487 +#ifdef CONFIG_SCHED_PGM
2488 +#include <litmus/pgm.h>
2489 +#endif
2490  
2491  #include <linux/module.h>
2492  
2493 @@ -451,13 +461,71 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
2494  	if (exists)
2495  		TRACE_TASK(prev,
2496  			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
2497 -			   "state:%d sig:%d\n",
2498 +			   "state:%d sig:%d boosted:%d\n",
2499  			   blocks, out_of_time, np, sleep, preempt,
2500 -			   prev->state, signal_pending(prev));
2501 +			   prev->state, signal_pending(prev),
2502 +			   is_priority_boosted(entry->scheduled));
2503  	if (entry->linked && preempt)
2504  		TRACE_TASK(prev, "will be preempted by %s/%d\n",
2505  			   entry->linked->comm, entry->linked->pid);
2506  
2507 +#ifdef CONFIG_SCHED_PGM
2508 +	if (exists) {
2509 +		if (is_pgm_sending(entry->scheduled)) {
2510 +			if (!is_pgm_satisfied(entry->scheduled)) {
2511 +				if (!is_priority_boosted(entry->scheduled)) {
2512 +					TRACE_TASK(entry->scheduled, "is sending PGM tokens and needs boosting.\n");
2513 +					BUG_ON(is_pgm_satisfied(entry->scheduled));
2514 +
2515 +					/* We are either sending tokens or waiting for tokes.
2516 +					   If waiting: Boost priority so we'll be scheduled
2517 +						immediately when needed tokens arrive.
2518 +					   If sending: Boost priority so no one (specifically, our
2519 +						consumers) will preempt us while signalling the token
2520 +						transmission.
2521 +					*/
2522 +					tsk_rt(entry->scheduled)->priority_boosted = 1;
2523 +					tsk_rt(entry->scheduled)->boost_start_time = litmus_clock();
2524 +
2525 +					if (likely(!blocks)) {
2526 +						unlink(entry->scheduled);
2527 +						gsnedf_job_arrival(entry->scheduled);
2528 +						/* we may regain the processor */
2529 +						if (preempt) {
2530 +							preempt = entry->scheduled != entry->linked;
2531 +							if (!preempt) {
2532 +								TRACE_TASK(entry->scheduled, "blocked preemption by lazy boosting.\n");
2533 +							}
2534 +						}
2535 +					}
2536 +				}
2537 +			}
2538 +			else { /* sending is satisfied */
2539 +				tsk_rt(entry->scheduled)->ctrl_page->pgm_sending = 0;
2540 +				tsk_rt(entry->scheduled)->ctrl_page->pgm_satisfied = 0;
2541 +
2542 +				if (is_priority_boosted(entry->scheduled)) {
2543 +					TRACE_TASK(entry->scheduled,
2544 +							"is done sending PGM tokens must relinquish boosting.\n");
2545 +					/* clear boosting */
2546 +					tsk_rt(entry->scheduled)->priority_boosted = 0;
2547 +					if(likely(!blocks)) {
2548 +						/* recheck priority */
2549 +						unlink(entry->scheduled);
2550 +						gsnedf_job_arrival(entry->scheduled);
2551 +						/* we may lose the processor */
2552 +						if (!preempt) {
2553 +							preempt = entry->scheduled != entry->linked;
2554 +							if (preempt) {
2555 +								TRACE_TASK(entry->scheduled, "preempted by lazy unboosting.\n");
2556 +							}
2557 +						}
2558 +					}
2559 +				}
2560 +			}
2561 +		}
2562 +	}
2563 +#endif
2564  
2565  	/* If a task blocks we have no choice but to reschedule.
2566  	 */
2567 @@ -469,10 +537,13 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
2568  	 * that we are still linked. Multiple calls to request_exit_np() don't
2569  	 * hurt.
2570  	 */
2571 -	if (np && (out_of_time || preempt || sleep)) {
2572 +	if (np && (out_of_time || sleep)) {
2573  		unlink(entry->scheduled);
2574  		request_exit_np(entry->scheduled);
2575  	}
2576 +	else if (np && preempt) {
2577 +		request_exit_np(entry->scheduled);
2578 +	}
2579  
2580  	/* Any task that is preemptable and either exhausts its execution
2581  	 * budget or wants to sleep completes. We may have to reschedule after
2582 @@ -503,12 +574,14 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
2583  			entry->scheduled->rt_param.scheduled_on = NO_CPU;
2584  			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
2585  		}
2586 -	} else
2587 +	}
2588 +	else {
2589  		/* Only override Linux scheduler if we have a real-time task
2590  		 * scheduled that needs to continue.
2591  		 */
2592  		if (exists)
2593  			next = prev;
2594 +	}
2595  
2596  	sched_state_task_picked();
2597  
2598 @@ -523,7 +596,6 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
2599  		TRACE("becomes idle at %llu.\n", litmus_clock());
2600  #endif
2601  
2602 -
2603  	return next;
2604  }
2605  
2606 @@ -595,6 +667,10 @@ static void gsnedf_task_wake_up(struct task_struct *task)
2607  		release_at(task, now);
2608  		sched_trace_task_release(task);
2609  	}
2610 +	if (is_pgm_waiting(task)) {
2611 +		/* shift out release/deadline, if needed */
2612 +		setup_pgm_release(task);
2613 +	}
2614  	gsnedf_job_arrival(task);
2615  	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
2616  }
2617 @@ -964,6 +1040,44 @@ static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
2618  
2619  #endif
2620  
2621 +static struct domain_proc_info gsnedf_domain_proc_info;
2622 +static long gsnedf_get_domain_proc_info(struct domain_proc_info **ret)
2623 +{
2624 +	*ret = &gsnedf_domain_proc_info;
2625 +	return 0;
2626 +}
2627 +
2628 +static void gsnedf_setup_domain_proc(void)
2629 +{
2630 +	int i, cpu;
2631 +	int release_master =
2632 +#ifdef CONFIG_RELEASE_MASTER
2633 +			atomic_read(&release_master_cpu);
2634 +#else
2635 +		NO_CPU;
2636 +#endif
2637 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
2638 +	struct cd_mapping *map;
2639 +
2640 +	memset(&gsnedf_domain_proc_info, sizeof(gsnedf_domain_proc_info), 0);
2641 +	init_domain_proc_info(&gsnedf_domain_proc_info, num_rt_cpus, 1);
2642 +	gsnedf_domain_proc_info.num_cpus = num_rt_cpus;
2643 +	gsnedf_domain_proc_info.num_domains = 1;
2644 +
2645 +	gsnedf_domain_proc_info.domain_to_cpus[0].id = 0;
2646 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
2647 +		if (cpu == release_master)
2648 +			continue;
2649 +		map = &gsnedf_domain_proc_info.cpu_to_domains[i];
2650 +		map->id = cpu;
2651 +		cpumask_set_cpu(0, map->mask);
2652 +		++i;
2653 +
2654 +		/* add cpu to the domain */
2655 +		cpumask_set_cpu(cpu,
2656 +			gsnedf_domain_proc_info.domain_to_cpus[0].mask);
2657 +	}
2658 +}
2659  
2660  static long gsnedf_activate_plugin(void)
2661  {
2662 @@ -991,6 +1105,15 @@ static long gsnedf_activate_plugin(void)
2663  		}
2664  #endif
2665  	}
2666 +
2667 +	gsnedf_setup_domain_proc();
2668 +
2669 +	return 0;
2670 +}
2671 +
2672 +static long gsnedf_deactivate_plugin(void)
2673 +{
2674 +	destroy_domain_proc_info(&gsnedf_domain_proc_info);
2675  	return 0;
2676  }
2677  
2678 @@ -1007,6 +1130,8 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
2679  	.task_block		= gsnedf_task_block,
2680  	.admit_task		= gsnedf_admit_task,
2681  	.activate_plugin	= gsnedf_activate_plugin,
2682 +	.deactivate_plugin	= gsnedf_deactivate_plugin,
2683 +	.get_domain_proc_info	= gsnedf_get_domain_proc_info,
2684  #ifdef CONFIG_LITMUS_LOCKING
2685  	.allocate_lock		= gsnedf_allocate_lock,
2686  #endif
2687 diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
2688 index efe5e13..2690498 100644
2689 --- a/litmus/sched_pfair.c
2690 +++ b/litmus/sched_pfair.c
2691 @@ -940,6 +940,66 @@ static void cleanup_clusters(void)
2692  	}
2693  }
2694  
2695 +static struct domain_proc_info pfair_domain_proc_info;
2696 +static long pfair_get_domain_proc_info(struct domain_proc_info **ret)
2697 +{
2698 +	*ret = &pfair_domain_proc_info;
2699 +	return 0;
2700 +}
2701 +
2702 +static void pfair_setup_domain_proc(void)
2703 +{
2704 +	int i, cpu, domain;
2705 +#ifdef CONFIG_RELEASE_MASTER
2706 +	int release_master = atomic_read(&release_master_cpu);
2707 +	/* skip over the domain with the release master if cluster size is 1 */
2708 +	int cluster_size = num_online_cpus() / num_pfair_clusters;
2709 +	int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
2710 +			release_master : NO_CPU;
2711 +#else
2712 +	int release_master = NO_CPU;
2713 +	int skip_domain = NO_CPU;
2714 +#endif
2715 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
2716 +	int num_rt_domains = num_pfair_clusters - (skip_domain != NO_CPU);
2717 +	struct cd_mapping *map;
2718 +
2719 +	memset(&pfair_domain_proc_info, sizeof(pfair_domain_proc_info), 0);
2720 +	init_domain_proc_info(&pfair_domain_proc_info, num_rt_cpus, num_pfair_clusters);
2721 +	pfair_domain_proc_info.num_cpus = num_rt_cpus;
2722 +	pfair_domain_proc_info.num_domains = num_rt_domains;
2723 +
2724 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
2725 +		if (cpu == release_master)
2726 +			continue;
2727 +		map = &pfair_domain_proc_info.cpu_to_domains[i];
2728 +		/* pointer math to figure out the domain index */
2729 +		domain = cpu_cluster(&per_cpu(pfair_state, cpu)) - pfair_clusters;
2730 +		map->id = cpu;
2731 +		cpumask_set_cpu(domain, map->mask);
2732 +		++i;
2733 +	}
2734 +
2735 +	for (domain = 0, i = 0; domain < num_pfair_clusters; ++domain) {
2736 +		struct pfair_cluster *cluster;
2737 +		struct list_head *pos;
2738 +
2739 +		if (domain == skip_domain)
2740 +			continue;
2741 +
2742 +		cluster = &pfair_clusters[domain];
2743 +		map = &pfair_domain_proc_info.domain_to_cpus[i];
2744 +		map->id = i;
2745 +
2746 +		list_for_each(pos, &cluster->topology.cpus) {
2747 +			cpu = cpu_id(from_cluster_list(pos));
2748 +			if (cpu != release_master)
2749 +				cpumask_set_cpu(cpu, map->mask);
2750 +		}
2751 +		++i;
2752 +	}
2753 +}
2754 +
2755  static long pfair_activate_plugin(void)
2756  {
2757  	int err, i;
2758 @@ -994,6 +1054,8 @@ static long pfair_activate_plugin(void)
2759  
2760  	if (err < 0)
2761  		cleanup_clusters();
2762 +	else
2763 +		pfair_setup_domain_proc();
2764  
2765  	return err;
2766  }
2767 @@ -1001,6 +1063,7 @@ static long pfair_activate_plugin(void)
2768  static long pfair_deactivate_plugin(void)
2769  {
2770  	cleanup_clusters();
2771 +	destroy_domain_proc_info(&pfair_domain_proc_info);
2772  	return 0;
2773  }
2774  
2775 @@ -1018,6 +1081,7 @@ static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
2776  	.complete_job		= complete_job,
2777  	.activate_plugin	= pfair_activate_plugin,
2778  	.deactivate_plugin	= pfair_deactivate_plugin,
2779 +	.get_domain_proc_info	= pfair_get_domain_proc_info,
2780  };
2781  
2782  
2783 diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
2784 index 01ac97d..3a01578 100644
2785 --- a/litmus/sched_pfp.c
2786 +++ b/litmus/sched_pfp.c
2787 @@ -21,6 +21,8 @@
2788  #include <litmus/trace.h>
2789  #include <litmus/budget.h>
2790  
2791 +/* to set up domain/cpu mappings */
2792 +#include <litmus/litmus_proc.h>
2793  #include <linux/uaccess.h>
2794  
2795  
2796 @@ -1681,6 +1683,43 @@ static long pfp_admit_task(struct task_struct* tsk)
2797  		return -EINVAL;
2798  }
2799  
2800 +static struct domain_proc_info pfp_domain_proc_info;
2801 +static long pfp_get_domain_proc_info(struct domain_proc_info **ret)
2802 +{
2803 +	*ret = &pfp_domain_proc_info;
2804 +	return 0;
2805 +}
2806 +
2807 +static void pfp_setup_domain_proc(void)
2808 +{
2809 +	int i, cpu;
2810 +	int release_master =
2811 +#ifdef CONFIG_RELEASE_MASTER
2812 +		atomic_read(&release_master_cpu);
2813 +#else
2814 +		NO_CPU;
2815 +#endif
2816 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
2817 +	struct cd_mapping *cpu_map, *domain_map;
2818 +
2819 +	memset(&pfp_domain_proc_info, sizeof(pfp_domain_proc_info), 0);
2820 +	init_domain_proc_info(&pfp_domain_proc_info, num_rt_cpus, num_rt_cpus);
2821 +	pfp_domain_proc_info.num_cpus = num_rt_cpus;
2822 +	pfp_domain_proc_info.num_domains = num_rt_cpus;
2823 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
2824 +		if (cpu == release_master)
2825 +			continue;
2826 +		cpu_map = &pfp_domain_proc_info.cpu_to_domains[i];
2827 +		domain_map = &pfp_domain_proc_info.domain_to_cpus[i];
2828 +
2829 +		cpu_map->id = cpu;
2830 +		domain_map->id = i; /* enumerate w/o counting the release master */
2831 +		cpumask_set_cpu(i, cpu_map->mask);
2832 +		cpumask_set_cpu(cpu, domain_map->mask);
2833 +		++i;
2834 +	}
2835 +}
2836 +
2837  static long pfp_activate_plugin(void)
2838  {
2839  #if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
2840 @@ -1706,9 +1745,16 @@ static long pfp_activate_plugin(void)
2841  
2842  #endif
2843  
2844 +	pfp_setup_domain_proc();
2845 +
2846  	return 0;
2847  }
2848  
2849 +static long pfp_deactivate_plugin(void)
2850 +{
2851 +	destroy_domain_proc_info(&pfp_domain_proc_info);
2852 +	return 0;
2853 +}
2854  
2855  /*	Plugin object	*/
2856  static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
2857 @@ -1722,6 +1768,8 @@ static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
2858  	.task_block		= pfp_task_block,
2859  	.admit_task		= pfp_admit_task,
2860  	.activate_plugin	= pfp_activate_plugin,
2861 +	.deactivate_plugin	= pfp_deactivate_plugin,
2862 +	.get_domain_proc_info	= pfp_get_domain_proc_info,
2863  #ifdef CONFIG_LITMUS_LOCKING
2864  	.allocate_lock		= pfp_allocate_lock,
2865  	.finish_switch		= pfp_finish_switch,
2866 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
2867 index c4747e0e..5924896 100644
2868 --- a/litmus/sched_plugin.c
2869 +++ b/litmus/sched_plugin.c
2870 @@ -111,6 +111,12 @@ static long litmus_dummy_deactivate_plugin(void)
2871  	return 0;
2872  }
2873  
2874 +static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d)
2875 +{
2876 +	*d = NULL;
2877 +	return 0;
2878 +}
2879 +
2880  #ifdef CONFIG_LITMUS_LOCKING
2881  
2882  static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
2883 @@ -137,6 +143,7 @@ struct sched_plugin linux_sched_plugin = {
2884  	.finish_switch = litmus_dummy_finish_switch,
2885  	.activate_plugin = litmus_dummy_activate_plugin,
2886  	.deactivate_plugin = litmus_dummy_deactivate_plugin,
2887 +	.get_domain_proc_info = litmus_dummy_get_domain_proc_info,
2888  #ifdef CONFIG_LITMUS_LOCKING
2889  	.allocate_lock = litmus_dummy_allocate_lock,
2890  #endif
2891 @@ -175,6 +182,7 @@ int register_sched_plugin(struct sched_plugin* plugin)
2892  	CHECK(complete_job);
2893  	CHECK(activate_plugin);
2894  	CHECK(deactivate_plugin);
2895 +	CHECK(get_domain_proc_info);
2896  #ifdef CONFIG_LITMUS_LOCKING
2897  	CHECK(allocate_lock);
2898  #endif
2899 diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
2900 index 0873dc1..7546dcb 100644
2901 --- a/litmus/sched_psn_edf.c
2902 +++ b/litmus/sched_psn_edf.c
2903 @@ -23,6 +23,9 @@
2904  #include <litmus/sched_trace.h>
2905  #include <litmus/trace.h>
2906  
2907 +/* to set up domain/cpu mappings */
2908 +#include <litmus/litmus_proc.h>
2909 +
2910  typedef struct {
2911  	rt_domain_t 		domain;
2912  	int          		cpu;
2913 @@ -599,6 +602,43 @@ static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
2914  
2915  #endif
2916  
2917 +static struct domain_proc_info psnedf_domain_proc_info;
2918 +static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
2919 +{
2920 +	*ret = &psnedf_domain_proc_info;
2921 +	return 0;
2922 +}
2923 +
2924 +static void psnedf_setup_domain_proc(void)
2925 +{
2926 +	int i, cpu;
2927 +	int release_master =
2928 +#ifdef CONFIG_RELEASE_MASTER
2929 +		atomic_read(&release_master_cpu);
2930 +#else
2931 +		NO_CPU;
2932 +#endif
2933 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
2934 +	struct cd_mapping *cpu_map, *domain_map;
2935 +
2936 +	memset(&psnedf_domain_proc_info, sizeof(psnedf_domain_proc_info), 0);
2937 +	init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
2938 +	psnedf_domain_proc_info.num_cpus = num_rt_cpus;
2939 +	psnedf_domain_proc_info.num_domains = num_rt_cpus;
2940 +
2941 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
2942 +		if (cpu == release_master)
2943 +			continue;
2944 +		cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
2945 +		domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
2946 +
2947 +		cpu_map->id = cpu;
2948 +		domain_map->id = i; /* enumerate w/o counting the release master */
2949 +		cpumask_set_cpu(i, cpu_map->mask);
2950 +		cpumask_set_cpu(cpu, domain_map->mask);
2951 +		++i;
2952 +	}
2953 +}
2954  
2955  static long psnedf_activate_plugin(void)
2956  {
2957 @@ -614,6 +654,14 @@ static long psnedf_activate_plugin(void)
2958  	get_srp_prio = psnedf_get_srp_prio;
2959  #endif
2960  
2961 +	psnedf_setup_domain_proc();
2962 +
2963 +	return 0;
2964 +}
2965 +
2966 +static long psnedf_deactivate_plugin(void)
2967 +{
2968 +	destroy_domain_proc_info(&psnedf_domain_proc_info);
2969  	return 0;
2970  }
2971  
2972 @@ -642,6 +690,8 @@ static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
2973  	.task_block		= psnedf_task_block,
2974  	.admit_task		= psnedf_admit_task,
2975  	.activate_plugin	= psnedf_activate_plugin,
2976 +	.deactivate_plugin	= psnedf_deactivate_plugin,
2977 +	.get_domain_proc_info	= psnedf_get_domain_proc_info,
2978  #ifdef CONFIG_LITMUS_LOCKING
2979  	.allocate_lock		= psnedf_allocate_lock,
2980  #endif
2981 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
2982 index 933e7e4..ea9e207 100644
2983 --- a/litmus/sched_task_trace.c
2984 +++ b/litmus/sched_task_trace.c
2985 @@ -231,10 +231,32 @@ feather_callback void do_sched_trace_action(unsigned long id,
2986  {
2987  	struct task_struct *t = (struct task_struct*) _task;
2988  	struct st_event_record* rec = get_record(ST_ACTION, t);
2989 -
2990  	if (rec) {
2991  		rec->data.action.when   = now();
2992  		rec->data.action.action = action;
2993  		put_record(rec);
2994  	}
2995  }
2996 +
2997 +feather_callback void do_sched_trace_pgm_param(unsigned long id, unsigned long _task)
2998 +{
2999 +	struct task_struct *t = (struct task_struct*) _task;
3000 +	struct st_event_record* rec = get_record(ST_PGM_PARAM, t);
3001 +	if (rec) {
3002 +		rec->data.pgm_param.node_type = tsk_rt(t)->task_params.pgm_type;
3003 +		rec->data.pgm_param.graph_pid = t->tgid;
3004 +		rec->data.pgm_param.expected_graph_etoe = tsk_rt(t)->task_params.pgm_expected_etoe;
3005 +		put_record(rec);
3006 +	}
3007 +}
3008 +
3009 +feather_callback void do_sched_trace_pgm_release(unsigned long id, unsigned long _task)
3010 +{
3011 +	struct task_struct *t = (struct task_struct*) _task;
3012 +	struct st_event_record* rec = get_record(ST_PGM_RELEASE, t);
3013 +	if (rec) {
3014 +		rec->data.pgm_release.release  = get_release(t);
3015 +		rec->data.pgm_release.deadline = get_deadline(t);
3016 +		put_record(rec);
3017 +	}
3018 +}
3019 -- 
3020 1.7.10.4

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2015-06-04 21:22:59, 244.8 KB) [[attachment:MC2-liblitmus-imx6-rtss15.patch]]
  • [get | view] (2016-05-12 14:35:37, 51.9 KB) [[attachment:MC2-liblitmus-rtss16.patch]]
  • [get | view] (2016-05-12 14:36:06, 190.4 KB) [[attachment:MC2-litmus-rt-rtss16.patch]]
  • [get | view] (2015-07-19 10:27:52, 1119.9 KB) [[attachment:MC2-litmut-rt-imx6-rtss15.patch]]
  • [get | view] (2014-05-27 20:46:19, 58.3 KB) [[attachment:MC2_liblitmus_ipdps15.patch]]
  • [get | view] (2014-05-27 20:45:43, 1044.3 KB) [[attachment:MC2_litmusrt_ipdps15.patch]]
  • [get | view] (2017-04-07 21:48:09, 6099.5 KB) [[attachment:buff_sharing.tar]]
  • [get | view] (2015-01-08 14:20:07, 61.0 KB) [[attachment:feather-trace-patch-against-sched-deadline-v8.patch]]
  • [get | view] (2014-04-01 23:10:10, 38.9 KB) [[attachment:gedf-mp-rtas14.patch]]
  • [get | view] (2012-03-02 20:13:59, 1.9 KB) [[attachment:gpu-klmirqd-liblitmus-rt-ecrts12.patch]]
  • [get | view] (2012-03-02 20:14:25, 389.8 KB) [[attachment:gpu-klmirqd-litmus-rt-ecrts12.patch]]
  • [get | view] (2012-05-26 21:41:34, 418.0 KB) [[attachment:gpusync-rtss12.patch]]
  • [get | view] (2012-05-26 21:42:20, 8.6 KB) [[attachment:gpusync_liblitmus-rtss12.patch]]
  • [get | view] (2013-05-21 15:32:08, 208.6 KB) [[attachment:gpusync_rtss13_liblitmus.patch]]
  • [get | view] (2013-05-21 15:31:32, 779.5 KB) [[attachment:gpusync_rtss13_litmus.patch]]
  • [get | view] (2012-05-26 21:42:41, 71.4 KB) [[attachment:klt_tracker_v1.0.litmus.tgz]]
  • [get | view] (2016-10-13 21:14:05, 19.6 KB) [[attachment:liblitmus-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:22, 90.0 KB) [[attachment:liblitmus-rtns17.patch]]
  • [get | view] (2018-12-11 01:38:53, 49.1 KB) [[attachment:liblitmus-semi-part-with-edfos.patch]]
  • [get | view] (2017-10-09 19:16:09, 304.0 KB) [[attachment:litmus-rt-os-isolation.patch]]
  • [get | view] (2016-10-13 21:13:27, 207.6 KB) [[attachment:litmus-rt-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:40, 207.6 KB) [[attachment:litmus-rt-rtns17.patch]]
  • [get | view] (2018-12-11 01:39:04, 100.5 KB) [[attachment:litmus-rt-semi-part-with-edfos.patch]]
  • [get | view] (2018-06-26 04:31:48, 7.0 KB) [[attachment:mc2_liblitmus_2015.1-rtns18.patch]]
  • [get | view] (2018-06-26 04:31:33, 292.7 KB) [[attachment:mc2_litmus-rt_2015.1-rtns18.patch]]
  • [get | view] (2017-05-01 20:45:10, 2596.9 KB) [[attachment:mcp_study.zip]]
  • [get | view] (2013-07-13 14:11:53, 58.0 KB) [[attachment:omip-ecrts13.patch]]
  • [get | view] (2014-02-19 21:48:33, 17.2 KB) [[attachment:pgmrt-liblitmus-ecrts14.patch]]
  • [get | view] (2014-02-19 21:47:57, 87.8 KB) [[attachment:pgmrt-litmusrt-ecrts14.patch]]
  • [get | view] (2015-01-08 14:22:32, 61.0 KB) [[attachment:sched-deadline-v8-feather-trace-rtas14.patch]]
  • [get | view] (2018-06-26 04:32:13, 2545.1 KB) [[attachment:sched_study_rtns2018.tar.gz]]
  • [get | view] (2017-04-07 21:53:39, 5969.5 KB) [[attachment:seminal.tar]]
  • [get | view] (2017-04-07 21:51:13, 6064.0 KB) [[attachment:shared_libraries.tar]]
  • [get | view] (2013-07-13 13:58:25, 42.7 KB) [[attachment:tracing-and-dflp-rtas13.patch]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.