Attachment 'MC2-litmut-rt-imx6-rtss15.patch'

Download

   1 From d6790ee609a62386c2803cbe74b84354af99bb73 Mon Sep 17 00:00:00 2001
   2 From: Bjoern Brandenburg <bbb@mpi-sws.org>
   3 Date: Thu, 20 Jun 2013 12:49:18 +0200
   4 Subject: [PATCH 001/119] Add LITMUS^RT directory
   5 
   6 Hookup litmus/ with kernel and add extra version.
   7 ---
   8  Makefile         | 2 +-
   9  arch/arm/Kconfig | 3 +++
  10  arch/x86/Kconfig | 2 ++
  11  litmus/Kconfig   | 3 +++
  12  litmus/Makefile  | 3 +++
  13  5 files changed, 12 insertions(+), 1 deletion(-)
  14  create mode 100644 litmus/Kconfig
  15  create mode 100644 litmus/Makefile
  16 
  17 diff --git a/Makefile b/Makefile
  18 index 3071428..dd0cb2e 100644
  19 --- a/Makefile
  20 +++ b/Makefile
  21 @@ -733,7 +733,7 @@ export mod_sign_cmd
  22  
  23  
  24  ifeq ($(KBUILD_EXTMOD),)
  25 -core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/
  26 +core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
  27  
  28  vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
  29  		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
  30 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
  31 index ea6ec7e..131ec84 100644
  32 --- a/arch/arm/Kconfig
  33 +++ b/arch/arm/Kconfig
  34 @@ -2269,3 +2269,6 @@ source "crypto/Kconfig"
  35  source "lib/Kconfig"
  36  
  37  source "arch/arm/kvm/Kconfig"
  38 +
  39 +source "litmus/Kconfig"
  40 +
  41 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
  42 index fe120da..bd67fd1 100644
  43 --- a/arch/x86/Kconfig
  44 +++ b/arch/x86/Kconfig
  45 @@ -2346,3 +2346,5 @@ source "crypto/Kconfig"
  46  source "arch/x86/kvm/Kconfig"
  47  
  48  source "lib/Kconfig"
  49 +
  50 +source "litmus/Kconfig"
  51 diff --git a/litmus/Kconfig b/litmus/Kconfig
  52 new file mode 100644
  53 index 0000000..382b2e4
  54 --- /dev/null
  55 +++ b/litmus/Kconfig
  56 @@ -0,0 +1,3 @@
  57 +menu "LITMUS^RT"
  58 +
  59 +endmenu
  60 diff --git a/litmus/Makefile b/litmus/Makefile
  61 new file mode 100644
  62 index 0000000..f0ed31f
  63 --- /dev/null
  64 +++ b/litmus/Makefile
  65 @@ -0,0 +1,3 @@
  66 +#
  67 +# Makefile for LITMUS^RT
  68 +#
  69 -- 
  70 1.8.1.2
  71 
  72 
  73 From efbaae0016a8bc98cc6d24e17ee242a52b356f17 Mon Sep 17 00:00:00 2001
  74 From: Bjoern Brandenburg <bbb@mpi-sws.org>
  75 Date: Sun, 23 Jun 2013 11:41:27 +0200
  76 Subject: [PATCH 002/119] Feather-Trace: add platform independent
  77  implementation
  78 
  79 This patch adds the simple fallback implementation and creates dummy
  80 hooks in the x86 and ARM Kconfig files.
  81 ---
  82  arch/arm/Kconfig                |   3 +
  83  arch/x86/Kconfig                |   3 +
  84  include/litmus/feather_buffer.h | 118 ++++++++++++++++++++++++++++++++++++++++
  85  include/litmus/feather_trace.h  |  69 +++++++++++++++++++++++
  86  litmus/Kconfig                  |  25 +++++++++
  87  litmus/Makefile                 |   2 +
  88  litmus/ft_event.c               |  43 +++++++++++++++
  89  7 files changed, 263 insertions(+)
  90  create mode 100644 include/litmus/feather_buffer.h
  91  create mode 100644 include/litmus/feather_trace.h
  92  create mode 100644 litmus/ft_event.c
  93 
  94 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
  95 index 131ec84..ecfd735 100644
  96 --- a/arch/arm/Kconfig
  97 +++ b/arch/arm/Kconfig
  98 @@ -2270,5 +2270,8 @@ source "lib/Kconfig"
  99  
 100  source "arch/arm/kvm/Kconfig"
 101  
 102 +config ARCH_HAS_FEATHER_TRACE
 103 +	def_bool n
 104 +
 105  source "litmus/Kconfig"
 106  
 107 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
 108 index bd67fd1..0216c93 100644
 109 --- a/arch/x86/Kconfig
 110 +++ b/arch/x86/Kconfig
 111 @@ -2347,4 +2347,7 @@ source "arch/x86/kvm/Kconfig"
 112  
 113  source "lib/Kconfig"
 114  
 115 +config ARCH_HAS_FEATHER_TRACE
 116 +	def_bool n
 117 +
 118  source "litmus/Kconfig"
 119 diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
 120 new file mode 100644
 121 index 0000000..38de95b
 122 --- /dev/null
 123 +++ b/include/litmus/feather_buffer.h
 124 @@ -0,0 +1,118 @@
 125 +#ifndef _FEATHER_BUFFER_H_
 126 +#define _FEATHER_BUFFER_H_
 127 +
 128 +/* requires UINT_MAX and memcpy */
 129 +
 130 +#define SLOT_FREE	0
 131 +#define	SLOT_BUSY 	1
 132 +#define	SLOT_READY	2
 133 +
 134 +struct ft_buffer {
 135 +	unsigned int	slot_count;
 136 +	unsigned int	slot_size;
 137 +
 138 +	int 		free_count;
 139 +	unsigned int 	write_idx;
 140 +	unsigned int 	read_idx;
 141 +
 142 +	char*		slots;
 143 +	void*		buffer_mem;
 144 +	unsigned int	failed_writes;
 145 +};
 146 +
 147 +static inline int init_ft_buffer(struct ft_buffer*	buf,
 148 +				 unsigned int 		slot_count,
 149 +				 unsigned int 		slot_size,
 150 +				 char*			slots,
 151 +				 void* 			buffer_mem)
 152 +{
 153 +	int i = 0;
 154 +	if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
 155 +		/* The slot count must divide UNIT_MAX + 1 so that when it
 156 +		 * wraps around the index correctly points to 0.
 157 +		 */
 158 +		return 0;
 159 +	} else {
 160 +		buf->slot_count    = slot_count;
 161 +		buf->slot_size     = slot_size;
 162 +		buf->slots         = slots;
 163 +		buf->buffer_mem    = buffer_mem;
 164 +		buf->free_count    = slot_count;
 165 +		buf->write_idx     = 0;
 166 +		buf->read_idx      = 0;
 167 +		buf->failed_writes = 0;
 168 +		for (i = 0; i < slot_count; i++)
 169 +			buf->slots[i] = SLOT_FREE;
 170 +		return 1;
 171 +	}
 172 +}
 173 +
 174 +static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
 175 +{
 176 +	int free = fetch_and_dec(&buf->free_count);
 177 +	unsigned int idx;
 178 +	if (free <= 0) {
 179 +		fetch_and_inc(&buf->free_count);
 180 +		*ptr = 0;
 181 +		fetch_and_inc(&buf->failed_writes);
 182 +		return 0;
 183 +	} else {
 184 +		idx  = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
 185 +		buf->slots[idx] = SLOT_BUSY;
 186 +		*ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
 187 +		return 1;
 188 +	}
 189 +}
 190 +
 191 +/* For single writer scenarios, with fewer atomic ops. */
 192 +static inline int ft_buffer_start_single_write(struct ft_buffer* buf, void **ptr)
 193 +{
 194 +	unsigned int idx;
 195 +
 196 +	if (buf->free_count <= 0) {
 197 +		*ptr = 0;
 198 +		/* single writer: no atomicity needed */
 199 +		buf->failed_writes++;
 200 +		return 0;
 201 +	} else {
 202 +		/* free_count is positive, and can only increase since we are
 203 +		 * (by assumption) the only writer accessing the buffer.
 204 +		 */
 205 +
 206 +		idx  = buf->write_idx++ % buf->slot_count;
 207 +		buf->slots[idx] = SLOT_BUSY;
 208 +		*ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
 209 +
 210 +		ft_atomic_dec(&buf->free_count);
 211 +		return 1;
 212 +	}
 213 +}
 214 +
 215 +static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
 216 +{
 217 +	unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
 218 +	buf->slots[idx]  = SLOT_READY;
 219 +}
 220 +
 221 +
 222 +/* exclusive reader access is assumed */
 223 +static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
 224 +{
 225 +	unsigned int idx;
 226 +	if (buf->free_count == buf->slot_count)
 227 +		/* nothing available */
 228 +		return 0;
 229 +	idx = buf->read_idx % buf->slot_count;
 230 +	if (buf->slots[idx] == SLOT_READY) {
 231 +		memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
 232 +		       buf->slot_size);
 233 +		buf->slots[idx] = SLOT_FREE;
 234 +		buf->read_idx++;
 235 +		fetch_and_inc(&buf->free_count);
 236 +		return 1;
 237 +	} else
 238 +		return 0;
 239 +}
 240 +
 241 +
 242 +#endif
 243 diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
 244 new file mode 100644
 245 index 0000000..dbeca46
 246 --- /dev/null
 247 +++ b/include/litmus/feather_trace.h
 248 @@ -0,0 +1,69 @@
 249 +#ifndef _FEATHER_TRACE_H_
 250 +#define _FEATHER_TRACE_H_
 251 +
 252 +#include <asm/atomic.h>
 253 +
 254 +int ft_enable_event(unsigned long id);
 255 +int ft_disable_event(unsigned long id);
 256 +int ft_is_event_enabled(unsigned long id);
 257 +int ft_disable_all_events(void);
 258 +
 259 +/* atomic_* funcitons are inline anyway */
 260 +static inline int fetch_and_inc(int *val)
 261 +{
 262 +	return atomic_add_return(1, (atomic_t*) val) - 1;
 263 +}
 264 +
 265 +static inline int fetch_and_dec(int *val)
 266 +{
 267 +	return atomic_sub_return(1, (atomic_t*) val) + 1;
 268 +}
 269 +
 270 +static inline void ft_atomic_dec(int *val)
 271 +{
 272 +	atomic_sub(1, (atomic_t*) val);
 273 +}
 274 +
 275 +/* Don't use rewriting implementation if kernel text pages are read-only.
 276 + * Ftrace gets around this by using the identity mapping, but that's more
 277 + * effort that is warrented right now for Feather-Trace.
 278 + * Eventually, it may make sense to replace Feather-Trace with ftrace.
 279 + */
 280 +#if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA)
 281 +
 282 +#include <asm/feather_trace.h>
 283 +
 284 +#else /* !__ARCH_HAS_FEATHER_TRACE */
 285 +
 286 +/* provide default implementation */
 287 +#include <linux/timex.h> /* for get_cycles() */
 288 +
 289 +static inline unsigned long long ft_timestamp(void)
 290 +{
 291 +	return get_cycles();
 292 +}
 293 +
 294 +#define feather_callback
 295 +
 296 +#define MAX_EVENTS 1024
 297 +
 298 +extern int ft_events[MAX_EVENTS];
 299 +
 300 +#define ft_event(id, callback) \
 301 +	if (ft_events[id]) callback();
 302 +
 303 +#define ft_event0(id, callback) \
 304 +	if (ft_events[id]) callback(id);
 305 +
 306 +#define ft_event1(id, callback, param) \
 307 +	if (ft_events[id]) callback(id, param);
 308 +
 309 +#define ft_event2(id, callback, param, param2) \
 310 +	if (ft_events[id]) callback(id, param, param2);
 311 +
 312 +#define ft_event3(id, callback, p, p2, p3) \
 313 +	if (ft_events[id]) callback(id, p, p2, p3);
 314 +
 315 +#endif /* __ARCH_HAS_FEATHER_TRACE */
 316 +
 317 +#endif
 318 diff --git a/litmus/Kconfig b/litmus/Kconfig
 319 index 382b2e4..70ddbad 100644
 320 --- a/litmus/Kconfig
 321 +++ b/litmus/Kconfig
 322 @@ -1,3 +1,28 @@
 323  menu "LITMUS^RT"
 324  
 325 +menu "Tracing"
 326 +
 327 +config FEATHER_TRACE
 328 +	bool "Feather-Trace Infrastructure"
 329 +	default y
 330 +	help
 331 +	  Feather-Trace basic tracing infrastructure. Includes device file
 332 +	  driver and instrumentation point support.
 333 +
 334 +	  There are actually two implementations of Feather-Trace.
 335 +	  1) A slower, but portable, default implementation.
 336 +	  2) Architecture-specific implementations that rewrite kernel .text at runtime.
 337 +
 338 +	  If enabled, Feather-Trace will be based on 2) if available (currently only for x86).
 339 +	  However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case
 340 +	  to avoid problems with write-protected .text pages.
 341 +
 342 +	  Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n.
 343 +
 344 +	  Note that this option only enables the basic Feather-Trace infrastructure;
 345 +	  you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
 346 +	  actually enable any events.
 347 +
 348 +endmenu
 349 +
 350  endmenu
 351 diff --git a/litmus/Makefile b/litmus/Makefile
 352 index f0ed31f..4c6130b 100644
 353 --- a/litmus/Makefile
 354 +++ b/litmus/Makefile
 355 @@ -1,3 +1,5 @@
 356  #
 357  # Makefile for LITMUS^RT
 358  #
 359 +
 360 +obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
 361 diff --git a/litmus/ft_event.c b/litmus/ft_event.c
 362 new file mode 100644
 363 index 0000000..399a07b
 364 --- /dev/null
 365 +++ b/litmus/ft_event.c
 366 @@ -0,0 +1,43 @@
 367 +#include <linux/types.h>
 368 +
 369 +#include <litmus/feather_trace.h>
 370 +
 371 +#if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA)
 372 +/* provide dummy implementation */
 373 +
 374 +int ft_events[MAX_EVENTS];
 375 +
 376 +int ft_enable_event(unsigned long id)
 377 +{
 378 +	if (id < MAX_EVENTS) {
 379 +		ft_events[id]++;
 380 +		return 1;
 381 +	} else
 382 +		return 0;
 383 +}
 384 +
 385 +int ft_disable_event(unsigned long id)
 386 +{
 387 +	if (id < MAX_EVENTS && ft_events[id]) {
 388 +		ft_events[id]--;
 389 +		return 1;
 390 +	} else
 391 +		return 0;
 392 +}
 393 +
 394 +int ft_disable_all_events(void)
 395 +{
 396 +	int i;
 397 +
 398 +	for (i = 0; i < MAX_EVENTS; i++)
 399 +		ft_events[i] = 0;
 400 +
 401 +	return MAX_EVENTS;
 402 +}
 403 +
 404 +int ft_is_event_enabled(unsigned long id)
 405 +{
 406 +	return 	id < MAX_EVENTS && ft_events[id];
 407 +}
 408 +
 409 +#endif
 410 -- 
 411 1.8.1.2
 412 
 413 
 414 From 32f9c06ee90e860a7c6fbe8d27b2c219804bfee2 Mon Sep 17 00:00:00 2001
 415 From: Bjoern Brandenburg <bbb@mpi-sws.org>
 416 Date: Sun, 23 Jun 2013 11:46:23 +0200
 417 Subject: [PATCH 003/119] Feather-Trace: add x86 binary rewriting
 418  implementation
 419 
 420 This patch adds the x86-specific implementation of Feather-Trace
 421 triggers that works by rewriting jump instructions.
 422 ---
 423  arch/x86/Kconfig                        |   2 +-
 424  arch/x86/include/asm/feather_trace.h    |  17 +++++
 425  arch/x86/include/asm/feather_trace_32.h | 115 +++++++++++++++++++++++++++++
 426  arch/x86/include/asm/feather_trace_64.h | 124 ++++++++++++++++++++++++++++++++
 427  arch/x86/kernel/Makefile                |   2 +
 428  arch/x86/kernel/ft_event.c              | 118 ++++++++++++++++++++++++++++++
 429  litmus/Kconfig                          |   1 +
 430  7 files changed, 378 insertions(+), 1 deletion(-)
 431  create mode 100644 arch/x86/include/asm/feather_trace.h
 432  create mode 100644 arch/x86/include/asm/feather_trace_32.h
 433  create mode 100644 arch/x86/include/asm/feather_trace_64.h
 434  create mode 100644 arch/x86/kernel/ft_event.c
 435 
 436 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
 437 index 0216c93..171cdc9 100644
 438 --- a/arch/x86/Kconfig
 439 +++ b/arch/x86/Kconfig
 440 @@ -2348,6 +2348,6 @@ source "arch/x86/kvm/Kconfig"
 441  source "lib/Kconfig"
 442  
 443  config ARCH_HAS_FEATHER_TRACE
 444 -	def_bool n
 445 +	def_bool y
 446  
 447  source "litmus/Kconfig"
 448 diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
 449 new file mode 100644
 450 index 0000000..4fd3163
 451 --- /dev/null
 452 +++ b/arch/x86/include/asm/feather_trace.h
 453 @@ -0,0 +1,17 @@
 454 +#ifndef _ARCH_FEATHER_TRACE_H
 455 +#define _ARCH_FEATHER_TRACE_H
 456 +
 457 +#include <asm/msr.h>
 458 +
 459 +static inline unsigned long long ft_timestamp(void)
 460 +{
 461 +	return __native_read_tsc();
 462 +}
 463 +
 464 +#ifdef CONFIG_X86_32
 465 +#include "feather_trace_32.h"
 466 +#else
 467 +#include "feather_trace_64.h"
 468 +#endif
 469 +
 470 +#endif
 471 diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
 472 new file mode 100644
 473 index 0000000..75e81a9
 474 --- /dev/null
 475 +++ b/arch/x86/include/asm/feather_trace_32.h
 476 @@ -0,0 +1,115 @@
 477 +/* Copyright (c) 2007-2012 Björn Brandenburg, <bbb@mpi-sws.org>
 478 + *
 479 + * Permission is hereby granted, free of charge, to any person obtaining
 480 + * a copy of this software and associated documentation files (the
 481 + * "Software"), to deal in the Software without restriction, including
 482 + * without limitation the rights to use, copy, modify, merge, publish,
 483 + * distribute, sublicense, and/or sell copies of the Software, and to
 484 + * permit persons to whom the Software is furnished to do so, subject to
 485 + * the following conditions:
 486 + *
 487 + * The above copyright notice and this permission notice shall be
 488 + * included in all copies or substantial portions of the Software.
 489 + *
 490 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 491 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 492 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 493 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 494 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 495 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 496 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 497 + * SOFTWARE.
 498 + */
 499 +
 500 +/* Do not directly include this file. Include feather_trace.h instead */
 501 +
 502 +#define feather_callback __attribute__((regparm(3)))  __attribute__((used))
 503 +
 504 +/*
 505 + * Make the compiler reload any register that is not saved in a cdecl function
 506 + * call (minus the registers that we explicitly clobber as output registers).
 507 + */
 508 +#define __FT_CLOBBER_LIST0 "memory", "cc", "eax", "edx", "ecx"
 509 +#define __FT_CLOBBER_LIST1 "memory", "cc", "eax", "ecx"
 510 +#define __FT_CLOBBER_LIST2 "memory", "cc", "eax"
 511 +#define __FT_CLOBBER_LIST3 "memory", "cc", "eax"
 512 +
 513 +#define __FT_TMP1(x) "=d" (x)
 514 +#define __FT_ARG1(x) "0" ((long) (x))
 515 +#define __FT_TMP2(x) "=c" (x)
 516 +#define __FT_ARG2(x) "1" ((long) (x))
 517 +
 518 +#define __FT_ARG3(x) "r" ((long) (x))
 519 +
 520 +#define ft_event(id, callback)                                  \
 521 +        __asm__ __volatile__(                                   \
 522 +            "1: jmp 2f                                    \n\t" \
 523 +	    " call " #callback "                          \n\t" \
 524 +            ".section __event_table, \"aw\"               \n\t" \
 525 +            ".long " #id  ", 0, 1b, 2f                    \n\t" \
 526 +            ".previous                                    \n\t" \
 527 +            "2:                                           \n\t" \
 528 +	    : : : __FT_CLOBBER_LIST0)
 529 +
 530 +#define ft_event0(id, callback)                                 \
 531 +        __asm__ __volatile__(                                   \
 532 +            "1: jmp 2f                                    \n\t" \
 533 +            " movl $" #id  ", %%eax                       \n\t" \
 534 +	    " call " #callback "                          \n\t" \
 535 +            ".section __event_table, \"aw\"               \n\t" \
 536 +            ".long " #id  ", 0, 1b, 2f                    \n\t" \
 537 +            ".previous                                    \n\t" \
 538 +            "2:                                           \n\t" \
 539 +	    : : : __FT_CLOBBER_LIST0)
 540 +
 541 +#define ft_event1(id, callback, param)				\
 542 +	do {							\
 543 +		long __ft_tmp1;					\
 544 +        __asm__ __volatile__(                                   \
 545 +            "1: jmp 2f                                    \n\t" \
 546 +            " movl $" #id  ", %%eax                       \n\t" \
 547 +	    " call " #callback "                          \n\t" \
 548 +            ".section __event_table, \"aw\"               \n\t" \
 549 +            ".long " #id  ", 0, 1b, 2f                    \n\t" \
 550 +            ".previous                                    \n\t" \
 551 +            "2:                                           \n\t" \
 552 +	    : __FT_TMP1(__ft_tmp1)				\
 553 +	    : __FT_ARG1(param)					\
 554 +	    : __FT_CLOBBER_LIST1);				\
 555 +	} while (0);
 556 +
 557 +#define ft_event2(id, callback, param, param2)                  \
 558 +	do {							\
 559 +		long __ft_tmp1, __ft_tmp2;			\
 560 +        __asm__ __volatile__(                                   \
 561 +            "1: jmp 2f                                    \n\t" \
 562 +            " movl $" #id  ", %%eax                       \n\t" \
 563 +	    " call " #callback "                          \n\t" \
 564 +            ".section __event_table, \"aw\"               \n\t" \
 565 +            ".long " #id  ", 0, 1b, 2f                    \n\t" \
 566 +            ".previous                                    \n\t" \
 567 +            "2:                                           \n\t" \
 568 +	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2)	\
 569 +	    : __FT_ARG1(param), __FT_ARG2(param2)		\
 570 +	    : __FT_CLOBBER_LIST2);				\
 571 +	} while (0);
 572 +
 573 +
 574 +#define ft_event3(id, callback, param, param2, param3)		\
 575 +	do {							\
 576 +		long __ft_tmp1, __ft_tmp2;			\
 577 +        __asm__ __volatile__(                                   \
 578 +            "1: jmp 2f                                    \n\t" \
 579 +	    " subl $4, %%esp                              \n\t" \
 580 +            " movl $" #id  ", %%eax                       \n\t" \
 581 +	    " movl %2, (%%esp)                            \n\t" \
 582 +	    " call " #callback "                          \n\t" \
 583 +	    " addl $4, %%esp                              \n\t" \
 584 +            ".section __event_table, \"aw\"               \n\t" \
 585 +            ".long " #id  ", 0, 1b, 2f                    \n\t" \
 586 +            ".previous                                    \n\t" \
 587 +            "2:                                           \n\t" \
 588 +	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2)	\
 589 +	    : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3)	\
 590 +	    : __FT_CLOBBER_LIST3);				\
 591 +	} while (0);
 592 diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
 593 new file mode 100644
 594 index 0000000..5ce49e2
 595 --- /dev/null
 596 +++ b/arch/x86/include/asm/feather_trace_64.h
 597 @@ -0,0 +1,124 @@
 598 +/* Copyright (c) 2010 Andrea Bastoni, <bastoni@cs.unc.edu>
 599 + * Copyright (c) 2012 Björn Brandenburg, <bbb@mpi-sws.org>
 600 + *
 601 + * Permission is hereby granted, free of charge, to any person obtaining
 602 + * a copy of this software and associated documentation files (the
 603 + * "Software"), to deal in the Software without restriction, including
 604 + * without limitation the rights to use, copy, modify, merge, publish,
 605 + * distribute, sublicense, and/or sell copies of the Software, and to
 606 + * permit persons to whom the Software is furnished to do so, subject to
 607 + * the following conditions:
 608 + *
 609 + * The above copyright notice and this permission notice shall be
 610 + * included in all copies or substantial portions of the Software.
 611 + *
 612 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 613 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 614 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 615 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 616 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 617 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 618 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 619 + * SOFTWARE.
 620 + */
 621 +
 622 +/* Do not directly include this file. Include feather_trace.h instead */
 623 +
 624 +/* regparm is the default on x86_64 */
 625 +#define feather_callback  __attribute__((used))
 626 +
 627 +#define __FT_EVENT_TABLE(id,from,to) \
 628 +            ".section __event_table, \"aw\"\n\t" \
 629 +	    ".balign 8\n\t" \
 630 +            ".quad " #id  ", 0, " #from ", " #to " \n\t" \
 631 +            ".previous \n\t"
 632 +
 633 +/*
 634 + * x86_64 caller only owns rbp, rbx, r12-r15;
 635 + * the callee can freely modify the others.
 636 + */
 637 +#define __FT_CLOBBER_LIST0	"memory", "cc", "rdi", "rsi", "rdx", "rcx", \
 638 +			"r8", "r9", "r10", "r11", "rax"
 639 +
 640 +#define __FT_CLOBBER_LIST1	"memory", "cc", "rdi", "rdx", "rcx", \
 641 +			"r8", "r9", "r10", "r11", "rax"
 642 +
 643 +#define __FT_CLOBBER_LIST2	"memory", "cc", "rdi", "rcx", \
 644 +			"r8", "r9", "r10", "r11", "rax"
 645 +
 646 +#define __FT_CLOBBER_LIST3	"memory", "cc", "rdi", \
 647 +			"r8", "r9", "r10", "r11", "rax"
 648 +
 649 +/* The registers RDI, RSI, RDX, RCX, R8 and R9 are used for integer and pointer
 650 + * arguments. */
 651 +
 652 +/* RSI */
 653 +#define __FT_TMP1(x) "=S" (x)
 654 +#define __FT_ARG1(x) "0" ((long) (x))
 655 +
 656 +/* RDX */
 657 +#define __FT_TMP2(x) "=d" (x)
 658 +#define __FT_ARG2(x) "1" ((long) (x))
 659 +
 660 +/* RCX */
 661 +#define __FT_TMP3(x) "=c" (x)
 662 +#define __FT_ARG3(x) "2" ((long) (x))
 663 +
 664 +#define ft_event(id, callback)                                  \
 665 +        __asm__ __volatile__(                                   \
 666 +            "1: jmp 2f                                    \n\t" \
 667 +	    " call " #callback "                          \n\t" \
 668 +            __FT_EVENT_TABLE(id,1b,2f)				\
 669 +            "2:                                           \n\t" \
 670 +        : : : __FT_CLOBBER_LIST0)
 671 +
 672 +#define ft_event0(id, callback)                                 \
 673 +        __asm__ __volatile__(                                   \
 674 +            "1: jmp 2f                                    \n\t" \
 675 +	    " movq $" #id ", %%rdi			  \n\t" \
 676 +	    " call " #callback "                          \n\t" \
 677 +	    __FT_EVENT_TABLE(id,1b,2f)				\
 678 +            "2:                                           \n\t" \
 679 +        : :  : __FT_CLOBBER_LIST0)
 680 +
 681 +#define ft_event1(id, callback, param)                          \
 682 +	do {							\
 683 +		long __ft_tmp1;					\
 684 +	__asm__ __volatile__(                                   \
 685 +	    "1: jmp 2f                                    \n\t" \
 686 +	    " movq $" #id ", %%rdi			  \n\t" \
 687 +	    " call " #callback "                          \n\t" \
 688 +	    __FT_EVENT_TABLE(id,1b,2f)				\
 689 +	    "2:                                           \n\t" \
 690 +	    : __FT_TMP1(__ft_tmp1)				\
 691 +	    : __FT_ARG1(param)					\
 692 +	    : __FT_CLOBBER_LIST1);				\
 693 +	} while (0);
 694 +
 695 +#define ft_event2(id, callback, param, param2)                  \
 696 +	do {							\
 697 +		long __ft_tmp1, __ft_tmp2;			\
 698 +        __asm__ __volatile__(                                   \
 699 +            "1: jmp 2f                                    \n\t" \
 700 +	    " movq $" #id ", %%rdi			  \n\t" \
 701 +	    " call " #callback "                          \n\t" \
 702 +            __FT_EVENT_TABLE(id,1b,2f)				\
 703 +            "2:                                           \n\t" \
 704 +	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2)	\
 705 +	    : __FT_ARG1(param), __FT_ARG2(param2)		\
 706 +	    : __FT_CLOBBER_LIST2);				\
 707 +	} while (0);
 708 +
 709 +#define ft_event3(id, callback, param, param2, param3)		\
 710 +	do {							\
 711 +		long __ft_tmp1, __ft_tmp2, __ft_tmp3;		\
 712 +        __asm__ __volatile__(                                   \
 713 +            "1: jmp 2f                                    \n\t" \
 714 +	    " movq $" #id ", %%rdi			  \n\t" \
 715 +	    " call " #callback "                          \n\t" \
 716 +            __FT_EVENT_TABLE(id,1b,2f)				\
 717 +            "2:                                           \n\t" \
 718 +	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2), __FT_TMP3(__ft_tmp3) \
 719 +	    : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3)	\
 720 +	    : __FT_CLOBBER_LIST3);				\
 721 +	} while (0);
 722 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
 723 index 7bd3bd3..d38a5a7 100644
 724 --- a/arch/x86/kernel/Makefile
 725 +++ b/arch/x86/kernel/Makefile
 726 @@ -103,6 +103,8 @@ obj-$(CONFIG_UPROBES)			+= uprobes.o
 727  
 728  obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 729  
 730 +obj-$(CONFIG_FEATHER_TRACE)	+= ft_event.o
 731 +
 732  ###
 733  # 64 bit specific files
 734  ifeq ($(CONFIG_X86_64),y)
 735 diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
 736 new file mode 100644
 737 index 0000000..37cc332
 738 --- /dev/null
 739 +++ b/arch/x86/kernel/ft_event.c
 740 @@ -0,0 +1,118 @@
 741 +#include <linux/types.h>
 742 +
 743 +#include <litmus/feather_trace.h>
 744 +
 745 +/* the feather trace management functions assume
 746 + * exclusive access to the event table
 747 + */
 748 +
 749 +#ifndef CONFIG_DEBUG_RODATA
 750 +
 751 +#define BYTE_JUMP      0xeb
 752 +#define BYTE_JUMP_LEN  0x02
 753 +
 754 +/* for each event, there is an entry in the event table */
 755 +struct trace_event {
 756 +	long 	id;
 757 +	long	count;
 758 +	long	start_addr;
 759 +	long	end_addr;
 760 +};
 761 +
 762 +extern struct trace_event  __start___event_table[];
 763 +extern struct trace_event  __stop___event_table[];
 764 +
 765 +/* Workaround: if no events are defined, then the event_table section does not
 766 + * exist and the above references cause linker errors. This could probably be
 767 + * fixed by adjusting the linker script, but it is easier to maintain for us if
 768 + * we simply create a dummy symbol in the event table section.
 769 + */
 770 +int __event_table_dummy[0] __attribute__ ((section("__event_table")));
 771 +
 772 +int ft_enable_event(unsigned long id)
 773 +{
 774 +	struct trace_event* te = __start___event_table;
 775 +	int count = 0;
 776 +	char* delta;
 777 +	unsigned char* instr;
 778 +
 779 +	while (te < __stop___event_table) {
 780 +		if (te->id == id && ++te->count == 1) {
 781 +			instr  = (unsigned char*) te->start_addr;
 782 +			/* make sure we don't clobber something wrong */
 783 +			if (*instr == BYTE_JUMP) {
 784 +				delta  = (((unsigned char*) te->start_addr) + 1);
 785 +				*delta = 0;
 786 +			}
 787 +		}
 788 +		if (te->id == id)
 789 +			count++;
 790 +		te++;
 791 +	}
 792 +
 793 +	printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
 794 +	return count;
 795 +}
 796 +
 797 +int ft_disable_event(unsigned long id)
 798 +{
 799 +	struct trace_event* te = __start___event_table;
 800 +	int count = 0;
 801 +	char* delta;
 802 +	unsigned char* instr;
 803 +
 804 +	while (te < __stop___event_table) {
 805 +		if (te->id == id && --te->count == 0) {
 806 +			instr  = (unsigned char*) te->start_addr;
 807 +			if (*instr == BYTE_JUMP) {
 808 +				delta  = (((unsigned char*) te->start_addr) + 1);
 809 +				*delta = te->end_addr - te->start_addr -
 810 +					BYTE_JUMP_LEN;
 811 +			}
 812 +		}
 813 +		if (te->id == id)
 814 +			count++;
 815 +		te++;
 816 +	}
 817 +
 818 +	printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
 819 +	return count;
 820 +}
 821 +
 822 +int ft_disable_all_events(void)
 823 +{
 824 +	struct trace_event* te = __start___event_table;
 825 +	int count = 0;
 826 +	char* delta;
 827 +	unsigned char* instr;
 828 +
 829 +	while (te < __stop___event_table) {
 830 +		if (te->count) {
 831 +			instr  = (unsigned char*) te->start_addr;
 832 +			if (*instr == BYTE_JUMP) {
 833 +				delta  = (((unsigned char*) te->start_addr)
 834 +					  + 1);
 835 +				*delta = te->end_addr - te->start_addr -
 836 +					BYTE_JUMP_LEN;
 837 +				te->count = 0;
 838 +				count++;
 839 +			}
 840 +		}
 841 +		te++;
 842 +	}
 843 +	return count;
 844 +}
 845 +
 846 +int ft_is_event_enabled(unsigned long id)
 847 +{
 848 +	struct trace_event* te = __start___event_table;
 849 +
 850 +	while (te < __stop___event_table) {
 851 +		if (te->id == id)
 852 +			return te->count;
 853 +		te++;
 854 +	}
 855 +	return 0;
 856 +}
 857 +
 858 +#endif
 859 diff --git a/litmus/Kconfig b/litmus/Kconfig
 860 index 70ddbad..7456eb2 100644
 861 --- a/litmus/Kconfig
 862 +++ b/litmus/Kconfig
 863 @@ -4,6 +4,7 @@ menu "Tracing"
 864  
 865  config FEATHER_TRACE
 866  	bool "Feather-Trace Infrastructure"
 867 +	depends on !RELOCATABLE
 868  	default y
 869  	help
 870  	  Feather-Trace basic tracing infrastructure. Includes device file
 871 -- 
 872 1.8.1.2
 873 
 874 
 875 From e46b5c3c4264a15b363502bcb980e3587131d826 Mon Sep 17 00:00:00 2001
 876 From: Bjoern Brandenburg <bbb@mpi-sws.org>
 877 Date: Sun, 23 Jun 2013 12:00:35 +0200
 878 Subject: [PATCH 004/119] Feather-Trace: add generic ftdev device driver
 879 
 880 This patch adds the ftdev device driver, which is used to export
 881 samples collected with Feather-Trace to userspace.
 882 ---
 883  include/litmus/ftdev.h |  58 +++++++
 884  litmus/Makefile        |   2 +-
 885  litmus/ftdev.c         | 439 +++++++++++++++++++++++++++++++++++++++++++++++++
 886  3 files changed, 498 insertions(+), 1 deletion(-)
 887  create mode 100644 include/litmus/ftdev.h
 888  create mode 100644 litmus/ftdev.c
 889 
 890 diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h
 891 new file mode 100644
 892 index 0000000..a566b0b
 893 --- /dev/null
 894 +++ b/include/litmus/ftdev.h
 895 @@ -0,0 +1,58 @@
 896 +#ifndef _LITMUS_FTDEV_H_
 897 +#define	_LITMUS_FTDEV_H_
 898 +
 899 +#include <litmus/feather_trace.h>
 900 +#include <litmus/feather_buffer.h>
 901 +#include <linux/mutex.h>
 902 +#include <linux/cdev.h>
 903 +
 904 +#define FTDEV_ENABLE_CMD 	0
 905 +#define FTDEV_DISABLE_CMD 	1
 906 +#define FTDEV_CALIBRATE		0x1410
 907 +
 908 +struct ftdev;
 909 +
 910 +/* return 0 if buffer can be opened, otherwise -$REASON */
 911 +typedef int  (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no);
 912 +/* return 0 on success, otherwise -$REASON */
 913 +typedef int  (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no);
 914 +typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no);
 915 +typedef long (*ftdev_calibrate_t)(struct ftdev* dev, unsigned int buf_no, unsigned long user_arg);
 916 +/* Let devices handle writes from userspace. No synchronization provided. */
 917 +typedef ssize_t (*ftdev_write_t)(struct ft_buffer* buf, size_t len, const char __user *from);
 918 +
 919 +struct ftdev_event;
 920 +
 921 +struct ftdev_minor {
 922 +	struct ft_buffer*	buf;
 923 +	unsigned int		readers;
 924 +	struct mutex		lock;
 925 +	/* FIXME: filter for authorized events */
 926 +	struct ftdev_event*	events;
 927 +	struct device*		device;
 928 +	struct ftdev*		ftdev;
 929 +};
 930 +
 931 +struct ftdev {
 932 +	dev_t			major;
 933 +	struct cdev		cdev;
 934 +	struct class*		class;
 935 +	const char*		name;
 936 +	struct ftdev_minor*	minor;
 937 +	unsigned int		minor_cnt;
 938 +	ftdev_alloc_t		alloc;
 939 +	ftdev_free_t		free;
 940 +	ftdev_can_open_t	can_open;
 941 +	ftdev_write_t		write;
 942 +	ftdev_calibrate_t	calibrate;
 943 +};
 944 +
 945 +struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size);
 946 +void free_ft_buffer(struct ft_buffer* buf);
 947 +
 948 +int ftdev_init(	struct ftdev* ftdev, struct module* owner,
 949 +		const int minor_cnt, const char* name);
 950 +void ftdev_exit(struct ftdev* ftdev);
 951 +int register_ftdev(struct ftdev* ftdev);
 952 +
 953 +#endif
 954 diff --git a/litmus/Makefile b/litmus/Makefile
 955 index 4c6130b..bca61e6 100644
 956 --- a/litmus/Makefile
 957 +++ b/litmus/Makefile
 958 @@ -2,4 +2,4 @@
 959  # Makefile for LITMUS^RT
 960  #
 961  
 962 -obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
 963 +obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 964 diff --git a/litmus/ftdev.c b/litmus/ftdev.c
 965 new file mode 100644
 966 index 0000000..13f1d48
 967 --- /dev/null
 968 +++ b/litmus/ftdev.c
 969 @@ -0,0 +1,439 @@
 970 +#include <linux/sched.h>
 971 +#include <linux/fs.h>
 972 +#include <linux/slab.h>
 973 +#include <linux/cdev.h>
 974 +#include <asm/uaccess.h>
 975 +#include <linux/module.h>
 976 +#include <linux/device.h>
 977 +#include <linux/vmalloc.h>
 978 +
 979 +#include <litmus/feather_trace.h>
 980 +#include <litmus/ftdev.h>
 981 +
 982 +struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
 983 +{
 984 +	struct ft_buffer* buf;
 985 +	size_t total = (size + 1) * count;
 986 +	char* mem;
 987 +
 988 +	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
 989 +	if (!buf)
 990 +		return NULL;
 991 +
 992 +
 993 +	mem = vmalloc(total);
 994 +
 995 +	if (!mem) {
 996 +		kfree(buf);
 997 +		return NULL;
 998 +	}
 999 +
1000 +	if (!init_ft_buffer(buf, count, size,
1001 +			    mem + (count * size),  /* markers at the end */
1002 +			    mem)) {                /* buffer objects     */
1003 +		vfree(mem);
1004 +		kfree(buf);
1005 +		return NULL;
1006 +	}
1007 +	return buf;
1008 +}
1009 +
1010 +void free_ft_buffer(struct ft_buffer* buf)
1011 +{
1012 +	if (buf) {
1013 +		vfree(buf->buffer_mem);
1014 +		kfree(buf);
1015 +	}
1016 +}
1017 +
1018 +struct ftdev_event {
1019 +	int id;
1020 +	struct ftdev_event* next;
1021 +};
1022 +
1023 +static int activate(struct ftdev_event** chain, int id)
1024 +{
1025 +	struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL);
1026 +	if (ev) {
1027 +		printk(KERN_INFO
1028 +		       "Enabling feather-trace event %d.\n", (int) id);
1029 +		ft_enable_event(id);
1030 +		ev->id = id;
1031 +		ev->next = *chain;
1032 +		*chain    = ev;
1033 +	}
1034 +	return ev ? 0 : -ENOMEM;
1035 +}
1036 +
1037 +static void deactivate(struct ftdev_event** chain, int id)
1038 +{
1039 +	struct ftdev_event **cur = chain;
1040 +	struct ftdev_event *nxt;
1041 +	while (*cur) {
1042 +		if ((*cur)->id == id) {
1043 +			nxt   = (*cur)->next;
1044 +			kfree(*cur);
1045 +			*cur  = nxt;
1046 +			printk(KERN_INFO
1047 +			       "Disabling feather-trace event %d.\n", (int) id);
1048 +			ft_disable_event(id);
1049 +			break;
1050 +		}
1051 +		cur = &(*cur)->next;
1052 +	}
1053 +}
1054 +
1055 +static int ftdev_open(struct inode *in, struct file *filp)
1056 +{
1057 +	struct ftdev* ftdev;
1058 +	struct ftdev_minor* ftdm;
1059 +	unsigned int buf_idx = iminor(in);
1060 +	int err = 0;
1061 +
1062 +	ftdev = container_of(in->i_cdev, struct ftdev, cdev);
1063 +
1064 +	if (buf_idx >= ftdev->minor_cnt) {
1065 +		err = -ENODEV;
1066 +		goto out;
1067 +	}
1068 +	if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx)))
1069 +		goto out;
1070 +
1071 +	ftdm = ftdev->minor + buf_idx;
1072 +	ftdm->ftdev = ftdev;
1073 +	filp->private_data = ftdm;
1074 +
1075 +	if (mutex_lock_interruptible(&ftdm->lock)) {
1076 +		err = -ERESTARTSYS;
1077 +		goto out;
1078 +	}
1079 +
1080 +	if (!ftdm->readers && ftdev->alloc)
1081 +		err = ftdev->alloc(ftdev, buf_idx);
1082 +	if (0 == err)
1083 +		ftdm->readers++;
1084 +
1085 +	mutex_unlock(&ftdm->lock);
1086 +out:
1087 +	return err;
1088 +}
1089 +
1090 +static int ftdev_release(struct inode *in, struct file *filp)
1091 +{
1092 +	struct ftdev* ftdev;
1093 +	struct ftdev_minor* ftdm;
1094 +	unsigned int buf_idx = iminor(in);
1095 +	int err = 0;
1096 +
1097 +	ftdev = container_of(in->i_cdev, struct ftdev, cdev);
1098 +
1099 +	if (buf_idx >= ftdev->minor_cnt) {
1100 +		err = -ENODEV;
1101 +		goto out;
1102 +	}
1103 +	ftdm = ftdev->minor + buf_idx;
1104 +
1105 +	if (mutex_lock_interruptible(&ftdm->lock)) {
1106 +		err = -ERESTARTSYS;
1107 +		goto out;
1108 +	}
1109 +
1110 +	if (ftdm->readers == 1) {
1111 +		while (ftdm->events)
1112 +			deactivate(&ftdm->events, ftdm->events->id);
1113 +
1114 +		/* wait for any pending events to complete */
1115 +		set_current_state(TASK_UNINTERRUPTIBLE);
1116 +		schedule_timeout(HZ);
1117 +
1118 +		printk(KERN_ALERT "Failed trace writes: %u\n",
1119 +		       ftdm->buf->failed_writes);
1120 +
1121 +		if (ftdev->free)
1122 +			ftdev->free(ftdev, buf_idx);
1123 +	}
1124 +
1125 +	ftdm->readers--;
1126 +	mutex_unlock(&ftdm->lock);
1127 +out:
1128 +	return err;
1129 +}
1130 +
1131 +/* based on ft_buffer_read
1132 + * @returns < 0 : page fault
1133 + *          = 0 : no data available
1134 + *          = 1 : one slot copied
1135 + */
1136 +static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest)
1137 +{
1138 +	unsigned int idx;
1139 +	int err = 0;
1140 +	if (buf->free_count != buf->slot_count) {
1141 +		/* data available */
1142 +		idx = buf->read_idx % buf->slot_count;
1143 +		if (buf->slots[idx] == SLOT_READY) {
1144 +			err = copy_to_user(dest, ((char*) buf->buffer_mem) +
1145 +					   idx * buf->slot_size,
1146 +					   buf->slot_size);
1147 +			if (err == 0) {
1148 +				/* copy ok */
1149 +				buf->slots[idx] = SLOT_FREE;
1150 +				buf->read_idx++;
1151 +				fetch_and_inc(&buf->free_count);
1152 +				err = 1;
1153 +			}
1154 +		}
1155 +	}
1156 +	return err;
1157 +}
1158 +
1159 +static ssize_t ftdev_read(struct file *filp,
1160 +			  char __user *to, size_t len, loff_t *f_pos)
1161 +{
1162 +	/* 	we ignore f_pos, this is strictly sequential */
1163 +
1164 +	ssize_t err = 0;
1165 +	size_t chunk;
1166 +	int copied;
1167 +	struct ftdev_minor* ftdm = filp->private_data;
1168 +
1169 +	if (mutex_lock_interruptible(&ftdm->lock)) {
1170 +		err = -ERESTARTSYS;
1171 +		goto out;
1172 +	}
1173 +
1174 +
1175 +	chunk = ftdm->buf->slot_size;
1176 +	while (len >= chunk) {
1177 +		copied = ft_buffer_copy_to_user(ftdm->buf, to);
1178 +		if (copied == 1) {
1179 +			len    -= chunk;
1180 +			to     += chunk;
1181 +			err    += chunk;
1182 +	        } else if (err == 0 && copied == 0 && ftdm->events) {
1183 +			/* Only wait if there are any events enabled and only
1184 +			 * if we haven't copied some data yet. We cannot wait
1185 +			 * here with copied data because that data would get
1186 +			 * lost if the task is interrupted (e.g., killed).
1187 +			 */
1188 +			mutex_unlock(&ftdm->lock);
1189 +			set_current_state(TASK_INTERRUPTIBLE);
1190 +
1191 +			schedule_timeout(50);
1192 +
1193 +			if (signal_pending(current)) {
1194 +				if (err == 0)
1195 +					/* nothing read yet, signal problem */
1196 +					err = -ERESTARTSYS;
1197 +				goto out;
1198 +			}
1199 +			if (mutex_lock_interruptible(&ftdm->lock)) {
1200 +				err = -ERESTARTSYS;
1201 +				goto out;
1202 +			}
1203 +		} else if (copied < 0) {
1204 +			/* page fault */
1205 +			err = copied;
1206 +			break;
1207 +		} else
1208 +			/* nothing left to get, return to user space */
1209 +			break;
1210 +	}
1211 +	mutex_unlock(&ftdm->lock);
1212 +out:
1213 +	return err;
1214 +}
1215 +
1216 +static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1217 +{
1218 +	long err = -ENOIOCTLCMD;
1219 +	struct ftdev_minor* ftdm = filp->private_data;
1220 +
1221 +	if (mutex_lock_interruptible(&ftdm->lock)) {
1222 +		err = -ERESTARTSYS;
1223 +		goto out;
1224 +	}
1225 +
1226 +	/* FIXME: check id against list of acceptable events */
1227 +
1228 +	switch (cmd) {
1229 +	case  FTDEV_ENABLE_CMD:
1230 +		if (activate(&ftdm->events, arg))
1231 +			err = -ENOMEM;
1232 +		else
1233 +			err = 0;
1234 +		break;
1235 +
1236 +	case FTDEV_DISABLE_CMD:
1237 +		deactivate(&ftdm->events, arg);
1238 +		err = 0;
1239 +		break;
1240 +
1241 +	case FTDEV_CALIBRATE:
1242 +		if (ftdm->ftdev->calibrate) {
1243 +			err = ftdm->ftdev->calibrate(ftdm->ftdev, iminor(filp->f_dentry->d_inode), arg);
1244 +		}
1245 +		break;
1246 +
1247 +	default:
1248 +		printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg);
1249 +	};
1250 +
1251 +	mutex_unlock(&ftdm->lock);
1252 +out:
1253 +	return err;
1254 +}
1255 +
1256 +static ssize_t ftdev_write(struct file *filp, const char __user *from,
1257 +			   size_t len, loff_t *f_pos)
1258 +{
1259 +	struct ftdev_minor* ftdm = filp->private_data;
1260 +	ssize_t err = -EINVAL;
1261 +	struct ftdev* ftdev = ftdm->ftdev;
1262 +
1263 +	/* dispatch write to buffer-specific code, if available */
1264 +	if (ftdev->write)
1265 +		err = ftdev->write(ftdm->buf, len, from);
1266 +
1267 +	return err;
1268 +}
1269 +
1270 +struct file_operations ftdev_fops = {
1271 +	.owner   = THIS_MODULE,
1272 +	.open    = ftdev_open,
1273 +	.release = ftdev_release,
1274 +	.write   = ftdev_write,
1275 +	.read    = ftdev_read,
1276 +	.unlocked_ioctl = ftdev_ioctl,
1277 +};
1278 +
1279 +int ftdev_init(	struct ftdev* ftdev, struct module* owner,
1280 +		const int minor_cnt, const char* name)
1281 +{
1282 +	int i, err;
1283 +
1284 +	BUG_ON(minor_cnt < 1);
1285 +
1286 +	cdev_init(&ftdev->cdev, &ftdev_fops);
1287 +	ftdev->name = name;
1288 +	ftdev->minor_cnt = minor_cnt;
1289 +	ftdev->cdev.owner = owner;
1290 +	ftdev->cdev.ops = &ftdev_fops;
1291 +	ftdev->alloc    = NULL;
1292 +	ftdev->free     = NULL;
1293 +	ftdev->can_open = NULL;
1294 +	ftdev->write	= NULL;
1295 +	ftdev->calibrate = NULL;
1296 +
1297 +	ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor),
1298 +			GFP_KERNEL);
1299 +	if (!ftdev->minor) {
1300 +		printk(KERN_WARNING "ftdev(%s): Could not allocate memory\n",
1301 +			ftdev->name);
1302 +		err = -ENOMEM;
1303 +		goto err_out;
1304 +	}
1305 +
1306 +	for (i = 0; i < ftdev->minor_cnt; i++) {
1307 +		mutex_init(&ftdev->minor[i].lock);
1308 +		ftdev->minor[i].readers = 0;
1309 +		ftdev->minor[i].buf     = NULL;
1310 +		ftdev->minor[i].events  = NULL;
1311 +	}
1312 +
1313 +	ftdev->class = class_create(owner, ftdev->name);
1314 +	if (IS_ERR(ftdev->class)) {
1315 +		err = PTR_ERR(ftdev->class);
1316 +		printk(KERN_WARNING "ftdev(%s): "
1317 +			"Could not create device class.\n", ftdev->name);
1318 +		goto err_dealloc;
1319 +	}
1320 +
1321 +	return 0;
1322 +
1323 +err_dealloc:
1324 +	kfree(ftdev->minor);
1325 +err_out:
1326 +	return err;
1327 +}
1328 +
1329 +/*
1330 + * Destroy minor devices up to, but not including, up_to.
1331 + */
1332 +static void ftdev_device_destroy(struct ftdev* ftdev, unsigned int up_to)
1333 +{
1334 +	dev_t minor_cntr;
1335 +
1336 +	if (up_to < 1)
1337 +		up_to = (ftdev->minor_cnt < 1) ? 0 : ftdev->minor_cnt;
1338 +
1339 +	for (minor_cntr = 0; minor_cntr < up_to; ++minor_cntr)
1340 +		device_destroy(ftdev->class, MKDEV(ftdev->major, minor_cntr));
1341 +}
1342 +
1343 +void ftdev_exit(struct ftdev* ftdev)
1344 +{
1345 +	printk("ftdev(%s): Exiting\n", ftdev->name);
1346 +	ftdev_device_destroy(ftdev, -1);
1347 +	cdev_del(&ftdev->cdev);
1348 +	unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
1349 +	class_destroy(ftdev->class);
1350 +	kfree(ftdev->minor);
1351 +}
1352 +
1353 +int register_ftdev(struct ftdev* ftdev)
1354 +{
1355 +	struct device **device;
1356 +	dev_t trace_dev_tmp, minor_cntr;
1357 +	int err;
1358 +
1359 +	err = alloc_chrdev_region(&trace_dev_tmp, 0, ftdev->minor_cnt,
1360 +			ftdev->name);
1361 +	if (err) {
1362 +		printk(KERN_WARNING "ftdev(%s): "
1363 +		       "Could not allocate char. device region (%d minors)\n",
1364 +		       ftdev->name, ftdev->minor_cnt);
1365 +		goto err_out;
1366 +	}
1367 +
1368 +	ftdev->major = MAJOR(trace_dev_tmp);
1369 +
1370 +	err = cdev_add(&ftdev->cdev, trace_dev_tmp, ftdev->minor_cnt);
1371 +	if (err) {
1372 +		printk(KERN_WARNING "ftdev(%s): "
1373 +		       "Could not add cdev for major %u with %u minor(s).\n",
1374 +		       ftdev->name, ftdev->major, ftdev->minor_cnt);
1375 +		goto err_unregister;
1376 +	}
1377 +
1378 +	/* create the minor device(s) */
1379 +	for (minor_cntr = 0; minor_cntr < ftdev->minor_cnt; ++minor_cntr)
1380 +	{
1381 +		trace_dev_tmp = MKDEV(ftdev->major, minor_cntr);
1382 +		device = &ftdev->minor[minor_cntr].device;
1383 +
1384 +		*device = device_create(ftdev->class, NULL, trace_dev_tmp, NULL,
1385 +				"litmus/%s%d", ftdev->name, minor_cntr);
1386 +		if (IS_ERR(*device)) {
1387 +			err = PTR_ERR(*device);
1388 +			printk(KERN_WARNING "ftdev(%s): "
1389 +				"Could not create device major/minor number "
1390 +				"%u/%u\n", ftdev->name, ftdev->major,
1391 +				minor_cntr);
1392 +			printk(KERN_WARNING "ftdev(%s): "
1393 +				"will attempt deletion of allocated devices.\n",
1394 +				ftdev->name);
1395 +			goto err_minors;
1396 +		}
1397 +	}
1398 +
1399 +	return 0;
1400 +
1401 +err_minors:
1402 +	ftdev_device_destroy(ftdev, minor_cntr);
1403 +	cdev_del(&ftdev->cdev);
1404 +err_unregister:
1405 +	unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
1406 +err_out:
1407 +	return err;
1408 +}
1409 -- 
1410 1.8.1.2
1411 
1412 
1413 From 7b317d9036b3fc0280327586ef52dfa6cb6dd250 Mon Sep 17 00:00:00 2001
1414 From: Bjoern Brandenburg <bbb@mpi-sws.org>
1415 Date: Sun, 23 Jun 2013 14:40:52 +0200
1416 Subject: [PATCH 005/119] Feather-Trace: add LITMUS^RT overhead tracing
1417  infrastructure
1418 
1419 This patch adds the main infrastructure for tracing overheads in
1420 LITMUS^RT.  It does not yet introduce any tracepoints into the kernel.
1421 ---
1422  include/litmus/trace.h | 142 +++++++++++++
1423  litmus/Kconfig         |  25 +++
1424  litmus/Makefile        |   1 +
1425  litmus/trace.c         | 562 +++++++++++++++++++++++++++++++++++++++++++++++++
1426  4 files changed, 730 insertions(+)
1427  create mode 100644 include/litmus/trace.h
1428  create mode 100644 litmus/trace.c
1429 
1430 diff --git a/include/litmus/trace.h b/include/litmus/trace.h
1431 new file mode 100644
1432 index 0000000..6017872
1433 --- /dev/null
1434 +++ b/include/litmus/trace.h
1435 @@ -0,0 +1,142 @@
1436 +#ifndef _SYS_TRACE_H_
1437 +#define	_SYS_TRACE_H_
1438 +
1439 +#ifdef CONFIG_SCHED_OVERHEAD_TRACE
1440 +
1441 +
1442 +#include <litmus/feather_trace.h>
1443 +#include <litmus/feather_buffer.h>
1444 +
1445 +
1446 +/*********************** TIMESTAMPS ************************/
1447 +
1448 +enum task_type_marker {
1449 +	TSK_BE,
1450 +	TSK_RT,
1451 +	TSK_UNKNOWN
1452 +};
1453 +
1454 +struct timestamp {
1455 +	uint64_t		timestamp:48;
1456 +	uint64_t		pid:16;
1457 +	uint32_t		seq_no;
1458 +	uint8_t			cpu;
1459 +	uint8_t			event;
1460 +	uint8_t			task_type:2;
1461 +	uint8_t			irq_flag:1;
1462 +	uint8_t			irq_count:5;
1463 +};
1464 +
1465 +/* tracing callbacks */
1466 +feather_callback void msg_sent(unsigned long event, unsigned long to);
1467 +feather_callback void msg_received(unsigned long event);
1468 +
1469 +#define MSG_TIMESTAMP_SENT(id, to) \
1470 +	ft_event1(id, msg_sent, (unsigned long) to);
1471 +
1472 +#define MSG_TIMESTAMP_RECEIVED(id) \
1473 +	ft_event0(id, msg_received);
1474 +
1475 +feather_callback void save_cpu_timestamp(unsigned long event);
1476 +feather_callback void save_cpu_timestamp_time(unsigned long event, unsigned long time_ptr);
1477 +feather_callback void save_cpu_timestamp_irq(unsigned long event, unsigned long irq_count_ptr);
1478 +feather_callback void save_cpu_timestamp_task(unsigned long event, unsigned long t_ptr);
1479 +feather_callback void save_cpu_timestamp_def(unsigned long event, unsigned long type);
1480 +feather_callback void save_cpu_task_latency(unsigned long event, unsigned long when_ptr);
1481 +
1482 +#define CPU_TIMESTAMP_TIME(id, time_ptr) \
1483 +	ft_event1(id, save_cpu_timestamp_time, (unsigned long) time_ptr)
1484 +
1485 +#define CPU_TIMESTAMP_IRQ(id, irq_count_ptr) \
1486 +	ft_event1(id, save_cpu_timestamp_irq, (unsigned long) irq_count_ptr)
1487 +
1488 +#define CPU_TIMESTAMP(id) ft_event0(id, save_cpu_timestamp)
1489 +
1490 +#define CPU_DTIMESTAMP(id, def)  ft_event1(id, save_cpu_timestamp_def, (unsigned long) def)
1491 +
1492 +#define CPU_TIMESTAMP_CUR(id) CPU_DTIMESTAMP(id, is_realtime(current) ? TSK_RT : TSK_BE)
1493 +
1494 +#define CPU_TTIMESTAMP(id, task) \
1495 +	ft_event1(id, save_cpu_timestamp_task, (unsigned long) task)
1496 +
1497 +#define CPU_LTIMESTAMP(id, task) \
1498 +	ft_event1(id, save_cpu_task_latency, (unsigned long) task)
1499 +
1500 +#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
1501 +
1502 +#define MSG_TIMESTAMP_SENT(id, to)
1503 +#define MSG_TIMESTAMP_RECEIVED(id)
1504 +
1505 +#define CPU_TIMESTAMP_TIME(id, time_ptr)
1506 +#define CPU_TIMESTAMP_IRQ(id, irq_count_ptr)
1507 +#define CPU_TIMESTAMP(id)
1508 +#define CPU_DTIMESTAMP(id, def)
1509 +#define CPU_TIMESTAMP_CUR(id)
1510 +#define CPU_TTIMESTAMP(id, task)
1511 +#define CPU_LTIMESTAMP(id, task)
1512 +
1513 +#endif
1514 +
1515 +
1516 +/* Convention for timestamps
1517 + * =========================
1518 + *
1519 + * In order to process the trace files with a common tool, we use the following
1520 + * convention to measure execution times: The end time id of a code segment is
1521 + * always the next number after the start time event id.
1522 + */
1523 +
1524 +#define __TS_SYSCALL_IN_START(p)	CPU_TIMESTAMP_TIME(10, p)
1525 +#define __TS_SYSCALL_IN_END(p)		CPU_TIMESTAMP_IRQ(11, p)
1526 +
1527 +#define TS_SYSCALL_OUT_START		CPU_TIMESTAMP_CUR(20)
1528 +#define TS_SYSCALL_OUT_END		CPU_TIMESTAMP_CUR(21)
1529 +
1530 +#define TS_LOCK_START			CPU_TIMESTAMP_CUR(30)
1531 +#define TS_LOCK_END			CPU_TIMESTAMP_CUR(31)
1532 +
1533 +#define TS_LOCK_SUSPEND			CPU_TIMESTAMP_CUR(38)
1534 +#define TS_LOCK_RESUME			CPU_TIMESTAMP_CUR(39)
1535 +
1536 +#define TS_UNLOCK_START			CPU_TIMESTAMP_CUR(40)
1537 +#define TS_UNLOCK_END			CPU_TIMESTAMP_CUR(41)
1538 +
1539 +#define TS_SCHED_START			CPU_DTIMESTAMP(100, TSK_UNKNOWN) /* we only
1540 +								      * care
1541 +								      * about
1542 +								      * next */
1543 +#define TS_SCHED_END(t)			CPU_TTIMESTAMP(101, t)
1544 +#define TS_SCHED2_START(t) 		CPU_TTIMESTAMP(102, t)
1545 +#define TS_SCHED2_END(t)       		CPU_TTIMESTAMP(103, t)
1546 +
1547 +#define TS_CXS_START(t)			CPU_TTIMESTAMP(104, t)
1548 +#define TS_CXS_END(t)			CPU_TTIMESTAMP(105, t)
1549 +
1550 +#define TS_RELEASE_START		CPU_DTIMESTAMP(106, TSK_RT)
1551 +#define TS_RELEASE_END			CPU_DTIMESTAMP(107, TSK_RT)
1552 +
1553 +#define TS_TICK_START(t)		CPU_TTIMESTAMP(110, t)
1554 +#define TS_TICK_END(t) 			CPU_TTIMESTAMP(111, t)
1555 +
1556 +#define TS_QUANTUM_BOUNDARY_START	CPU_TIMESTAMP_CUR(112)
1557 +#define TS_QUANTUM_BOUNDARY_END		CPU_TIMESTAMP_CUR(113)
1558 +
1559 +
1560 +#define TS_PLUGIN_SCHED_START		/* TIMESTAMP(120) */  /* currently unused */
1561 +#define TS_PLUGIN_SCHED_END		/* TIMESTAMP(121) */
1562 +
1563 +#define TS_PLUGIN_TICK_START		/* TIMESTAMP(130) */
1564 +#define TS_PLUGIN_TICK_END		/* TIMESTAMP(131) */
1565 +
1566 +#define TS_ENTER_NP_START		CPU_TIMESTAMP(140)
1567 +#define TS_ENTER_NP_END			CPU_TIMESTAMP(141)
1568 +
1569 +#define TS_EXIT_NP_START		CPU_TIMESTAMP(150)
1570 +#define TS_EXIT_NP_END			CPU_TIMESTAMP(151)
1571 +
1572 +#define TS_SEND_RESCHED_START(c)	MSG_TIMESTAMP_SENT(190, c)
1573 +#define TS_SEND_RESCHED_END		MSG_TIMESTAMP_RECEIVED(191)
1574 +
1575 +#define TS_RELEASE_LATENCY(when)	CPU_LTIMESTAMP(208, &(when))
1576 +
1577 +#endif /* !_SYS_TRACE_H_ */
1578 diff --git a/litmus/Kconfig b/litmus/Kconfig
1579 index 7456eb2..0c7e06b 100644
1580 --- a/litmus/Kconfig
1581 +++ b/litmus/Kconfig
1582 @@ -24,6 +24,31 @@ config FEATHER_TRACE
1583  	  you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
1584  	  actually enable any events.
1585  
1586 +config SCHED_OVERHEAD_TRACE
1587 +	bool "Record timestamps for overhead measurements"
1588 +	depends on FEATHER_TRACE
1589 +	default y
1590 +	help
1591 +	  Export event stream for overhead tracing.
1592 +	  Say Yes for overhead tracing.
1593 +
1594 +config SCHED_OVERHEAD_TRACE_SHIFT
1595 +       int "Buffer size for Feather-Trace overhead data"
1596 +       depends on SCHED_OVERHEAD_TRACE
1597 +       range 15 32
1598 +       default 22
1599 +       help
1600 +
1601 +         Select the buffer size for the Feather-Trace overhead tracing
1602 +         infrastructure (/dev/litmus/ft_trace0 & ftcat) as a power of two.  The
1603 +         larger the buffer, the less likely the chance of buffer overflows if
1604 +         the ftcat process is starved by real-time activity. In machines with
1605 +         large memories, large buffer sizes are recommended.
1606 +
1607 +	 Examples: 16 =>   2 MB
1608 +		   24 => 512 MB
1609 +		   26 =>  2G MB
1610 +
1611  endmenu
1612  
1613  endmenu
1614 diff --git a/litmus/Makefile b/litmus/Makefile
1615 index bca61e6..99f90c3 100644
1616 --- a/litmus/Makefile
1617 +++ b/litmus/Makefile
1618 @@ -3,3 +3,4 @@
1619  #
1620  
1621  obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
1622 +obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
1623 diff --git a/litmus/trace.c b/litmus/trace.c
1624 new file mode 100644
1625 index 0000000..a378623
1626 --- /dev/null
1627 +++ b/litmus/trace.c
1628 @@ -0,0 +1,562 @@
1629 +#include <linux/sched.h>
1630 +#include <linux/module.h>
1631 +#include <linux/uaccess.h>
1632 +
1633 +#include <litmus/ftdev.h>
1634 +#include <litmus/trace.h>
1635 +
1636 +/* dummy definition of is_realtime() */
1637 +#define is_realtime(t) (0)
1638 +
1639 +/******************************************************************************/
1640 +/*                          Allocation                                        */
1641 +/******************************************************************************/
1642 +
1643 +static struct ftdev cpu_overhead_dev;
1644 +static struct ftdev msg_overhead_dev;
1645 +
1646 +#define cpu_trace_ts_buf(cpu) cpu_overhead_dev.minor[(cpu)].buf
1647 +#define msg_trace_ts_buf(cpu) msg_overhead_dev.minor[(cpu)].buf
1648 +
1649 +DEFINE_PER_CPU(atomic_t, irq_fired_count;)
1650 +DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, cpu_irq_fired_count);
1651 +
1652 +static DEFINE_PER_CPU(unsigned int, cpu_ts_seq_no);
1653 +static DEFINE_PER_CPU(unsigned int, msg_ts_seq_no);
1654 +
1655 +static int64_t cycle_offset[NR_CPUS][NR_CPUS];
1656 +
1657 +void ft_irq_fired(void)
1658 +{
1659 +	/* Only called with preemptions disabled.  */
1660 +	atomic_inc(&__get_cpu_var(irq_fired_count));
1661 +	atomic_inc(&__get_cpu_var(cpu_irq_fired_count));
1662 +}
1663 +
1664 +static inline void clear_irq_fired(void)
1665 +{
1666 +	atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
1667 +}
1668 +
1669 +static inline unsigned int get_and_clear_irq_fired(void)
1670 +{
1671 +	/* This is potentially not atomic  since we might migrate if
1672 +	 * preemptions are not disabled. As a tradeoff between
1673 +	 * accuracy and tracing overheads, this seems acceptable.
1674 +	 * If it proves to be a problem, then one could add a callback
1675 +	 * from the migration code to invalidate irq_fired_count.
1676 +	 */
1677 +	return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
1678 +}
1679 +
1680 +static inline unsigned int get_and_clear_irq_fired_for_cpu(int cpu)
1681 +{
1682 +	return atomic_xchg(&per_cpu(irq_fired_count, cpu), 0);
1683 +}
1684 +
1685 +static inline void cpu_clear_irq_fired(void)
1686 +{
1687 +	atomic_set(&__raw_get_cpu_var(cpu_irq_fired_count), 0);
1688 +}
1689 +
1690 +static inline unsigned int cpu_get_and_clear_irq_fired(void)
1691 +{
1692 +	return atomic_xchg(&__raw_get_cpu_var(cpu_irq_fired_count), 0);
1693 +}
1694 +
1695 +static inline void save_irq_flags(struct timestamp *ts, unsigned int irq_count)
1696 +{
1697 +	/* Store how many interrupts occurred. */
1698 +	ts->irq_count = irq_count;
1699 +	/* Extra flag because ts->irq_count overflows quickly. */
1700 +	ts->irq_flag  = irq_count > 0;
1701 +}
1702 +
1703 +#define NO_IRQ_COUNT 0
1704 +#define LOCAL_IRQ_COUNT 1
1705 +#define REMOTE_IRQ_COUNT 2
1706 +
1707 +#define DO_NOT_RECORD_TIMESTAMP 0
1708 +#define RECORD_LOCAL_TIMESTAMP 1
1709 +#define RECORD_OFFSET_TIMESTAMP 2
1710 +
1711 +static inline void __write_record(
1712 +	uint8_t event,
1713 +	uint8_t type,
1714 +	uint16_t pid_fragment,
1715 +	unsigned int irq_count,
1716 +	int record_irq,
1717 +	int hide_irq,
1718 +	uint64_t timestamp,
1719 +	int record_timestamp,
1720 +
1721 +	int only_single_writer,
1722 +	int is_cpu_timestamp,
1723 +	int local_cpu,
1724 +	uint8_t other_cpu)
1725 +{
1726 +	unsigned long flags;
1727 +	unsigned int seq_no;
1728 +	struct timestamp *ts;
1729 +	int cpu;
1730 +	struct ft_buffer* buf;
1731 +
1732 +	/* Avoid preemptions while recording the timestamp. This reduces the
1733 +	 * number of "out of order" timestamps in the stream and makes
1734 +	 * post-processing easier. */
1735 +
1736 +	local_irq_save(flags);
1737 +
1738 +	if (local_cpu)
1739 +		cpu = smp_processor_id();
1740 +	else
1741 +		cpu = other_cpu;
1742 +
1743 +	/* resolved during function inlining */
1744 +	if (is_cpu_timestamp) {
1745 +		seq_no = __get_cpu_var(cpu_ts_seq_no)++;
1746 +		buf = cpu_trace_ts_buf(cpu);
1747 +	} else {
1748 +		seq_no = fetch_and_inc((int *) &per_cpu(msg_ts_seq_no, cpu));
1749 +		buf = msg_trace_ts_buf(cpu);
1750 +	}
1751 +
1752 +	/* If buf is non-NULL here, then the buffer cannot be deallocated until
1753 +	 * we turn interrupts on again. This is because free_timestamp_buffer()
1754 +	 * indirectly causes TLB invalidations due to modifications of the
1755 +	 * kernel address space, namely via vfree() in free_ft_buffer(), which
1756 +	 * cannot be processed until we turn on interrupts again.
1757 +	 */
1758 +
1759 +	if (buf &&
1760 +	    (only_single_writer /* resolved during function inlining */
1761 +	     ? ft_buffer_start_single_write(buf, (void**)  &ts)
1762 +	     : ft_buffer_start_write(buf, (void**) &ts))) {
1763 +		ts->event     = event;
1764 +		ts->seq_no    = seq_no;
1765 +
1766 +		ts->task_type = type;
1767 +		ts->pid	      = pid_fragment;
1768 +
1769 +		ts->cpu       = cpu;
1770 +
1771 +		if (record_irq) {
1772 +			if (local_cpu)
1773 +				irq_count = cpu_get_and_clear_irq_fired();
1774 +			else
1775 +				irq_count = get_and_clear_irq_fired_for_cpu(cpu);
1776 +		}
1777 +
1778 +		save_irq_flags(ts, irq_count - hide_irq);
1779 +
1780 +		if (record_timestamp)
1781 +			timestamp = ft_timestamp();
1782 +		if (record_timestamp == RECORD_OFFSET_TIMESTAMP)
1783 +			timestamp += cycle_offset[smp_processor_id()][cpu];
1784 +
1785 +		ts->timestamp = timestamp;
1786 +		ft_buffer_finish_write(buf, ts);
1787 +	}
1788 +
1789 +	local_irq_restore(flags);
1790 +}
1791 +
1792 +
1793 +static inline void write_cpu_timestamp(
1794 +	uint8_t event,
1795 +	uint8_t type,
1796 +	uint16_t pid_fragment,
1797 +	unsigned int irq_count,
1798 +	int record_irq,
1799 +	int hide_irq,
1800 +	uint64_t timestamp,
1801 +	int record_timestamp)
1802 +{
1803 +	__write_record(event, type,
1804 +		       pid_fragment,
1805 +		       irq_count, record_irq, hide_irq,
1806 +		       timestamp, record_timestamp,
1807 +		       1 /* only_single_writer */,
1808 +		       1 /* is_cpu_timestamp */,
1809 +		       1 /* local_cpu */,
1810 +		       0xff /* other_cpu */);
1811 +}
1812 +
1813 +static inline void save_msg_timestamp(
1814 +	uint8_t event,
1815 +	int hide_irq)
1816 +{
1817 +	struct task_struct *t  = current;
1818 +	__write_record(event, is_realtime(t) ? TSK_RT : TSK_BE,
1819 +		       t->pid,
1820 +		       0, LOCAL_IRQ_COUNT, hide_irq,
1821 +		       0, RECORD_LOCAL_TIMESTAMP,
1822 +		       0 /* only_single_writer */,
1823 +		       0 /* is_cpu_timestamp */,
1824 +		       1 /* local_cpu */,
1825 +		       0xff /* other_cpu */);
1826 +}
1827 +
1828 +static inline void save_remote_msg_timestamp(
1829 +	uint8_t event,
1830 +	uint8_t remote_cpu)
1831 +{
1832 +	struct task_struct *t  = current;
1833 +	__write_record(event, is_realtime(t) ? TSK_RT : TSK_BE,
1834 +		       t->pid,
1835 +		       0, REMOTE_IRQ_COUNT, 0,
1836 +		       0, RECORD_OFFSET_TIMESTAMP,
1837 +		       0 /* only_single_writer */,
1838 +		       0 /* is_cpu_timestamp */,
1839 +		       0 /* local_cpu */,
1840 +		       remote_cpu);
1841 +}
1842 +
1843 +feather_callback void save_cpu_timestamp_def(unsigned long event,
1844 +					     unsigned long type)
1845 +{
1846 +	write_cpu_timestamp(event, type,
1847 +			    current->pid,
1848 +			    0, LOCAL_IRQ_COUNT, 0,
1849 +			    0, RECORD_LOCAL_TIMESTAMP);
1850 +}
1851 +
1852 +feather_callback void save_cpu_timestamp_task(unsigned long event,
1853 +					      unsigned long t_ptr)
1854 +{
1855 +	struct task_struct *t = (struct task_struct *) t_ptr;
1856 +	int rt = is_realtime(t);
1857 +
1858 +	write_cpu_timestamp(event, rt ? TSK_RT : TSK_BE,
1859 +			    t->pid,
1860 +			    0, LOCAL_IRQ_COUNT, 0,
1861 +			    0, RECORD_LOCAL_TIMESTAMP);
1862 +}
1863 +
1864 +feather_callback void save_cpu_task_latency(unsigned long event,
1865 +					    unsigned long when_ptr)
1866 +{
1867 +	lt_t now = litmus_clock();
1868 +	lt_t *when = (lt_t*) when_ptr;
1869 +
1870 +	write_cpu_timestamp(event, TSK_RT,
1871 +			    0,
1872 +			    0, LOCAL_IRQ_COUNT, 0,
1873 +			    now - *when, DO_NOT_RECORD_TIMESTAMP);
1874 +}
1875 +
1876 +/* fake timestamp to user-reported time */
1877 +feather_callback void save_cpu_timestamp_time(unsigned long event,
1878 +			 unsigned long ptr)
1879 +{
1880 +	uint64_t* time = (uint64_t*) ptr;
1881 +
1882 +	write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
1883 +			    current->pid,
1884 +			    0, LOCAL_IRQ_COUNT, 0,
1885 +			    *time, DO_NOT_RECORD_TIMESTAMP);
1886 +}
1887 +
1888 +/* Record user-reported IRQ count */
1889 +feather_callback void save_cpu_timestamp_irq(unsigned long event,
1890 +			unsigned long irq_counter_ptr)
1891 +{
1892 +	uint64_t* irqs = (uint64_t*) irq_counter_ptr;
1893 +
1894 +	write_cpu_timestamp(event, is_realtime(current) ? TSK_RT : TSK_BE,
1895 +			    current->pid,
1896 +			    *irqs, NO_IRQ_COUNT, 0,
1897 +			    0, RECORD_LOCAL_TIMESTAMP);
1898 +}
1899 +
1900 +
1901 +feather_callback void msg_sent(unsigned long event, unsigned long to)
1902 +{
1903 +	save_remote_msg_timestamp(event, to);
1904 +}
1905 +
1906 +/* Suppresses one IRQ from the irq count. Used by TS_SEND_RESCHED_END, which is
1907 + * called from within an interrupt that is expected. */
1908 +feather_callback void msg_received(unsigned long event)
1909 +{
1910 +	save_msg_timestamp(event, 1);
1911 +}
1912 +
1913 +static void __add_timestamp_user(struct timestamp *pre_recorded)
1914 +{
1915 +	unsigned long flags;
1916 +	unsigned int seq_no;
1917 +	struct timestamp *ts;
1918 +	struct ft_buffer* buf;
1919 +	int cpu;
1920 +
1921 +	local_irq_save(flags);
1922 +
1923 +	cpu = smp_processor_id();
1924 +	buf = cpu_trace_ts_buf(cpu);
1925 +
1926 +	seq_no = __get_cpu_var(cpu_ts_seq_no)++;
1927 +	if (buf && ft_buffer_start_single_write(buf, (void**)  &ts)) {
1928 +		*ts = *pre_recorded;
1929 +		ts->seq_no = seq_no;
1930 +		ts->cpu	   = raw_smp_processor_id();
1931 +	        save_irq_flags(ts, get_and_clear_irq_fired());
1932 +		ft_buffer_finish_write(buf, ts);
1933 +	}
1934 +
1935 +	local_irq_restore(flags);
1936 +}
1937 +
1938 +/******************************************************************************/
1939 +/*                        DEVICE FILE DRIVER                                  */
1940 +/******************************************************************************/
1941 +
1942 +struct calibrate_info {
1943 +	atomic_t ready;
1944 +
1945 +	uint64_t cycle_count;
1946 +};
1947 +
1948 +static void calibrate_helper(void *_info)
1949 +{
1950 +	struct calibrate_info *info = _info;
1951 +	/* check in with master */
1952 +	atomic_inc(&info->ready);
1953 +
1954 +	/* wait for master to signal start */
1955 +	while (atomic_read(&info->ready))
1956 +		cpu_relax();
1957 +
1958 +	/* report time stamp */
1959 +	info->cycle_count = ft_timestamp();
1960 +
1961 +	/* tell master that we are done */
1962 +	atomic_inc(&info->ready);
1963 +}
1964 +
1965 +
1966 +static int64_t calibrate_cpu(int cpu)
1967 +{
1968 +	uint64_t cycles;
1969 +	struct calibrate_info info;
1970 +	unsigned long flags;
1971 +	int64_t  delta;
1972 +
1973 +	atomic_set(&info.ready, 0);
1974 +	info.cycle_count = 0;
1975 +	smp_wmb();
1976 +
1977 +	smp_call_function_single(cpu, calibrate_helper, &info, 0);
1978 +
1979 +	/* wait for helper to become active */
1980 +	while (!atomic_read(&info.ready))
1981 +		cpu_relax();
1982 +
1983 +	/* avoid interrupt interference */
1984 +	local_irq_save(flags);
1985 +
1986 +	/* take measurement */
1987 +	atomic_set(&info.ready, 0);
1988 +	smp_wmb();
1989 +	cycles = ft_timestamp();
1990 +
1991 +	/* wait for helper reading */
1992 +	while (!atomic_read(&info.ready))
1993 +		cpu_relax();
1994 +
1995 +	/* positive offset: the other guy is ahead of us */
1996 +	delta  = (int64_t) info.cycle_count;
1997 +	delta -= (int64_t) cycles;
1998 +
1999 +	local_irq_restore(flags);
2000 +
2001 +	return delta;
2002 +}
2003 +
2004 +#define NUM_SAMPLES 10
2005 +
2006 +static long calibrate_tsc_offsets(struct ftdev* ftdev, unsigned int idx,
2007 +				  unsigned long uarg)
2008 +{
2009 +	int cpu, self, i;
2010 +	int64_t delta, sample;
2011 +
2012 +	preempt_disable();
2013 +	self = smp_processor_id();
2014 +
2015 +	if (uarg)
2016 +		printk(KERN_INFO "Feather-Trace: determining TSC offsets for P%d\n", self);
2017 +
2018 +	for_each_online_cpu(cpu)
2019 +		if (cpu != self) {
2020 +			delta = calibrate_cpu(cpu);
2021 +			for (i = 1; i < NUM_SAMPLES; i++) {
2022 +			        sample = calibrate_cpu(cpu);
2023 +				delta = sample < delta ? sample : delta;
2024 +			}
2025 +
2026 +			cycle_offset[self][cpu] = delta;
2027 +
2028 +			if (uarg)
2029 +				printk(KERN_INFO "Feather-Trace: TSC offset for P%d->P%d is %lld cycles.\n",
2030 +				       self, cpu, cycle_offset[self][cpu]);
2031 +		}
2032 +
2033 +	preempt_enable();
2034 +	return 0;
2035 +}
2036 +
2037 +#define NO_TIMESTAMPS (2 << CONFIG_SCHED_OVERHEAD_TRACE_SHIFT)
2038 +
2039 +static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
2040 +{
2041 +	unsigned int count = NO_TIMESTAMPS;
2042 +
2043 +	/* An overhead-tracing timestamp should be exactly 16 bytes long. */
2044 +	BUILD_BUG_ON(sizeof(struct timestamp) != 16);
2045 +
2046 +	while (count && !ftdev->minor[idx].buf) {
2047 +		printk("time stamp buffer: trying to allocate %u time stamps for minor=%u.\n", count, idx);
2048 +		ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
2049 +		count /= 2;
2050 +	}
2051 +	return ftdev->minor[idx].buf ? 0 : -ENOMEM;
2052 +}
2053 +
2054 +static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
2055 +{
2056 +	ftdev->minor[idx].buf = NULL;
2057 +	/* Make sure all cores have actually seen buf == NULL before
2058 +	 * yanking out the mappings from underneath them. */
2059 +	smp_wmb();
2060 +	free_ft_buffer(ftdev->minor[idx].buf);
2061 +}
2062 +
2063 +static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
2064 +					 const char __user *from)
2065 +{
2066 +	ssize_t consumed = 0;
2067 +	struct timestamp ts;
2068 +
2069 +	/* don't give us partial timestamps */
2070 +	if (len % sizeof(ts))
2071 +		return -EINVAL;
2072 +
2073 +	while (len >= sizeof(ts)) {
2074 +		if (copy_from_user(&ts, from, sizeof(ts))) {
2075 +			consumed = -EFAULT;
2076 +			goto out;
2077 +		}
2078 +		len  -= sizeof(ts);
2079 +		from += sizeof(ts);
2080 +		consumed += sizeof(ts);
2081 +
2082 +		/* Note: this always adds to the buffer of the CPU-local
2083 +		 * device, not necessarily to the device that the system call
2084 +		 * was invoked on. This is admittedly a bit ugly, but requiring
2085 +		 * tasks to only write to the appropriate device would make
2086 +		 * tracing from userspace under global and clustered scheduling
2087 +		 * exceedingly difficult. Writing to remote buffers would
2088 +		 * require to not use ft_buffer_start_single_write(), which we
2089 +		 * want to do to reduce the number of atomic ops in the common
2090 +		 * case (which is the recording of CPU-local scheduling
2091 +		 * overheads).
2092 +		 */
2093 +		__add_timestamp_user(&ts);
2094 +	}
2095 +
2096 +out:
2097 +	return consumed;
2098 +}
2099 +
2100 +static int __init init_cpu_ft_overhead_trace(void)
2101 +{
2102 +	int err, cpu;
2103 +
2104 +	printk("Initializing Feather-Trace per-cpu overhead tracing device.\n");
2105 +	err = ftdev_init(&cpu_overhead_dev, THIS_MODULE,
2106 +			 num_online_cpus(), "ft_cpu_trace");
2107 +	if (err)
2108 +		goto err_out;
2109 +
2110 +	cpu_overhead_dev.alloc = alloc_timestamp_buffer;
2111 +	cpu_overhead_dev.free  = free_timestamp_buffer;
2112 +	cpu_overhead_dev.write = write_timestamp_from_user;
2113 +
2114 +	err = register_ftdev(&cpu_overhead_dev);
2115 +	if (err)
2116 +		goto err_dealloc;
2117 +
2118 +	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
2119 +		per_cpu(cpu_ts_seq_no, cpu) = 0;
2120 +	}
2121 +
2122 +	return 0;
2123 +
2124 +err_dealloc:
2125 +	ftdev_exit(&cpu_overhead_dev);
2126 +err_out:
2127 +	printk(KERN_WARNING "Could not register per-cpu ft_trace device.\n");
2128 +	return err;
2129 +}
2130 +
2131 +static int __init init_msg_ft_overhead_trace(void)
2132 +{
2133 +	int err, cpu;
2134 +
2135 +	printk("Initializing Feather-Trace per-cpu message overhead tracing device.\n");
2136 +	err = ftdev_init(&msg_overhead_dev, THIS_MODULE,
2137 +			 num_online_cpus(), "ft_msg_trace");
2138 +	if (err)
2139 +		goto err_out;
2140 +
2141 +	msg_overhead_dev.alloc = alloc_timestamp_buffer;
2142 +	msg_overhead_dev.free  = free_timestamp_buffer;
2143 +	msg_overhead_dev.calibrate = calibrate_tsc_offsets;
2144 +
2145 +	err = register_ftdev(&msg_overhead_dev);
2146 +	if (err)
2147 +		goto err_dealloc;
2148 +
2149 +	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
2150 +		per_cpu(msg_ts_seq_no, cpu) = 0;
2151 +	}
2152 +
2153 +	return 0;
2154 +
2155 +err_dealloc:
2156 +	ftdev_exit(&msg_overhead_dev);
2157 +err_out:
2158 +	printk(KERN_WARNING "Could not register message ft_trace device.\n");
2159 +	return err;
2160 +}
2161 +
2162 +
2163 +static int __init init_ft_overhead_trace(void)
2164 +{
2165 +	int err, i, j;
2166 +
2167 +	for (i = 0; i < NR_CPUS; i++)
2168 +		for (j = 0; j < NR_CPUS; j++)
2169 +			cycle_offset[i][j] = 0;
2170 +
2171 +	err = init_cpu_ft_overhead_trace();
2172 +	if (err)
2173 +		return err;
2174 +
2175 +	err = init_msg_ft_overhead_trace();
2176 +	if (err)
2177 +		ftdev_exit(&cpu_overhead_dev);
2178 +		return err;
2179 +
2180 +	return 0;
2181 +}
2182 +
2183 +static void __exit exit_ft_overhead_trace(void)
2184 +{
2185 +	ftdev_exit(&cpu_overhead_dev);
2186 +	ftdev_exit(&msg_overhead_dev);
2187 +}
2188 +
2189 +module_init(init_ft_overhead_trace);
2190 +module_exit(exit_ft_overhead_trace);
2191 -- 
2192 1.8.1.2
2193 
2194 
2195 From 530f3c252277104613501e10cbfa63c09e4ca9c0 Mon Sep 17 00:00:00 2001
2196 From: Bjoern Brandenburg <bbb@mpi-sws.org>
2197 Date: Sun, 23 Jun 2013 14:51:57 +0200
2198 Subject: [PATCH 006/119] Integrate ft_irq_fired() with Linux
2199 
2200 This patch hooks up Feather-Trace's ft_irq_fired() handler with
2201 Linux's interrupt handling infrastructure.
2202 ---
2203  include/linux/hardirq.h    |  4 ++++
2204  include/litmus/trace_irq.h | 14 ++++++++++++++
2205  kernel/sched/core.c        |  5 +++++
2206  kernel/softirq.c           |  3 +++
2207  4 files changed, 26 insertions(+)
2208  create mode 100644 include/litmus/trace_irq.h
2209 
2210 diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
2211 index c1d6555..7ad5fd8 100644
2212 --- a/include/linux/hardirq.h
2213 +++ b/include/linux/hardirq.h
2214 @@ -7,6 +7,8 @@
2215  #include <linux/vtime.h>
2216  #include <asm/hardirq.h>
2217  
2218 +#include <litmus/trace_irq.h>
2219 +
2220  /*
2221   * We put the hardirq and softirq counter into the preemption
2222   * counter. The bitmask has the following meaning:
2223 @@ -154,6 +156,7 @@ extern void rcu_nmi_exit(void);
2224  		account_irq_enter_time(current);	\
2225  		add_preempt_count(HARDIRQ_OFFSET);	\
2226  		trace_hardirq_enter();			\
2227 +		ft_irq_fired();				\
2228  	} while (0)
2229  
2230  /*
2231 @@ -184,6 +187,7 @@ extern void irq_exit(void);
2232  		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
2233  		rcu_nmi_enter();				\
2234  		trace_hardirq_enter();				\
2235 +		ft_irq_fired();					\
2236  	} while (0)
2237  
2238  #define nmi_exit()						\
2239 diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h
2240 new file mode 100644
2241 index 0000000..0d0c042
2242 --- /dev/null
2243 +++ b/include/litmus/trace_irq.h
2244 @@ -0,0 +1,14 @@
2245 +#ifndef _LITMUS_TRACE_IRQ_H_
2246 +#define	_LITMUS_TRACE_IRQ_H_
2247 +
2248 +#ifdef CONFIG_SCHED_OVERHEAD_TRACE
2249 +
2250 +void ft_irq_fired(void);
2251 +
2252 +#else
2253 +
2254 +#define ft_irq_fired() /* nothing to do */
2255 +
2256 +#endif
2257 +
2258 +#endif
2259 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
2260 index b4308d7..7eefaab 100644
2261 --- a/kernel/sched/core.c
2262 +++ b/kernel/sched/core.c
2263 @@ -1408,7 +1408,12 @@ void scheduler_ipi(void)
2264  	if (llist_empty(&this_rq()->wake_list)
2265  			&& !tick_nohz_full_cpu(smp_processor_id())
2266  			&& !got_nohz_idle_kick())
2267 +	{
2268 +		/* If we don't call irq_enter(), we need to triggger the IRQ
2269 +		 * tracing manually. */
2270 +		ft_irq_fired();
2271  		return;
2272 +	}
2273  
2274  	/*
2275  	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
2276 diff --git a/kernel/softirq.c b/kernel/softirq.c
2277 index 787b3a0..fe8890b 100644
2278 --- a/kernel/softirq.c
2279 +++ b/kernel/softirq.c
2280 @@ -226,6 +226,9 @@ asmlinkage void __do_softirq(void)
2281  	 */
2282  	current->flags &= ~PF_MEMALLOC;
2283  
2284 +	/* Mark Feather-Trace samples as "disturbed". */
2285 +	ft_irq_fired();
2286 +
2287  	pending = local_softirq_pending();
2288  	account_irq_enter_time(current);
2289  
2290 -- 
2291 1.8.1.2
2292 
2293 
2294 From 93919aeeb7b30971603a3d67b3d12091a45e9fbf Mon Sep 17 00:00:00 2001
2295 From: Bjoern Brandenburg <bbb@mpi-sws.org>
2296 Date: Wed, 26 Jun 2013 18:45:30 +0200
2297 Subject: [PATCH 007/119] Add SCHED, SCHED2, TICK, and CXS overhead tracepoints
2298 
2299 This patch integrates the overhead tracepoints into the Linux
2300 scheduler that are compatible with plain vanilla Linux (i.e., not
2301 specific to LITMUS^RT plugins).  This can be used to measure the
2302 overheads of an otherwise unmodified kernel.
2303 ---
2304  kernel/sched/core.c | 20 +++++++++++++++++++-
2305  1 file changed, 19 insertions(+), 1 deletion(-)
2306 
2307 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
2308 index 7eefaab..3a471d6 100644
2309 --- a/kernel/sched/core.c
2310 +++ b/kernel/sched/core.c
2311 @@ -86,6 +86,8 @@
2312  #include "../workqueue_internal.h"
2313  #include "../smpboot.h"
2314  
2315 +#include <litmus/trace.h>
2316 +
2317  #define CREATE_TRACE_POINTS
2318  #include <trace/events/sched.h>
2319  
2320 @@ -2748,6 +2750,8 @@ void scheduler_tick(void)
2321  
2322  	sched_clock_tick();
2323  
2324 +	TS_TICK_START(current);
2325 +
2326  	raw_spin_lock(&rq->lock);
2327  	update_rq_clock(rq);
2328  	update_cpu_load_active(rq);
2329 @@ -2761,6 +2765,8 @@ void scheduler_tick(void)
2330  	trigger_load_balance(rq, cpu);
2331  #endif
2332  	rq_last_tick_reset(rq);
2333 +
2334 +	TS_TICK_END(current);
2335  }
2336  
2337  #ifdef CONFIG_NO_HZ_FULL
2338 @@ -2972,6 +2978,8 @@ need_resched:
2339  	rcu_note_context_switch(cpu);
2340  	prev = rq->curr;
2341  
2342 +	TS_SCHED_START;
2343 +
2344  	schedule_debug(prev);
2345  
2346  	if (sched_feat(HRTICK))
2347 @@ -3024,7 +3032,10 @@ need_resched:
2348  		rq->curr = next;
2349  		++*switch_count;
2350  
2351 +		TS_SCHED_END(next);
2352 +		TS_CXS_START(next);
2353  		context_switch(rq, prev, next); /* unlocks the rq */
2354 +		TS_CXS_END(current);
2355  		/*
2356  		 * The context switch have flipped the stack from under us
2357  		 * and restored the local variables which were saved when
2358 @@ -3033,12 +3044,19 @@ need_resched:
2359  		 */
2360  		cpu = smp_processor_id();
2361  		rq = cpu_rq(cpu);
2362 -	} else
2363 +	} else {
2364 +		TS_SCHED_END(prev);
2365  		raw_spin_unlock_irq(&rq->lock);
2366 +	}
2367 +
2368 +	TS_SCHED2_START(prev);
2369  
2370  	post_schedule(rq);
2371  
2372  	sched_preempt_enable_no_resched();
2373 +
2374 +	TS_SCHED2_END(prev);
2375 +
2376  	if (need_resched())
2377  		goto need_resched;
2378  }
2379 -- 
2380 1.8.1.2
2381 
2382 
2383 From 5ce8d2c4a1fa8e5a4bca25891f2e892ef55fc89f Mon Sep 17 00:00:00 2001
2384 From: Felipe Cerqueira <felipec@mpi-sws.org>
2385 Date: Mon, 11 Feb 2013 18:10:50 +0100
2386 Subject: [PATCH 008/119] Export x86 cache topology
2387 
2388 This patch adds get_shared_cpu_map(), which allows the caller to infer
2389 which CPUs share a cache at a given level.
2390 ---
2391  arch/x86/include/asm/processor.h      |  4 ++++
2392  arch/x86/kernel/cpu/intel_cacheinfo.c | 17 +++++++++++++++++
2393  2 files changed, 21 insertions(+)
2394 
2395 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
2396 index 22224b3..254dd2b 100644
2397 --- a/arch/x86/include/asm/processor.h
2398 +++ b/arch/x86/include/asm/processor.h
2399 @@ -173,6 +173,10 @@ void print_cpu_msr(struct cpuinfo_x86 *);
2400  extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
2401  extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
2402  extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
2403 +#ifdef CONFIG_SYSFS
2404 +extern int get_shared_cpu_map(cpumask_var_t mask,
2405 +			       unsigned int cpu, int index);
2406 +#endif
2407  
2408  extern void detect_extended_topology(struct cpuinfo_x86 *c);
2409  extern void detect_ht(struct cpuinfo_x86 *c);
2410 diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
2411 index 7c6f7d5..033939b 100644
2412 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c
2413 +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
2414 @@ -744,6 +744,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
2415  static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
2416  #define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
2417  
2418 +/* returns CPUs that share the index cache with cpu */
2419 +int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
2420 +{
2421 +	int ret = 0;
2422 +	struct _cpuid4_info *this_leaf;
2423 +
2424 +	if (index >= num_cache_leaves) {
2425 +		index = num_cache_leaves - 1;
2426 +		ret = index;
2427 +	}
2428 +
2429 +	this_leaf = CPUID4_INFO_IDX(cpu,index);
2430 +	cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map));
2431 +
2432 +	return ret;
2433 +}
2434 +
2435  #ifdef CONFIG_SMP
2436  
2437  static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
2438 -- 
2439 1.8.1.2
2440 
2441 
2442 From 74a89132e046e7a35f16f6eab9c6884679d48f27 Mon Sep 17 00:00:00 2001
2443 From: Bjoern Brandenburg <bbb@mpi-sws.org>
2444 Date: Sun, 23 Jun 2013 15:26:01 +0200
2445 Subject: [PATCH 009/119] Add object list to inodes
2446 
2447 This patch adds a list of arbitrary objects to inodes.
2448 
2449 This is used by Linux's locking API to attach lock objects to inodes
2450 (which represent namespaces in Linux's locking API).
2451 ---
2452  fs/inode.c         | 2 ++
2453  include/linux/fs.h | 5 +++++
2454  2 files changed, 7 insertions(+)
2455 
2456 diff --git a/fs/inode.c b/fs/inode.c
2457 index 00d5fc3..a80e326 100644
2458 --- a/fs/inode.c
2459 +++ b/fs/inode.c
2460 @@ -370,6 +370,8 @@ void inode_init_once(struct inode *inode)
2461  #ifdef CONFIG_FSNOTIFY
2462  	INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
2463  #endif
2464 +	INIT_LIST_HEAD(&inode->i_obj_list);
2465 +	mutex_init(&inode->i_obj_mutex);
2466  }
2467  EXPORT_SYMBOL(inode_init_once);
2468  
2469 diff --git a/include/linux/fs.h b/include/linux/fs.h
2470 index 65c2be2..8a0ac17 100644
2471 --- a/include/linux/fs.h
2472 +++ b/include/linux/fs.h
2473 @@ -508,6 +508,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
2474  
2475  struct posix_acl;
2476  #define ACL_NOT_CACHED ((void *)(-1))
2477 +struct inode_obj_id_table;
2478  
2479  #define IOP_FASTPERM	0x0001
2480  #define IOP_LOOKUP	0x0002
2481 @@ -606,6 +607,10 @@ struct inode {
2482  #ifdef CONFIG_IMA
2483  	atomic_t		i_readcount; /* struct files open RO */
2484  #endif
2485 +
2486 +	struct list_head	i_obj_list;
2487 +	struct mutex		i_obj_mutex;
2488 +
2489  	void			*i_private; /* fs or device private pointer */
2490  };
2491  
2492 -- 
2493 1.8.1.2
2494 
2495 
2496 From 7fb0ac2758b6e277de7bd753fdbe8596048d156c Mon Sep 17 00:00:00 2001
2497 From: Bjoern Brandenburg <bbb@mpi-sws.org>
2498 Date: Sun, 23 Jun 2013 16:29:07 +0200
2499 Subject: [PATCH 010/119] Add TRACE() debug tracing support
2500 
2501 This patch adds the infrastructure for the TRACE() debug macro.
2502 ---
2503  include/litmus/debug_trace.h |  40 +++++++
2504  kernel/printk.c              |  13 ++-
2505  litmus/Kconfig               |  53 +++++++++
2506  litmus/Makefile              |   1 +
2507  litmus/sched_trace.c         | 251 +++++++++++++++++++++++++++++++++++++++++++
2508  5 files changed, 357 insertions(+), 1 deletion(-)
2509  create mode 100644 include/litmus/debug_trace.h
2510  create mode 100644 litmus/sched_trace.c
2511 
2512 diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
2513 new file mode 100644
2514 index 0000000..1266ac6
2515 --- /dev/null
2516 +++ b/include/litmus/debug_trace.h
2517 @@ -0,0 +1,40 @@
2518 +#ifndef LITMUS_DEBUG_TRACE_H
2519 +#define LITMUS_DEBUG_TRACE_H
2520 +
2521 +#ifdef CONFIG_SCHED_DEBUG_TRACE
2522 +void sched_trace_log_message(const char* fmt, ...);
2523 +void dump_trace_buffer(int max);
2524 +#else
2525 +
2526 +#define sched_trace_log_message(fmt, ...)
2527 +
2528 +#endif
2529 +
2530 +extern atomic_t __log_seq_no;
2531 +
2532 +#ifdef CONFIG_SCHED_DEBUG_TRACE_CALLER
2533 +#define TRACE_PREFIX "%d P%d [%s@%s:%d]: "
2534 +#define TRACE_ARGS  atomic_add_return(1, &__log_seq_no),	\
2535 +		raw_smp_processor_id(),				\
2536 +		__FUNCTION__, __FILE__, __LINE__
2537 +#else
2538 +#define TRACE_PREFIX "%d P%d: "
2539 +#define TRACE_ARGS  atomic_add_return(1, &__log_seq_no), \
2540 +		raw_smp_processor_id()
2541 +#endif
2542 +
2543 +#define TRACE(fmt, args...)						\
2544 +	sched_trace_log_message(TRACE_PREFIX fmt,			\
2545 +				TRACE_ARGS,  ## args)
2546 +
2547 +#define TRACE_TASK(t, fmt, args...)			\
2548 +	TRACE("(%s/%d:%d) " fmt,			 \
2549 +	      t ? (t)->comm : "null",			 \
2550 +	      t ? (t)->pid : 0,				 \
2551 +	      t ? (t)->rt_param.job_params.job_no : 0,	 \
2552 +	      ##args)
2553 +
2554 +#define TRACE_CUR(fmt, args...) \
2555 +	TRACE_TASK(current, fmt, ## args)
2556 +
2557 +#endif
2558 diff --git a/kernel/printk.c b/kernel/printk.c
2559 index d37d45c..5616e59 100644
2560 --- a/kernel/printk.c
2561 +++ b/kernel/printk.c
2562 @@ -66,6 +66,13 @@ int console_printk[4] = {
2563  };
2564  
2565  /*
2566 + * divert printk() messages when there is a LITMUS^RT debug listener
2567 + */
2568 +#include <litmus/debug_trace.h>
2569 +int trace_override = 0;
2570 +int trace_recurse  = 0;
2571 +
2572 +/*
2573   * Low level drivers may need that to know if they can schedule in
2574   * their unblank() callback or not. So let's export it.
2575   */
2576 @@ -1552,6 +1559,10 @@ asmlinkage int vprintk_emit(int facility, int level,
2577  	 */
2578  	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
2579  
2580 +	/* if LITMUS^RT tracer is active divert printk() msgs */
2581 +	if (trace_override && !trace_recurse)
2582 +		TRACE("%s", text);
2583 +
2584  	/* mark and strip a trailing newline */
2585  	if (text_len && text[text_len-1] == '\n') {
2586  		text_len--;
2587 @@ -2478,7 +2489,7 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
2588  void wake_up_klogd(void)
2589  {
2590  	preempt_disable();
2591 -	if (waitqueue_active(&log_wait)) {
2592 +	if (!trace_override && waitqueue_active(&log_wait)) {
2593  		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
2594  		irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
2595  	}
2596 diff --git a/litmus/Kconfig b/litmus/Kconfig
2597 index 0c7e06b..e4624ee 100644
2598 --- a/litmus/Kconfig
2599 +++ b/litmus/Kconfig
2600 @@ -49,6 +49,59 @@ config SCHED_OVERHEAD_TRACE_SHIFT
2601  		   24 => 512 MB
2602  		   26 =>  2G MB
2603  
2604 +config SCHED_DEBUG_TRACE
2605 +	bool "TRACE() debugging"
2606 +	default n
2607 +	help
2608 +	  Include support for sched_trace_log_messageg(), which is used to
2609 +	  implement TRACE(). If disabled, no TRACE() messages will be included
2610 +	  in the kernel, and no overheads due to debugging statements will be
2611 +	  incurred by the scheduler. Disable if the overhead is not acceptable
2612 +	  (e.g. benchmarking).
2613 +
2614 +	  Say Yes for debugging.
2615 +	  Say No for overhead tracing.
2616 +
2617 +config SCHED_DEBUG_TRACE_SHIFT
2618 +       int "Buffer size for TRACE() buffer"
2619 +       depends on SCHED_DEBUG_TRACE
2620 +       range 14 22
2621 +       default 18
2622 +       help
2623 +
2624 +	Select the amount of memory needed per for the TRACE() buffer, as a
2625 +	power of two. The TRACE() buffer is global and statically allocated. If
2626 +	the buffer is too small, there will be holes in the TRACE() log if the
2627 +	buffer-flushing task is starved.
2628 +
2629 +	The default should be sufficient for most systems. Increase the buffer
2630 +	size if the log contains holes. Reduce the buffer size when running on
2631 +	a memory-constrained system.
2632 +
2633 +	Examples: 14 =>  16KB
2634 +		  18 => 256KB
2635 +		  20 =>   1MB
2636 +
2637 +        This buffer is exported to usespace using a misc device as
2638 +        'litmus/log'. On a system with default udev rules, a corresponding
2639 +        character device node should be created at /dev/litmus/log. The buffer
2640 +        can be flushed using cat, e.g., 'cat /dev/litmus/log > my_log_file.txt'.
2641 +
2642 +config SCHED_DEBUG_TRACE_CALLER
2643 +       bool "Include [function@file:line] tag in TRACE() log"
2644 +       depends on SCHED_DEBUG_TRACE
2645 +       default n
2646 +       help
2647 +         With this option enabled, TRACE() prepends
2648 +
2649 +	      "[<function name>@<filename>:<line number>]"
2650 +
2651 +	 to each message in the debug log. Enable this to aid in figuring out
2652 +         what was called in which order. The downside is that it adds a lot of
2653 +         clutter.
2654 +
2655 +	 If unsure, say No.
2656 +
2657  endmenu
2658  
2659  endmenu
2660 diff --git a/litmus/Makefile b/litmus/Makefile
2661 index 99f90c3..07f065f 100644
2662 --- a/litmus/Makefile
2663 +++ b/litmus/Makefile
2664 @@ -3,4 +3,5 @@
2665  #
2666  
2667  obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
2668 +obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
2669  obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
2670 diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
2671 new file mode 100644
2672 index 0000000..426a9dd
2673 --- /dev/null
2674 +++ b/litmus/sched_trace.c
2675 @@ -0,0 +1,251 @@
2676 +/*
2677 + * sched_trace.c -- record scheduling events to a byte stream.
2678 + */
2679 +#include <linux/spinlock.h>
2680 +#include <linux/mutex.h>
2681 +
2682 +#include <linux/fs.h>
2683 +#include <linux/slab.h>
2684 +#include <linux/miscdevice.h>
2685 +#include <asm/uaccess.h>
2686 +#include <linux/module.h>
2687 +#include <linux/sysrq.h>
2688 +#include <linux/sched.h>
2689 +#include <linux/kfifo.h>
2690 +
2691 +atomic_t __log_seq_no = ATOMIC_INIT(0);
2692 +
2693 +#define SCHED_TRACE_NAME "litmus/log"
2694 +
2695 +/* Compute size of TRACE() buffer */
2696 +#define LITMUS_TRACE_BUF_SIZE (1 << CONFIG_SCHED_DEBUG_TRACE_SHIFT)
2697 +
2698 +/* Max length of one read from the buffer */
2699 +#define MAX_READ_LEN (64 * 1024)
2700 +
2701 +/* Max length for one write --- by TRACE() --- to the buffer. This is used to
2702 + * allocate a per-cpu buffer for printf() formatting. */
2703 +#define MSG_SIZE 255
2704 +
2705 +
2706 +static DEFINE_MUTEX(reader_mutex);
2707 +static atomic_t reader_cnt = ATOMIC_INIT(0);
2708 +static DEFINE_KFIFO(debug_buffer, char, LITMUS_TRACE_BUF_SIZE);
2709 +
2710 +
2711 +static DEFINE_RAW_SPINLOCK(log_buffer_lock);
2712 +static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
2713 +
2714 +/*
2715 + * sched_trace_log_message - Write to the trace buffer (log_buffer)
2716 + *
2717 + * This is the only function accessing the log_buffer from inside the
2718 + * kernel for writing.
2719 + * Concurrent access to sched_trace_log_message must be serialized using
2720 + * log_buffer_lock
2721 + * The maximum length of a formatted message is 255
2722 + */
2723 +void sched_trace_log_message(const char* fmt, ...)
2724 +{
2725 +	unsigned long 	flags;
2726 +	va_list 	args;
2727 +	size_t		len;
2728 +	char*		buf;
2729 +
2730 +	if (!atomic_read(&reader_cnt))
2731 +		/* early exit if nobody is listening */
2732 +		return;
2733 +
2734 +	va_start(args, fmt);
2735 +	local_irq_save(flags);
2736 +
2737 +	/* format message */
2738 +	buf = __get_cpu_var(fmt_buffer);
2739 +	len = vscnprintf(buf, MSG_SIZE, fmt, args);
2740 +
2741 +	raw_spin_lock(&log_buffer_lock);
2742 +	/* Don't copy the trailing null byte, we don't want null bytes in a
2743 +	 * text file.
2744 +	 */
2745 +	kfifo_in(&debug_buffer, buf, len);
2746 +	raw_spin_unlock(&log_buffer_lock);
2747 +
2748 +	local_irq_restore(flags);
2749 +	va_end(args);
2750 +}
2751 +
2752 +
2753 +/*
2754 + * log_read - Read the trace buffer
2755 + *
2756 + * This function is called as a file operation from userspace.
2757 + * Readers can sleep. Access is serialized through reader_mutex
2758 + */
2759 +static ssize_t log_read(struct file *filp,
2760 +			char __user *to, size_t len,
2761 +			loff_t *f_pos)
2762 +{
2763 +	/* we ignore f_pos, this is strictly sequential */
2764 +
2765 +	ssize_t error = -EINVAL;
2766 +	char* mem;
2767 +
2768 +	if (mutex_lock_interruptible(&reader_mutex)) {
2769 +		error = -ERESTARTSYS;
2770 +		goto out;
2771 +	}
2772 +
2773 +	if (len > MAX_READ_LEN)
2774 +		len = MAX_READ_LEN;
2775 +
2776 +	mem = kmalloc(len, GFP_KERNEL);
2777 +	if (!mem) {
2778 +		error = -ENOMEM;
2779 +		goto out_unlock;
2780 +	}
2781 +
2782 +	error = kfifo_out(&debug_buffer, mem, len);
2783 +	while (!error) {
2784 +		set_current_state(TASK_INTERRUPTIBLE);
2785 +		schedule_timeout(110);
2786 +		if (signal_pending(current))
2787 +			error = -ERESTARTSYS;
2788 +		else
2789 +			error = kfifo_out(&debug_buffer, mem, len);
2790 +	}
2791 +
2792 +	if (error > 0 && copy_to_user(to, mem, error))
2793 +		error = -EFAULT;
2794 +
2795 +	kfree(mem);
2796 + out_unlock:
2797 +	mutex_unlock(&reader_mutex);
2798 + out:
2799 +	return error;
2800 +}
2801 +
2802 +/*
2803 + * Enable redirection of printk() messages to the trace buffer.
2804 + * Defined in kernel/printk.c
2805 + */
2806 +extern int trace_override;
2807 +extern int trace_recurse;
2808 +
2809 +/*
2810 + * log_open - open the global log message ring buffer.
2811 + */
2812 +static int log_open(struct inode *in, struct file *filp)
2813 +{
2814 +	int error = -EINVAL;
2815 +
2816 +	if (mutex_lock_interruptible(&reader_mutex)) {
2817 +		error = -ERESTARTSYS;
2818 +		goto out;
2819 +	}
2820 +
2821 +	atomic_inc(&reader_cnt);
2822 +	error = 0;
2823 +
2824 +	printk(KERN_DEBUG
2825 +	       "sched_trace kfifo with buffer starting at: 0x%p\n",
2826 +	       debug_buffer.buf);
2827 +
2828 +	/* override printk() */
2829 +	trace_override++;
2830 +
2831 +	mutex_unlock(&reader_mutex);
2832 + out:
2833 +	return error;
2834 +}
2835 +
2836 +static int log_release(struct inode *in, struct file *filp)
2837 +{
2838 +	int error = -EINVAL;
2839 +
2840 +	if (mutex_lock_interruptible(&reader_mutex)) {
2841 +		error = -ERESTARTSYS;
2842 +		goto out;
2843 +	}
2844 +
2845 +	atomic_dec(&reader_cnt);
2846 +
2847 +	/* release printk() overriding */
2848 +	trace_override--;
2849 +
2850 +	printk(KERN_DEBUG "sched_trace kfifo released\n");
2851 +
2852 +	mutex_unlock(&reader_mutex);
2853 + out:
2854 +	return error;
2855 +}
2856 +
2857 +/*
2858 + * log_fops  - The file operations for accessing the global LITMUS log message
2859 + *             buffer.
2860 + *
2861 + * Except for opening the device file it uses the same operations as trace_fops.
2862 + */
2863 +static struct file_operations log_fops = {
2864 +	.owner   = THIS_MODULE,
2865 +	.open    = log_open,
2866 +	.release = log_release,
2867 +	.read    = log_read,
2868 +};
2869 +
2870 +static struct miscdevice litmus_log_dev = {
2871 +	.name    = SCHED_TRACE_NAME,
2872 +	.minor   = MISC_DYNAMIC_MINOR,
2873 +	.fops    = &log_fops,
2874 +};
2875 +
2876 +#ifdef CONFIG_MAGIC_SYSRQ
2877 +void dump_trace_buffer(int max)
2878 +{
2879 +	char line[80];
2880 +	int len;
2881 +	int count = 0;
2882 +
2883 +	/* potential, but very unlikely, race... */
2884 +	trace_recurse = 1;
2885 +	while ((max == 0 || count++ < max) &&
2886 +	       (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) {
2887 +		line[len] = '\0';
2888 +		printk("%s", line);
2889 +	}
2890 +	trace_recurse = 0;
2891 +}
2892 +
2893 +static void sysrq_dump_trace_buffer(int key)
2894 +{
2895 +	dump_trace_buffer(100);
2896 +}
2897 +
2898 +static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
2899 +	.handler	= sysrq_dump_trace_buffer,
2900 +	.help_msg	= "dump-trace-buffer(Y)",
2901 +	.action_msg	= "writing content of TRACE() buffer",
2902 +};
2903 +#endif
2904 +
2905 +static int __init init_sched_trace(void)
2906 +{
2907 +	printk("Initializing TRACE() device\n");
2908 +
2909 +#ifdef CONFIG_MAGIC_SYSRQ
2910 +	/* offer some debugging help */
2911 +	if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op))
2912 +		printk("Registered dump-trace-buffer(Y) magic sysrq.\n");
2913 +	else
2914 +		printk("Could not register dump-trace-buffer(Y) magic sysrq.\n");
2915 +#endif
2916 +
2917 +	return misc_register(&litmus_log_dev);
2918 +}
2919 +
2920 +static void __exit exit_sched_trace(void)
2921 +{
2922 +	misc_deregister(&litmus_log_dev);
2923 +}
2924 +
2925 +module_init(init_sched_trace);
2926 +module_exit(exit_sched_trace);
2927 -- 
2928 1.8.1.2
2929 
2930 
2931 From 39587f773d6b64c8f7ab82d7e222de45899cb36f Mon Sep 17 00:00:00 2001
2932 From: Felipe Cerqueira <felipec@mpi-sws.org>
2933 Date: Mon, 11 Feb 2013 16:36:35 +0100
2934 Subject: [PATCH 011/119] Add hrtimer_start_on() support
2935 
2936 This patch adds hrtimer_start_on(), which allows arming timers on
2937 remote CPUs.  This is needed to avoided timer interrupts on "shielded"
2938 CPUs and is also useful for implementing semi-partitioned schedulers.
2939 ---
2940  arch/arm/Kconfig                   |  3 ++
2941  arch/x86/Kconfig                   |  3 ++
2942  arch/x86/include/asm/entry_arch.h  |  1 +
2943  arch/x86/include/asm/hw_irq.h      |  3 ++
2944  arch/x86/include/asm/irq_vectors.h |  6 +++
2945  arch/x86/kernel/entry_64.S         |  2 +
2946  arch/x86/kernel/irqinit.c          |  3 ++
2947  arch/x86/kernel/smp.c              | 23 +++++++++
2948  include/linux/hrtimer.h            | 32 +++++++++++++
2949  include/linux/smp.h                |  5 ++
2950  kernel/hrtimer.c                   | 95 ++++++++++++++++++++++++++++++++++++++
2951  11 files changed, 176 insertions(+)
2952 
2953 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
2954 index ecfd735..81dddd7 100644
2955 --- a/arch/arm/Kconfig
2956 +++ b/arch/arm/Kconfig
2957 @@ -2273,5 +2273,8 @@ source "arch/arm/kvm/Kconfig"
2958  config ARCH_HAS_FEATHER_TRACE
2959  	def_bool n
2960  
2961 +config ARCH_HAS_SEND_PULL_TIMERS
2962 +	def_bool n
2963 +
2964  source "litmus/Kconfig"
2965  
2966 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
2967 index 171cdc9..b069526 100644
2968 --- a/arch/x86/Kconfig
2969 +++ b/arch/x86/Kconfig
2970 @@ -2350,4 +2350,7 @@ source "lib/Kconfig"
2971  config ARCH_HAS_FEATHER_TRACE
2972  	def_bool y
2973  
2974 +config ARCH_HAS_SEND_PULL_TIMERS
2975 +	def_bool y
2976 +
2977  source "litmus/Kconfig"
2978 diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
2979 index 9bd4eca..3a3c2f1 100644
2980 --- a/arch/x86/include/asm/entry_arch.h
2981 +++ b/arch/x86/include/asm/entry_arch.h
2982 @@ -13,6 +13,7 @@
2983  BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
2984  BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
2985  BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
2986 +BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR)
2987  BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
2988  BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
2989  #endif
2990 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
2991 index 1da97ef..672de93 100644
2992 --- a/arch/x86/include/asm/hw_irq.h
2993 +++ b/arch/x86/include/asm/hw_irq.h
2994 @@ -77,6 +77,8 @@ extern void threshold_interrupt(void);
2995  extern void call_function_interrupt(void);
2996  extern void call_function_single_interrupt(void);
2997  
2998 +extern void pull_timers_interrupt(void);
2999 +
3000  /* IOAPIC */
3001  #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
3002  extern unsigned long io_apic_irqs;
3003 @@ -166,6 +168,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
3004  extern void smp_reschedule_interrupt(struct pt_regs *);
3005  extern void smp_call_function_interrupt(struct pt_regs *);
3006  extern void smp_call_function_single_interrupt(struct pt_regs *);
3007 +extern void smp_pull_timers_interrupt(struct pt_regs *);
3008  #ifdef CONFIG_X86_32
3009  extern void smp_invalidate_interrupt(struct pt_regs *);
3010  #else
3011 diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
3012 index 5702d7e..224116b 100644
3013 --- a/arch/x86/include/asm/irq_vectors.h
3014 +++ b/arch/x86/include/asm/irq_vectors.h
3015 @@ -124,6 +124,12 @@
3016   */
3017  #define LOCAL_TIMER_VECTOR		0xef
3018  
3019 +/*
3020 + * LITMUS^RT pull timers IRQ vector.
3021 + * Make sure it's not used by Linux.
3022 + */
3023 +#define PULL_TIMERS_VECTOR		0xdf
3024 +
3025  #define NR_VECTORS			 256
3026  
3027  #define FPU_IRQ				  13
3028 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
3029 index 7ac938a..2a54337 100644
3030 --- a/arch/x86/kernel/entry_64.S
3031 +++ b/arch/x86/kernel/entry_64.S
3032 @@ -1183,6 +1183,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \
3033  	call_function_interrupt smp_call_function_interrupt
3034  apicinterrupt RESCHEDULE_VECTOR \
3035  	reschedule_interrupt smp_reschedule_interrupt
3036 +apicinterrupt PULL_TIMERS_VECTOR \
3037 +	pull_timers_interrupt smp_pull_timers_interrupt
3038  #endif
3039  
3040  apicinterrupt ERROR_APIC_VECTOR \
3041 diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
3042 index a2a1fbc..77979d9 100644
3043 --- a/arch/x86/kernel/irqinit.c
3044 +++ b/arch/x86/kernel/irqinit.c
3045 @@ -145,6 +145,9 @@ static void __init smp_intr_init(void)
3046  	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
3047  			call_function_single_interrupt);
3048  
3049 +	/* IPI for hrtimer pulling on remote cpus */
3050 +	alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt);
3051 +
3052  	/* Low priority IPI to cleanup after moving an irq */
3053  	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
3054  	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
3055 diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
3056 index 48d2b7d..a52ef7f 100644
3057 --- a/arch/x86/kernel/smp.c
3058 +++ b/arch/x86/kernel/smp.c
3059 @@ -24,6 +24,8 @@
3060  #include <linux/cpu.h>
3061  #include <linux/gfp.h>
3062  
3063 +#include <litmus/debug_trace.h>
3064 +
3065  #include <asm/mtrr.h>
3066  #include <asm/tlbflush.h>
3067  #include <asm/mmu_context.h>
3068 @@ -163,6 +165,16 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
3069  	return NMI_HANDLED;
3070  }
3071  
3072 +/* trigger timers on remote cpu */
3073 +void smp_send_pull_timers(int cpu)
3074 +{
3075 +	if (unlikely(cpu_is_offline(cpu))) {
3076 +		WARN_ON(1);
3077 +		return;
3078 +	}
3079 +	apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR);
3080 +}
3081 +
3082  /*
3083   * this function calls the 'stop' function on all other CPUs in the system.
3084   */
3085 @@ -285,6 +297,17 @@ static int __init nonmi_ipi_setup(char *str)
3086  
3087  __setup("nonmi_ipi", nonmi_ipi_setup);
3088  
3089 +extern void hrtimer_pull(void);
3090 +
3091 +void smp_pull_timers_interrupt(struct pt_regs *regs)
3092 +{
3093 +	ack_APIC_irq();
3094 +	irq_enter();
3095 +	TRACE("pull timer interrupt\n");
3096 +	hrtimer_pull();
3097 +	irq_exit();
3098 +}
3099 +
3100  struct smp_ops smp_ops = {
3101  	.smp_prepare_boot_cpu	= native_smp_prepare_boot_cpu,
3102  	.smp_prepare_cpus	= native_smp_prepare_cpus,
3103 diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
3104 index d19a5c2..93def50 100644
3105 --- a/include/linux/hrtimer.h
3106 +++ b/include/linux/hrtimer.h
3107 @@ -176,6 +176,7 @@ enum  hrtimer_base_type {
3108   * @nr_hangs:		Total number of hrtimer interrupt hangs
3109   * @max_hang_time:	Maximum time spent in hrtimer_interrupt
3110   * @clock_base:		array of clock bases for this cpu
3111 + * @to_pull:		LITMUS^RT list of timers to be pulled on this cpu
3112   */
3113  struct hrtimer_cpu_base {
3114  	raw_spinlock_t			lock;
3115 @@ -191,8 +192,32 @@ struct hrtimer_cpu_base {
3116  	ktime_t				max_hang_time;
3117  #endif
3118  	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
3119 +	struct list_head		to_pull;
3120  };
3121  
3122 +#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
3123 +
3124 +#define HRTIMER_START_ON_INACTIVE	0
3125 +#define HRTIMER_START_ON_QUEUED		1
3126 +
3127 +/*
3128 + * struct hrtimer_start_on_info - save timer info on remote cpu
3129 + * @list:	list of hrtimer_start_on_info on remote cpu (to_pull)
3130 + * @timer:	timer to be triggered on remote cpu
3131 + * @time:	time event
3132 + * @mode:	timer mode
3133 + * @state:	activity flag
3134 + */
3135 +struct hrtimer_start_on_info {
3136 +	struct list_head	list;
3137 +	struct hrtimer		*timer;
3138 +	ktime_t			time;
3139 +	enum hrtimer_mode	mode;
3140 +	atomic_t		state;
3141 +};
3142 +
3143 +#endif
3144 +
3145  static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
3146  {
3147  	timer->node.expires = time;
3148 @@ -366,6 +391,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
3149  			 unsigned long delta_ns,
3150  			 const enum hrtimer_mode mode, int wakeup);
3151  
3152 +#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
3153 +extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info);
3154 +extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
3155 +			struct hrtimer *timer, ktime_t time,
3156 +			const enum hrtimer_mode mode);
3157 +#endif
3158 +
3159  extern int hrtimer_cancel(struct hrtimer *timer);
3160  extern int hrtimer_try_to_cancel(struct hrtimer *timer);
3161  
3162 diff --git a/include/linux/smp.h b/include/linux/smp.h
3163 index c848876..4f78ea7 100644
3164 --- a/include/linux/smp.h
3165 +++ b/include/linux/smp.h
3166 @@ -84,6 +84,11 @@ int smp_call_function_any(const struct cpumask *mask,
3167  void kick_all_cpus_sync(void);
3168  
3169  /*
3170 + * sends a 'pull timer' event to a remote CPU
3171 + */
3172 +extern void smp_send_pull_timers(int cpu);
3173 +
3174 +/*
3175   * Generic and arch helpers
3176   */
3177  #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
3178 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
3179 index 2288fbd..c7f0c79 100644
3180 --- a/kernel/hrtimer.c
3181 +++ b/kernel/hrtimer.c
3182 @@ -48,6 +48,8 @@
3183  #include <linux/sched/rt.h>
3184  #include <linux/timer.h>
3185  
3186 +#include <litmus/debug_trace.h>
3187 +
3188  #include <asm/uaccess.h>
3189  
3190  #include <trace/events/timer.h>
3191 @@ -1064,6 +1066,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
3192  }
3193  EXPORT_SYMBOL_GPL(hrtimer_start);
3194  
3195 +#if defined(CONFIG_ARCH_HAS_SEND_PULL_TIMERS) && defined(CONFIG_SMP)
3196 +
3197 +/**
3198 + * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info
3199 + */
3200 +void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
3201 +{
3202 +	memset(info, 0, sizeof(struct hrtimer_start_on_info));
3203 +	atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
3204 +}
3205 +
3206 +/**
3207 + *  hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu
3208 + */
3209 +void hrtimer_pull(void)
3210 +{
3211 +	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
3212 +	struct hrtimer_start_on_info *info;
3213 +	struct list_head *pos, *safe, list;
3214 +
3215 +	raw_spin_lock(&base->lock);
3216 +	list_replace_init(&base->to_pull, &list);
3217 +	raw_spin_unlock(&base->lock);
3218 +
3219 +	list_for_each_safe(pos, safe, &list) {
3220 +		info = list_entry(pos, struct hrtimer_start_on_info, list);
3221 +		TRACE("pulled timer 0x%x\n", info->timer);
3222 +		list_del(pos);
3223 +		hrtimer_start(info->timer, info->time, info->mode);
3224 +	}
3225 +}
3226 +
3227 +/**
3228 + *  hrtimer_start_on - trigger timer arming on remote cpu
3229 + *  @cpu:	remote cpu
3230 + *  @info:	save timer information for enqueuing on remote cpu
3231 + *  @timer:	timer to be pulled
3232 + *  @time:	expire time
3233 + *  @mode:	timer mode
3234 + */
3235 +int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
3236 +		struct hrtimer *timer, ktime_t time,
3237 +		const enum hrtimer_mode mode)
3238 +{
3239 +	unsigned long flags;
3240 +	struct hrtimer_cpu_base* base;
3241 +	int in_use = 0, was_empty;
3242 +
3243 +	/* serialize access to info through the timer base */
3244 +	lock_hrtimer_base(timer, &flags);
3245 +
3246 +	in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE);
3247 +	if (!in_use) {
3248 +		INIT_LIST_HEAD(&info->list);
3249 +		info->timer = timer;
3250 +		info->time  = time;
3251 +		info->mode  = mode;
3252 +		/* mark as in use */
3253 +		atomic_set(&info->state, HRTIMER_START_ON_QUEUED);
3254 +	}
3255 +
3256 +	unlock_hrtimer_base(timer, &flags);
3257 +
3258 +	if (!in_use) {
3259 +		/* initiate pull  */
3260 +		preempt_disable();
3261 +		if (cpu == smp_processor_id()) {
3262 +			/* start timer locally; we may get called
3263 +			 * with rq->lock held, do not wake up anything
3264 +			 */
3265 +			TRACE("hrtimer_start_on: starting on local CPU\n");
3266 +			__hrtimer_start_range_ns(info->timer, info->time,
3267 +						 0, info->mode, 0);
3268 +		} else {
3269 +			TRACE("hrtimer_start_on: pulling to remote CPU\n");
3270 +			base = &per_cpu(hrtimer_bases, cpu);
3271 +			raw_spin_lock_irqsave(&base->lock, flags);
3272 +			was_empty = list_empty(&base->to_pull);
3273 +			list_add(&info->list, &base->to_pull);
3274 +			raw_spin_unlock_irqrestore(&base->lock, flags);
3275 +			if (was_empty)
3276 +				/* only send IPI if other no else
3277 +				 * has done so already
3278 +				 */
3279 +				smp_send_pull_timers(cpu);
3280 +		}
3281 +		preempt_enable();
3282 +	}
3283 +	return in_use;
3284 +}
3285 +
3286 +#endif
3287  
3288  /**
3289   * hrtimer_try_to_cancel - try to deactivate a timer
3290 @@ -1667,6 +1761,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
3291  	}
3292  
3293  	hrtimer_init_hres(cpu_base);
3294 +	INIT_LIST_HEAD(&cpu_base->to_pull);
3295  }
3296  
3297  #ifdef CONFIG_HOTPLUG_CPU
3298 -- 
3299 1.8.1.2
3300 
3301 
3302 From 0014dd7899cb1e7109516d70c6db29223b3bdbdd Mon Sep 17 00:00:00 2001
3303 From: Bjoern Brandenburg <bbb@mpi-sws.org>
3304 Date: Tue, 25 Jun 2013 06:22:06 +0200
3305 Subject: [PATCH 012/119] Extend task_struct with rt_param
3306 
3307 This patch adds the PCB extensions required for LITMUS^RT.
3308 ---
3309  include/linux/sched.h     |   5 +
3310  include/litmus/rt_param.h | 285 ++++++++++++++++++++++++++++++++++++++++++++++
3311  2 files changed, 290 insertions(+)
3312  create mode 100644 include/litmus/rt_param.h
3313 
3314 diff --git a/include/linux/sched.h b/include/linux/sched.h
3315 index 597c8ab..164bb0d 100644
3316 --- a/include/linux/sched.h
3317 +++ b/include/linux/sched.h
3318 @@ -55,6 +55,8 @@ struct sched_param {
3319  
3320  #include <asm/processor.h>
3321  
3322 +#include <litmus/rt_param.h>
3323 +
3324  struct exec_domain;
3325  struct futex_pi_state;
3326  struct robust_list_head;
3327 @@ -1369,6 +1371,9 @@ struct task_struct {
3328  	int nr_dirtied_pause;
3329  	unsigned long dirty_paused_when; /* start of a write-and-pause period */
3330  
3331 +	/* LITMUS RT parameters and state */
3332 +	struct rt_param rt_param;
3333 +
3334  #ifdef CONFIG_LATENCYTOP
3335  	int latency_record_count;
3336  	struct latency_record latency_record[LT_SAVECOUNT];
3337 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
3338 new file mode 100644
3339 index 0000000..ce76faa
3340 --- /dev/null
3341 +++ b/include/litmus/rt_param.h
3342 @@ -0,0 +1,285 @@
3343 +/*
3344 + * Definition of the scheduler plugin interface.
3345 + *
3346 + */
3347 +#ifndef _LINUX_RT_PARAM_H_
3348 +#define _LINUX_RT_PARAM_H_
3349 +
3350 +/* Litmus time type. */
3351 +typedef unsigned long long lt_t;
3352 +
3353 +static inline int lt_after(lt_t a, lt_t b)
3354 +{
3355 +	return ((long long) b) - ((long long) a) < 0;
3356 +}
3357 +#define lt_before(a, b) lt_after(b, a)
3358 +
3359 +static inline int lt_after_eq(lt_t a, lt_t b)
3360 +{
3361 +	return ((long long) a) - ((long long) b) >= 0;
3362 +}
3363 +#define lt_before_eq(a, b) lt_after_eq(b, a)
3364 +
3365 +/* different types of clients */
3366 +typedef enum {
3367 +	RT_CLASS_HARD,
3368 +	RT_CLASS_SOFT,
3369 +	RT_CLASS_BEST_EFFORT
3370 +} task_class_t;
3371 +
3372 +typedef enum {
3373 +	NO_ENFORCEMENT,      /* job may overrun unhindered */
3374 +	QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
3375 +	PRECISE_ENFORCEMENT  /* budgets are enforced with hrtimers */
3376 +} budget_policy_t;
3377 +
3378 +/* Release behaviors for jobs. PERIODIC and EARLY jobs
3379 +   must end by calling sys_complete_job() (or equivalent)
3380 +   to set up their next release and deadline. */
3381 +typedef enum {
3382 +	/* Jobs are released sporadically (provided job precedence
3383 +       constraints are met). */
3384 +	TASK_SPORADIC,
3385 +
3386 +	/* Jobs are released periodically (provided job precedence
3387 +       constraints are met). */
3388 +	TASK_PERIODIC,
3389 +
3390 +    /* Jobs are released immediately after meeting precedence
3391 +       constraints. Beware this can peg your CPUs if used in
3392 +       the wrong applications. Only supported by EDF schedulers. */
3393 +	TASK_EARLY
3394 +} release_policy_t;
3395 +
3396 +/* We use the common priority interpretation "lower index == higher priority",
3397 + * which is commonly used in fixed-priority schedulability analysis papers.
3398 + * So, a numerically lower priority value implies higher scheduling priority,
3399 + * with priority 1 being the highest priority. Priority 0 is reserved for
3400 + * priority boosting. LITMUS_MAX_PRIORITY denotes the maximum priority value
3401 + * range.
3402 + */
3403 +
3404 +#define LITMUS_MAX_PRIORITY     512
3405 +#define LITMUS_HIGHEST_PRIORITY   1
3406 +#define LITMUS_LOWEST_PRIORITY    (LITMUS_MAX_PRIORITY - 1)
3407 +
3408 +/* Provide generic comparison macros for userspace,
3409 + * in case that we change this later. */
3410 +#define litmus_higher_fixed_prio(a, b)	(a < b)
3411 +#define litmus_lower_fixed_prio(a, b)	(a > b)
3412 +#define litmus_is_valid_fixed_prio(p)		\
3413 +	((p) >= LITMUS_HIGHEST_PRIORITY &&	\
3414 +	 (p) <= LITMUS_LOWEST_PRIORITY)
3415 +
3416 +struct rt_task {
3417 +	lt_t 		exec_cost;
3418 +	lt_t 		period;
3419 +	lt_t		relative_deadline;
3420 +	lt_t		phase;
3421 +	unsigned int	cpu;
3422 +	unsigned int	priority;
3423 +	task_class_t	cls;
3424 +	budget_policy_t  budget_policy;  /* ignored by pfair */
3425 +	release_policy_t release_policy;
3426 +};
3427 +
3428 +union np_flag {
3429 +	uint64_t raw;
3430 +	struct {
3431 +		/* Is the task currently in a non-preemptive section? */
3432 +		uint64_t flag:31;
3433 +		/* Should the task call into the scheduler? */
3434 +		uint64_t preempt:1;
3435 +	} np;
3436 +};
3437 +
3438 +/* The definition of the data that is shared between the kernel and real-time
3439 + * tasks via a shared page (see litmus/ctrldev.c).
3440 + *
3441 + * WARNING: User space can write to this, so don't trust
3442 + * the correctness of the fields!
3443 + *
3444 + * This servees two purposes: to enable efficient signaling
3445 + * of non-preemptive sections (user->kernel) and
3446 + * delayed preemptions (kernel->user), and to export
3447 + * some real-time relevant statistics such as preemption and
3448 + * migration data to user space. We can't use a device to export
3449 + * statistics because we want to avoid system call overhead when
3450 + * determining preemption/migration overheads).
3451 + */
3452 +struct control_page {
3453 +	/* This flag is used by userspace to communicate non-preempive
3454 +	 * sections. */
3455 +	volatile union np_flag sched;
3456 +
3457 +	volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is
3458 +				      * handled. */
3459 +
3460 +	/* Locking overhead tracing: userspace records here the time stamp
3461 +	 * and IRQ counter prior to starting the system call. */
3462 +	uint64_t ts_syscall_start;  /* Feather-Trace cycles */
3463 +	uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall
3464 +				     * started. */
3465 +
3466 +	/* to be extended */
3467 +};
3468 +
3469 +/* Expected offsets within the control page. */
3470 +
3471 +#define LITMUS_CP_OFFSET_SCHED		0
3472 +#define LITMUS_CP_OFFSET_IRQ_COUNT	8
3473 +#define LITMUS_CP_OFFSET_TS_SC_START	16
3474 +#define LITMUS_CP_OFFSET_IRQ_SC_START	24
3475 +
3476 +/* don't export internal data structures to user space (liblitmus) */
3477 +#ifdef __KERNEL__
3478 +
3479 +struct _rt_domain;
3480 +struct bheap_node;
3481 +struct release_heap;
3482 +
3483 +struct rt_job {
3484 +	/* Time instant the the job was or will be released.  */
3485 +	lt_t	release;
3486 +
3487 +	/* What is the current deadline? */
3488 +	lt_t   	deadline;
3489 +
3490 +	/* How much service has this job received so far? */
3491 +	lt_t	exec_time;
3492 +
3493 +	/* By how much did the prior job miss its deadline by?
3494 +	 * Value differs from tardiness in that lateness may
3495 +	 * be negative (when job finishes before its deadline).
3496 +	 */
3497 +	long long	lateness;
3498 +
3499 +	/* Which job is this. This is used to let user space
3500 +	 * specify which job to wait for, which is important if jobs
3501 +	 * overrun. If we just call sys_sleep_next_period() then we
3502 +	 * will unintentionally miss jobs after an overrun.
3503 +	 *
3504 +	 * Increase this sequence number when a job is released.
3505 +	 */
3506 +	unsigned int    job_no;
3507 +};
3508 +
3509 +struct pfair_param;
3510 +
3511 +/*	RT task parameters for scheduling extensions
3512 + *	These parameters are inherited during clone and therefore must
3513 + *	be explicitly set up before the task set is launched.
3514 + */
3515 +struct rt_param {
3516 +	/* Generic flags available for plugin-internal use. */
3517 +	unsigned int 		flags:8;
3518 +
3519 +	/* do we need to check for srp blocking? */
3520 +	unsigned int		srp_non_recurse:1;
3521 +
3522 +	/* is the task present? (true if it can be scheduled) */
3523 +	unsigned int		present:1;
3524 +
3525 +	/* has the task completed? */
3526 +	unsigned int		completed:1;
3527 +
3528 +#ifdef CONFIG_LITMUS_LOCKING
3529 +	/* Is the task being priority-boosted by a locking protocol? */
3530 +	unsigned int		priority_boosted:1;
3531 +	/* If so, when did this start? */
3532 +	lt_t			boost_start_time;
3533 +
3534 +	/* How many LITMUS^RT locks does the task currently hold/wait for? */
3535 +	unsigned int		num_locks_held;
3536 +	/* How many PCP/SRP locks does the task currently hold/wait for? */
3537 +	unsigned int		num_local_locks_held;
3538 +#endif
3539 +
3540 +	/* user controlled parameters */
3541 +	struct rt_task 		task_params;
3542 +
3543 +	/* timing parameters */
3544 +	struct rt_job 		job_params;
3545 +
3546 +	/* Should the next job be released at some time other than
3547 +	 * just period time units after the last release?
3548 +	 */
3549 +	unsigned int		sporadic_release:1;
3550 +	lt_t			sporadic_release_time;
3551 +
3552 +
3553 +	/* task representing the current "inherited" task
3554 +	 * priority, assigned by inherit_priority and
3555 +	 * return priority in the scheduler plugins.
3556 +	 * could point to self if PI does not result in
3557 +	 * an increased task priority.
3558 +	 */
3559 +	 struct task_struct*	inh_task;
3560 +
3561 +#ifdef CONFIG_NP_SECTION
3562 +	/* For the FMLP under PSN-EDF, it is required to make the task
3563 +	 * non-preemptive from kernel space. In order not to interfere with
3564 +	 * user space, this counter indicates the kernel space np setting.
3565 +	 * kernel_np > 0 => task is non-preemptive
3566 +	 */
3567 +	unsigned int	kernel_np;
3568 +#endif
3569 +
3570 +	/* This field can be used by plugins to store where the task
3571 +	 * is currently scheduled. It is the responsibility of the
3572 +	 * plugin to avoid race conditions.
3573 +	 *
3574 +	 * This used by GSN-EDF and PFAIR.
3575 +	 */
3576 +	volatile int		scheduled_on;
3577 +
3578 +	/* Is the stack of the task currently in use? This is updated by
3579 +	 * the LITMUS core.
3580 +	 *
3581 +	 * Be careful to avoid deadlocks!
3582 +	 */
3583 +	volatile int		stack_in_use;
3584 +
3585 +	/* This field can be used by plugins to store where the task
3586 +	 * is currently linked. It is the responsibility of the plugin
3587 +	 * to avoid race conditions.
3588 +	 *
3589 +	 * Used by GSN-EDF.
3590 +	 */
3591 +	volatile int		linked_on;
3592 +
3593 +	/* PFAIR/PD^2 state. Allocated on demand. */
3594 +	struct pfair_param*	pfair;
3595 +
3596 +	/* Fields saved before BE->RT transition.
3597 +	 */
3598 +	int old_policy;
3599 +	int old_prio;
3600 +
3601 +	/* ready queue for this task */
3602 +	struct _rt_domain* domain;
3603 +
3604 +	/* heap element for this task
3605 +	 *
3606 +	 * Warning: Don't statically allocate this node. The heap
3607 +	 *          implementation swaps these between tasks, thus after
3608 +	 *          dequeuing from a heap you may end up with a different node
3609 +	 *          then the one you had when enqueuing the task.  For the same
3610 +	 *          reason, don't obtain and store references to this node
3611 +	 *          other than this pointer (which is updated by the heap
3612 +	 *          implementation).
3613 +	 */
3614 +	struct bheap_node*	heap_node;
3615 +	struct release_heap*	rel_heap;
3616 +
3617 +	/* Used by rt_domain to queue task in release list.
3618 +	 */
3619 +	struct list_head list;
3620 +
3621 +	/* Pointer to the page shared between userspace and kernel. */
3622 +	struct control_page * ctrl_page;
3623 +};
3624 +
3625 +#endif
3626 +
3627 +#endif
3628 -- 
3629 1.8.1.2
3630 
3631 
3632 From c158cd6f9b7928a5ab4a514f9edc044d31f3913b Mon Sep 17 00:00:00 2001
3633 From: Bjoern Brandenburg <bbb@mpi-sws.org>
3634 Date: Tue, 25 Jun 2013 06:31:46 +0200
3635 Subject: [PATCH 013/119] Introduce main LITMUS^RT header
3636 
3637 This patch adds a basic litmus/litmus.h, which is required for basic
3638 LITMUS^RT infrastructure to compile.
3639 ---
3640  include/litmus/litmus.h    | 61 ++++++++++++++++++++++++++++++++++++++++++++++
3641  include/uapi/linux/sched.h |  1 +
3642  2 files changed, 62 insertions(+)
3643  create mode 100644 include/litmus/litmus.h
3644 
3645 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
3646 new file mode 100644
3647 index 0000000..c87863c
3648 --- /dev/null
3649 +++ b/include/litmus/litmus.h
3650 @@ -0,0 +1,61 @@
3651 +/*
3652 + * Constant definitions related to
3653 + * scheduling policy.
3654 + */
3655 +
3656 +#ifndef _LINUX_LITMUS_H_
3657 +#define _LINUX_LITMUS_H_
3658 +
3659 +#define is_realtime(t) 		((t)->policy == SCHED_LITMUS)
3660 +
3661 +#define tsk_rt(t)		(&(t)->rt_param)
3662 +
3663 +/*	Realtime utility macros */
3664 +#ifdef CONFIG_LITMUS_LOCKING
3665 +#define is_priority_boosted(t)  (tsk_rt(t)->priority_boosted)
3666 +#define get_boost_start(t)  (tsk_rt(t)->boost_start_time)
3667 +#else
3668 +#define is_priority_boosted(t)  0
3669 +#define get_boost_start(t)      0
3670 +#endif
3671 +
3672 +
3673 +/* task_params macros */
3674 +#define get_exec_cost(t)  	(tsk_rt(t)->task_params.exec_cost)
3675 +#define get_rt_period(t)	(tsk_rt(t)->task_params.period)
3676 +#define get_rt_relative_deadline(t)	(tsk_rt(t)->task_params.relative_deadline)
3677 +#define get_rt_phase(t)		(tsk_rt(t)->task_params.phase)
3678 +#define get_partition(t) 	(tsk_rt(t)->task_params.cpu)
3679 +#define get_priority(t) 	(tsk_rt(t)->task_params.priority)
3680 +#define get_class(t)        (tsk_rt(t)->task_params.cls)
3681 +
3682 +/* job_param macros */
3683 +#define get_exec_time(t)    (tsk_rt(t)->job_params.exec_time)
3684 +#define get_deadline(t)		(tsk_rt(t)->job_params.deadline)
3685 +#define get_release(t)		(tsk_rt(t)->job_params.release)
3686 +#define get_lateness(t)		(tsk_rt(t)->job_params.lateness)
3687 +
3688 +#define is_hrt(t)     		\
3689 +	(tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
3690 +#define is_srt(t)     		\
3691 +	(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT)
3692 +#define is_be(t)      		\
3693 +	(tsk_rt(t)->task_params.cls == RT_CLASS_BEST_EFFORT)
3694 +
3695 +/* Our notion of time within LITMUS: kernel monotonic time. */
3696 +static inline lt_t litmus_clock(void)
3697 +{
3698 +	return ktime_to_ns(ktime_get());
3699 +}
3700 +
3701 +static inline struct control_page* get_control_page(struct task_struct *t)
3702 +{
3703 +	return tsk_rt(t)->ctrl_page;
3704 +}
3705 +
3706 +static inline int has_control_page(struct task_struct* t)
3707 +{
3708 +	return tsk_rt(t)->ctrl_page != NULL;
3709 +}
3710 +
3711 +#endif
3712 diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
3713 index 5a0f945..6a7b1b7 100644
3714 --- a/include/uapi/linux/sched.h
3715 +++ b/include/uapi/linux/sched.h
3716 @@ -39,6 +39,7 @@
3717  #define SCHED_BATCH		3
3718  /* SCHED_ISO: reserved but not implemented yet */
3719  #define SCHED_IDLE		5
3720 +#define SCHED_LITMUS		6
3721  /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
3722  #define SCHED_RESET_ON_FORK     0x40000000
3723  
3724 -- 
3725 1.8.1.2
3726 
3727 
3728 From fd09632251e9988dc2d064b80d69f73a3b50e4e9 Mon Sep 17 00:00:00 2001
3729 From: Bjoern Brandenburg <bbb@mpi-sws.org>
3730 Date: Sun, 23 Jun 2013 14:42:21 +0200
3731 Subject: [PATCH 014/119] Feather-Trace: use proper definition of is_realtime()
3732 
3733 Remove dummy implementation of is_realtime() in trace.c.
3734 ---
3735  litmus/trace.c | 4 +---
3736  1 file changed, 1 insertion(+), 3 deletions(-)
3737 
3738 diff --git a/litmus/trace.c b/litmus/trace.c
3739 index a378623..7eacf2e 100644
3740 --- a/litmus/trace.c
3741 +++ b/litmus/trace.c
3742 @@ -3,11 +3,9 @@
3743  #include <linux/uaccess.h>
3744  
3745  #include <litmus/ftdev.h>
3746 +#include <litmus/litmus.h>
3747  #include <litmus/trace.h>
3748  
3749 -/* dummy definition of is_realtime() */
3750 -#define is_realtime(t) (0)
3751 -
3752  /******************************************************************************/
3753  /*                          Allocation                                        */
3754  /******************************************************************************/
3755 -- 
3756 1.8.1.2
3757 
3758 
3759 From f2c8dedbf8d03df6f0a977f9e8aee41227cd8893 Mon Sep 17 00:00:00 2001
3760 From: Bjoern Brandenburg <bbb@mpi-sws.org>
3761 Date: Sun, 23 Jun 2013 14:43:54 +0200
3762 Subject: [PATCH 015/119] Feather-Trace: write interrupt counts to control page
3763 
3764 This patch exports the interrupt counter to userspace via the control
3765 page.
3766 ---
3767  litmus/trace.c | 3 +++
3768  1 file changed, 3 insertions(+)
3769 
3770 diff --git a/litmus/trace.c b/litmus/trace.c
3771 index 7eacf2e..6f2e295 100644
3772 --- a/litmus/trace.c
3773 +++ b/litmus/trace.c
3774 @@ -29,6 +29,9 @@ void ft_irq_fired(void)
3775  	/* Only called with preemptions disabled.  */
3776  	atomic_inc(&__get_cpu_var(irq_fired_count));
3777  	atomic_inc(&__get_cpu_var(cpu_irq_fired_count));
3778 +
3779 +	if (has_control_page(current))
3780 +		get_control_page(current)->irq_count++;
3781  }
3782  
3783  static inline void clear_irq_fired(void)
3784 -- 
3785 1.8.1.2
3786 
3787 
3788 From b8512db6218d2c86320c7ec0d07a6efa465e3fcd Mon Sep 17 00:00:00 2001
3789 From: Bjoern Brandenburg <bbb@mpi-sws.org>
3790 Date: Tue, 12 Feb 2013 13:45:47 +0100
3791 Subject: [PATCH 016/119] Add schedule tracing support
3792 
3793 This patch introduces the sched_trace infrastructure, which in
3794 principle allows tracing the generated schedule.  However, this patch
3795 does not yet integrate the callbacks with the kernel.
3796 ---
3797  include/litmus/sched_trace.h | 251 +++++++++++++++++++++++++++++++++++++++++++
3798  litmus/Kconfig               |  34 ++++++
3799  litmus/Makefile              |   1 +
3800  litmus/sched_task_trace.c    | 241 +++++++++++++++++++++++++++++++++++++++++
3801  4 files changed, 527 insertions(+)
3802  create mode 100644 include/litmus/sched_trace.h
3803  create mode 100644 litmus/sched_task_trace.c
3804 
3805 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
3806 new file mode 100644
3807 index 0000000..6044d9f
3808 --- /dev/null
3809 +++ b/include/litmus/sched_trace.h
3810 @@ -0,0 +1,251 @@
3811 +/*
3812 + * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
3813 + */
3814 +#ifndef _LINUX_SCHED_TRACE_H_
3815 +#define _LINUX_SCHED_TRACE_H_
3816 +
3817 +/* all times in nanoseconds */
3818 +
3819 +struct st_trace_header {
3820 +	u8	type;		/* Of what type is this record?  */
3821 +	u8	cpu;		/* On which CPU was it recorded? */
3822 +	u16	pid;		/* PID of the task.              */
3823 +	u32	job;		/* The job sequence number.      */
3824 +};
3825 +
3826 +#define ST_NAME_LEN 16
3827 +struct st_name_data {
3828 +	char	cmd[ST_NAME_LEN];/* The name of the executable of this process. */
3829 +};
3830 +
3831 +struct st_param_data {		/* regular params */
3832 +	u32	wcet;
3833 +	u32	period;
3834 +	u32	phase;
3835 +	u8	partition;
3836 +	u8	class;
3837 +	u8	__unused[2];
3838 +};
3839 +
3840 +struct st_release_data {	/* A job is was/is going to be released. */
3841 +	u64	release;	/* What's the release time?              */
3842 +	u64	deadline;	/* By when must it finish?		 */
3843 +};
3844 +
3845 +struct st_assigned_data {	/* A job was asigned to a CPU. 		 */
3846 +	u64	when;
3847 +	u8	target;		/* Where should it execute?	         */
3848 +	u8	__unused[7];
3849 +};
3850 +
3851 +struct st_switch_to_data {	/* A process was switched to on a given CPU.   */
3852 +	u64	when;		/* When did this occur?                        */
3853 +	u32	exec_time;	/* Time the current job has executed.          */
3854 +	u8	__unused[4];
3855 +
3856 +};
3857 +
3858 +struct st_switch_away_data {	/* A process was switched away from on a given CPU. */
3859 +	u64	when;
3860 +	u64	exec_time;
3861 +};
3862 +
3863 +struct st_completion_data {	/* A job completed. */
3864 +	u64	when;
3865 +	u8	forced:1; 	/* Set to 1 if job overran and kernel advanced to the
3866 +				 * next task automatically; set to 0 otherwise.
3867 +				 */
3868 +	u8	__uflags:7;
3869 +	u8	__unused[7];
3870 +};
3871 +
3872 +struct st_block_data {		/* A task blocks. */
3873 +	u64	when;
3874 +	u64	__unused;
3875 +};
3876 +
3877 +struct st_resume_data {		/* A task resumes. */
3878 +	u64	when;
3879 +	u64	__unused;
3880 +};
3881 +
3882 +struct st_action_data {
3883 +	u64	when;
3884 +	u8	action;
3885 +	u8	__unused[7];
3886 +};
3887 +
3888 +struct st_sys_release_data {
3889 +	u64	when;
3890 +	u64	release;
3891 +};
3892 +
3893 +#define DATA(x) struct st_ ## x ## _data x;
3894 +
3895 +typedef enum {
3896 +        ST_NAME = 1,		/* Start at one, so that we can spot
3897 +				 * uninitialized records. */
3898 +	ST_PARAM,
3899 +	ST_RELEASE,
3900 +	ST_ASSIGNED,
3901 +	ST_SWITCH_TO,
3902 +	ST_SWITCH_AWAY,
3903 +	ST_COMPLETION,
3904 +	ST_BLOCK,
3905 +	ST_RESUME,
3906 +	ST_ACTION,
3907 +	ST_SYS_RELEASE
3908 +} st_event_record_type_t;
3909 +
3910 +struct st_event_record {
3911 +	struct st_trace_header hdr;
3912 +	union {
3913 +		u64 raw[2];
3914 +
3915 +		DATA(name);
3916 +		DATA(param);
3917 +		DATA(release);
3918 +		DATA(assigned);
3919 +		DATA(switch_to);
3920 +		DATA(switch_away);
3921 +		DATA(completion);
3922 +		DATA(block);
3923 +		DATA(resume);
3924 +		DATA(action);
3925 +		DATA(sys_release);
3926 +	} data;
3927 +};
3928 +
3929 +#undef DATA
3930 +
3931 +#ifdef __KERNEL__
3932 +
3933 +#include <linux/sched.h>
3934 +#include <litmus/feather_trace.h>
3935 +
3936 +#ifdef CONFIG_SCHED_TASK_TRACE
3937 +
3938 +#define SCHED_TRACE(id, callback, task) \
3939 +	ft_event1(id, callback, task)
3940 +#define SCHED_TRACE2(id, callback, task, xtra) \
3941 +	ft_event2(id, callback, task, xtra)
3942 +
3943 +/* provide prototypes; needed on sparc64 */
3944 +#ifndef NO_TASK_TRACE_DECLS
3945 +feather_callback void do_sched_trace_task_name(unsigned long id,
3946 +					       struct task_struct* task);
3947 +feather_callback void do_sched_trace_task_param(unsigned long id,
3948 +						struct task_struct* task);
3949 +feather_callback void do_sched_trace_task_release(unsigned long id,
3950 +						  struct task_struct* task);
3951 +feather_callback void do_sched_trace_task_switch_to(unsigned long id,
3952 +						    struct task_struct* task);
3953 +feather_callback void do_sched_trace_task_switch_away(unsigned long id,
3954 +						      struct task_struct* task);
3955 +feather_callback void do_sched_trace_task_completion(unsigned long id,
3956 +						     struct task_struct* task,
3957 +						     unsigned long forced);
3958 +feather_callback void do_sched_trace_task_block(unsigned long id,
3959 +						struct task_struct* task);
3960 +feather_callback void do_sched_trace_task_resume(unsigned long id,
3961 +						 struct task_struct* task);
3962 +feather_callback void do_sched_trace_action(unsigned long id,
3963 +					    struct task_struct* task,
3964 +					    unsigned long action);
3965 +feather_callback void do_sched_trace_sys_release(unsigned long id,
3966 +						 lt_t* start);
3967 +
3968 +#endif
3969 +
3970 +#else
3971 +
3972 +#define SCHED_TRACE(id, callback, task)        /* no tracing */
3973 +#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
3974 +
3975 +#endif
3976 +
3977 +#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
3978 +
3979 +#include <trace/events/litmus.h>
3980 +
3981 +#else
3982 +
3983 +/* Override trace macros to actually do nothing */
3984 +#define trace_litmus_task_param(t)
3985 +#define trace_litmus_task_release(t)
3986 +#define trace_litmus_switch_to(t)
3987 +#define trace_litmus_switch_away(prev)
3988 +#define trace_litmus_task_completion(t, forced)
3989 +#define trace_litmus_task_block(t)
3990 +#define trace_litmus_task_resume(t)
3991 +#define trace_litmus_sys_release(start)
3992 +
3993 +#endif
3994 +
3995 +
3996 +#define SCHED_TRACE_BASE_ID 500
3997 +
3998 +
3999 +#define sched_trace_task_name(t)					\
4000 +	SCHED_TRACE(SCHED_TRACE_BASE_ID + 1,				\
4001 +			do_sched_trace_task_name, t)
4002 +
4003 +#define sched_trace_task_param(t)					\
4004 +	do {								\
4005 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 2,			\
4006 +				do_sched_trace_task_param, t);		\
4007 +	} while (0)
4008 +
4009 +#define sched_trace_task_release(t)					\
4010 +	do {								\
4011 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 3,			\
4012 +				do_sched_trace_task_release, t);	\
4013 +	} while (0)
4014 +
4015 +#define sched_trace_task_switch_to(t)					\
4016 +	do {								\
4017 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 4,			\
4018 +			do_sched_trace_task_switch_to, t);		\
4019 +	} while (0)
4020 +
4021 +#define sched_trace_task_switch_away(t)					\
4022 +	do {								\
4023 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 5,			\
4024 +			do_sched_trace_task_switch_away, t);		\
4025 +	} while (0)
4026 +
4027 +#define sched_trace_task_completion(t, forced)				\
4028 +	do {								\
4029 +		SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6,			\
4030 +				do_sched_trace_task_completion, t,	\
4031 +				(unsigned long) forced);		\
4032 +	} while (0)
4033 +
4034 +#define sched_trace_task_block(t)					\
4035 +	do {								\
4036 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 7,			\
4037 +			do_sched_trace_task_block, t);			\
4038 +	} while (0)
4039 +
4040 +#define sched_trace_task_resume(t)					\
4041 +	do {								\
4042 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 8,			\
4043 +				do_sched_trace_task_resume, t);		\
4044 +	} while (0)
4045 +
4046 +#define sched_trace_action(t, action)					\
4047 +	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9,				\
4048 +		do_sched_trace_action, t, (unsigned long) action);
4049 +
4050 +/* when is a pointer, it does not need an explicit cast to unsigned long */
4051 +#define sched_trace_sys_release(when)					\
4052 +	do {								\
4053 +		SCHED_TRACE(SCHED_TRACE_BASE_ID + 10,			\
4054 +			do_sched_trace_sys_release, when);		\
4055 +	} while (0)
4056 +
4057 +#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
4058 +
4059 +#endif /* __KERNEL__ */
4060 +
4061 +#endif
4062 diff --git a/litmus/Kconfig b/litmus/Kconfig
4063 index e4624ee..19211ac 100644
4064 --- a/litmus/Kconfig
4065 +++ b/litmus/Kconfig
4066 @@ -24,6 +24,40 @@ config FEATHER_TRACE
4067  	  you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
4068  	  actually enable any events.
4069  
4070 +config SCHED_TASK_TRACE
4071 +	bool "Trace real-time tasks"
4072 +	depends on FEATHER_TRACE
4073 +	default y
4074 +	help
4075 +	  Include support for the sched_trace_XXX() tracing functions. This
4076 +          allows the collection of real-time task events such as job
4077 +	  completions, job releases, early completions, etc. This results in  a
4078 +	  small overhead in the scheduling code. Disable if the overhead is not
4079 +	  acceptable (e.g., benchmarking).
4080 +
4081 +	  Say Yes for debugging.
4082 +	  Say No for overhead tracing.
4083 +
4084 +config SCHED_TASK_TRACE_SHIFT
4085 +       int "Buffer size for sched_trace_xxx() events"
4086 +       depends on SCHED_TASK_TRACE
4087 +       range 8 13
4088 +       default 9
4089 +       help
4090 +
4091 +         Select the buffer size of sched_trace_xxx() events as a power of two.
4092 +	 These buffers are statically allocated as per-CPU data. Each event
4093 +	 requires 24 bytes storage plus one additional flag byte. Too large
4094 +	 buffers can cause issues with the per-cpu allocator (and waste
4095 +	 memory). Too small buffers can cause scheduling events to be lost. The
4096 +	 "right" size is workload dependent and depends on the number of tasks,
4097 +	 each task's period, each task's number of suspensions, and how often
4098 +	 the buffer is flushed.
4099 +
4100 +	 Examples: 12 =>   4k events
4101 +		   10 =>   1k events
4102 +		    8 =>  512 events
4103 +
4104  config SCHED_OVERHEAD_TRACE
4105  	bool "Record timestamps for overhead measurements"
4106  	depends on FEATHER_TRACE
4107 diff --git a/litmus/Makefile b/litmus/Makefile
4108 index 07f065f..6318f1c 100644
4109 --- a/litmus/Makefile
4110 +++ b/litmus/Makefile
4111 @@ -3,5 +3,6 @@
4112  #
4113  
4114  obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
4115 +obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
4116  obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
4117  obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
4118 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
4119 new file mode 100644
4120 index 0000000..5ef8d09
4121 --- /dev/null
4122 +++ b/litmus/sched_task_trace.c
4123 @@ -0,0 +1,241 @@
4124 +/*
4125 + * sched_task_trace.c -- record scheduling events to a byte stream
4126 + */
4127 +
4128 +#define NO_TASK_TRACE_DECLS
4129 +
4130 +#include <linux/module.h>
4131 +#include <linux/sched.h>
4132 +#include <linux/percpu.h>
4133 +
4134 +#include <litmus/ftdev.h>
4135 +#include <litmus/litmus.h>
4136 +
4137 +#include <litmus/sched_trace.h>
4138 +#include <litmus/feather_trace.h>
4139 +#include <litmus/ftdev.h>
4140 +
4141 +
4142 +#define NO_EVENTS		(1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
4143 +
4144 +#define now() litmus_clock()
4145 +
4146 +struct local_buffer {
4147 +	struct st_event_record record[NO_EVENTS];
4148 +	char   flag[NO_EVENTS];
4149 +	struct ft_buffer ftbuf;
4150 +};
4151 +
4152 +DEFINE_PER_CPU(struct local_buffer, st_event_buffer);
4153 +
4154 +static struct ftdev st_dev;
4155 +
4156 +static int st_dev_can_open(struct ftdev *dev, unsigned int cpu)
4157 +{
4158 +	return cpu_online(cpu) ? 0 : -ENODEV;
4159 +}
4160 +
4161 +static int __init init_sched_task_trace(void)
4162 +{
4163 +	struct local_buffer* buf;
4164 +	int i, ok = 0, err;
4165 +	printk("Allocated %u sched_trace_xxx() events per CPU "
4166 +	       "(buffer size: %d bytes)\n",
4167 +	       NO_EVENTS, (int) sizeof(struct local_buffer));
4168 +
4169 +	err = ftdev_init(&st_dev, THIS_MODULE,
4170 +			num_online_cpus(), "sched_trace");
4171 +	if (err)
4172 +		goto err_out;
4173 +
4174 +	for (i = 0; i < st_dev.minor_cnt; i++) {
4175 +		buf = &per_cpu(st_event_buffer, i);
4176 +		ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
4177 +				     sizeof(struct st_event_record),
4178 +				     buf->flag,
4179 +				     buf->record);
4180 +		st_dev.minor[i].buf = &buf->ftbuf;
4181 +	}
4182 +	if (ok == st_dev.minor_cnt) {
4183 +		st_dev.can_open = st_dev_can_open;
4184 +		err = register_ftdev(&st_dev);
4185 +		if (err)
4186 +			goto err_dealloc;
4187 +	} else {
4188 +		err = -EINVAL;
4189 +		goto err_dealloc;
4190 +	}
4191 +
4192 +	return 0;
4193 +
4194 +err_dealloc:
4195 +	ftdev_exit(&st_dev);
4196 +err_out:
4197 +	printk(KERN_WARNING "Could not register sched_trace module\n");
4198 +	return err;
4199 +}
4200 +
4201 +static void __exit exit_sched_task_trace(void)
4202 +{
4203 +	ftdev_exit(&st_dev);
4204 +}
4205 +
4206 +module_init(init_sched_task_trace);
4207 +module_exit(exit_sched_task_trace);
4208 +
4209 +
4210 +static inline struct st_event_record* get_record(u8 type, struct task_struct* t)
4211 +{
4212 +	struct st_event_record* rec = NULL;
4213 +	struct local_buffer* buf;
4214 +
4215 +	buf = &get_cpu_var(st_event_buffer);
4216 +	if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) {
4217 +		rec->hdr.type = type;
4218 +		rec->hdr.cpu  = smp_processor_id();
4219 +		rec->hdr.pid  = t ? t->pid : 0;
4220 +		rec->hdr.job  = t ? t->rt_param.job_params.job_no : 0;
4221 +	} else {
4222 +		put_cpu_var(st_event_buffer);
4223 +	}
4224 +	/* rec will be NULL if it failed */
4225 +	return rec;
4226 +}
4227 +
4228 +static inline void put_record(struct st_event_record* rec)
4229 +{
4230 +	struct local_buffer* buf;
4231 +	buf = &__get_cpu_var(st_event_buffer);
4232 +	ft_buffer_finish_write(&buf->ftbuf, rec);
4233 +	put_cpu_var(st_event_buffer);
4234 +}
4235 +
4236 +feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task)
4237 +{
4238 +	struct task_struct *t = (struct task_struct*) _task;
4239 +	struct st_event_record* rec = get_record(ST_NAME, t);
4240 +	int i;
4241 +	if (rec) {
4242 +		for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++)
4243 +			rec->data.name.cmd[i] = t->comm[i];
4244 +		put_record(rec);
4245 +	}
4246 +}
4247 +
4248 +feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task)
4249 +{
4250 +	struct task_struct *t = (struct task_struct*) _task;
4251 +	struct st_event_record* rec = get_record(ST_PARAM, t);
4252 +	if (rec) {
4253 +		rec->data.param.wcet      = get_exec_cost(t);
4254 +		rec->data.param.period    = get_rt_period(t);
4255 +		rec->data.param.phase     = get_rt_phase(t);
4256 +		rec->data.param.partition = get_partition(t);
4257 +		rec->data.param.class     = get_class(t);
4258 +		put_record(rec);
4259 +	}
4260 +}
4261 +
4262 +feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task)
4263 +{
4264 +	struct task_struct *t = (struct task_struct*) _task;
4265 +	struct st_event_record* rec = get_record(ST_RELEASE, t);
4266 +	if (rec) {
4267 +		rec->data.release.release  = get_release(t);
4268 +		rec->data.release.deadline = get_deadline(t);
4269 +		put_record(rec);
4270 +	}
4271 +}
4272 +
4273 +/* skipped: st_assigned_data, we don't use it atm */
4274 +
4275 +feather_callback void do_sched_trace_task_switch_to(unsigned long id,
4276 +						    unsigned long _task)
4277 +{
4278 +	struct task_struct *t = (struct task_struct*) _task;
4279 +	struct st_event_record* rec;
4280 +	if (is_realtime(t)) {
4281 +		rec = get_record(ST_SWITCH_TO, t);
4282 +		if (rec) {
4283 +			rec->data.switch_to.when      = now();
4284 +			rec->data.switch_to.exec_time = get_exec_time(t);
4285 +			put_record(rec);
4286 +		}
4287 +	}
4288 +}
4289 +
4290 +feather_callback void do_sched_trace_task_switch_away(unsigned long id,
4291 +						      unsigned long _task)
4292 +{
4293 +	struct task_struct *t = (struct task_struct*) _task;
4294 +	struct st_event_record* rec;
4295 +	if (is_realtime(t)) {
4296 +		rec = get_record(ST_SWITCH_AWAY, t);
4297 +		if (rec) {
4298 +			rec->data.switch_away.when      = now();
4299 +			rec->data.switch_away.exec_time = get_exec_time(t);
4300 +			put_record(rec);
4301 +		}
4302 +	}
4303 +}
4304 +
4305 +feather_callback void do_sched_trace_task_completion(unsigned long id,
4306 +						     unsigned long _task,
4307 +						     unsigned long forced)
4308 +{
4309 +	struct task_struct *t = (struct task_struct*) _task;
4310 +	struct st_event_record* rec = get_record(ST_COMPLETION, t);
4311 +	if (rec) {
4312 +		rec->data.completion.when   = now();
4313 +		rec->data.completion.forced = forced;
4314 +		put_record(rec);
4315 +	}
4316 +}
4317 +
4318 +feather_callback void do_sched_trace_task_block(unsigned long id,
4319 +						unsigned long _task)
4320 +{
4321 +	struct task_struct *t = (struct task_struct*) _task;
4322 +	struct st_event_record* rec = get_record(ST_BLOCK, t);
4323 +	if (rec) {
4324 +		rec->data.block.when      = now();
4325 +		put_record(rec);
4326 +	}
4327 +}
4328 +
4329 +feather_callback void do_sched_trace_task_resume(unsigned long id,
4330 +						 unsigned long _task)
4331 +{
4332 +	struct task_struct *t = (struct task_struct*) _task;
4333 +	struct st_event_record* rec = get_record(ST_RESUME, t);
4334 +	if (rec) {
4335 +		rec->data.resume.when      = now();
4336 +		put_record(rec);
4337 +	}
4338 +}
4339 +
4340 +feather_callback void do_sched_trace_sys_release(unsigned long id,
4341 +						 unsigned long _start)
4342 +{
4343 +	lt_t *start = (lt_t*) _start;
4344 +	struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL);
4345 +	if (rec) {
4346 +		rec->data.sys_release.when    = now();
4347 +		rec->data.sys_release.release = *start;
4348 +		put_record(rec);
4349 +	}
4350 +}
4351 +
4352 +feather_callback void do_sched_trace_action(unsigned long id,
4353 +					    unsigned long _task,
4354 +					    unsigned long action)
4355 +{
4356 +	struct task_struct *t = (struct task_struct*) _task;
4357 +	struct st_event_record* rec = get_record(ST_ACTION, t);
4358 +
4359 +	if (rec) {
4360 +		rec->data.action.when   = now();
4361 +		rec->data.action.action = action;
4362 +		put_record(rec);
4363 +	}
4364 +}
4365 -- 
4366 1.8.1.2
4367 
4368 
4369 From bac300c4dc3c9c0ed4f317514e1f8496ebe10cac Mon Sep 17 00:00:00 2001
4370 From: Bjoern Brandenburg <bbb@mpi-sws.org>
4371 Date: Tue, 12 Feb 2013 16:31:10 +0100
4372 Subject: [PATCH 017/119] Add tracepoint support
4373 
4374 This patch integrates LITMUS^RT's sched_trace_XXX() macros with
4375 Linux's notion of tracepoints. This is useful to visualize schedules
4376 in kernel shark and similar tools. Historically, LITMUS^RT's
4377 sched_trace predates Linux's tracepoint infrastructure.
4378 ---
4379  include/litmus/sched_trace.h  |   8 ++
4380  include/trace/events/litmus.h | 231 ++++++++++++++++++++++++++++++++++++++++++
4381  litmus/Kconfig                |  18 ++++
4382  litmus/sched_task_trace.c     |   4 +
4383  4 files changed, 261 insertions(+)
4384  create mode 100644 include/trace/events/litmus.h
4385 
4386 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
4387 index 6044d9f..82bde82 100644
4388 --- a/include/litmus/sched_trace.h
4389 +++ b/include/litmus/sched_trace.h
4390 @@ -194,24 +194,28 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
4391  	do {								\
4392  		SCHED_TRACE(SCHED_TRACE_BASE_ID + 2,			\
4393  				do_sched_trace_task_param, t);		\
4394 +		trace_litmus_task_param(t);				\
4395  	} while (0)
4396  
4397  #define sched_trace_task_release(t)					\
4398  	do {								\
4399  		SCHED_TRACE(SCHED_TRACE_BASE_ID + 3,			\
4400  				do_sched_trace_task_release, t);	\
4401 +		trace_litmus_task_release(t);				\
4402  	} while (0)
4403  
4404  #define sched_trace_task_switch_to(t)					\
4405  	do {								\
4406  		SCHED_TRACE(SCHED_TRACE_BASE_ID + 4,			\
4407  			do_sched_trace_task_switch_to, t);		\
4408 +		trace_litmus_switch_to(t);				\
4409  	} while (0)
4410  
4411  #define sched_trace_task_switch_away(t)					\
4412  	do {								\
4413  		SCHED_TRACE(SCHED_TRACE_BASE_ID + 5,			\
4414  			do_sched_trace_task_switch_away, t);		\
4415 +		trace_litmus_switch_away(t);				\
4416  	} while (0)
4417  
4418  #define sched_trace_task_completion(t, forced)				\
4419 @@ -219,18 +223,21 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
4420  		SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6,			\
4421  				do_sched_trace_task_completion, t,	\
4422  				(unsigned long) forced);		\
4423 +		trace_litmus_task_completion(t, forced);		\
4424  	} while (0)
4425  
4426  #define sched_trace_task_block(t)					\
4427  	do {								\
4428  		SCHED_TRACE(SCHED_TRACE_BASE_ID + 7,			\
4429  			do_sched_trace_task_block, t);			\
4430 +		trace_litmus_task_block(t);				\
4431  	} while (0)
4432  
4433  #define sched_trace_task_resume(t)					\
4434  	do {								\
4435  		SCHED_TRACE(SCHED_TRACE_BASE_ID + 8,			\
4436  				do_sched_trace_task_resume, t);		\
4437 +		trace_litmus_task_resume(t);				\
4438  	} while (0)
4439  
4440  #define sched_trace_action(t, action)					\
4441 @@ -242,6 +249,7 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
4442  	do {								\
4443  		SCHED_TRACE(SCHED_TRACE_BASE_ID + 10,			\
4444  			do_sched_trace_sys_release, when);		\
4445 +		trace_litmus_sys_release(when);				\
4446  	} while (0)
4447  
4448  #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
4449 diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
4450 new file mode 100644
4451 index 0000000..0fffcee
4452 --- /dev/null
4453 +++ b/include/trace/events/litmus.h
4454 @@ -0,0 +1,231 @@
4455 +/*
4456 + * LITMUS^RT kernel style scheduling tracepoints
4457 + */
4458 +#undef TRACE_SYSTEM
4459 +#define TRACE_SYSTEM litmus
4460 +
4461 +#if !defined(_SCHED_TASK_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ)
4462 +#define _SCHED_TASK_TRACEPOINT_H
4463 +
4464 +#include <linux/tracepoint.h>
4465 +
4466 +#include <litmus/litmus.h>
4467 +#include <litmus/rt_param.h>
4468 +
4469 +/*
4470 + * Tracing task admission
4471 + */
4472 +TRACE_EVENT(litmus_task_param,
4473 +
4474 +	TP_PROTO(struct task_struct *t),
4475 +
4476 +	TP_ARGS(t),
4477 +
4478 +	TP_STRUCT__entry(
4479 +		__field( pid_t,		pid	)
4480 +		__field( unsigned int,	job	)
4481 +		__field( lt_t,		wcet	)
4482 +		__field( lt_t,		period	)
4483 +		__field( lt_t,		phase	)
4484 +		__field( int,		partition )
4485 +	),
4486 +
4487 +	TP_fast_assign(
4488 +		__entry->pid	= t ? t->pid : 0;
4489 +		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
4490 +		__entry->wcet	= get_exec_cost(t);
4491 +		__entry->period	= get_rt_period(t);
4492 +		__entry->phase	= get_rt_phase(t);
4493 +		__entry->partition = get_partition(t);
4494 +	),
4495 +
4496 +	TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n",
4497 +		__entry->pid, __entry->period,
4498 +		__entry->pid, __entry->wcet)
4499 +);
4500 +
4501 +/*
4502 + * Tracing jobs release
4503 + */
4504 +TRACE_EVENT(litmus_task_release,
4505 +
4506 +	TP_PROTO(struct task_struct *t),
4507 +
4508 +	TP_ARGS(t),
4509 +
4510 +	TP_STRUCT__entry(
4511 +		__field( pid_t,		pid	)
4512 +		__field( unsigned int,	job	)
4513 +		__field( lt_t,		release	)
4514 +		__field( lt_t,		deadline	)
4515 +	),
4516 +
4517 +	TP_fast_assign(
4518 +		__entry->pid	= t ? t->pid : 0;
4519 +		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
4520 +		__entry->release	= get_release(t);
4521 +		__entry->deadline	= get_deadline(t);
4522 +	),
4523 +
4524 +	TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n",
4525 +			__entry->pid, __entry->job, __entry->release,
4526 +			__entry->pid, __entry->job, __entry->deadline)
4527 +);
4528 +
4529 +/*
4530 + * Tracepoint for switching to new task
4531 + */
4532 +TRACE_EVENT(litmus_switch_to,
4533 +
4534 +	TP_PROTO(struct task_struct *t),
4535 +
4536 +	TP_ARGS(t),
4537 +
4538 +	TP_STRUCT__entry(
4539 +		__field( pid_t,		pid	)
4540 +		__field( unsigned int,	job	)
4541 +		__field( lt_t,		when	)
4542 +		__field( lt_t,		exec_time	)
4543 +	),
4544 +
4545 +	TP_fast_assign(
4546 +		__entry->pid	= is_realtime(t) ? t->pid : 0;
4547 +		__entry->job	= is_realtime(t) ? t->rt_param.job_params.job_no : 0;
4548 +		__entry->when		= litmus_clock();
4549 +		__entry->exec_time	= get_exec_time(t);
4550 +	),
4551 +
4552 +	TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n",
4553 +			__entry->pid, __entry->job,
4554 +			__entry->when, __entry->exec_time)
4555 +);
4556 +
4557 +/*
4558 + * Tracepoint for switching away previous task
4559 + */
4560 +TRACE_EVENT(litmus_switch_away,
4561 +
4562 +	TP_PROTO(struct task_struct *t),
4563 +
4564 +	TP_ARGS(t),
4565 +
4566 +	TP_STRUCT__entry(
4567 +		__field( pid_t,		pid	)
4568 +		__field( unsigned int,	job	)
4569 +		__field( lt_t,		when	)
4570 +		__field( lt_t,		exec_time	)
4571 +	),
4572 +
4573 +	TP_fast_assign(
4574 +		__entry->pid	= is_realtime(t) ? t->pid : 0;
4575 +		__entry->job	= is_realtime(t) ? t->rt_param.job_params.job_no : 0;
4576 +		__entry->when		= litmus_clock();
4577 +		__entry->exec_time	= get_exec_time(t);
4578 +	),
4579 +
4580 +	TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n",
4581 +			__entry->pid, __entry->job,
4582 +			__entry->when, __entry->exec_time)
4583 +);
4584 +
4585 +/*
4586 + * Tracing jobs completion
4587 + */
4588 +TRACE_EVENT(litmus_task_completion,
4589 +
4590 +	TP_PROTO(struct task_struct *t, unsigned long forced),
4591 +
4592 +	TP_ARGS(t, forced),
4593 +
4594 +	TP_STRUCT__entry(
4595 +		__field( pid_t,		pid	)
4596 +		__field( unsigned int,	job	)
4597 +		__field( lt_t,		when	)
4598 +		__field( unsigned long,	forced	)
4599 +	),
4600 +
4601 +	TP_fast_assign(
4602 +		__entry->pid	= t ? t->pid : 0;
4603 +		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
4604 +		__entry->when	= litmus_clock();
4605 +		__entry->forced	= forced;
4606 +	),
4607 +
4608 +	TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n",
4609 +			__entry->pid, __entry->job,
4610 +			__entry->when, __entry->forced)
4611 +);
4612 +
4613 +/*
4614 + * Trace blocking tasks.
4615 + */
4616 +TRACE_EVENT(litmus_task_block,
4617 +
4618 +	TP_PROTO(struct task_struct *t),
4619 +
4620 +	TP_ARGS(t),
4621 +
4622 +	TP_STRUCT__entry(
4623 +		__field( pid_t,		pid	)
4624 +		__field( lt_t,		when	)
4625 +	),
4626 +
4627 +	TP_fast_assign(
4628 +		__entry->pid	= t ? t->pid : 0;
4629 +		__entry->when	= litmus_clock();
4630 +	),
4631 +
4632 +	TP_printk("(%u) blocks: %Lu\n", __entry->pid, __entry->when)
4633 +);
4634 +
4635 +/*
4636 + * Tracing jobs resume
4637 + */
4638 +TRACE_EVENT(litmus_task_resume,
4639 +
4640 +	TP_PROTO(struct task_struct *t),
4641 +
4642 +	TP_ARGS(t),
4643 +
4644 +	TP_STRUCT__entry(
4645 +		__field( pid_t,		pid	)
4646 +		__field( unsigned int,	job	)
4647 +		__field( lt_t,		when	)
4648 +	),
4649 +
4650 +	TP_fast_assign(
4651 +		__entry->pid	= t ? t->pid : 0;
4652 +		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
4653 +		__entry->when	= litmus_clock();
4654 +	),
4655 +
4656 +	TP_printk("resume(job(%u, %u)): %Lu\n",
4657 +			__entry->pid, __entry->job, __entry->when)
4658 +);
4659 +
4660 +/*
4661 + * Trace synchronous release
4662 + */
4663 +TRACE_EVENT(litmus_sys_release,
4664 +
4665 +	TP_PROTO(lt_t *start),
4666 +
4667 +	TP_ARGS(start),
4668 +
4669 +	TP_STRUCT__entry(
4670 +		__field( lt_t,		rel	)
4671 +		__field( lt_t,		when	)
4672 +	),
4673 +
4674 +	TP_fast_assign(
4675 +		__entry->rel	= *start;
4676 +		__entry->when	= litmus_clock();
4677 +	),
4678 +
4679 +	TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
4680 +);
4681 +
4682 +#endif /* _SCHED_TASK_TRACEPOINT_H */
4683 +
4684 +/* Must stay outside the protection */
4685 +#include <trace/define_trace.h>
4686 diff --git a/litmus/Kconfig b/litmus/Kconfig
4687 index 19211ac..5408ef6 100644
4688 --- a/litmus/Kconfig
4689 +++ b/litmus/Kconfig
4690 @@ -58,6 +58,24 @@ config SCHED_TASK_TRACE_SHIFT
4691  		   10 =>   1k events
4692  		    8 =>  512 events
4693  
4694 +config SCHED_LITMUS_TRACEPOINT
4695 +	bool "Enable Event/Tracepoint Tracing for real-time task tracing"
4696 +	depends on TRACEPOINTS
4697 +	default n
4698 +	help
4699 +	  Enable kernel-style events (tracepoint) for Litmus. Litmus events
4700 +	  trace the same functions as the above sched_trace_XXX(), but can
4701 +	  be enabled independently.
4702 +	  Litmus tracepoints can be recorded and analyzed together (single
4703 +	  time reference) with all other kernel tracing events (e.g.,
4704 +	  sched:sched_switch, etc.).
4705 +
4706 +	  This also enables a quick way to visualize schedule traces using
4707 +	  trace-cmd utility and kernelshark visualizer.
4708 +
4709 +	  Say Yes for debugging and visualization purposes.
4710 +	  Say No for overhead tracing.
4711 +
4712  config SCHED_OVERHEAD_TRACE
4713  	bool "Record timestamps for overhead measurements"
4714  	depends on FEATHER_TRACE
4715 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
4716 index 5ef8d09..2bdfbbd 100644
4717 --- a/litmus/sched_task_trace.c
4718 +++ b/litmus/sched_task_trace.c
4719 @@ -15,6 +15,10 @@
4720  #include <litmus/feather_trace.h>
4721  #include <litmus/ftdev.h>
4722  
4723 +#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
4724 +#define CREATE_TRACE_POINTS
4725 +#include <trace/events/litmus.h>
4726 +#endif
4727  
4728  #define NO_EVENTS		(1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
4729  
4730 -- 
4731 1.8.1.2
4732 
4733 
4734 From ce3268cf0ebd29c52dd5130f8430f3a3b61b0cec Mon Sep 17 00:00:00 2001
4735 From: Bjoern Brandenburg <bbb@mpi-sws.org>
4736 Date: Tue, 25 Jun 2013 07:32:20 +0200
4737 Subject: [PATCH 018/119] Add object descriptor table to Linux's task_struct
4738 
4739 This table is similar to a file descriptor table. It keeps track of
4740 which "objects" (locks) a real-time task holds a handle to.
4741 ---
4742  include/linux/sched.h | 4 ++++
4743  1 file changed, 4 insertions(+)
4744 
4745 diff --git a/include/linux/sched.h b/include/linux/sched.h
4746 index 164bb0d..cbb3b44 100644
4747 --- a/include/linux/sched.h
4748 +++ b/include/linux/sched.h
4749 @@ -1029,6 +1029,7 @@ struct sched_rt_entity {
4750  
4751  
4752  struct rcu_node;
4753 +struct od_table_entry;
4754  
4755  enum perf_event_task_context {
4756  	perf_invalid_context = -1,
4757 @@ -1374,6 +1375,9 @@ struct task_struct {
4758  	/* LITMUS RT parameters and state */
4759  	struct rt_param rt_param;
4760  
4761 +	/* references to PI semaphores, etc. */
4762 +	struct od_table_entry *od_table;
4763 +
4764  #ifdef CONFIG_LATENCYTOP
4765  	int latency_record_count;
4766  	struct latency_record latency_record[LT_SAVECOUNT];
4767 -- 
4768 1.8.1.2
4769 
4770 
4771 From 134e525e8a1e90e002e5b51bd6fcfc65dd7c1615 Mon Sep 17 00:00:00 2001
4772 From: Bjoern Brandenburg <bbb@mpi-sws.org>
4773 Date: Thu, 27 Jun 2013 09:54:55 +0200
4774 Subject: [PATCH 019/119] Export num_cache_leaves in asm/processor.h
4775 
4776 This is required to make litmus/affinity.c compile.
4777 ---
4778  arch/x86/include/asm/processor.h | 5 +++++
4779  1 file changed, 5 insertions(+)
4780 
4781 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
4782 index 254dd2b..add9c71 100644
4783 --- a/arch/x86/include/asm/processor.h
4784 +++ b/arch/x86/include/asm/processor.h
4785 @@ -174,6 +174,11 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
4786  extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
4787  extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
4788  #ifdef CONFIG_SYSFS
4789 +/* Note: commit 04a1541828ea223169eb44a336bfad8ec0dfb46a hid num_cache_leaves,
4790 + * but LITMUS^RT currently still depends on it.
4791 + * FIXME: port LITMUS^RT's affinity-aware scheduling to use proper interfaces.
4792 + */
4793 +extern unsigned short num_cache_leaves;
4794  extern int get_shared_cpu_map(cpumask_var_t mask,
4795  			       unsigned int cpu, int index);
4796  #endif
4797 -- 
4798 1.8.1.2
4799 
4800 
4801 From beda58fbd93b3cb07becd08696a09437dc95561c Mon Sep 17 00:00:00 2001
4802 From: Bjoern Brandenburg <bbb@mpi-sws.org>
4803 Date: Tue, 25 Jun 2013 07:27:07 +0200
4804 Subject: [PATCH 020/119] Add LITMUS^RT core implementation
4805 
4806 This patch adds the core of LITMUS^RT:
4807 
4808  - library functionality (heaps, rt_domain, prioritization, etc.)
4809  - budget enforcement logic
4810  - job management
4811  - system call backends
4812  - virtual devices (control page, etc.)
4813  - scheduler plugin API (and dummy plugin)
4814 
4815 This code compiles, but is not yet integrated with the rest of Linux.
4816 ---
4817  include/litmus/affinity.h     |  80 ++++++
4818  include/litmus/bheap.h        |  77 +++++
4819  include/litmus/binheap.h      | 205 ++++++++++++++
4820  include/litmus/budget.h       |  36 +++
4821  include/litmus/clustered.h    |  44 +++
4822  include/litmus/edf_common.h   |  25 ++
4823  include/litmus/fdso.h         |  78 ++++++
4824  include/litmus/fp_common.h    | 105 +++++++
4825  include/litmus/fpmath.h       | 147 ++++++++++
4826  include/litmus/jobs.h         |  10 +
4827  include/litmus/litmus.h       | 268 ++++++++++++++++++
4828  include/litmus/litmus_proc.h  |  63 +++++
4829  include/litmus/locking.h      |  28 ++
4830  include/litmus/preempt.h      | 164 +++++++++++
4831  include/litmus/rt_domain.h    | 182 ++++++++++++
4832  include/litmus/rt_param.h     |  12 +-
4833  include/litmus/sched_plugin.h | 128 +++++++++
4834  include/litmus/srp.h          |  28 ++
4835  include/litmus/unistd_32.h    |  21 ++
4836  include/litmus/unistd_64.h    |  33 +++
4837  include/litmus/wait.h         |  57 ++++
4838  kernel/sched/litmus.c         | 340 ++++++++++++++++++++++
4839  litmus/Kconfig                | 193 +++++++++++++
4840  litmus/Makefile               |  20 ++
4841  litmus/affinity.c             |  41 +++
4842  litmus/bheap.c                | 316 +++++++++++++++++++++
4843  litmus/binheap.c              | 387 +++++++++++++++++++++++++
4844  litmus/budget.c               | 116 ++++++++
4845  litmus/clustered.c            | 111 ++++++++
4846  litmus/ctrldev.c              | 160 +++++++++++
4847  litmus/edf_common.c           | 200 +++++++++++++
4848  litmus/fdso.c                 | 308 ++++++++++++++++++++
4849  litmus/fp_common.c            | 119 ++++++++
4850  litmus/jobs.c                 |  77 +++++
4851  litmus/litmus.c               | 639 ++++++++++++++++++++++++++++++++++++++++++
4852  litmus/litmus_proc.c          | 576 +++++++++++++++++++++++++++++++++++++
4853  litmus/locking.c              | 188 +++++++++++++
4854  litmus/preempt.c              | 137 +++++++++
4855  litmus/rt_domain.c            | 353 +++++++++++++++++++++++
4856  litmus/sched_plugin.c         | 238 ++++++++++++++++
4857  litmus/srp.c                  | 313 +++++++++++++++++++++
4858  litmus/sync.c                 | 152 ++++++++++
4859  litmus/uncachedev.c           | 102 +++++++
4860  43 files changed, 6871 insertions(+), 6 deletions(-)
4861  create mode 100644 include/litmus/affinity.h
4862  create mode 100644 include/litmus/bheap.h
4863  create mode 100644 include/litmus/binheap.h
4864  create mode 100644 include/litmus/budget.h
4865  create mode 100644 include/litmus/clustered.h
4866  create mode 100644 include/litmus/edf_common.h
4867  create mode 100644 include/litmus/fdso.h
4868  create mode 100644 include/litmus/fp_common.h
4869  create mode 100644 include/litmus/fpmath.h
4870  create mode 100644 include/litmus/jobs.h
4871  create mode 100644 include/litmus/litmus_proc.h
4872  create mode 100644 include/litmus/locking.h
4873  create mode 100644 include/litmus/preempt.h
4874  create mode 100644 include/litmus/rt_domain.h
4875  create mode 100644 include/litmus/sched_plugin.h
4876  create mode 100644 include/litmus/srp.h
4877  create mode 100644 include/litmus/unistd_32.h
4878  create mode 100644 include/litmus/unistd_64.h
4879  create mode 100644 include/litmus/wait.h
4880  create mode 100644 kernel/sched/litmus.c
4881  create mode 100644 litmus/affinity.c
4882  create mode 100644 litmus/bheap.c
4883  create mode 100644 litmus/binheap.c
4884  create mode 100644 litmus/budget.c
4885  create mode 100644 litmus/clustered.c
4886  create mode 100644 litmus/ctrldev.c
4887  create mode 100644 litmus/edf_common.c
4888  create mode 100644 litmus/fdso.c
4889  create mode 100644 litmus/fp_common.c
4890  create mode 100644 litmus/jobs.c
4891  create mode 100644 litmus/litmus.c
4892  create mode 100644 litmus/litmus_proc.c
4893  create mode 100644 litmus/locking.c
4894  create mode 100644 litmus/preempt.c
4895  create mode 100644 litmus/rt_domain.c
4896  create mode 100644 litmus/sched_plugin.c
4897  create mode 100644 litmus/srp.c
4898  create mode 100644 litmus/sync.c
4899  create mode 100644 litmus/uncachedev.c
4900 
4901 diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
4902 new file mode 100644
4903 index 0000000..ca2e442
4904 --- /dev/null
4905 +++ b/include/litmus/affinity.h
4906 @@ -0,0 +1,80 @@
4907 +#ifndef __LITMUS_AFFINITY_H
4908 +#define __LITMUS_AFFINITY_H
4909 +
4910 +#include <linux/cpumask.h>
4911 +
4912 +/*
4913 +  L1 (instr) = depth 0
4914 +  L1 (data)  = depth 1
4915 +  L2 = depth 2
4916 +  L3 = depth 3
4917 + */
4918 +#define NUM_CACHE_LEVELS 4
4919 +
4920 +struct neighborhood
4921 +{
4922 +	unsigned int size[NUM_CACHE_LEVELS];
4923 +	cpumask_var_t neighbors[NUM_CACHE_LEVELS];
4924 +};
4925 +
4926 +/* topology info is stored redundently in a big array for fast lookups */
4927 +extern struct neighborhood neigh_info[NR_CPUS];
4928 +
4929 +void init_topology(void); /* called by Litmus module's _init_litmus() */
4930 +
4931 +/* Works like:
4932 +void get_nearest_available_cpu(
4933 +	cpu_entry_t **nearest,
4934 +	cpu_entry_t *start,
4935 +	cpu_entry_t *entries,
4936 +	int release_master)
4937 +
4938 +Set release_master = NO_CPU for no Release Master.
4939 +
4940 +We use a macro here to exploit the fact that C-EDF and G-EDF
4941 +have similar structures for their cpu_entry_t structs, even though
4942 +they do not share a common base-struct.  The macro allows us to
4943 +avoid code duplication.
4944 +
4945 +TODO: Factor out the job-to-processor linking from C/G-EDF into
4946 +a reusable "processor mapping".  (See B.B.'s RTSS'09 paper &
4947 +dissertation.)
4948 + */
4949 +#define get_nearest_available_cpu(nearest, start, entries, release_master) \
4950 +{ \
4951 +	(nearest) = NULL; \
4952 +	if (!(start)->linked) { \
4953 +		(nearest) = (start); \
4954 +	} else { \
4955 +		int __level; \
4956 +		int __cpu; \
4957 +		int __release_master = ((release_master) == NO_CPU) ? -1 : (release_master); \
4958 +		struct neighborhood *__neighbors = &neigh_info[(start)->cpu]; \
4959 +		\
4960 +		for (__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) { \
4961 +			if (__neighbors->size[__level] > 1) { \
4962 +				for_each_cpu(__cpu, __neighbors->neighbors[__level]) { \
4963 +					if (__cpu != __release_master) { \
4964 +						cpu_entry_t *__entry = &per_cpu((entries), __cpu); \
4965 +						if (!__entry->linked) { \
4966 +							(nearest) = __entry; \
4967 +							break; \
4968 +						} \
4969 +					} \
4970 +				} \
4971 +			} else if (__neighbors->size[__level] == 0) { \
4972 +				break; \
4973 +			} \
4974 +		} \
4975 +	} \
4976 +	\
4977 +	if ((nearest)) { \
4978 +		TRACE("P%d is closest available CPU to P%d\n", \
4979 +				(nearest)->cpu, (start)->cpu); \
4980 +	} else { \
4981 +		TRACE("Could not find an available CPU close to P%d\n", \
4982 +				(start)->cpu); \
4983 +	} \
4984 +}
4985 +
4986 +#endif
4987 diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
4988 new file mode 100644
4989 index 0000000..cf4864a
4990 --- /dev/null
4991 +++ b/include/litmus/bheap.h
4992 @@ -0,0 +1,77 @@
4993 +/* bheaps.h -- Binomial Heaps
4994 + *
4995 + * (c) 2008, 2009 Bjoern Brandenburg
4996 + */
4997 +
4998 +#ifndef BHEAP_H
4999 +#define BHEAP_H
5000 +
5001 +#define NOT_IN_HEAP UINT_MAX
5002 +
5003 +struct bheap_node {
5004 +	struct bheap_node* 	parent;
5005 +	struct bheap_node* 	next;
5006 +	struct bheap_node* 	child;
5007 +
5008 +	unsigned int 		degree;
5009 +	void*			value;
5010 +	struct bheap_node**	ref;
5011 +};
5012 +
5013 +struct bheap {
5014 +	struct bheap_node* 	head;
5015 +	/* We cache the minimum of the heap.
5016 +	 * This speeds up repeated peek operations.
5017 +	 */
5018 +	struct bheap_node*	min;
5019 +};
5020 +
5021 +typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
5022 +
5023 +void bheap_init(struct bheap* heap);
5024 +void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
5025 +
5026 +static inline int bheap_node_in_heap(struct bheap_node* h)
5027 +{
5028 +	return h->degree != NOT_IN_HEAP;
5029 +}
5030 +
5031 +static inline int bheap_empty(struct bheap* heap)
5032 +{
5033 +	return heap->head == NULL && heap->min == NULL;
5034 +}
5035 +
5036 +/* insert (and reinitialize) a node into the heap */
5037 +void bheap_insert(bheap_prio_t higher_prio,
5038 +		 struct bheap* heap,
5039 +		 struct bheap_node* node);
5040 +
5041 +/* merge addition into target */
5042 +void bheap_union(bheap_prio_t higher_prio,
5043 +		struct bheap* target,
5044 +		struct bheap* addition);
5045 +
5046 +struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
5047 +			    struct bheap* heap);
5048 +
5049 +struct bheap_node* bheap_take(bheap_prio_t higher_prio,
5050 +			    struct bheap* heap);
5051 +
5052 +void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
5053 +int  bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
5054 +
5055 +void bheap_delete(bheap_prio_t higher_prio,
5056 +		 struct bheap* heap,
5057 +		 struct bheap_node* node);
5058 +
5059 +/* allocate from memcache */
5060 +struct bheap_node* bheap_node_alloc(int gfp_flags);
5061 +void bheap_node_free(struct bheap_node* hn);
5062 +
5063 +/* allocate a heap node for value and insert into the heap */
5064 +int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
5065 +	     void* value, int gfp_flags);
5066 +
5067 +void* bheap_take_del(bheap_prio_t higher_prio,
5068 +		    struct bheap* heap);
5069 +#endif
5070 diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
5071 new file mode 100644
5072 index 0000000..1cf3647
5073 --- /dev/null
5074 +++ b/include/litmus/binheap.h
5075 @@ -0,0 +1,205 @@
5076 +#ifndef LITMUS_BINARY_HEAP_H
5077 +#define LITMUS_BINARY_HEAP_H
5078 +
5079 +#include <linux/kernel.h>
5080 +
5081 +/**
5082 + * Simple binary heap with add, arbitrary delete, delete_root, and top
5083 + * operations.
5084 + *
5085 + * Style meant to conform with list.h.
5086 + *
5087 + * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
5088 + * heap may be overkill (and perhaps not general enough) for some applications.
5089 + *
5090 + * Note: In order to make node swaps fast, a node inserted with a data pointer
5091 + * may not always hold said data pointer. This is similar to the binomial heap
5092 + * implementation. This does make node deletion tricky since we have to
5093 + * (1) locate the node that holds the data pointer to delete, and (2) the
5094 + * node that was originally inserted with said data pointer. These have to be
5095 + * coalesced into a single node before removal (see usage of
5096 + * __binheap_safe_swap()). We have to track node references to accomplish this.
5097 + */
5098 +
5099 +struct binheap_node {
5100 +	void	*data;
5101 +	struct binheap_node *parent;
5102 +	struct binheap_node *left;
5103 +	struct binheap_node *right;
5104 +
5105 +	/* pointer to binheap_node that holds *data for which this binheap_node
5106 +	 * was originally inserted.  (*data "owns" this node)
5107 +	 */
5108 +	struct binheap_node *ref;
5109 +	struct binheap_node **ref_ptr;
5110 +};
5111 +
5112 +/**
5113 + * Signature of compator function.  Assumed 'less-than' (min-heap).
5114 + * Pass in 'greater-than' for max-heap.
5115 + *
5116 + * TODO: Consider macro-based implementation that allows comparator to be
5117 + * inlined (similar to Linux red/black tree) for greater efficiency.
5118 + */
5119 +typedef int (*binheap_order_t)(struct binheap_node *a,
5120 +				struct binheap_node *b);
5121 +
5122 +
5123 +struct binheap {
5124 +	struct binheap_node *root;
5125 +
5126 +	/* pointer to node to take next inserted child */
5127 +	struct binheap_node *next;
5128 +
5129 +	/* pointer to last node in complete binary tree */
5130 +	struct binheap_node *last;
5131 +
5132 +	/* comparator function pointer */
5133 +	binheap_order_t compare;
5134 +};
5135 +
5136 +
5137 +/* Initialized heap nodes not in a heap have parent
5138 + * set to BINHEAP_POISON.
5139 + */
5140 +#define BINHEAP_POISON	((void*)(0xdeadbeef))
5141 +
5142 +
5143 +/**
5144 + * binheap_entry - get the struct for this heap node.
5145 + *  Only valid when called upon heap nodes other than the root handle.
5146 + * @ptr:	the heap node.
5147 + * @type:	the type of struct pointed to by binheap_node::data.
5148 + * @member:	unused.
5149 + */
5150 +#define binheap_entry(ptr, type, member) \
5151 +((type *)((ptr)->data))
5152 +
5153 +/**
5154 + * binheap_node_container - get the struct that contains this node.
5155 + *  Only valid when called upon heap nodes other than the root handle.
5156 + * @ptr:	the heap node.
5157 + * @type:	the type of struct the node is embedded in.
5158 + * @member:	the name of the binheap_struct within the (type) struct.
5159 + */
5160 +#define binheap_node_container(ptr, type, member) \
5161 +container_of((ptr), type, member)
5162 +
5163 +/**
5164 + * binheap_top_entry - get the struct for the node at the top of the heap.
5165 + *  Only valid when called upon the heap handle node.
5166 + * @ptr:    the special heap-handle node.
5167 + * @type:   the type of the struct the head is embedded in.
5168 + * @member:	the name of the binheap_struct within the (type) struct.
5169 + */
5170 +#define binheap_top_entry(ptr, type, member) \
5171 +binheap_entry((ptr)->root, type, member)
5172 +
5173 +/**
5174 + * binheap_delete_root - remove the root element from the heap.
5175 + * @handle:	 handle to the heap.
5176 + * @type:    the type of the struct the head is embedded in.
5177 + * @member:	 the name of the binheap_struct within the (type) struct.
5178 + */
5179 +#define binheap_delete_root(handle, type, member) \
5180 +__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
5181 +
5182 +/**
5183 + * binheap_delete - remove an arbitrary element from the heap.
5184 + * @to_delete:  pointer to node to be removed.
5185 + * @handle:	 handle to the heap.
5186 + */
5187 +#define binheap_delete(to_delete, handle) \
5188 +__binheap_delete((to_delete), (handle))
5189 +
5190 +/**
5191 + * binheap_add - insert an element to the heap
5192 + * new_node: node to add.
5193 + * @handle:	 handle to the heap.
5194 + * @type:    the type of the struct the head is embedded in.
5195 + * @member:	 the name of the binheap_struct within the (type) struct.
5196 + */
5197 +#define binheap_add(new_node, handle, type, member) \
5198 +__binheap_add((new_node), (handle), container_of((new_node), type, member))
5199 +
5200 +/**
5201 + * binheap_decrease - re-eval the position of a node (based upon its
5202 + * original data pointer).
5203 + * @handle: handle to the heap.
5204 + * @orig_node: node that was associated with the data pointer
5205 + *             (whose value has changed) when said pointer was
5206 + *             added to the heap.
5207 + */
5208 +#define binheap_decrease(orig_node, handle) \
5209 +__binheap_decrease((orig_node), (handle))
5210 +
5211 +#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
5212 +
5213 +#define BINHEAP_NODE(name) \
5214 +	struct binheap_node name = BINHEAP_NODE_INIT()
5215 +
5216 +
5217 +static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
5218 +{
5219 +	n->data = NULL;
5220 +	n->parent = BINHEAP_POISON;
5221 +	n->left = NULL;
5222 +	n->right = NULL;
5223 +	n->ref = NULL;
5224 +	n->ref_ptr = NULL;
5225 +}
5226 +
5227 +static inline void INIT_BINHEAP_HANDLE(struct binheap *handle,
5228 +				binheap_order_t compare)
5229 +{
5230 +	handle->root = NULL;
5231 +	handle->next = NULL;
5232 +	handle->last = NULL;
5233 +	handle->compare = compare;
5234 +}
5235 +
5236 +/* Returns true if binheap is empty. */
5237 +static inline int binheap_empty(struct binheap *handle)
5238 +{
5239 +	return(handle->root == NULL);
5240 +}
5241 +
5242 +/* Returns true if binheap node is in a heap. */
5243 +static inline int binheap_is_in_heap(struct binheap_node *node)
5244 +{
5245 +	return (node->parent != BINHEAP_POISON);
5246 +}
5247 +
5248 +/* Returns true if binheap node is in given heap. */
5249 +int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap);
5250 +
5251 +/* Add a node to a heap */
5252 +void __binheap_add(struct binheap_node *new_node,
5253 +				struct binheap *handle,
5254 +				void *data);
5255 +
5256 +/**
5257 + * Removes the root node from the heap. The node is removed after coalescing
5258 + * the binheap_node with its original data pointer at the root of the tree.
5259 + *
5260 + * The 'last' node in the tree is then swapped up to the root and bubbled
5261 + * down.
5262 + */
5263 +void __binheap_delete_root(struct binheap *handle,
5264 +				struct binheap_node *container);
5265 +
5266 +/**
5267 + * Delete an arbitrary node.  Bubble node to delete up to the root,
5268 + * and then delete to root.
5269 + */
5270 +void __binheap_delete(struct binheap_node *node_to_delete,
5271 +				struct binheap *handle);
5272 +
5273 +/**
5274 + * Bubble up a node whose pointer has decreased in value.
5275 + */
5276 +void __binheap_decrease(struct binheap_node *orig_node,
5277 +						struct binheap *handle);
5278 +
5279 +
5280 +#endif
5281 diff --git a/include/litmus/budget.h b/include/litmus/budget.h
5282 new file mode 100644
5283 index 0000000..bd2d5c9
5284 --- /dev/null
5285 +++ b/include/litmus/budget.h
5286 @@ -0,0 +1,36 @@
5287 +#ifndef _LITMUS_BUDGET_H_
5288 +#define _LITMUS_BUDGET_H_
5289 +
5290 +/* Update the per-processor enforcement timer (arm/reproram/cancel) for
5291 + * the next task. */
5292 +void update_enforcement_timer(struct task_struct* t);
5293 +
5294 +inline static int budget_exhausted(struct task_struct* t)
5295 +{
5296 +	return get_exec_time(t) >= get_exec_cost(t);
5297 +}
5298 +
5299 +inline static lt_t budget_remaining(struct task_struct* t)
5300 +{
5301 +	if (!budget_exhausted(t))
5302 +		return get_exec_cost(t) - get_exec_time(t);
5303 +	else
5304 +		/* avoid overflow */
5305 +		return 0;
5306 +}
5307 +
5308 +#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
5309 +
5310 +#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
5311 +				      == PRECISE_ENFORCEMENT)
5312 +
5313 +static inline int requeue_preempted_job(struct task_struct* t)
5314 +{
5315 +	/* Add task to ready queue only if not subject to budget enforcement or
5316 +	 * if the job has budget remaining. t may be NULL.
5317 +	 */
5318 +	return t && !is_completed(t) &&
5319 +		(!budget_exhausted(t) || !budget_enforced(t));
5320 +}
5321 +
5322 +#endif
5323 diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
5324 new file mode 100644
5325 index 0000000..0c18dcb
5326 --- /dev/null
5327 +++ b/include/litmus/clustered.h
5328 @@ -0,0 +1,44 @@
5329 +#ifndef CLUSTERED_H
5330 +#define CLUSTERED_H
5331 +
5332 +/* Which cache level should be used to group CPUs into clusters?
5333 + * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
5334 + * global scheduling).
5335 + */
5336 +enum cache_level {
5337 +	GLOBAL_CLUSTER = 0,
5338 +	L1_CLUSTER     = 1,
5339 +	L2_CLUSTER     = 2,
5340 +	L3_CLUSTER     = 3
5341 +};
5342 +
5343 +int parse_cache_level(const char *str, enum cache_level *level);
5344 +const char* cache_level_name(enum cache_level level);
5345 +
5346 +/* expose a cache level in a /proc dir */
5347 +struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
5348 +					   enum cache_level* level);
5349 +
5350 +
5351 +
5352 +struct scheduling_cluster {
5353 +	unsigned int id;
5354 +	/* list of CPUs that are part of this cluster */
5355 +	struct list_head cpus;
5356 +};
5357 +
5358 +struct cluster_cpu {
5359 +	unsigned int id; /* which CPU is this? */
5360 +	struct list_head cluster_list; /* List of the CPUs in this cluster. */
5361 +	struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
5362 +};
5363 +
5364 +int get_cluster_size(enum cache_level level);
5365 +
5366 +int assign_cpus_to_clusters(enum cache_level level,
5367 +			    struct scheduling_cluster* clusters[],
5368 +			    unsigned int num_clusters,
5369 +			    struct cluster_cpu* cpus[],
5370 +			    unsigned int num_cpus);
5371 +
5372 +#endif
5373 diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
5374 new file mode 100644
5375 index 0000000..bbaf22e
5376 --- /dev/null
5377 +++ b/include/litmus/edf_common.h
5378 @@ -0,0 +1,25 @@
5379 +/*
5380 + * EDF common data structures and utility functions shared by all EDF
5381 + * based scheduler plugins
5382 + */
5383 +
5384 +/* CLEANUP: Add comments and make it less messy.
5385 + *
5386 + */
5387 +
5388 +#ifndef __UNC_EDF_COMMON_H__
5389 +#define __UNC_EDF_COMMON_H__
5390 +
5391 +#include <litmus/rt_domain.h>
5392 +
5393 +void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
5394 +		     release_jobs_t release);
5395 +
5396 +int edf_higher_prio(struct task_struct* first,
5397 +		    struct task_struct* second);
5398 +
5399 +int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
5400 +
5401 +int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
5402 +
5403 +#endif
5404 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
5405 new file mode 100644
5406 index 0000000..fd9b30d
5407 --- /dev/null
5408 +++ b/include/litmus/fdso.h
5409 @@ -0,0 +1,78 @@
5410 +/* fdso.h - file descriptor attached shared objects
5411 + *
5412 + * (c) 2007 B. Brandenburg, LITMUS^RT project
5413 + */
5414 +
5415 +#ifndef _LINUX_FDSO_H_
5416 +#define _LINUX_FDSO_H_
5417 +
5418 +#include <linux/list.h>
5419 +#include <asm/atomic.h>
5420 +
5421 +#include <linux/fs.h>
5422 +#include <linux/slab.h>
5423 +
5424 +#define MAX_OBJECT_DESCRIPTORS 85
5425 +
5426 +typedef enum  {
5427 +	MIN_OBJ_TYPE 	= 0,
5428 +
5429 +	FMLP_SEM	= 0,
5430 +	SRP_SEM		= 1,
5431 +
5432 +	MPCP_SEM	= 2,
5433 +	MPCP_VS_SEM	= 3,
5434 +	DPCP_SEM	= 4,
5435 +	PCP_SEM         = 5,
5436 +
5437 +	DFLP_SEM	= 6,
5438 +
5439 +	MAX_OBJ_TYPE	= 6
5440 +} obj_type_t;
5441 +
5442 +struct inode_obj_id {
5443 +	struct list_head	list;
5444 +	atomic_t		count;
5445 +	struct inode*		inode;
5446 +
5447 +	obj_type_t 		type;
5448 +	void*			obj;
5449 +	unsigned int		id;
5450 +};
5451 +
5452 +struct fdso_ops;
5453 +
5454 +struct od_table_entry {
5455 +	unsigned int		used;
5456 +
5457 +	struct inode_obj_id*	obj;
5458 +	const struct fdso_ops*	class;
5459 +};
5460 +
5461 +struct fdso_ops {
5462 +	int   (*create)(void** obj_ref, obj_type_t type, void* __user);
5463 +	void  (*destroy)(obj_type_t type, void*);
5464 +	int   (*open)	(struct od_table_entry*, void* __user);
5465 +	int   (*close)	(struct od_table_entry*);
5466 +};
5467 +
5468 +/* translate a userspace supplied od into the raw table entry
5469 + * returns NULL if od is invalid
5470 + */
5471 +struct od_table_entry* get_entry_for_od(int od);
5472 +
5473 +/* translate a userspace supplied od into the associated object
5474 + * returns NULL if od is invalid
5475 + */
5476 +static inline void* od_lookup(int od, obj_type_t type)
5477 +{
5478 +	struct od_table_entry* e = get_entry_for_od(od);
5479 +	return e && e->obj->type == type ? e->obj->obj : NULL;
5480 +}
5481 +
5482 +#define lookup_fmlp_sem(od)((struct pi_semaphore*)  od_lookup(od, FMLP_SEM))
5483 +#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
5484 +#define lookup_ics(od)     ((struct ics*)           od_lookup(od, ICS_ID))
5485 +
5486 +
5487 +#endif
5488 diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
5489 new file mode 100644
5490 index 0000000..19356c0
5491 --- /dev/null
5492 +++ b/include/litmus/fp_common.h
5493 @@ -0,0 +1,105 @@
5494 +/* Fixed-priority scheduler support.
5495 + */
5496 +
5497 +#ifndef __FP_COMMON_H__
5498 +#define __FP_COMMON_H__
5499 +
5500 +#include <litmus/rt_domain.h>
5501 +
5502 +#include <asm/bitops.h>
5503 +
5504 +
5505 +void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
5506 +		    release_jobs_t release);
5507 +
5508 +int fp_higher_prio(struct task_struct* first,
5509 +		   struct task_struct* second);
5510 +
5511 +int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
5512 +
5513 +#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
5514 +
5515 +#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
5516 +#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
5517 +#endif
5518 +
5519 +/* bitmask-inexed priority queue */
5520 +struct fp_prio_queue {
5521 +	unsigned long	bitmask[FP_PRIO_BIT_WORDS];
5522 +	struct bheap	queue[LITMUS_MAX_PRIORITY];
5523 +};
5524 +
5525 +void fp_prio_queue_init(struct fp_prio_queue* q);
5526 +
5527 +static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
5528 +{
5529 +	unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
5530 +	__set_bit(index % BITS_PER_LONG, word);
5531 +}
5532 +
5533 +static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
5534 +{
5535 +	unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
5536 +	__clear_bit(index % BITS_PER_LONG, word);
5537 +}
5538 +
5539 +static inline unsigned int fpq_find(struct fp_prio_queue* q)
5540 +{
5541 +	int i;
5542 +
5543 +	/* loop optimizer should unroll this */
5544 +	for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
5545 +		if (q->bitmask[i])
5546 +			return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
5547 +
5548 +	return LITMUS_MAX_PRIORITY; /* nothing found */
5549 +}
5550 +
5551 +static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
5552 +{
5553 +	BUG_ON(index >= LITMUS_MAX_PRIORITY);
5554 +	BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
5555 +
5556 +	fpq_set(q, index);
5557 +	bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
5558 +}
5559 +
5560 +static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
5561 +{
5562 +	BUG_ON(!is_queued(t));
5563 +
5564 +	bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
5565 +	if (likely(bheap_empty(&q->queue[index])))
5566 +		fpq_clear(q, index);
5567 +}
5568 +
5569 +static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
5570 +{
5571 +	unsigned int idx = fpq_find(q);
5572 +	struct bheap_node* hn;
5573 +
5574 +	if (idx < LITMUS_MAX_PRIORITY) {
5575 +		hn = bheap_peek(fp_ready_order, &q->queue[idx]);
5576 +		return bheap2task(hn);
5577 +	} else
5578 +		return NULL;
5579 +}
5580 +
5581 +static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
5582 +{
5583 +	unsigned int idx = fpq_find(q);
5584 +	struct bheap_node* hn;
5585 +
5586 +	if (idx < LITMUS_MAX_PRIORITY) {
5587 +		hn = bheap_take(fp_ready_order, &q->queue[idx]);
5588 +		if (likely(bheap_empty(&q->queue[idx])))
5589 +			fpq_clear(q, idx);
5590 +		return bheap2task(hn);
5591 +	} else
5592 +		return NULL;
5593 +}
5594 +
5595 +int fp_preemption_needed(struct fp_prio_queue*  q, struct task_struct *t);
5596 +
5597 +
5598 +#endif
5599 diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
5600 new file mode 100644
5601 index 0000000..642de98
5602 --- /dev/null
5603 +++ b/include/litmus/fpmath.h
5604 @@ -0,0 +1,147 @@
5605 +#ifndef __FP_MATH_H__
5606 +#define __FP_MATH_H__
5607 +
5608 +#include <linux/math64.h>
5609 +
5610 +#ifndef __KERNEL__
5611 +#include <stdint.h>
5612 +#define abs(x) (((x) < 0) ? -(x) : x)
5613 +#endif
5614 +
5615 +// Use 64-bit because we want to track things at the nanosecond scale.
5616 +// This can lead to very large numbers.
5617 +typedef int64_t fpbuf_t;
5618 +typedef struct
5619 +{
5620 +	fpbuf_t val;
5621 +} fp_t;
5622 +
5623 +#define FP_SHIFT 10
5624 +#define ROUND_BIT (FP_SHIFT - 1)
5625 +
5626 +#define _fp(x) ((fp_t) {x})
5627 +
5628 +#ifdef __KERNEL__
5629 +static const fp_t LITMUS_FP_ZERO = {.val = 0};
5630 +static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
5631 +#endif
5632 +
5633 +static inline fp_t FP(fpbuf_t x)
5634 +{
5635 +	return _fp(((fpbuf_t) x) << FP_SHIFT);
5636 +}
5637 +
5638 +/* divide two integers to obtain a fixed point value  */
5639 +static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
5640 +{
5641 +	return _fp(div64_s64(FP(a).val, (b)));
5642 +}
5643 +
5644 +static inline fpbuf_t _point(fp_t x)
5645 +{
5646 +	return (x.val % (1 << FP_SHIFT));
5647 +
5648 +}
5649 +
5650 +#define fp2str(x) x.val
5651 +/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
5652 +#define _FP_  "%ld/1024"
5653 +
5654 +static inline fpbuf_t _floor(fp_t x)
5655 +{
5656 +	return x.val >> FP_SHIFT;
5657 +}
5658 +
5659 +/* FIXME: negative rounding */
5660 +static inline fpbuf_t _round(fp_t x)
5661 +{
5662 +	return _floor(x) + ((x.val >> ROUND_BIT) & 1);
5663 +}
5664 +
5665 +/* multiply two fixed point values */
5666 +static inline fp_t _mul(fp_t a, fp_t b)
5667 +{
5668 +	return _fp((a.val * b.val) >> FP_SHIFT);
5669 +}
5670 +
5671 +static inline fp_t _div(fp_t a, fp_t b)
5672 +{
5673 +#if !defined(__KERNEL__) && !defined(unlikely)
5674 +#define unlikely(x) (x)
5675 +#define DO_UNDEF_UNLIKELY
5676 +#endif
5677 +	/* try not to overflow */
5678 +	if (unlikely(  a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
5679 +		return _fp((a.val / b.val) << FP_SHIFT);
5680 +	else
5681 +		return _fp((a.val << FP_SHIFT) / b.val);
5682 +#ifdef DO_UNDEF_UNLIKELY
5683 +#undef unlikely
5684 +#undef DO_UNDEF_UNLIKELY
5685 +#endif
5686 +}
5687 +
5688 +static inline fp_t _add(fp_t a, fp_t b)
5689 +{
5690 +	return _fp(a.val + b.val);
5691 +}
5692 +
5693 +static inline fp_t _sub(fp_t a, fp_t b)
5694 +{
5695 +	return _fp(a.val - b.val);
5696 +}
5697 +
5698 +static inline fp_t _neg(fp_t x)
5699 +{
5700 +	return _fp(-x.val);
5701 +}
5702 +
5703 +static inline fp_t _abs(fp_t x)
5704 +{
5705 +	return _fp(abs(x.val));
5706 +}
5707 +
5708 +/* works the same as casting float/double to integer */
5709 +static inline fpbuf_t _fp_to_integer(fp_t x)
5710 +{
5711 +	return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
5712 +}
5713 +
5714 +static inline fp_t _integer_to_fp(fpbuf_t x)
5715 +{
5716 +	return _frac(x,1);
5717 +}
5718 +
5719 +static inline int _leq(fp_t a, fp_t b)
5720 +{
5721 +	return a.val <= b.val;
5722 +}
5723 +
5724 +static inline int _geq(fp_t a, fp_t b)
5725 +{
5726 +	return a.val >= b.val;
5727 +}
5728 +
5729 +static inline int _lt(fp_t a, fp_t b)
5730 +{
5731 +	return a.val < b.val;
5732 +}
5733 +
5734 +static inline int _gt(fp_t a, fp_t b)
5735 +{
5736 +	return a.val > b.val;
5737 +}
5738 +
5739 +static inline int _eq(fp_t a, fp_t b)
5740 +{
5741 +	return a.val == b.val;
5742 +}
5743 +
5744 +static inline fp_t _max(fp_t a, fp_t b)
5745 +{
5746 +	if (a.val < b.val)
5747 +		return b;
5748 +	else
5749 +		return a;
5750 +}
5751 +#endif
5752 diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
5753 new file mode 100644
5754 index 0000000..24771df
5755 --- /dev/null
5756 +++ b/include/litmus/jobs.h
5757 @@ -0,0 +1,10 @@
5758 +#ifndef __LITMUS_JOBS_H__
5759 +#define __LITMUS_JOBS_H__
5760 +
5761 +void prepare_for_next_period(struct task_struct *t);
5762 +void release_at(struct task_struct *t, lt_t start);
5763 +
5764 +long default_wait_for_release_at(lt_t release_time);
5765 +long complete_job(void);
5766 +
5767 +#endif
5768 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
5769 index c87863c..0519831 100644
5770 --- a/include/litmus/litmus.h
5771 +++ b/include/litmus/litmus.h
5772 @@ -6,7 +6,49 @@
5773  #ifndef _LINUX_LITMUS_H_
5774  #define _LINUX_LITMUS_H_
5775  
5776 +#include <litmus/debug_trace.h>
5777 +
5778 +#ifdef CONFIG_RELEASE_MASTER
5779 +extern atomic_t release_master_cpu;
5780 +#endif
5781 +
5782 +/* in_list - is a given list_head queued on some list?
5783 + */
5784 +static inline int in_list(struct list_head* list)
5785 +{
5786 +	return !(  /* case 1: deleted */
5787 +		   (list->next == LIST_POISON1 &&
5788 +		    list->prev == LIST_POISON2)
5789 +		 ||
5790 +		   /* case 2: initialized */
5791 +		   (list->next == list &&
5792 +		    list->prev == list)
5793 +		);
5794 +}
5795 +
5796 +struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5797 +
5798 +#define NO_CPU			0xffffffff
5799 +
5800 +void litmus_fork(struct task_struct *tsk);
5801 +void litmus_exec(void);
5802 +/* clean up real-time state of a task */
5803 +void litmus_clear_state(struct task_struct *dead_tsk);
5804 +void exit_litmus(struct task_struct *dead_tsk);
5805 +
5806 +/* Prevent the plugin from being switched-out from underneath a code
5807 + * path. Might sleep, so may be called only from non-atomic context. */
5808 +void litmus_plugin_switch_disable(void);
5809 +void litmus_plugin_switch_enable(void);
5810 +
5811 +long litmus_admit_task(struct task_struct *tsk);
5812 +void litmus_exit_task(struct task_struct *tsk);
5813 +void litmus_dealloc(struct task_struct *tsk);
5814 +void litmus_do_exit(struct task_struct *tsk);
5815 +
5816  #define is_realtime(t) 		((t)->policy == SCHED_LITMUS)
5817 +#define rt_transition_pending(t) \
5818 +	((t)->rt_param.transition_pending)
5819  
5820  #define tsk_rt(t)		(&(t)->rt_param)
5821  
5822 @@ -28,6 +70,7 @@
5823  #define get_partition(t) 	(tsk_rt(t)->task_params.cpu)
5824  #define get_priority(t) 	(tsk_rt(t)->task_params.priority)
5825  #define get_class(t)        (tsk_rt(t)->task_params.cls)
5826 +#define get_release_policy(t) (tsk_rt(t)->task_params.release_policy)
5827  
5828  /* job_param macros */
5829  #define get_exec_time(t)    (tsk_rt(t)->job_params.exec_time)
5830 @@ -35,6 +78,15 @@
5831  #define get_release(t)		(tsk_rt(t)->job_params.release)
5832  #define get_lateness(t)		(tsk_rt(t)->job_params.lateness)
5833  
5834 +/* release policy macros */
5835 +#define is_periodic(t)		(get_release_policy(t) == TASK_PERIODIC)
5836 +#define is_sporadic(t)		(get_release_policy(t) == TASK_SPORADIC)
5837 +#ifdef CONFIG_ALLOW_EARLY_RELEASE
5838 +#define is_early_releasing(t)	(get_release_policy(t) == TASK_EARLY)
5839 +#else
5840 +#define is_early_releasing(t)	(0)
5841 +#endif
5842 +
5843  #define is_hrt(t)     		\
5844  	(tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
5845  #define is_srt(t)     		\
5846 @@ -48,6 +100,196 @@ static inline lt_t litmus_clock(void)
5847  	return ktime_to_ns(ktime_get());
5848  }
5849  
5850 +/* A macro to convert from nanoseconds to ktime_t. */
5851 +#define ns_to_ktime(t)		ktime_add_ns(ktime_set(0, 0), t)
5852 +
5853 +#define get_domain(t) (tsk_rt(t)->domain)
5854 +
5855 +/* Honor the flag in the preempt_count variable that is set
5856 + * when scheduling is in progress.
5857 + */
5858 +#define is_running(t) 			\
5859 +	((t)->state == TASK_RUNNING || 	\
5860 +	 task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
5861 +
5862 +#define is_blocked(t)       \
5863 +	(!is_running(t))
5864 +#define is_released(t, now)	\
5865 +	(lt_before_eq(get_release(t), now))
5866 +#define is_tardy(t, now)    \
5867 +	(lt_before_eq(tsk_rt(t)->job_params.deadline, now))
5868 +
5869 +/* real-time comparison macros */
5870 +#define earlier_deadline(a, b) (lt_before(\
5871 +	(a)->rt_param.job_params.deadline,\
5872 +	(b)->rt_param.job_params.deadline))
5873 +#define earlier_release(a, b)  (lt_before(\
5874 +	(a)->rt_param.job_params.release,\
5875 +	(b)->rt_param.job_params.release))
5876 +
5877 +void preempt_if_preemptable(struct task_struct* t, int on_cpu);
5878 +
5879 +#ifdef CONFIG_LITMUS_LOCKING
5880 +void srp_ceiling_block(void);
5881 +#else
5882 +#define srp_ceiling_block() /* nothing */
5883 +#endif
5884 +
5885 +#define bheap2task(hn) ((struct task_struct*) hn->value)
5886 +
5887 +#ifdef CONFIG_NP_SECTION
5888 +
5889 +static inline int is_kernel_np(struct task_struct *t)
5890 +{
5891 +	return tsk_rt(t)->kernel_np;
5892 +}
5893 +
5894 +static inline int is_user_np(struct task_struct *t)
5895 +{
5896 +	return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
5897 +}
5898 +
5899 +static inline void request_exit_np(struct task_struct *t)
5900 +{
5901 +	if (is_user_np(t)) {
5902 +		/* Set the flag that tells user space to call
5903 +		 * into the kernel at the end of a critical section. */
5904 +		if (likely(tsk_rt(t)->ctrl_page)) {
5905 +			TRACE_TASK(t, "setting delayed_preemption flag\n");
5906 +			tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
5907 +		}
5908 +	}
5909 +}
5910 +
5911 +static inline void make_np(struct task_struct *t)
5912 +{
5913 +	tsk_rt(t)->kernel_np++;
5914 +}
5915 +
5916 +/* Caller should check if preemption is necessary when
5917 + * the function return 0.
5918 + */
5919 +static inline int take_np(struct task_struct *t)
5920 +{
5921 +	return --tsk_rt(t)->kernel_np;
5922 +}
5923 +
5924 +/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
5925 +static inline int request_exit_np_atomic(struct task_struct *t)
5926 +{
5927 +	union np_flag old, new;
5928 +
5929 +	if (tsk_rt(t)->ctrl_page) {
5930 +		old.raw = tsk_rt(t)->ctrl_page->sched.raw;
5931 +		if (old.np.flag == 0) {
5932 +			/* no longer non-preemptive */
5933 +			return 0;
5934 +		} else if (old.np.preempt) {
5935 +			/* already set, nothing for us to do */
5936 +			return 1;
5937 +		} else {
5938 +			/* non preemptive and flag not set */
5939 +			new.raw = old.raw;
5940 +			new.np.preempt = 1;
5941 +			/* if we get old back, then we atomically set the flag */
5942 +			return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
5943 +			/* If we raced with a concurrent change, then so be
5944 +			 * it. Deliver it by IPI.  We don't want an unbounded
5945 +			 * retry loop here since tasks might exploit that to
5946 +			 * keep the kernel busy indefinitely. */
5947 +		}
5948 +	} else
5949 +		return 0;
5950 +}
5951 +
5952 +#else
5953 +
5954 +static inline int is_kernel_np(struct task_struct* t)
5955 +{
5956 +	return 0;
5957 +}
5958 +
5959 +static inline int is_user_np(struct task_struct* t)
5960 +{
5961 +	return 0;
5962 +}
5963 +
5964 +static inline void request_exit_np(struct task_struct *t)
5965 +{
5966 +	/* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
5967 +	BUG();
5968 +}
5969 +
5970 +static inline int request_exit_np_atomic(struct task_struct *t)
5971 +{
5972 +	return 0;
5973 +}
5974 +
5975 +#endif
5976 +
5977 +static inline void clear_exit_np(struct task_struct *t)
5978 +{
5979 +	if (likely(tsk_rt(t)->ctrl_page))
5980 +		tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
5981 +}
5982 +
5983 +static inline int is_np(struct task_struct *t)
5984 +{
5985 +#ifdef CONFIG_SCHED_DEBUG_TRACE
5986 +	int kernel, user;
5987 +	kernel = is_kernel_np(t);
5988 +	user   = is_user_np(t);
5989 +	if (kernel || user)
5990 +		TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
5991 +
5992 +			   kernel, user);
5993 +	return kernel || user;
5994 +#else
5995 +	return unlikely(is_kernel_np(t) || is_user_np(t));
5996 +#endif
5997 +}
5998 +
5999 +static inline int is_present(struct task_struct* t)
6000 +{
6001 +	return t && tsk_rt(t)->present;
6002 +}
6003 +
6004 +static inline int is_completed(struct task_struct* t)
6005 +{
6006 +	return t && tsk_rt(t)->completed;
6007 +}
6008 +
6009 +
6010 +/* Used to convert ns-specified execution costs and periods into
6011 + * integral quanta equivalents.
6012 + */
6013 +#define LITMUS_QUANTUM_LENGTH_NS (CONFIG_LITMUS_QUANTUM_LENGTH_US * 1000ULL)
6014 +
6015 +/* make the unit explicit */
6016 +typedef unsigned long quanta_t;
6017 +
6018 +enum round {
6019 +	FLOOR,
6020 +	CEIL
6021 +};
6022 +
6023 +static inline quanta_t time2quanta(lt_t time, enum round round)
6024 +{
6025 +	s64  quantum_length = LITMUS_QUANTUM_LENGTH_NS;
6026 +
6027 +	if (do_div(time, quantum_length) && round == CEIL)
6028 +		time++;
6029 +	return (quanta_t) time;
6030 +}
6031 +
6032 +static inline lt_t quanta2time(quanta_t quanta)
6033 +{
6034 +	return quanta * LITMUS_QUANTUM_LENGTH_NS;
6035 +}
6036 +
6037 +/* By how much is cpu staggered behind CPU 0? */
6038 +u64 cpu_stagger_offset(int cpu);
6039 +
6040  static inline struct control_page* get_control_page(struct task_struct *t)
6041  {
6042  	return tsk_rt(t)->ctrl_page;
6043 @@ -58,4 +300,30 @@ static inline int has_control_page(struct task_struct* t)
6044  	return tsk_rt(t)->ctrl_page != NULL;
6045  }
6046  
6047 +
6048 +#ifdef CONFIG_SCHED_OVERHEAD_TRACE
6049 +
6050 +#define TS_SYSCALL_IN_START						\
6051 +	if (has_control_page(current)) {				\
6052 +		__TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); \
6053 +	}
6054 +
6055 +#define TS_SYSCALL_IN_END						\
6056 +	if (has_control_page(current)) {				\
6057 +		unsigned long flags;					\
6058 +		uint64_t irqs;						\
6059 +		local_irq_save(flags);					\
6060 +		irqs = get_control_page(current)->irq_count -		\
6061 +			get_control_page(current)->irq_syscall_start;	\
6062 +		__TS_SYSCALL_IN_END(&irqs);				\
6063 +		local_irq_restore(flags);				\
6064 +	}
6065 +
6066 +#else
6067 +
6068 +#define TS_SYSCALL_IN_START
6069 +#define TS_SYSCALL_IN_END
6070 +
6071 +#endif
6072 +
6073  #endif
6074 diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
6075 new file mode 100644
6076 index 0000000..a5db24c
6077 --- /dev/null
6078 +++ b/include/litmus/litmus_proc.h
6079 @@ -0,0 +1,63 @@
6080 +#include <litmus/sched_plugin.h>
6081 +#include <linux/proc_fs.h>
6082 +
6083 +int __init init_litmus_proc(void);
6084 +void exit_litmus_proc(void);
6085 +
6086 +struct cd_mapping
6087 +{
6088 +	int id;
6089 +	cpumask_var_t mask;
6090 +	struct proc_dir_entry *proc_file;
6091 +};
6092 +
6093 +struct domain_proc_info
6094 +{
6095 +	int num_cpus;
6096 +	int num_domains;
6097 +
6098 +	struct cd_mapping *cpu_to_domains;
6099 +	struct cd_mapping *domain_to_cpus;
6100 +};
6101 +
6102 +/*
6103 + * On success, returns 0 and sets the pointer to the location of the new
6104 + * proc dir entry, otherwise returns an error code and sets pde to NULL.
6105 + */
6106 +long make_plugin_proc_dir(struct sched_plugin* plugin,
6107 +		struct proc_dir_entry** pde);
6108 +
6109 +/*
6110 + * Plugins should deallocate all child proc directory entries before
6111 + * calling this, to avoid memory leaks.
6112 + */
6113 +void remove_plugin_proc_dir(struct sched_plugin* plugin);
6114 +
6115 +/*
6116 + * Setup the CPU <-> sched domain mappings in proc
6117 + */
6118 +long activate_domain_proc(struct domain_proc_info* map);
6119 +
6120 +/*
6121 + * Remove the CPU <-> sched domain mappings from proc
6122 + */
6123 +long deactivate_domain_proc(void);
6124 +
6125 +/*
6126 + * Alloc memory for the mapping
6127 + * Note: Does not set up proc files. Use make_sched_domain_maps for that.
6128 + */
6129 +long init_domain_proc_info(struct domain_proc_info* map,
6130 +	int num_cpus, int num_domains);
6131 +
6132 +/*
6133 + * Free memory of the mapping
6134 + * Note: Does not clean up proc files. Use deactivate_domain_proc for that.
6135 + */
6136 +void destroy_domain_proc_info(struct domain_proc_info* map);
6137 +
6138 +/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
6139 + * remove a '\n' if present. Returns the number of bytes that were read or
6140 + * -EFAULT. */
6141 +int copy_and_chomp(char *kbuf, unsigned long ksize,
6142 +		   __user const char* ubuf, unsigned long ulength);
6143 diff --git a/include/litmus/locking.h b/include/litmus/locking.h
6144 new file mode 100644
6145 index 0000000..4d7b870
6146 --- /dev/null
6147 +++ b/include/litmus/locking.h
6148 @@ -0,0 +1,28 @@
6149 +#ifndef LITMUS_LOCKING_H
6150 +#define LITMUS_LOCKING_H
6151 +
6152 +struct litmus_lock_ops;
6153 +
6154 +/* Generic base struct for LITMUS^RT userspace semaphores.
6155 + * This structure should be embedded in protocol-specific semaphores.
6156 + */
6157 +struct litmus_lock {
6158 +	struct litmus_lock_ops *ops;
6159 +	int type;
6160 +};
6161 +
6162 +struct litmus_lock_ops {
6163 +	/* Current task tries to obtain / drop a reference to a lock.
6164 +	 * Optional methods, allowed by default. */
6165 +	int (*open)(struct litmus_lock*, void* __user);
6166 +	int (*close)(struct litmus_lock*);
6167 +
6168 +	/* Current tries to lock/unlock this lock (mandatory methods). */
6169 +	int (*lock)(struct litmus_lock*);
6170 +	int (*unlock)(struct litmus_lock*);
6171 +
6172 +	/* The lock is no longer being referenced (mandatory method). */
6173 +	void (*deallocate)(struct litmus_lock*);
6174 +};
6175 +
6176 +#endif
6177 diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
6178 new file mode 100644
6179 index 0000000..4fd108a
6180 --- /dev/null
6181 +++ b/include/litmus/preempt.h
6182 @@ -0,0 +1,164 @@
6183 +#ifndef LITMUS_PREEMPT_H
6184 +#define LITMUS_PREEMPT_H
6185 +
6186 +#include <linux/types.h>
6187 +#include <linux/cache.h>
6188 +#include <linux/percpu.h>
6189 +#include <asm/atomic.h>
6190 +
6191 +#include <litmus/debug_trace.h>
6192 +
6193 +DECLARE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
6194 +
6195 +#ifdef CONFIG_PREEMPT_STATE_TRACE
6196 +const char* sched_state_name(int s);
6197 +#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
6198 +#else
6199 +#define TRACE_STATE(fmt, args...) /* ignore */
6200 +#endif
6201 +
6202 +#define VERIFY_SCHED_STATE(x)						\
6203 +	do { int __s = get_sched_state();				\
6204 +		if ((__s & (x)) == 0)					\
6205 +			TRACE_STATE("INVALID s=0x%x (%s) not "		\
6206 +				    "in 0x%x (%s) [%s]\n",		\
6207 +				    __s, sched_state_name(__s),		\
6208 +				    (x), #x, __FUNCTION__);		\
6209 +	} while (0);
6210 +
6211 +#define TRACE_SCHED_STATE_CHANGE(x, y, cpu)				\
6212 +	TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n",			\
6213 +		    cpu,  (x), sched_state_name(x),			\
6214 +		    (y), sched_state_name(y))
6215 +
6216 +
6217 +typedef enum scheduling_state {
6218 +	TASK_SCHEDULED    = (1 << 0),  /* The currently scheduled task is the one that
6219 +					* should be scheduled, and the processor does not
6220 +					* plan to invoke schedule(). */
6221 +	SHOULD_SCHEDULE   = (1 << 1),  /* A remote processor has determined that the
6222 +					* processor should reschedule, but this has not
6223 +					* been communicated yet (IPI still pending). */
6224 +	WILL_SCHEDULE     = (1 << 2),  /* The processor has noticed that it has to
6225 +					* reschedule and will do so shortly. */
6226 +	TASK_PICKED       = (1 << 3),  /* The processor is currently executing schedule(),
6227 +					* has selected a new task to schedule, but has not
6228 +					* yet performed the actual context switch. */
6229 +	PICKED_WRONG_TASK = (1 << 4),  /* The processor has not yet performed the context
6230 +					* switch, but a remote processor has already
6231 +					* determined that a higher-priority task became
6232 +					* eligible after the task was picked. */
6233 +} sched_state_t;
6234 +
6235 +static inline sched_state_t get_sched_state_on(int cpu)
6236 +{
6237 +	return atomic_read(&per_cpu(resched_state, cpu));
6238 +}
6239 +
6240 +static inline sched_state_t get_sched_state(void)
6241 +{
6242 +	return atomic_read(&__get_cpu_var(resched_state));
6243 +}
6244 +
6245 +static inline int is_in_sched_state(int possible_states)
6246 +{
6247 +	return get_sched_state() & possible_states;
6248 +}
6249 +
6250 +static inline int cpu_is_in_sched_state(int cpu, int possible_states)
6251 +{
6252 +	return get_sched_state_on(cpu) & possible_states;
6253 +}
6254 +
6255 +static inline void set_sched_state(sched_state_t s)
6256 +{
6257 +	TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
6258 +	atomic_set(&__get_cpu_var(resched_state), s);
6259 +}
6260 +
6261 +static inline int sched_state_transition(sched_state_t from, sched_state_t to)
6262 +{
6263 +	sched_state_t old_state;
6264 +
6265 +	old_state = atomic_cmpxchg(&__get_cpu_var(resched_state), from, to);
6266 +	if (old_state == from) {
6267 +		TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
6268 +		return 1;
6269 +	} else
6270 +		return 0;
6271 +}
6272 +
6273 +static inline int sched_state_transition_on(int cpu,
6274 +					    sched_state_t from,
6275 +					    sched_state_t to)
6276 +{
6277 +	sched_state_t old_state;
6278 +
6279 +	old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
6280 +	if (old_state == from) {
6281 +		TRACE_SCHED_STATE_CHANGE(from, to, cpu);
6282 +		return 1;
6283 +	} else
6284 +		return 0;
6285 +}
6286 +
6287 +/* Plugins must call this function after they have decided which job to
6288 + * schedule next.  IMPORTANT: this function must be called while still holding
6289 + * the lock that is used to serialize scheduling decisions.
6290 + *
6291 + * (Ideally, we would like to use runqueue locks for this purpose, but that
6292 + * would lead to deadlocks with the migration code.)
6293 + */
6294 +static inline void sched_state_task_picked(void)
6295 +{
6296 +	VERIFY_SCHED_STATE(WILL_SCHEDULE);
6297 +
6298 +	/* WILL_SCHEDULE has only a local tansition => simple store is ok */
6299 +	set_sched_state(TASK_PICKED);
6300 +}
6301 +
6302 +static inline void sched_state_entered_schedule(void)
6303 +{
6304 +	/* Update state for the case that we entered schedule() not due to
6305 +	 * set_tsk_need_resched() */
6306 +	set_sched_state(WILL_SCHEDULE);
6307 +}
6308 +
6309 +/* Called by schedule() to check if the scheduling decision is still valid
6310 + * after a context switch. Returns 1 if the CPU needs to reschdule. */
6311 +static inline int sched_state_validate_switch(void)
6312 +{
6313 +	int left_state_ok = 0;
6314 +
6315 +	VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED);
6316 +
6317 +	if (is_in_sched_state(TASK_PICKED)) {
6318 +		/* Might be good; let's try to transition out of this
6319 +		 * state. This must be done atomically since remote processors
6320 +		 * may try to change the state, too. */
6321 +		left_state_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
6322 +	}
6323 +
6324 +	if (!left_state_ok) {
6325 +		/* We raced with a higher-priority task arrival => not
6326 +		 * valid. The CPU needs to reschedule. */
6327 +		set_sched_state(WILL_SCHEDULE);
6328 +		return 1;
6329 +	} else
6330 +		return 0;
6331 +}
6332 +
6333 +/* State transition events. See litmus/preempt.c for details. */
6334 +void sched_state_will_schedule(struct task_struct* tsk);
6335 +void sched_state_ipi(void);
6336 +/* Cause a CPU (remote or local) to reschedule. */
6337 +void litmus_reschedule(int cpu);
6338 +void litmus_reschedule_local(void);
6339 +
6340 +#ifdef CONFIG_DEBUG_KERNEL
6341 +void sched_state_plugin_check(void);
6342 +#else
6343 +#define sched_state_plugin_check() /* no check */
6344 +#endif
6345 +
6346 +#endif
6347 diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
6348 new file mode 100644
6349 index 0000000..ac24929
6350 --- /dev/null
6351 +++ b/include/litmus/rt_domain.h
6352 @@ -0,0 +1,182 @@
6353 +/* CLEANUP: Add comments and make it less messy.
6354 + *
6355 + */
6356 +
6357 +#ifndef __UNC_RT_DOMAIN_H__
6358 +#define __UNC_RT_DOMAIN_H__
6359 +
6360 +#include <litmus/bheap.h>
6361 +
6362 +#define RELEASE_QUEUE_SLOTS 127 /* prime */
6363 +
6364 +struct _rt_domain;
6365 +
6366 +typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
6367 +typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
6368 +
6369 +struct release_queue {
6370 +	/* each slot maintains a list of release heaps sorted
6371 +	 * by release time */
6372 +	struct list_head		slot[RELEASE_QUEUE_SLOTS];
6373 +};
6374 +
6375 +typedef struct _rt_domain {
6376 +	/* runnable rt tasks are in here */
6377 +	raw_spinlock_t 			ready_lock;
6378 +	struct bheap	 		ready_queue;
6379 +
6380 +	/* real-time tasks waiting for release are in here */
6381 +	raw_spinlock_t 			release_lock;
6382 +	struct release_queue 		release_queue;
6383 +
6384 +#ifdef CONFIG_RELEASE_MASTER
6385 +	int				release_master;
6386 +#endif
6387 +
6388 +	/* for moving tasks to the release queue */
6389 +	raw_spinlock_t			tobe_lock;
6390 +	struct list_head		tobe_released;
6391 +
6392 +	/* how do we check if we need to kick another CPU? */
6393 +	check_resched_needed_t		check_resched;
6394 +
6395 +	/* how do we release jobs? */
6396 +	release_jobs_t			release_jobs;
6397 +
6398 +	/* how are tasks ordered in the ready queue? */
6399 +	bheap_prio_t			order;
6400 +} rt_domain_t;
6401 +
6402 +struct release_heap {
6403 +	/* list_head for per-time-slot list */
6404 +	struct list_head		list;
6405 +	lt_t				release_time;
6406 +	/* all tasks to be released at release_time */
6407 +	struct bheap			heap;
6408 +	/* used to trigger the release */
6409 +	struct hrtimer			timer;
6410 +
6411 +#ifdef CONFIG_RELEASE_MASTER
6412 +	/* used to delegate releases */
6413 +	struct hrtimer_start_on_info	info;
6414 +#endif
6415 +	/* required for the timer callback */
6416 +	rt_domain_t*			dom;
6417 +};
6418 +
6419 +
6420 +static inline struct task_struct* __next_ready(rt_domain_t* rt)
6421 +{
6422 +	struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
6423 +	if (hn)
6424 +		return bheap2task(hn);
6425 +	else
6426 +		return NULL;
6427 +}
6428 +
6429 +void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
6430 +		    check_resched_needed_t check,
6431 +		    release_jobs_t relase);
6432 +
6433 +void __add_ready(rt_domain_t* rt, struct task_struct *new);
6434 +void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
6435 +void __add_release(rt_domain_t* rt, struct task_struct *task);
6436 +
6437 +static inline struct task_struct* __take_ready(rt_domain_t* rt)
6438 +{
6439 +	struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
6440 +	if (hn)
6441 +		return bheap2task(hn);
6442 +	else
6443 +		return NULL;
6444 +}
6445 +
6446 +static inline struct task_struct* __peek_ready(rt_domain_t* rt)
6447 +{
6448 +	struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
6449 +	if (hn)
6450 +		return bheap2task(hn);
6451 +	else
6452 +		return NULL;
6453 +}
6454 +
6455 +static inline int  is_queued(struct task_struct *t)
6456 +{
6457 +	BUG_ON(!tsk_rt(t)->heap_node);
6458 +	return bheap_node_in_heap(tsk_rt(t)->heap_node);
6459 +}
6460 +
6461 +static inline void remove(rt_domain_t* rt, struct task_struct *t)
6462 +{
6463 +	bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
6464 +}
6465 +
6466 +static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
6467 +{
6468 +	unsigned long flags;
6469 +	/* first we need the write lock for rt_ready_queue */
6470 +	raw_spin_lock_irqsave(&rt->ready_lock, flags);
6471 +	__add_ready(rt, new);
6472 +	raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
6473 +}
6474 +
6475 +static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
6476 +{
6477 +	unsigned long flags;
6478 +	raw_spin_lock_irqsave(&rt->ready_lock, flags);
6479 +	__merge_ready(rt, tasks);
6480 +	raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
6481 +}
6482 +
6483 +static inline struct task_struct* take_ready(rt_domain_t* rt)
6484 +{
6485 +	unsigned long flags;
6486 +	struct task_struct* ret;
6487 +	/* first we need the write lock for rt_ready_queue */
6488 +	raw_spin_lock_irqsave(&rt->ready_lock, flags);
6489 +	ret = __take_ready(rt);
6490 +	raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
6491 +	return ret;
6492 +}
6493 +
6494 +
6495 +static inline void add_release(rt_domain_t* rt, struct task_struct *task)
6496 +{
6497 +	unsigned long flags;
6498 +	raw_spin_lock_irqsave(&rt->tobe_lock, flags);
6499 +	__add_release(rt, task);
6500 +	raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
6501 +}
6502 +
6503 +#ifdef CONFIG_RELEASE_MASTER
6504 +void __add_release_on(rt_domain_t* rt, struct task_struct *task,
6505 +		      int target_cpu);
6506 +
6507 +static inline void add_release_on(rt_domain_t* rt,
6508 +				  struct task_struct *task,
6509 +				  int target_cpu)
6510 +{
6511 +	unsigned long flags;
6512 +	raw_spin_lock_irqsave(&rt->tobe_lock, flags);
6513 +	__add_release_on(rt, task, target_cpu);
6514 +	raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
6515 +}
6516 +#endif
6517 +
6518 +static inline int __jobs_pending(rt_domain_t* rt)
6519 +{
6520 +	return !bheap_empty(&rt->ready_queue);
6521 +}
6522 +
6523 +static inline int jobs_pending(rt_domain_t* rt)
6524 +{
6525 +	unsigned long flags;
6526 +	int ret;
6527 +	/* first we need the write lock for rt_ready_queue */
6528 +	raw_spin_lock_irqsave(&rt->ready_lock, flags);
6529 +	ret = !bheap_empty(&rt->ready_queue);
6530 +	raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
6531 +	return ret;
6532 +}
6533 +
6534 +#endif
6535 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
6536 index ce76faa..e26535b 100644
6537 --- a/include/litmus/rt_param.h
6538 +++ b/include/litmus/rt_param.h
6539 @@ -84,12 +84,12 @@ struct rt_task {
6540  };
6541  
6542  union np_flag {
6543 -	uint64_t raw;
6544 +	uint32_t raw;
6545  	struct {
6546  		/* Is the task currently in a non-preemptive section? */
6547 -		uint64_t flag:31;
6548 +		uint32_t flag:31;
6549  		/* Should the task call into the scheduler? */
6550 -		uint64_t preempt:1;
6551 +		uint32_t preempt:1;
6552  	} np;
6553  };
6554  
6555 @@ -110,10 +110,10 @@ union np_flag {
6556  struct control_page {
6557  	/* This flag is used by userspace to communicate non-preempive
6558  	 * sections. */
6559 -	volatile union np_flag sched;
6560 +	volatile __attribute__ ((aligned (8))) union np_flag sched;
6561  
6562 -	volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is
6563 -				      * handled. */
6564 +	/* Incremented by the kernel each time an IRQ is handled. */
6565 +	volatile __attribute__ ((aligned (8))) uint64_t irq_count;
6566  
6567  	/* Locking overhead tracing: userspace records here the time stamp
6568  	 * and IRQ counter prior to starting the system call. */
6569 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
6570 new file mode 100644
6571 index 0000000..0ccccd6
6572 --- /dev/null
6573 +++ b/include/litmus/sched_plugin.h
6574 @@ -0,0 +1,128 @@
6575 +/*
6576 + * Definition of the scheduler plugin interface.
6577 + *
6578 + */
6579 +#ifndef _LINUX_SCHED_PLUGIN_H_
6580 +#define _LINUX_SCHED_PLUGIN_H_
6581 +
6582 +#include <linux/sched.h>
6583 +
6584 +#ifdef CONFIG_LITMUS_LOCKING
6585 +#include <litmus/locking.h>
6586 +#endif
6587 +
6588 +/************************ setup/tear down ********************/
6589 +
6590 +typedef long (*activate_plugin_t) (void);
6591 +typedef long (*deactivate_plugin_t) (void);
6592 +
6593 +struct domain_proc_info;
6594 +typedef long (*get_domain_proc_info_t) (struct domain_proc_info **info);
6595 +
6596 +
6597 +/********************* scheduler invocation ******************/
6598 +/* The main scheduling function, called to select the next task to dispatch. */
6599 +typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
6600 +/* Clean up after the task switch has occured.
6601 + * This function is called after every (even non-rt) task switch.
6602 + */
6603 +typedef void (*finish_switch_t)(struct task_struct *prev);
6604 +
6605 +
6606 +/********************* task state changes ********************/
6607 +
6608 +/* Called to setup a new real-time task.
6609 + * Release the first job, enqueue, etc.
6610 + * Task may already be running.
6611 + */
6612 +typedef void (*task_new_t) (struct task_struct *task,
6613 +			    int on_rq,
6614 +			    int running);
6615 +
6616 +/* Called to re-introduce a task after blocking.
6617 + * Can potentially be called multiple times.
6618 + */
6619 +typedef void (*task_wake_up_t) (struct task_struct *task);
6620 +/* called to notify the plugin of a blocking real-time task
6621 + * it will only be called for real-time tasks and before schedule is called */
6622 +typedef void (*task_block_t)  (struct task_struct *task);
6623 +/* Called when a real-time task exits or changes to a different scheduling
6624 + * class.
6625 + * Free any allocated resources
6626 + */
6627 +typedef void (*task_exit_t)    (struct task_struct *);
6628 +
6629 +/* task_exit() is called with interrupts disabled and runqueue locks held, and
6630 + * thus and cannot block or spin.  task_cleanup() is called sometime later
6631 + * without any locks being held.
6632 + */
6633 +typedef void (*task_cleanup_t)	(struct task_struct *);
6634 +
6635 +#ifdef CONFIG_LITMUS_LOCKING
6636 +/* Called when the current task attempts to create a new lock of a given
6637 + * protocol type. */
6638 +typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
6639 +				 void* __user config);
6640 +#endif
6641 +
6642 +
6643 +/********************* sys call backends  ********************/
6644 +/* This function causes the caller to sleep until the next release */
6645 +typedef long (*complete_job_t) (void);
6646 +
6647 +typedef long (*admit_task_t)(struct task_struct* tsk);
6648 +
6649 +typedef long (*wait_for_release_at_t)(lt_t release_time);
6650 +
6651 +/* Informs the plugin when a synchronous release takes place. */
6652 +typedef void (*synchronous_release_at_t)(lt_t time_zero);
6653 +
6654 +/************************ misc routines ***********************/
6655 +
6656 +
6657 +struct sched_plugin {
6658 +	struct list_head	list;
6659 +	/* 	basic info 		*/
6660 +	char 			*plugin_name;
6661 +
6662 +	/*	setup			*/
6663 +	activate_plugin_t	activate_plugin;
6664 +	deactivate_plugin_t	deactivate_plugin;
6665 +	get_domain_proc_info_t	get_domain_proc_info;
6666 +
6667 +	/* 	scheduler invocation 	*/
6668 +	schedule_t 		schedule;
6669 +	finish_switch_t 	finish_switch;
6670 +
6671 +	/*	syscall backend 	*/
6672 +	complete_job_t 		complete_job;
6673 +	wait_for_release_at_t	wait_for_release_at;
6674 +	synchronous_release_at_t synchronous_release_at;
6675 +
6676 +	/*	task state changes 	*/
6677 +	admit_task_t		admit_task;
6678 +
6679 +        task_new_t 		task_new;
6680 +	task_wake_up_t		task_wake_up;
6681 +	task_block_t		task_block;
6682 +
6683 +	task_exit_t 		task_exit;
6684 +	task_cleanup_t		task_cleanup;
6685 +
6686 +#ifdef CONFIG_LITMUS_LOCKING
6687 +	/*	locking protocols	*/
6688 +	allocate_lock_t		allocate_lock;
6689 +#endif
6690 +} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
6691 +
6692 +
6693 +extern struct sched_plugin *litmus;
6694 +
6695 +int register_sched_plugin(struct sched_plugin* plugin);
6696 +struct sched_plugin* find_sched_plugin(const char* name);
6697 +void print_sched_plugins(struct seq_file *m);
6698 +
6699 +
6700 +extern struct sched_plugin linux_sched_plugin;
6701 +
6702 +#endif
6703 diff --git a/include/litmus/srp.h b/include/litmus/srp.h
6704 new file mode 100644
6705 index 0000000..c9a4552
6706 --- /dev/null
6707 +++ b/include/litmus/srp.h
6708 @@ -0,0 +1,28 @@
6709 +#ifndef LITMUS_SRP_H
6710 +#define LITMUS_SRP_H
6711 +
6712 +struct srp_semaphore;
6713 +
6714 +struct srp_priority {
6715 +	struct list_head	list;
6716 +        unsigned int 		priority;
6717 +	pid_t			pid;
6718 +};
6719 +#define list2prio(l) list_entry(l, struct srp_priority, list)
6720 +
6721 +/* struct for uniprocessor SRP "semaphore" */
6722 +struct srp_semaphore {
6723 +	struct litmus_lock litmus_lock;
6724 +	struct srp_priority ceiling;
6725 +	struct task_struct* owner;
6726 +	int cpu; /* cpu associated with this "semaphore" and resource */
6727 +};
6728 +
6729 +/* map a task to its SRP preemption level priority */
6730 +typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
6731 +/* Must be updated by each plugin that uses SRP.*/
6732 +extern srp_prioritization_t get_srp_prio;
6733 +
6734 +struct srp_semaphore* allocate_srp_semaphore(void);
6735 +
6736 +#endif
6737 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
6738 new file mode 100644
6739 index 0000000..94264c2
6740 --- /dev/null
6741 +++ b/include/litmus/unistd_32.h
6742 @@ -0,0 +1,21 @@
6743 +/*
6744 + * included from arch/x86/include/asm/unistd_32.h
6745 + *
6746 + * LITMUS^RT syscalls with "relative" numbers
6747 + */
6748 +#define __LSC(x) (__NR_LITMUS + x)
6749 +
6750 +#define __NR_set_rt_task_param	__LSC(0)
6751 +#define __NR_get_rt_task_param	__LSC(1)
6752 +#define __NR_complete_job	__LSC(2)
6753 +#define __NR_od_open		__LSC(3)
6754 +#define __NR_od_close		__LSC(4)
6755 +#define __NR_litmus_lock       	__LSC(5)
6756 +#define __NR_litmus_unlock	__LSC(6)
6757 +#define __NR_query_job_no	__LSC(7)
6758 +#define __NR_wait_for_job_release __LSC(8)
6759 +#define __NR_wait_for_ts_release __LSC(9)
6760 +#define __NR_release_ts		__LSC(10)
6761 +#define __NR_null_call		__LSC(11)
6762 +
6763 +#define NR_litmus_syscalls 12
6764 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
6765 new file mode 100644
6766 index 0000000..d5ced0d
6767 --- /dev/null
6768 +++ b/include/litmus/unistd_64.h
6769 @@ -0,0 +1,33 @@
6770 +/*
6771 + * included from arch/x86/include/asm/unistd_64.h
6772 + *
6773 + * LITMUS^RT syscalls with "relative" numbers
6774 + */
6775 +#define __LSC(x) (__NR_LITMUS + x)
6776 +
6777 +#define __NR_set_rt_task_param			__LSC(0)
6778 +__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
6779 +#define __NR_get_rt_task_param			__LSC(1)
6780 +__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
6781 +#define __NR_complete_job	  		__LSC(2)
6782 +__SYSCALL(__NR_complete_job, sys_complete_job)
6783 +#define __NR_od_open				__LSC(3)
6784 +__SYSCALL(__NR_od_open, sys_od_open)
6785 +#define __NR_od_close				__LSC(4)
6786 +__SYSCALL(__NR_od_close, sys_od_close)
6787 +#define __NR_litmus_lock	       		__LSC(5)
6788 +__SYSCALL(__NR_litmus_lock, sys_litmus_lock)
6789 +#define __NR_litmus_unlock	       		__LSC(6)
6790 +__SYSCALL(__NR_litmus_unlock, sys_litmus_unlock)
6791 +#define __NR_query_job_no			__LSC(7)
6792 +__SYSCALL(__NR_query_job_no, sys_query_job_no)
6793 +#define __NR_wait_for_job_release		__LSC(8)
6794 +__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
6795 +#define __NR_wait_for_ts_release		__LSC(9)
6796 +__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
6797 +#define __NR_release_ts				__LSC(10)
6798 +__SYSCALL(__NR_release_ts, sys_release_ts)
6799 +#define __NR_null_call				__LSC(11)
6800 +__SYSCALL(__NR_null_call, sys_null_call)
6801 +
6802 +#define NR_litmus_syscalls 12
6803 diff --git a/include/litmus/wait.h b/include/litmus/wait.h
6804 new file mode 100644
6805 index 0000000..ce1347c
6806 --- /dev/null
6807 +++ b/include/litmus/wait.h
6808 @@ -0,0 +1,57 @@
6809 +#ifndef _LITMUS_WAIT_H_
6810 +#define _LITMUS_WAIT_H_
6811 +
6812 +struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
6813 +
6814 +/* wrap regular wait_queue_t head */
6815 +struct __prio_wait_queue {
6816 +	wait_queue_t wq;
6817 +
6818 +	/* some priority point */
6819 +	lt_t priority;
6820 +	/* break ties in priority by lower tie_breaker */
6821 +	unsigned int tie_breaker;
6822 +};
6823 +
6824 +typedef struct __prio_wait_queue prio_wait_queue_t;
6825 +
6826 +static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
6827 +					     struct task_struct* t,
6828 +					     lt_t priority)
6829 +{
6830 +	init_waitqueue_entry(&pwq->wq, t);
6831 +	pwq->priority    = priority;
6832 +	pwq->tie_breaker = 0;
6833 +}
6834 +
6835 +static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
6836 +						 struct task_struct* t,
6837 +						 lt_t priority,
6838 +						 unsigned int tie_breaker)
6839 +{
6840 +	init_waitqueue_entry(&pwq->wq, t);
6841 +	pwq->priority    = priority;
6842 +	pwq->tie_breaker = tie_breaker;
6843 +}
6844 +
6845 +unsigned int __add_wait_queue_prio_exclusive(
6846 +	wait_queue_head_t* head,
6847 +	prio_wait_queue_t *new);
6848 +
6849 +static inline unsigned int add_wait_queue_prio_exclusive(
6850 +	wait_queue_head_t* head,
6851 +	prio_wait_queue_t *new)
6852 +{
6853 +	unsigned long flags;
6854 +	unsigned int passed;
6855 +
6856 +	spin_lock_irqsave(&head->lock, flags);
6857 +	passed = __add_wait_queue_prio_exclusive(head, new);
6858 +
6859 +	spin_unlock_irqrestore(&head->lock, flags);
6860 +
6861 +	return passed;
6862 +}
6863 +
6864 +
6865 +#endif
6866 diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
6867 new file mode 100644
6868 index 0000000..ad88a14
6869 --- /dev/null
6870 +++ b/kernel/sched/litmus.c
6871 @@ -0,0 +1,340 @@
6872 +/* This file is included from kernel/sched.c */
6873 +
6874 +#include "sched.h"
6875 +
6876 +#include <litmus/trace.h>
6877 +#include <litmus/sched_trace.h>
6878 +
6879 +#include <litmus/litmus.h>
6880 +#include <litmus/budget.h>
6881 +#include <litmus/sched_plugin.h>
6882 +#include <litmus/preempt.h>
6883 +
6884 +static void update_time_litmus(struct rq *rq, struct task_struct *p)
6885 +{
6886 +	u64 delta = rq->clock - p->se.exec_start;
6887 +	if (unlikely((s64)delta < 0))
6888 +		delta = 0;
6889 +	/* per job counter */
6890 +	p->rt_param.job_params.exec_time += delta;
6891 +	/* task counter */
6892 +	p->se.sum_exec_runtime += delta;
6893 +	/* sched_clock() */
6894 +	p->se.exec_start = rq->clock;
6895 +	cpuacct_charge(p, delta);
6896 +}
6897 +
6898 +static void double_rq_lock(struct rq *rq1, struct rq *rq2);
6899 +static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
6900 +
6901 +static struct task_struct *
6902 +litmus_schedule(struct rq *rq, struct task_struct *prev)
6903 +{
6904 +	struct task_struct *next;
6905 +
6906 +#ifdef CONFIG_SMP
6907 +	struct rq* other_rq;
6908 +	long was_running;
6909 +	lt_t _maybe_deadlock = 0;
6910 +#endif
6911 +
6912 +	/* let the plugin schedule */
6913 +	next = litmus->schedule(prev);
6914 +
6915 +	sched_state_plugin_check();
6916 +
6917 +#ifdef CONFIG_SMP
6918 +	/* check if a global plugin pulled a task from a different RQ */
6919 +	if (next && task_rq(next) != rq) {
6920 +		/* we need to migrate the task */
6921 +		other_rq = task_rq(next);
6922 +		TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
6923 +
6924 +		/* while we drop the lock, the prev task could change its
6925 +		 * state
6926 +		 */
6927 +		was_running = is_running(prev);
6928 +		mb();
6929 +		raw_spin_unlock(&rq->lock);
6930 +
6931 +		/* Don't race with a concurrent switch.  This could deadlock in
6932 +		 * the case of cross or circular migrations.  It's the job of
6933 +		 * the plugin to make sure that doesn't happen.
6934 +		 */
6935 +		TRACE_TASK(next, "stack_in_use=%d\n",
6936 +			   next->rt_param.stack_in_use);
6937 +		if (next->rt_param.stack_in_use != NO_CPU) {
6938 +			TRACE_TASK(next, "waiting to deschedule\n");
6939 +			_maybe_deadlock = litmus_clock();
6940 +		}
6941 +		while (next->rt_param.stack_in_use != NO_CPU) {
6942 +			cpu_relax();
6943 +			mb();
6944 +			if (next->rt_param.stack_in_use == NO_CPU)
6945 +				TRACE_TASK(next,"descheduled. Proceeding.\n");
6946 +
6947 +			if (lt_before(_maybe_deadlock + 1000000000L,
6948 +				      litmus_clock())) {
6949 +				/* We've been spinning for 1s.
6950 +				 * Something can't be right!
6951 +				 * Let's abandon the task and bail out; at least
6952 +				 * we will have debug info instead of a hard
6953 +				 * deadlock.
6954 +				 */
6955 +#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
6956 +				BUG();
6957 +#else
6958 +				TRACE_TASK(next,"stack too long in use. "
6959 +					   "Deadlock?\n");
6960 +				next = NULL;
6961 +
6962 +				/* bail out */
6963 +				raw_spin_lock(&rq->lock);
6964 +				return next;
6965 +#endif
6966 +			}
6967 +		}
6968 +#ifdef  __ARCH_WANT_UNLOCKED_CTXSW
6969 +		if (next->on_cpu)
6970 +			TRACE_TASK(next, "waiting for !oncpu");
6971 +		while (next->on_cpu) {
6972 +			cpu_relax();
6973 +			mb();
6974 +		}
6975 +#endif
6976 +		double_rq_lock(rq, other_rq);
6977 +		mb();
6978 +		if (is_realtime(prev) && is_running(prev) != was_running) {
6979 +			TRACE_TASK(prev,
6980 +				   "state changed while we dropped"
6981 +				   " the lock: is_running=%d, was_running=%d\n",
6982 +				   is_running(prev), was_running);
6983 +			if (is_running(prev) && !was_running) {
6984 +				/* prev task became unblocked
6985 +				 * we need to simulate normal sequence of events
6986 +				 * to scheduler plugins.
6987 +				 */
6988 +				litmus->task_block(prev);
6989 +				litmus->task_wake_up(prev);
6990 +			}
6991 +		}
6992 +
6993 +		set_task_cpu(next, smp_processor_id());
6994 +
6995 +		/* DEBUG: now that we have the lock we need to make sure a
6996 +		 *  couple of things still hold:
6997 +		 *  - it is still a real-time task
6998 +		 *  - it is still runnable (could have been stopped)
6999 +		 * If either is violated, then the active plugin is
7000 +		 * doing something wrong.
7001 +		 */
7002 +		if (!is_realtime(next) || !is_running(next)) {
7003 +			/* BAD BAD BAD */
7004 +			TRACE_TASK(next,"BAD: migration invariant FAILED: "
7005 +				   "rt=%d running=%d\n",
7006 +				   is_realtime(next),
7007 +				   is_running(next));
7008 +			/* drop the task */
7009 +			next = NULL;
7010 +		}
7011 +		/* release the other CPU's runqueue, but keep ours */
7012 +		raw_spin_unlock(&other_rq->lock);
7013 +	}
7014 +#endif
7015 +
7016 +	if (next) {
7017 +#ifdef CONFIG_SMP
7018 +		next->rt_param.stack_in_use = rq->cpu;
7019 +#else
7020 +		next->rt_param.stack_in_use = 0;
7021 +#endif
7022 +		update_rq_clock(rq);
7023 +		next->se.exec_start = rq->clock;
7024 +	}
7025 +
7026 +	update_enforcement_timer(next);
7027 +	return next;
7028 +}
7029 +
7030 +static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
7031 +				int flags)
7032 +{
7033 +	if (flags & ENQUEUE_WAKEUP) {
7034 +		sched_trace_task_resume(p);
7035 +		tsk_rt(p)->present = 1;
7036 +		/* LITMUS^RT plugins need to update the state
7037 +		 * _before_ making it available in global structures.
7038 +		 * Linux gets away with being lazy about the task state
7039 +		 * update. We can't do that, hence we update the task
7040 +		 * state already here.
7041 +		 *
7042 +		 * WARNING: this needs to be re-evaluated when porting
7043 +		 *          to newer kernel versions.
7044 +		 */
7045 +		p->state = TASK_RUNNING;
7046 +		litmus->task_wake_up(p);
7047 +
7048 +		rq->litmus.nr_running++;
7049 +	} else
7050 +		TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
7051 +}
7052 +
7053 +static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
7054 +				int flags)
7055 +{
7056 +	if (flags & DEQUEUE_SLEEP) {
7057 +		litmus->task_block(p);
7058 +		tsk_rt(p)->present = 0;
7059 +		sched_trace_task_block(p);
7060 +
7061 +		rq->litmus.nr_running--;
7062 +	} else
7063 +		TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
7064 +}
7065 +
7066 +static void yield_task_litmus(struct rq *rq)
7067 +{
7068 +	TS_SYSCALL_IN_START;
7069 +	TS_SYSCALL_IN_END;
7070 +
7071 +	BUG_ON(rq->curr != current);
7072 +	/* sched_yield() is called to trigger delayed preemptions.
7073 +	 * Thus, mark the current task as needing to be rescheduled.
7074 +	 * This will cause the scheduler plugin to be invoked, which can
7075 +	 * then determine if a preemption is still required.
7076 +	 */
7077 +	clear_exit_np(current);
7078 +	litmus_reschedule_local();
7079 +
7080 +	TS_SYSCALL_OUT_START;
7081 +}
7082 +
7083 +/* Plugins are responsible for this.
7084 + */
7085 +static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
7086 +{
7087 +}
7088 +
7089 +static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
7090 +{
7091 +}
7092 +
7093 +#ifdef CONFIG_SMP
7094 +static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev)
7095 +{
7096 +	update_rq_clock(rq);
7097 +	/* tell update_rq_clock() that we just did that */
7098 +	rq->skip_clock_update = 1;
7099 +	update_time_litmus(rq, prev);
7100 +	if (!is_running(prev))
7101 +		tsk_rt(prev)->present = 0;
7102 +}
7103 +#endif
7104 +
7105 +/* pick_next_task_litmus() - litmus_schedule() function
7106 + *
7107 + * return the next task to be scheduled
7108 + */
7109 +static struct task_struct *pick_next_task_litmus(struct rq *rq)
7110 +{
7111 +	/* get the to-be-switched-out task (prev) */
7112 +	struct task_struct *prev = rq->litmus.prev;
7113 +	struct task_struct *next;
7114 +
7115 +	/* if not called from schedule() but from somewhere
7116 +	 * else (e.g., migration), return now!
7117 +	 */
7118 +	if(!rq->litmus.prev)
7119 +		return NULL;
7120 +
7121 +	rq->litmus.prev = NULL;
7122 +
7123 +	TS_PLUGIN_SCHED_START;
7124 +	next = litmus_schedule(rq, prev);
7125 +	TS_PLUGIN_SCHED_END;
7126 +
7127 +	return next;
7128 +}
7129 +
7130 +static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
7131 +{
7132 +	if (is_realtime(p) && !queued) {
7133 +		update_time_litmus(rq, p);
7134 +		/* budget check for QUANTUM_ENFORCEMENT tasks */
7135 +		if (budget_enforced(p) && budget_exhausted(p)) {
7136 +			litmus_reschedule_local();
7137 +		}
7138 +	}
7139 +}
7140 +
7141 +static void switched_to_litmus(struct rq *rq, struct task_struct *p)
7142 +{
7143 +}
7144 +
7145 +static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
7146 +				int oldprio)
7147 +{
7148 +}
7149 +
7150 +unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
7151 +{
7152 +	/* return infinity */
7153 +	return 0;
7154 +}
7155 +
7156 +/* This is called when a task became a real-time task, either due to a SCHED_*
7157 + * class transition or due to PI mutex inheritance. We don't handle Linux PI
7158 + * mutex inheritance yet (and probably never will). Use LITMUS provided
7159 + * synchronization primitives instead.
7160 + */
7161 +static void set_curr_task_litmus(struct rq *rq)
7162 +{
7163 +	rq->curr->se.exec_start = rq->clock;
7164 +}
7165 +
7166 +
7167 +#ifdef CONFIG_SMP
7168 +/* execve tries to rebalance task in this scheduling domain.
7169 + * We don't care about the scheduling domain; can gets called from
7170 + * exec, fork, wakeup.
7171 + */
7172 +static int
7173 +select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
7174 +{
7175 +	/* preemption is already disabled.
7176 +	 * We don't want to change cpu here
7177 +	 */
7178 +	return task_cpu(p);
7179 +}
7180 +#endif
7181 +
7182 +const struct sched_class litmus_sched_class = {
7183 +	/* From 34f971f6 the stop/migrate worker threads have a class on
7184 +	 * their own, which is the highest prio class. We don't support
7185 +	 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
7186 +	 * CPU capacity.
7187 +	 */
7188 +	.next			= &rt_sched_class,
7189 +	.enqueue_task		= enqueue_task_litmus,
7190 +	.dequeue_task		= dequeue_task_litmus,
7191 +	.yield_task		= yield_task_litmus,
7192 +
7193 +	.check_preempt_curr	= check_preempt_curr_litmus,
7194 +
7195 +	.pick_next_task		= pick_next_task_litmus,
7196 +	.put_prev_task		= put_prev_task_litmus,
7197 +
7198 +#ifdef CONFIG_SMP
7199 +	.select_task_rq		= select_task_rq_litmus,
7200 +
7201 +	.pre_schedule		= pre_schedule_litmus,
7202 +#endif
7203 +
7204 +	.set_curr_task          = set_curr_task_litmus,
7205 +	.task_tick		= task_tick_litmus,
7206 +
7207 +	.get_rr_interval	= get_rr_interval_litmus,
7208 +
7209 +	.prio_changed		= prio_changed_litmus,
7210 +	.switched_to		= switched_to_litmus,
7211 +};
7212 diff --git a/litmus/Kconfig b/litmus/Kconfig
7213 index 5408ef6..fdf31f3 100644
7214 --- a/litmus/Kconfig
7215 +++ b/litmus/Kconfig
7216 @@ -1,5 +1,184 @@
7217  menu "LITMUS^RT"
7218  
7219 +menu "Scheduling"
7220 +
7221 +config RELEASE_MASTER
7222 +        bool "Release-master Support"
7223 +	depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
7224 +	default n
7225 +	help
7226 +           Allow one processor to act as a dedicated interrupt processor
7227 +           that services all timer interrupts, but that does not schedule
7228 +           real-time tasks. See RTSS'09 paper for details
7229 +	   (http://www.cs.unc.edu/~anderson/papers.html).
7230 +
7231 +config PREFER_LOCAL_LINKING
7232 +       bool "Link newly arrived tasks locally if possible"
7233 +       depends on SMP
7234 +       default y
7235 +       help
7236 +          In linking-based schedulers such as GSN-EDF, if an idle CPU processes
7237 +	  a job arrival (i.e., when a job resumed or was released), it can
7238 +	  either link the task to itself and schedule it immediately (to avoid
7239 +	  unnecessary scheduling latency) or it can try to link it to the CPU
7240 +	  where it executed previously (to maximize cache affinity, at the
7241 +	  expense of increased latency due to the need to send an IPI).
7242 +
7243 +	  In lightly loaded systems, this option can significantly reduce
7244 +	  scheduling latencies. In heavily loaded systems (where CPUs are
7245 +	  rarely idle), it will likely make hardly a difference.
7246 +
7247 +	  If unsure, say yes.
7248 +
7249 +config LITMUS_QUANTUM_LENGTH_US
7250 +    int "quantum length (in us)"
7251 +    default 1000
7252 +    range 500 10000
7253 +    help 
7254 +      Determine the desired quantum length, in microseconds, which 
7255 +      is used to determine the granularity of scheduling in
7256 +      quantum-driven plugins (primarily PFAIR). This parameter does not
7257 +      affect event-driven plugins (such as the EDF-based plugins and P-FP).
7258 +      Default: 1000us = 1ms.
7259 +
7260 +config BUG_ON_MIGRATION_DEADLOCK
7261 +       bool "Panic on suspected migration deadlock"
7262 +       default y
7263 +       help
7264 +          This is a debugging option. The LITMUS^RT migration support code for
7265 +	  global scheduling contains a simple heuristic to detect when the
7266 +	  system deadlocks due to circular stack dependencies.
7267 +
7268 +	  For example, such a deadlock exists if CPU 0 waits for task A's stack
7269 +	  to become available while using task B's stack, and CPU 1 waits for
7270 +	  task B's stack to become available while using task A's stack. Such
7271 +	  a situation can arise in (buggy) global scheduling plugins.
7272 +
7273 +	  With this option enabled, such a scenario with result in a BUG().
7274 +	  You can turn off this option when debugging on real hardware (e.g.,
7275 +	  to rescue traces, etc. that would be hard to get after a panic).
7276 +
7277 +	  Only turn this off if you really know what you are doing. If this
7278 +	  BUG() triggers, the scheduler is broken and turning off this option
7279 +	  won't fix it.
7280 +
7281 +
7282 +endmenu
7283 +
7284 +menu "Real-Time Synchronization"
7285 +
7286 +config NP_SECTION
7287 +        bool "Non-preemptive section support"
7288 +	default y
7289 +	help
7290 +	  Allow tasks to become non-preemptable.
7291 +          Note that plugins still need to explicitly support non-preemptivity.
7292 +          Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
7293 +
7294 +	  This is required to support locking protocols such as the FMLP.
7295 +	  If disabled, all tasks will be considered preemptable at all times.
7296 +
7297 +config LITMUS_LOCKING
7298 +        bool "Support for real-time locking protocols"
7299 +	depends on NP_SECTION
7300 +	default y
7301 +	help
7302 +	  Enable LITMUS^RT's multiprocessor real-time locking protocols with
7303 +	  predicable maximum blocking times.
7304 +
7305 +	  Say Yes if you want to include locking protocols such as the FMLP and
7306 +	  Baker's SRP.
7307 +
7308 +endmenu
7309 +
7310 +menu "Performance Enhancements"
7311 +
7312 +config SCHED_CPU_AFFINITY
7313 +	bool "Local Migration Affinity"
7314 +	depends on X86 && SYSFS
7315 +	default y
7316 +	help
7317 +	  Rescheduled tasks prefer CPUs near to their previously used CPU.
7318 +	  This may improve cache performance through possible preservation of
7319 +	  cache affinity, at the expense of (slightly) more involved scheduling
7320 +	  logic.
7321 +
7322 +	  Warning: May make bugs harder to find since tasks may migrate less often.
7323 +
7324 +	  NOTES:
7325 +		* Feature is not utilized by PFair/PD^2.
7326 +
7327 +	  Say Yes if unsure.
7328 +
7329 +config ALLOW_EARLY_RELEASE
7330 +	bool "Allow Early Releasing"
7331 +	default y
7332 +	help
7333 +	  Allow tasks to release jobs early (while still maintaining job
7334 +	  precedence constraints). Only supported by EDF schedulers. Early
7335 +	  releasing must be explicitly requested by real-time tasks via
7336 +	  the task_params passed to sys_set_task_rt_param().
7337 +
7338 +	  Early releasing can improve job response times while maintaining
7339 +	  real-time correctness. However, it can easily peg your CPUs
7340 +	  since tasks never suspend to wait for their next job. As such, early
7341 +	  releasing is really only useful in the context of implementing
7342 +	  bandwidth servers, interrupt handling threads, or short-lived
7343 +	  computations.
7344 +
7345 +	  Beware that early releasing may affect real-time analysis
7346 +	  if using locking protocols or I/O.
7347 +
7348 +	  Say Yes if unsure.
7349 +
7350 +choice
7351 +	prompt "EDF Tie-Break Behavior"
7352 +	default EDF_TIE_BREAK_LATENESS_NORM
7353 +	help
7354 +	  Allows the configuration of tie-breaking behavior when the deadlines
7355 +	  of two EDF-scheduled tasks are equal.
7356 +
7357 +	config EDF_TIE_BREAK_LATENESS
7358 +	bool "Lateness-based Tie Break"
7359 +	help
7360 +	  Break ties between two jobs, A and B, based upon the lateness of their
7361 +	  prior jobs. The job with the greatest lateness has priority. Note that
7362 +	  lateness has a negative value if the prior job finished before its
7363 +	  deadline.
7364 +
7365 +	config EDF_TIE_BREAK_LATENESS_NORM
7366 +	bool "Normalized Lateness-based Tie Break"
7367 +	help
7368 +	  Break ties between two jobs, A and B, based upon the lateness, normalized
7369 +	  by relative deadline, of their prior jobs. The job with the greatest
7370 +	  normalized lateness has priority. Note that lateness has a negative value
7371 +	  if the prior job finished before its deadline.
7372 +
7373 +	  Normalized lateness tie-breaks are likely desireable over non-normalized
7374 +	  tie-breaks if the execution times and/or relative deadlines of tasks in a
7375 +	  task set vary greatly.
7376 +
7377 +	config EDF_TIE_BREAK_HASH
7378 +	bool "Hash-based Tie Breaks"
7379 +	help
7380 +	  Break ties between two jobs, A and B, with equal deadlines by using a
7381 +	  uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
7382 +	  A has ~50% of winning a given tie-break.
7383 +
7384 +	config EDF_PID_TIE_BREAK
7385 +	bool "PID-based Tie Breaks"
7386 +	help
7387 +	  Break ties based upon OS-assigned thread IDs. Use this option if
7388 +	  required by algorithm's real-time analysis or per-task response-time
7389 +	  jitter must be minimized.
7390 +
7391 +	  NOTES:
7392 +	    * This tie-breaking method was default in Litmus 2012.2 and before.
7393 +
7394 +endchoice
7395 +
7396 +endmenu
7397 +
7398  menu "Tracing"
7399  
7400  config FEATHER_TRACE
7401 @@ -154,6 +333,20 @@ config SCHED_DEBUG_TRACE_CALLER
7402  
7403  	 If unsure, say No.
7404  
7405 +config PREEMPT_STATE_TRACE
7406 +       bool "Trace preemption state machine transitions"
7407 +       depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
7408 +       default n
7409 +       help
7410 +         With this option enabled, each CPU will log when it transitions
7411 +	 states in the preemption state machine. This state machine is
7412 +	 used to determine how to react to IPIs (avoid races with in-flight IPIs).
7413 +
7414 +	 Warning: this creates a lot of information in the debug trace. Only
7415 +	 recommended when you are debugging preemption-related races.
7416 +
7417 +	 If unsure, say No.
7418 +
7419  endmenu
7420  
7421  endmenu
7422 diff --git a/litmus/Makefile b/litmus/Makefile
7423 index 6318f1c..f7ceabc 100644
7424 --- a/litmus/Makefile
7425 +++ b/litmus/Makefile
7426 @@ -2,6 +2,26 @@
7427  # Makefile for LITMUS^RT
7428  #
7429  
7430 +obj-y     = sched_plugin.o litmus.o \
7431 +	    preempt.o \
7432 +	    litmus_proc.o \
7433 +	    budget.o \
7434 +	    clustered.o \
7435 +	    jobs.o \
7436 +	    sync.o \
7437 +	    rt_domain.o \
7438 +	    edf_common.o \
7439 +	    fp_common.o \
7440 +	    fdso.o \
7441 +	    locking.o \
7442 +	    srp.o \
7443 +	    bheap.o \
7444 +	    binheap.o \
7445 +	    ctrldev.o \
7446 +	    uncachedev.o
7447 +
7448 +obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
7449 +
7450  obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
7451  obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
7452  obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
7453 diff --git a/litmus/affinity.c b/litmus/affinity.c
7454 new file mode 100644
7455 index 0000000..a5b437b
7456 --- /dev/null
7457 +++ b/litmus/affinity.c
7458 @@ -0,0 +1,41 @@
7459 +#include <linux/cpu.h>
7460 +
7461 +#include <litmus/affinity.h>
7462 +
7463 +struct neighborhood neigh_info[NR_CPUS];
7464 +
7465 +/* called by _init_litmus() */
7466 +void init_topology(void) {
7467 +	int cpu;
7468 +	int i;
7469 +	int chk;
7470 +	int depth = num_cache_leaves;
7471 +
7472 +	if (depth > NUM_CACHE_LEVELS)
7473 +		depth = NUM_CACHE_LEVELS;
7474 +
7475 +	for_each_online_cpu(cpu) {
7476 +		for (i = 0; i < depth; ++i) {
7477 +			chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
7478 +			if (chk) {
7479 +				/* failed */
7480 +				neigh_info[cpu].size[i] = 0;
7481 +			} else {
7482 +				/* size = num bits in mask */
7483 +				neigh_info[cpu].size[i] =
7484 +					cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
7485 +			}
7486 +			printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
7487 +							cpu, neigh_info[cpu].size[i], i,
7488 +							*cpumask_bits(neigh_info[cpu].neighbors[i]));
7489 +		}
7490 +
7491 +		/* set data for non-existent levels */
7492 +		for (; i < NUM_CACHE_LEVELS; ++i) {
7493 +			neigh_info[cpu].size[i] = 0;
7494 +
7495 +			printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
7496 +						cpu, neigh_info[cpu].size[i], i, 0lu);
7497 +		}
7498 +	}
7499 +}
7500 diff --git a/litmus/bheap.c b/litmus/bheap.c
7501 new file mode 100644
7502 index 0000000..2707e01
7503 --- /dev/null
7504 +++ b/litmus/bheap.c
7505 @@ -0,0 +1,316 @@
7506 +#include <linux/bug.h>
7507 +#include <linux/kernel.h>
7508 +#include <litmus/bheap.h>
7509 +
7510 +void bheap_init(struct bheap* heap)
7511 +{
7512 +	heap->head = NULL;
7513 +	heap->min  = NULL;
7514 +}
7515 +
7516 +void bheap_node_init(struct bheap_node** _h, void* value)
7517 +{
7518 +	struct bheap_node* h = *_h;
7519 +	h->parent = NULL;
7520 +	h->next   = NULL;
7521 +	h->child  = NULL;
7522 +	h->degree = NOT_IN_HEAP;
7523 +	h->value  = value;
7524 +	h->ref    = _h;
7525 +}
7526 +
7527 +
7528 +/* make child a subtree of root */
7529 +static void __bheap_link(struct bheap_node* root,
7530 +			struct bheap_node* child)
7531 +{
7532 +	child->parent = root;
7533 +	child->next   = root->child;
7534 +	root->child   = child;
7535 +	root->degree++;
7536 +}
7537 +
7538 +/* merge root lists */
7539 +static  struct bheap_node* __bheap_merge(struct bheap_node* a,
7540 +					     struct bheap_node* b)
7541 +{
7542 +	struct bheap_node* head = NULL;
7543 +	struct bheap_node** pos = &head;
7544 +
7545 +	while (a && b) {
7546 +		if (a->degree < b->degree) {
7547 +			*pos = a;
7548 +			a = a->next;
7549 +		} else {
7550 +			*pos = b;
7551 +			b = b->next;
7552 +		}
7553 +		pos = &(*pos)->next;
7554 +	}
7555 +	if (a)
7556 +		*pos = a;
7557 +	else
7558 +		*pos = b;
7559 +	return head;
7560 +}
7561 +
7562 +/* reverse a linked list of nodes. also clears parent pointer */
7563 +static  struct bheap_node* __bheap_reverse(struct bheap_node* h)
7564 +{
7565 +	struct bheap_node* tail = NULL;
7566 +	struct bheap_node* next;
7567 +
7568 +	if (!h)
7569 +		return h;
7570 +
7571 +	h->parent = NULL;
7572 +	while (h->next) {
7573 +		next    = h->next;
7574 +		h->next = tail;
7575 +		tail    = h;
7576 +		h       = next;
7577 +		h->parent = NULL;
7578 +	}
7579 +	h->next = tail;
7580 +	return h;
7581 +}
7582 +
7583 +static  void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
7584 +			      struct bheap_node** prev, struct bheap_node** node)
7585 +{
7586 +	struct bheap_node *_prev, *cur;
7587 +	*prev = NULL;
7588 +
7589 +	if (!heap->head) {
7590 +		*node = NULL;
7591 +		return;
7592 +	}
7593 +
7594 +	*node = heap->head;
7595 +	_prev = heap->head;
7596 +	cur   = heap->head->next;
7597 +	while (cur) {
7598 +		if (higher_prio(cur, *node)) {
7599 +			*node = cur;
7600 +			*prev = _prev;
7601 +		}
7602 +		_prev = cur;
7603 +		cur   = cur->next;
7604 +	}
7605 +}
7606 +
7607 +static  void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
7608 +				struct bheap_node* h2)
7609 +{
7610 +	struct bheap_node* h1;
7611 +	struct bheap_node *prev, *x, *next;
7612 +	if (!h2)
7613 +		return;
7614 +	h1 = heap->head;
7615 +	if (!h1) {
7616 +		heap->head = h2;
7617 +		return;
7618 +	}
7619 +	h1 = __bheap_merge(h1, h2);
7620 +	prev = NULL;
7621 +	x    = h1;
7622 +	next = x->next;
7623 +	while (next) {
7624 +		if (x->degree != next->degree ||
7625 +		    (next->next && next->next->degree == x->degree)) {
7626 +			/* nothing to do, advance */
7627 +			prev = x;
7628 +			x    = next;
7629 +		} else if (higher_prio(x, next)) {
7630 +			/* x becomes the root of next */
7631 +			x->next = next->next;
7632 +			__bheap_link(x, next);
7633 +		} else {
7634 +			/* next becomes the root of x */
7635 +			if (prev)
7636 +				prev->next = next;
7637 +			else
7638 +				h1 = next;
7639 +			__bheap_link(next, x);
7640 +			x = next;
7641 +		}
7642 +		next = x->next;
7643 +	}
7644 +	heap->head = h1;
7645 +}
7646 +
7647 +static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
7648 +					    struct bheap* heap)
7649 +{
7650 +	struct bheap_node *prev, *node;
7651 +	__bheap_min(higher_prio, heap, &prev, &node);
7652 +	if (!node)
7653 +		return NULL;
7654 +	if (prev)
7655 +		prev->next = node->next;
7656 +	else
7657 +		heap->head = node->next;
7658 +	__bheap_union(higher_prio, heap, __bheap_reverse(node->child));
7659 +	return node;
7660 +}
7661 +
7662 +/* insert (and reinitialize) a node into the heap */
7663 +void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
7664 +		 struct bheap_node* node)
7665 +{
7666 +	struct bheap_node *min;
7667 +	node->child  = NULL;
7668 +	node->parent = NULL;
7669 +	node->next   = NULL;
7670 +	node->degree = 0;
7671 +	if (heap->min && higher_prio(node, heap->min)) {
7672 +		/* swap min cache */
7673 +		min = heap->min;
7674 +		min->child  = NULL;
7675 +		min->parent = NULL;
7676 +		min->next   = NULL;
7677 +		min->degree = 0;
7678 +		__bheap_union(higher_prio, heap, min);
7679 +		heap->min   = node;
7680 +	} else
7681 +		__bheap_union(higher_prio, heap, node);
7682 +}
7683 +
7684 +void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
7685 +{
7686 +	struct bheap_node* min;
7687 +	if (heap->min) {
7688 +		min = heap->min;
7689 +		heap->min = NULL;
7690 +		bheap_insert(higher_prio, heap, min);
7691 +	}
7692 +}
7693 +
7694 +/* merge addition into target */
7695 +void bheap_union(bheap_prio_t higher_prio,
7696 +		struct bheap* target, struct bheap* addition)
7697 +{
7698 +	/* first insert any cached minima, if necessary */
7699 +	bheap_uncache_min(higher_prio, target);
7700 +	bheap_uncache_min(higher_prio, addition);
7701 +	__bheap_union(higher_prio, target, addition->head);
7702 +	/* this is a destructive merge */
7703 +	addition->head = NULL;
7704 +}
7705 +
7706 +struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
7707 +			    struct bheap* heap)
7708 +{
7709 +	if (!heap->min)
7710 +		heap->min = __bheap_extract_min(higher_prio, heap);
7711 +	return heap->min;
7712 +}
7713 +
7714 +struct bheap_node* bheap_take(bheap_prio_t higher_prio,
7715 +			    struct bheap* heap)
7716 +{
7717 +	struct bheap_node *node;
7718 +	if (!heap->min)
7719 +		heap->min = __bheap_extract_min(higher_prio, heap);
7720 +	node = heap->min;
7721 +	heap->min = NULL;
7722 +	if (node)
7723 +		node->degree = NOT_IN_HEAP;
7724 +	return node;
7725 +}
7726 +
7727 +int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
7728 +{
7729 +	struct bheap_node  *parent;
7730 +	struct bheap_node** tmp_ref;
7731 +	void* tmp;
7732 +
7733 +	/* bubble up */
7734 +	parent = node->parent;
7735 +	while (parent && higher_prio(node, parent)) {
7736 +		/* swap parent and node */
7737 +		tmp           = parent->value;
7738 +		parent->value = node->value;
7739 +		node->value   = tmp;
7740 +		/* swap references */
7741 +		*(parent->ref) = node;
7742 +		*(node->ref)   = parent;
7743 +		tmp_ref        = parent->ref;
7744 +		parent->ref    = node->ref;
7745 +		node->ref      = tmp_ref;
7746 +		/* step up */
7747 +		node   = parent;
7748 +		parent = node->parent;
7749 +	}
7750 +
7751 +	return parent != NULL;
7752 +}
7753 +
7754 +void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
7755 +		 struct bheap_node* node)
7756 +{
7757 +	struct bheap_node *parent, *prev, *pos;
7758 +	struct bheap_node** tmp_ref;
7759 +	void* tmp;
7760 +
7761 +	if (heap->min != node) {
7762 +		/* bubble up */
7763 +		parent = node->parent;
7764 +		while (parent) {
7765 +			/* swap parent and node */
7766 +			tmp           = parent->value;
7767 +			parent->value = node->value;
7768 +			node->value   = tmp;
7769 +			/* swap references */
7770 +			*(parent->ref) = node;
7771 +			*(node->ref)   = parent;
7772 +			tmp_ref        = parent->ref;
7773 +			parent->ref    = node->ref;
7774 +			node->ref      = tmp_ref;
7775 +			/* step up */
7776 +			node   = parent;
7777 +			parent = node->parent;
7778 +		}
7779 +		/* now delete:
7780 +		 * first find prev */
7781 +		prev = NULL;
7782 +		pos  = heap->head;
7783 +		while (pos != node) {
7784 +			BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
7785 +			prev = pos;
7786 +			pos  = pos->next;
7787 +		}
7788 +		/* we have prev, now remove node */
7789 +		if (prev)
7790 +			prev->next = node->next;
7791 +		else
7792 +			heap->head = node->next;
7793 +		__bheap_union(higher_prio, heap, __bheap_reverse(node->child));
7794 +	} else
7795 +		heap->min = NULL;
7796 +	node->degree = NOT_IN_HEAP;
7797 +}
7798 +
7799 +/* allocate a heap node for value and insert into the heap */
7800 +int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
7801 +	     void* value, int gfp_flags)
7802 +{
7803 +	struct bheap_node* hn = bheap_node_alloc(gfp_flags);
7804 +	if (likely(hn)) {
7805 +		bheap_node_init(&hn, value);
7806 +		bheap_insert(higher_prio, heap, hn);
7807 +	}
7808 +	return hn != NULL;
7809 +}
7810 +
7811 +void* bheap_take_del(bheap_prio_t higher_prio,
7812 +		    struct bheap* heap)
7813 +{
7814 +	struct bheap_node* hn = bheap_take(higher_prio, heap);
7815 +	void* ret = NULL;
7816 +	if (hn) {
7817 +		ret = hn->value;
7818 +		bheap_node_free(hn);
7819 +	}
7820 +	return ret;
7821 +}
7822 diff --git a/litmus/binheap.c b/litmus/binheap.c
7823 new file mode 100644
7824 index 0000000..d3ab34b
7825 --- /dev/null
7826 +++ b/litmus/binheap.c
7827 @@ -0,0 +1,387 @@
7828 +#include <litmus/binheap.h>
7829 +
7830 +/* Returns true of the root ancestor of node is the root of the given heap. */
7831 +int binheap_is_in_this_heap(struct binheap_node *node,
7832 +	struct binheap* heap)
7833 +{
7834 +	if(!binheap_is_in_heap(node)) {
7835 +		return 0;
7836 +	}
7837 +
7838 +	while(node->parent != NULL) {
7839 +		node = node->parent;
7840 +	}
7841 +
7842 +	return (node == heap->root);
7843 +}
7844 +
7845 +
7846 +/* Update the node reference pointers.  Same logic as Litmus binomial heap. */
7847 +static void __update_ref(struct binheap_node *parent,
7848 +				struct binheap_node *child)
7849 +{
7850 +	*(parent->ref_ptr) = child;
7851 +	*(child->ref_ptr) = parent;
7852 +
7853 +	swap(parent->ref_ptr, child->ref_ptr);
7854 +}
7855 +
7856 +
7857 +/* Swaps data between two nodes. */
7858 +static void __binheap_swap(struct binheap_node *parent,
7859 +				struct binheap_node *child)
7860 +{
7861 +	swap(parent->data, child->data);
7862 +	__update_ref(parent, child);
7863 +}
7864 +
7865 +
7866 +/* Swaps memory and data between two nodes. Actual nodes swap instead of
7867 + * just data.  Needed when we delete nodes from the heap.
7868 + */
7869 +static void __binheap_swap_safe(struct binheap *handle,
7870 +				struct binheap_node *a,
7871 +				struct binheap_node *b)
7872 +{
7873 +	swap(a->data, b->data);
7874 +	__update_ref(a, b);
7875 +
7876 +	if((a->parent != NULL) && (a->parent == b->parent)) {
7877 +		/* special case: shared parent */
7878 +		swap(a->parent->left, a->parent->right);
7879 +	}
7880 +	else {
7881 +		/* Update pointers to swap parents. */
7882 +
7883 +		if(a->parent) {
7884 +			if(a == a->parent->left) {
7885 +				a->parent->left = b;
7886 +			}
7887 +			else {
7888 +				a->parent->right = b;
7889 +			}
7890 +		}
7891 +
7892 +		if(b->parent) {
7893 +			if(b == b->parent->left) {
7894 +				b->parent->left = a;
7895 +			}
7896 +			else {
7897 +				b->parent->right = a;
7898 +			}
7899 +		}
7900 +
7901 +		swap(a->parent, b->parent);
7902 +	}
7903 +
7904 +	/* swap children */
7905 +
7906 +	if(a->left) {
7907 +		a->left->parent = b;
7908 +
7909 +		if(a->right) {
7910 +			a->right->parent = b;
7911 +		}
7912 +	}
7913 +
7914 +	if(b->left) {
7915 +		b->left->parent = a;
7916 +
7917 +		if(b->right) {
7918 +			b->right->parent = a;
7919 +		}
7920 +	}
7921 +
7922 +	swap(a->left, b->left);
7923 +	swap(a->right, b->right);
7924 +
7925 +
7926 +	/* update next/last/root pointers */
7927 +
7928 +	if(a == handle->next) {
7929 +		handle->next = b;
7930 +	}
7931 +	else if(b == handle->next) {
7932 +		handle->next = a;
7933 +	}
7934 +
7935 +	if(a == handle->last) {
7936 +		handle->last = b;
7937 +	}
7938 +	else if(b == handle->last) {
7939 +		handle->last = a;
7940 +	}
7941 +
7942 +	if(a == handle->root) {
7943 +		handle->root = b;
7944 +	}
7945 +	else if(b == handle->root) {
7946 +		handle->root = a;
7947 +	}
7948 +}
7949 +
7950 +
7951 +/**
7952 + * Update the pointer to the last node in the complete binary tree.
7953 + * Called internally after the root node has been deleted.
7954 + */
7955 +static void __binheap_update_last(struct binheap *handle)
7956 +{
7957 +	struct binheap_node *temp = handle->last;
7958 +
7959 +	/* find a "bend" in the tree. */
7960 +	while(temp->parent && (temp == temp->parent->left)) {
7961 +		temp = temp->parent;
7962 +	}
7963 +
7964 +	/* step over to sibling if we're not at root */
7965 +	if(temp->parent != NULL) {
7966 +		temp = temp->parent->left;
7967 +	}
7968 +
7969 +	/* now travel right as far as possible. */
7970 +	while(temp->right != NULL) {
7971 +		temp = temp->right;
7972 +	}
7973 +
7974 +	/* take one step to the left if we're not at the bottom-most level. */
7975 +	if(temp->left != NULL) {
7976 +		temp = temp->left;
7977 +	}
7978 +
7979 +	handle->last = temp;
7980 +}
7981 +
7982 +
7983 +/**
7984 + * Update the pointer to the node that will take the next inserted node.
7985 + * Called internally after a node has been inserted.
7986 + */
7987 +static void __binheap_update_next(struct binheap *handle)
7988 +{
7989 +	struct binheap_node *temp = handle->next;
7990 +
7991 +	/* find a "bend" in the tree. */
7992 +	while(temp->parent && (temp == temp->parent->right)) {
7993 +		temp = temp->parent;
7994 +	}
7995 +
7996 +	/* step over to sibling if we're not at root */
7997 +	if(temp->parent != NULL) {
7998 +		temp = temp->parent->right;
7999 +	}
8000 +
8001 +	/* now travel left as far as possible. */
8002 +	while(temp->left != NULL) {
8003 +		temp = temp->left;
8004 +	}
8005 +
8006 +	handle->next = temp;
8007 +}
8008 +
8009 +
8010 +
8011 +/* bubble node up towards root */
8012 +static void __binheap_bubble_up(struct binheap *handle,
8013 +				struct binheap_node *node)
8014 +{
8015 +	/* let BINHEAP_POISON data bubble to the top */
8016 +
8017 +	while((node->parent != NULL) &&
8018 +		  ((node->data == BINHEAP_POISON) ||
8019 +		   handle->compare(node, node->parent))) {
8020 +			  __binheap_swap(node->parent, node);
8021 +			  node = node->parent;
8022 +	}
8023 +}
8024 +
8025 +
8026 +/* bubble node down, swapping with min-child */
8027 +static void __binheap_bubble_down(struct binheap *handle)
8028 +{
8029 +	struct binheap_node *node = handle->root;
8030 +
8031 +	while(node->left != NULL) {
8032 +		if(node->right && handle->compare(node->right, node->left)) {
8033 +			if(handle->compare(node->right, node)) {
8034 +				__binheap_swap(node, node->right);
8035 +				node = node->right;
8036 +			}
8037 +			else {
8038 +				break;
8039 +			}
8040 +		}
8041 +		else {
8042 +			if(handle->compare(node->left, node)) {
8043 +				__binheap_swap(node, node->left);
8044 +				node = node->left;
8045 +			}
8046 +			else {
8047 +				break;
8048 +			}
8049 +		}
8050 +	}
8051 +}
8052 +
8053 +
8054 +void __binheap_add(struct binheap_node *new_node,
8055 +				struct binheap *handle,
8056 +				void *data)
8057 +{
8058 +	new_node->data = data;
8059 +	new_node->ref = new_node;
8060 +	new_node->ref_ptr = &(new_node->ref);
8061 +
8062 +	if(!binheap_empty(handle)) {
8063 +		/* insert left side first */
8064 +		if(handle->next->left == NULL) {
8065 +			handle->next->left = new_node;
8066 +			new_node->parent = handle->next;
8067 +			new_node->left = NULL;
8068 +			new_node->right = NULL;
8069 +
8070 +			handle->last = new_node;
8071 +
8072 +			__binheap_bubble_up(handle, new_node);
8073 +		}
8074 +		else {
8075 +			/* left occupied. insert right. */
8076 +			handle->next->right = new_node;
8077 +			new_node->parent = handle->next;
8078 +			new_node->left = NULL;
8079 +			new_node->right = NULL;
8080 +
8081 +			handle->last = new_node;
8082 +
8083 +			__binheap_update_next(handle);
8084 +			__binheap_bubble_up(handle, new_node);
8085 +		}
8086 +	}
8087 +	else {
8088 +		/* first node in heap */
8089 +
8090 +		new_node->parent = NULL;
8091 +		new_node->left = NULL;
8092 +		new_node->right = NULL;
8093 +
8094 +		handle->root = new_node;
8095 +		handle->next = new_node;
8096 +		handle->last = new_node;
8097 +	}
8098 +}
8099 +
8100 +
8101 +/**
8102 + * Removes the root node from the heap. The node is removed after coalescing
8103 + * the binheap_node with its original data pointer at the root of the tree.
8104 + *
8105 + * The 'last' node in the tree is then swapped up to the root and bubbled
8106 + * down.
8107 + */
8108 +void __binheap_delete_root(struct binheap *handle,
8109 +				struct binheap_node *container)
8110 +{
8111 +	struct binheap_node *root = handle->root;
8112 +
8113 +	if(root != container) {
8114 +		/* coalesce */
8115 +		__binheap_swap_safe(handle, root, container);
8116 +		root = container;
8117 +	}
8118 +
8119 +	if(handle->last != root) {
8120 +		/* swap 'last' node up to root and bubble it down. */
8121 +
8122 +		struct binheap_node *to_move = handle->last;
8123 +
8124 +		if(to_move->parent != root) {
8125 +			handle->next = to_move->parent;
8126 +
8127 +			if(handle->next->right == to_move) {
8128 +				/* disconnect from parent */
8129 +				to_move->parent->right = NULL;
8130 +				handle->last = handle->next->left;
8131 +			}
8132 +			else {
8133 +				/* find new 'last' before we disconnect */
8134 +				__binheap_update_last(handle);
8135 +
8136 +				/* disconnect from parent */
8137 +				to_move->parent->left = NULL;
8138 +			}
8139 +		}
8140 +		else {
8141 +			/* 'last' is direct child of root */
8142 +
8143 +			handle->next = to_move;
8144 +
8145 +			if(to_move == to_move->parent->right) {
8146 +				to_move->parent->right = NULL;
8147 +				handle->last = to_move->parent->left;
8148 +			}
8149 +			else {
8150 +				to_move->parent->left = NULL;
8151 +				handle->last = to_move;
8152 +			}
8153 +		}
8154 +		to_move->parent = NULL;
8155 +
8156 +		/* reconnect as root.  We can't just swap data ptrs since root node
8157 +		 * may be freed after this function returns.
8158 +		 */
8159 +		to_move->left = root->left;
8160 +		to_move->right = root->right;
8161 +		if(to_move->left != NULL) {
8162 +			to_move->left->parent = to_move;
8163 +		}
8164 +		if(to_move->right != NULL) {
8165 +			to_move->right->parent = to_move;
8166 +		}
8167 +
8168 +		handle->root = to_move;
8169 +
8170 +		/* bubble down */
8171 +		__binheap_bubble_down(handle);
8172 +	}
8173 +	else {
8174 +		/* removing last node in tree */
8175 +		handle->root = NULL;
8176 +		handle->next = NULL;
8177 +		handle->last = NULL;
8178 +	}
8179 +
8180 +	/* mark as removed */
8181 +	container->parent = BINHEAP_POISON;
8182 +}
8183 +
8184 +
8185 +/**
8186 + * Delete an arbitrary node.  Bubble node to delete up to the root,
8187 + * and then delete to root.
8188 + */
8189 +void __binheap_delete(struct binheap_node *node_to_delete,
8190 +				struct binheap *handle)
8191 +{
8192 +	struct binheap_node *target = node_to_delete->ref;
8193 +	void *temp_data = target->data;
8194 +
8195 +	/* temporarily set data to null to allow node to bubble up to the top. */
8196 +	target->data = BINHEAP_POISON;
8197 +
8198 +	__binheap_bubble_up(handle, target);
8199 +	__binheap_delete_root(handle, node_to_delete);
8200 +
8201 +	node_to_delete->data = temp_data;  /* restore node data pointer */
8202 +}
8203 +
8204 +
8205 +/**
8206 + * Bubble up a node whose pointer has decreased in value.
8207 + */
8208 +void __binheap_decrease(struct binheap_node *orig_node,
8209 +				struct binheap *handle)
8210 +{
8211 +	struct binheap_node *target = orig_node->ref;
8212 +
8213 +	__binheap_bubble_up(handle, target);
8214 +}
8215 diff --git a/litmus/budget.c b/litmus/budget.c
8216 new file mode 100644
8217 index 0000000..1ffb8e3
8218 --- /dev/null
8219 +++ b/litmus/budget.c
8220 @@ -0,0 +1,116 @@
8221 +#include <linux/sched.h>
8222 +#include <linux/percpu.h>
8223 +#include <linux/hrtimer.h>
8224 +
8225 +#include <litmus/litmus.h>
8226 +#include <litmus/preempt.h>
8227 +
8228 +#include <litmus/budget.h>
8229 +
8230 +struct enforcement_timer {
8231 +	/* The enforcement timer is used to accurately police
8232 +	 * slice budgets. */
8233 +	struct hrtimer		timer;
8234 +	int			armed;
8235 +};
8236 +
8237 +DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
8238 +
8239 +static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
8240 +{
8241 +	struct enforcement_timer* et = container_of(timer,
8242 +						    struct enforcement_timer,
8243 +						    timer);
8244 +	unsigned long flags;
8245 +
8246 +	local_irq_save(flags);
8247 +	TRACE("enforcement timer fired.\n");
8248 +	et->armed = 0;
8249 +	/* activate scheduler */
8250 +	litmus_reschedule_local();
8251 +	local_irq_restore(flags);
8252 +
8253 +	return  HRTIMER_NORESTART;
8254 +}
8255 +
8256 +/* assumes called with IRQs off */
8257 +static void cancel_enforcement_timer(struct enforcement_timer* et)
8258 +{
8259 +	int ret;
8260 +
8261 +	TRACE("cancelling enforcement timer.\n");
8262 +
8263 +	/* Since interrupts are disabled and et->armed is only
8264 +	 * modified locally, we do not need any locks.
8265 +	 */
8266 +
8267 +	if (et->armed) {
8268 +		ret = hrtimer_try_to_cancel(&et->timer);
8269 +		/* Should never be inactive. */
8270 +		BUG_ON(ret == 0);
8271 +		/* Should never be running concurrently. */
8272 +		BUG_ON(ret == -1);
8273 +
8274 +		et->armed = 0;
8275 +	}
8276 +}
8277 +
8278 +/* assumes called with IRQs off */
8279 +static void arm_enforcement_timer(struct enforcement_timer* et,
8280 +				  struct task_struct* t)
8281 +{
8282 +	lt_t when_to_fire;
8283 +	TRACE_TASK(t, "arming enforcement timer.\n");
8284 +
8285 +	WARN_ONCE(!hrtimer_is_hres_active(&et->timer),
8286 +		KERN_ERR "WARNING: no high resolution timers available!?\n");
8287 +
8288 +	/* Calling this when there is no budget left for the task
8289 +	 * makes no sense, unless the task is non-preemptive. */
8290 +	BUG_ON(budget_exhausted(t) && (!is_np(t)));
8291 +
8292 +	/* __hrtimer_start_range_ns() cancels the timer
8293 +	 * anyway, so we don't have to check whether it is still armed */
8294 +
8295 +	if (likely(!is_np(t))) {
8296 +		when_to_fire = litmus_clock() + budget_remaining(t);
8297 +		__hrtimer_start_range_ns(&et->timer,
8298 +					 ns_to_ktime(when_to_fire),
8299 +					 0 /* delta */,
8300 +					 HRTIMER_MODE_ABS_PINNED,
8301 +					 0 /* no wakeup */);
8302 +		et->armed = 1;
8303 +	}
8304 +}
8305 +
8306 +
8307 +/* expects to be called with IRQs off */
8308 +void update_enforcement_timer(struct task_struct* t)
8309 +{
8310 +	struct enforcement_timer* et = &__get_cpu_var(budget_timer);
8311 +
8312 +	if (t && budget_precisely_enforced(t)) {
8313 +		/* Make sure we call into the scheduler when this budget
8314 +		 * expires. */
8315 +		arm_enforcement_timer(et, t);
8316 +	} else if (et->armed) {
8317 +		/* Make sure we don't cause unnecessary interrupts. */
8318 +		cancel_enforcement_timer(et);
8319 +	}
8320 +}
8321 +
8322 +
8323 +static int __init init_budget_enforcement(void)
8324 +{
8325 +	int cpu;
8326 +	struct enforcement_timer* et;
8327 +
8328 +	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
8329 +		et = &per_cpu(budget_timer, cpu);
8330 +		hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
8331 +		et->timer.function = on_enforcement_timeout;
8332 +	}
8333 +	return 0;
8334 +}
8335 +
8336 +module_init(init_budget_enforcement);
8337 diff --git a/litmus/clustered.c b/litmus/clustered.c
8338 new file mode 100644
8339 index 0000000..979fac6
8340 --- /dev/null
8341 +++ b/litmus/clustered.c
8342 @@ -0,0 +1,111 @@
8343 +#include <linux/gfp.h>
8344 +#include <linux/cpumask.h>
8345 +#include <linux/list.h>
8346 +
8347 +#include <litmus/clustered.h>
8348 +
8349 +#if !defined(CONFIG_X86) || !defined(CONFIG_SYSFS)
8350 +/* fake get_shared_cpu_map() on non-x86 architectures */
8351 +
8352 +int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
8353 +{
8354 +	if (index != 1)
8355 +		return 1;
8356 +	else {
8357 +		/* Fake L1: CPU is all by itself. */
8358 +		cpumask_clear(mask);
8359 +		cpumask_set_cpu(cpu, mask);
8360 +		return 0;
8361 +	}
8362 +}
8363 +
8364 +#endif
8365 +
8366 +int get_cluster_size(enum cache_level level)
8367 +{
8368 +	cpumask_var_t mask;
8369 +	int ok;
8370 +	int num_cpus;
8371 +
8372 +	if (level == GLOBAL_CLUSTER)
8373 +		return num_online_cpus();
8374 +	else {
8375 +		if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
8376 +			return -ENOMEM;
8377 +		/* assumes CPU 0 is representative of all CPUs */
8378 +		ok = get_shared_cpu_map(mask, 0, level);
8379 +		/* ok == 0 means we got the map; otherwise it's an invalid cache level */
8380 +		if (ok == 0)
8381 +			num_cpus = cpumask_weight(mask);
8382 +		free_cpumask_var(mask);
8383 +
8384 +		if (ok == 0)
8385 +			return num_cpus;
8386 +		else
8387 +			return -EINVAL;
8388 +	}
8389 +}
8390 +
8391 +int assign_cpus_to_clusters(enum cache_level level,
8392 +			    struct scheduling_cluster* clusters[],
8393 +			    unsigned int num_clusters,
8394 +			    struct cluster_cpu* cpus[],
8395 +			    unsigned int num_cpus)
8396 +{
8397 +	cpumask_var_t mask;
8398 +	unsigned int i, free_cluster = 0, low_cpu;
8399 +	int err = 0;
8400 +
8401 +	if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
8402 +		return -ENOMEM;
8403 +
8404 +	/* clear cluster pointers */
8405 +	for (i = 0; i < num_cpus; i++) {
8406 +		cpus[i]->id      = i;
8407 +		cpus[i]->cluster = NULL;
8408 +	}
8409 +
8410 +	/* initialize clusters */
8411 +	for (i = 0; i < num_clusters; i++) {
8412 +		clusters[i]->id = i;
8413 +		INIT_LIST_HEAD(&clusters[i]->cpus);
8414 +	}
8415 +
8416 +	/* Assign each CPU. Two assumtions are made:
8417 +	 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
8418 +	 * 2) All cpus that belong to some cluster are online.
8419 +	 */
8420 +	for_each_online_cpu(i) {
8421 +		/* get lowest-id CPU in cluster */
8422 +		if (level != GLOBAL_CLUSTER) {
8423 +			err = get_shared_cpu_map(mask, cpus[i]->id, level);
8424 +			if (err != 0) {
8425 +				/* ugh... wrong cache level? Either caller screwed up
8426 +				 * or the CPU topology is weird. */
8427 +				printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
8428 +				       level, err);
8429 +				err = -EINVAL;
8430 +				goto out;
8431 +			}
8432 +			low_cpu = cpumask_first(mask);
8433 +		} else
8434 +			low_cpu = 0;
8435 +		if (low_cpu == i) {
8436 +			/* caller must provide an appropriate number of clusters */
8437 +			BUG_ON(free_cluster >= num_clusters);
8438 +
8439 +			/* create new cluster */
8440 +			cpus[i]->cluster = clusters[free_cluster++];
8441 +		} else {
8442 +			/* low_cpu points to the right cluster
8443 +			 * Assumption: low_cpu is actually online and was processed earlier. */
8444 +			cpus[i]->cluster = cpus[low_cpu]->cluster;
8445 +		}
8446 +		/* enqueue in cpus list */
8447 +		list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
8448 +		printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
8449 +	}
8450 +out:
8451 +	free_cpumask_var(mask);
8452 +	return err;
8453 +}
8454 diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
8455 new file mode 100644
8456 index 0000000..877f278
8457 --- /dev/null
8458 +++ b/litmus/ctrldev.c
8459 @@ -0,0 +1,160 @@
8460 +#include <linux/sched.h>
8461 +#include <linux/mm.h>
8462 +#include <linux/fs.h>
8463 +#include <linux/miscdevice.h>
8464 +#include <linux/module.h>
8465 +
8466 +#include <litmus/litmus.h>
8467 +
8468 +/* only one page for now, but we might want to add a RO version at some point */
8469 +
8470 +#define CTRL_NAME        "litmus/ctrl"
8471 +
8472 +/* allocate t->rt_param.ctrl_page*/
8473 +static int alloc_ctrl_page(struct task_struct *t)
8474 +{
8475 +	int err = 0;
8476 +
8477 +	/* only allocate if the task doesn't have one yet */
8478 +	if (!tsk_rt(t)->ctrl_page) {
8479 +		tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
8480 +		if (!tsk_rt(t)->ctrl_page)
8481 +			err = -ENOMEM;
8482 +		/* will get de-allocated in task teardown */
8483 +		TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
8484 +			   tsk_rt(t)->ctrl_page);
8485 +	}
8486 +	return err;
8487 +}
8488 +
8489 +static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
8490 +{
8491 +	int err;
8492 +
8493 +	struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
8494 +
8495 +	TRACE_CUR(CTRL_NAME
8496 +		  ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
8497 +		  tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
8498 +		  vma->vm_page_prot);
8499 +
8500 +	/* Map it into the vma. */
8501 +	err = vm_insert_page(vma, vma->vm_start, ctrl);
8502 +
8503 +	if (err)
8504 +		TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
8505 +
8506 +	return err;
8507 +}
8508 +
8509 +static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
8510 +{
8511 +	TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
8512 +		  vma->vm_flags, vma->vm_page_prot);
8513 +
8514 +	TRACE_CUR(CTRL_NAME
8515 +		  ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
8516 +		  (void*) vma->vm_start, (void*) vma->vm_end, vma,
8517 +		  vma->vm_private_data);
8518 +}
8519 +
8520 +static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
8521 +				      struct vm_fault* vmf)
8522 +{
8523 +	TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
8524 +		  vma->vm_flags, vmf->pgoff);
8525 +
8526 +	/* This function should never be called, since all pages should have
8527 +	 * been mapped by mmap() already. */
8528 +	WARN_ONCE(1, "Page faults should be impossible in the control page\n");
8529 +
8530 +	return VM_FAULT_SIGBUS;
8531 +}
8532 +
8533 +static struct vm_operations_struct litmus_ctrl_vm_ops = {
8534 +	.close = litmus_ctrl_vm_close,
8535 +	.fault = litmus_ctrl_vm_fault,
8536 +};
8537 +
8538 +static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
8539 +{
8540 +	int err = 0;
8541 +
8542 +	/* first make sure mapper knows what he's doing */
8543 +
8544 +	/* you can only get one page */
8545 +	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
8546 +		return -EINVAL;
8547 +
8548 +	/* you can only map the "first" page */
8549 +	if (vma->vm_pgoff != 0)
8550 +		return -EINVAL;
8551 +
8552 +	/* you can't share it with anyone */
8553 +	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
8554 +		return -EINVAL;
8555 +
8556 +	vma->vm_ops = &litmus_ctrl_vm_ops;
8557 +	/* This mapping should not be kept across forks,
8558 +	 * cannot be expanded, and is not a "normal" page. */
8559 +	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ | VM_WRITE;
8560 +
8561 +	/* We don't want the first write access to trigger a "minor" page fault
8562 +	 * to mark the page as dirty.  This is transient, private memory, we
8563 +	 * don't care if it was touched or not. PAGE_SHARED means RW access, but
8564 +	 * not execute, and avoids copy-on-write behavior.
8565 +	 * See protection_map in mmap.c.  */
8566 +	vma->vm_page_prot = PAGE_SHARED;
8567 +
8568 +	err = alloc_ctrl_page(current);
8569 +	if (!err)
8570 +		err = map_ctrl_page(current, vma);
8571 +
8572 +	TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
8573 +		  __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
8574 +
8575 +	return err;
8576 +}
8577 +
8578 +static struct file_operations litmus_ctrl_fops = {
8579 +	.owner = THIS_MODULE,
8580 +	.mmap  = litmus_ctrl_mmap,
8581 +};
8582 +
8583 +static struct miscdevice litmus_ctrl_dev = {
8584 +	.name  = CTRL_NAME,
8585 +	.minor = MISC_DYNAMIC_MINOR,
8586 +	.fops  = &litmus_ctrl_fops,
8587 +};
8588 +
8589 +static int __init init_litmus_ctrl_dev(void)
8590 +{
8591 +	int err;
8592 +
8593 +	BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
8594 +
8595 +	BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
8596 +
8597 +	BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
8598 +		     != LITMUS_CP_OFFSET_SCHED);
8599 +	BUILD_BUG_ON(offsetof(struct control_page, irq_count)
8600 +		     != LITMUS_CP_OFFSET_IRQ_COUNT);
8601 +	BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
8602 +		     != LITMUS_CP_OFFSET_TS_SC_START);
8603 +	BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
8604 +		     != LITMUS_CP_OFFSET_IRQ_SC_START);
8605 +
8606 +	printk("Initializing LITMUS^RT control device.\n");
8607 +	err = misc_register(&litmus_ctrl_dev);
8608 +	if (err)
8609 +		printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
8610 +	return err;
8611 +}
8612 +
8613 +static void __exit exit_litmus_ctrl_dev(void)
8614 +{
8615 +	misc_deregister(&litmus_ctrl_dev);
8616 +}
8617 +
8618 +module_init(init_litmus_ctrl_dev);
8619 +module_exit(exit_litmus_ctrl_dev);
8620 diff --git a/litmus/edf_common.c b/litmus/edf_common.c
8621 new file mode 100644
8622 index 0000000..5aca293
8623 --- /dev/null
8624 +++ b/litmus/edf_common.c
8625 @@ -0,0 +1,200 @@
8626 +/*
8627 + * kernel/edf_common.c
8628 + *
8629 + * Common functions for EDF based scheduler.
8630 + */
8631 +
8632 +#include <linux/percpu.h>
8633 +#include <linux/sched.h>
8634 +#include <linux/list.h>
8635 +
8636 +#include <litmus/litmus.h>
8637 +#include <litmus/sched_plugin.h>
8638 +#include <litmus/sched_trace.h>
8639 +
8640 +#include <litmus/edf_common.h>
8641 +
8642 +#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
8643 +#include <litmus/fpmath.h>
8644 +#endif
8645 +
8646 +#ifdef CONFIG_EDF_TIE_BREAK_HASH
8647 +#include <linux/hash.h>
8648 +static inline long edf_hash(struct task_struct *t)
8649 +{
8650 +	/* pid is 32 bits, so normally we would shove that into the
8651 +	 * upper 32-bits and and put the job number in the bottom
8652 +	 * and hash the 64-bit number with hash_64(). Sadly,
8653 +	 * in testing, hash_64() doesn't distribute keys were the
8654 +	 * upper bits are close together (as would be the case with
8655 +	 * pids) and job numbers are equal (as would be the case with
8656 +	 * synchronous task sets with all relative deadlines equal).
8657 +	 *
8658 +	 * A 2006 Linux patch proposed the following solution
8659 +	 * (but for some reason it wasn't accepted...).
8660 +	 *
8661 +	 * At least this workaround works for 32-bit systems as well.
8662 +	 */
8663 +	return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
8664 +}
8665 +#endif
8666 +
8667 +
8668 +/* edf_higher_prio -  returns true if first has a higher EDF priority
8669 + *                    than second. Deadline ties are broken by PID.
8670 + *
8671 + * both first and second may be NULL
8672 + */
8673 +int edf_higher_prio(struct task_struct* first,
8674 +		    struct task_struct* second)
8675 +{
8676 +	struct task_struct *first_task = first;
8677 +	struct task_struct *second_task = second;
8678 +
8679 +	/* There is no point in comparing a task to itself. */
8680 +	if (first && first == second) {
8681 +		TRACE_TASK(first,
8682 +			   "WARNING: pointless edf priority comparison.\n");
8683 +		return 0;
8684 +	}
8685 +
8686 +
8687 +	/* check for NULL tasks */
8688 +	if (!first || !second)
8689 +		return first && !second;
8690 +
8691 +#ifdef CONFIG_LITMUS_LOCKING
8692 +
8693 +	/* Check for inherited priorities. Change task
8694 +	 * used for comparison in such a case.
8695 +	 */
8696 +	if (unlikely(first->rt_param.inh_task))
8697 +		first_task = first->rt_param.inh_task;
8698 +	if (unlikely(second->rt_param.inh_task))
8699 +		second_task = second->rt_param.inh_task;
8700 +
8701 +	/* Check for priority boosting. Tie-break by start of boosting.
8702 +	 */
8703 +	if (unlikely(is_priority_boosted(first_task))) {
8704 +		/* first_task is boosted, how about second_task? */
8705 +		if (!is_priority_boosted(second_task) ||
8706 +		    lt_before(get_boost_start(first_task),
8707 +			      get_boost_start(second_task)))
8708 +			return 1;
8709 +		else
8710 +			return 0;
8711 +	} else if (unlikely(is_priority_boosted(second_task)))
8712 +		/* second_task is boosted, first is not*/
8713 +		return 0;
8714 +
8715 +#endif
8716 +
8717 +	if (earlier_deadline(first_task, second_task)) {
8718 +		return 1;
8719 +	}
8720 +	else if (get_deadline(first_task) == get_deadline(second_task)) {
8721 +		/* Need to tie break. All methods must set pid_break to 0/1 if
8722 +		 * first_task does not have priority over second_task.
8723 +		 */
8724 +		int pid_break;
8725 +
8726 +
8727 +#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
8728 +		/* Tie break by lateness. Jobs with greater lateness get
8729 +		 * priority. This should spread tardiness across all tasks,
8730 +		 * especially in task sets where all tasks have the same
8731 +		 * period and relative deadlines.
8732 +		 */
8733 +		if (get_lateness(first_task) > get_lateness(second_task)) {
8734 +			return 1;
8735 +		}
8736 +		pid_break = (get_lateness(first_task) == get_lateness(second_task));
8737 +
8738 +
8739 +#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
8740 +		/* Tie break by lateness, normalized by relative deadline. Jobs with
8741 +		 * greater normalized lateness get priority.
8742 +		 *
8743 +		 * Note: Considered using the algebraically equivalent
8744 +		 *	lateness(first)*relative_deadline(second) >
8745 +					lateness(second)*relative_deadline(first)
8746 +		 * to avoid fixed-point math, but values are prone to overflow if inputs
8747 +		 * are on the order of several seconds, even in 64-bit.
8748 +		 */
8749 +		fp_t fnorm = _frac(get_lateness(first_task),
8750 +						   get_rt_relative_deadline(first_task));
8751 +		fp_t snorm = _frac(get_lateness(second_task),
8752 +						   get_rt_relative_deadline(second_task));
8753 +		if (_gt(fnorm, snorm)) {
8754 +			return 1;
8755 +		}
8756 +		pid_break = _eq(fnorm, snorm);
8757 +
8758 +
8759 +#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
8760 +		/* Tie break by comparing hashs of (pid, job#) tuple.  There should be
8761 +		 * a 50% chance that first_task has a higher priority than second_task.
8762 +		 */
8763 +		long fhash = edf_hash(first_task);
8764 +		long shash = edf_hash(second_task);
8765 +		if (fhash < shash) {
8766 +			return 1;
8767 +		}
8768 +		pid_break = (fhash == shash);
8769 +#else
8770 +
8771 +
8772 +		/* CONFIG_EDF_PID_TIE_BREAK */
8773 +		pid_break = 1; // fall through to tie-break by pid;
8774 +#endif
8775 +
8776 +		/* Tie break by pid */
8777 +		if(pid_break) {
8778 +			if (first_task->pid < second_task->pid) {
8779 +				return 1;
8780 +			}
8781 +			else if (first_task->pid == second_task->pid) {
8782 +				/* If the PIDs are the same then the task with the
8783 +				 * inherited priority wins.
8784 +				 */
8785 +				if (!second->rt_param.inh_task) {
8786 +					return 1;
8787 +				}
8788 +			}
8789 +		}
8790 +	}
8791 +	return 0; /* fall-through. prio(second_task) > prio(first_task) */
8792 +}
8793 +
8794 +int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
8795 +{
8796 +	return edf_higher_prio(bheap2task(a), bheap2task(b));
8797 +}
8798 +
8799 +void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
8800 +		      release_jobs_t release)
8801 +{
8802 +	rt_domain_init(rt,  edf_ready_order, resched, release);
8803 +}
8804 +
8805 +/* need_to_preempt - check whether the task t needs to be preempted
8806 + *                   call only with irqs disabled and with  ready_lock acquired
8807 + *                   THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
8808 + */
8809 +int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
8810 +{
8811 +	/* we need the read lock for edf_ready_queue */
8812 +	/* no need to preempt if there is nothing pending */
8813 +	if (!__jobs_pending(rt))
8814 +		return 0;
8815 +	/* we need to reschedule if t doesn't exist */
8816 +	if (!t)
8817 +		return 1;
8818 +
8819 +	/* NOTE: We cannot check for non-preemptibility since we
8820 +	 *       don't know what address space we're currently in.
8821 +	 */
8822 +
8823 +	/* make sure to get non-rt stuff out of the way */
8824 +	return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
8825 +}
8826 diff --git a/litmus/fdso.c b/litmus/fdso.c
8827 new file mode 100644
8828 index 0000000..31d7028
8829 --- /dev/null
8830 +++ b/litmus/fdso.c
8831 @@ -0,0 +1,308 @@
8832 +/* fdso.c - file descriptor attached shared objects
8833 + *
8834 + * (c) 2007 B. Brandenburg, LITMUS^RT project
8835 + *
8836 + * Notes:
8837 + *   - objects descriptor (OD) tables are not cloned during a fork.
8838 + *   - objects are created on-demand, and freed after the last reference
8839 + *     is dropped.
8840 + *   - for now, object types are hard coded.
8841 + *   - As long as we have live objects, we keep a reference to the inode.
8842 + */
8843 +
8844 +#include <linux/errno.h>
8845 +#include <linux/sched.h>
8846 +#include <linux/mutex.h>
8847 +#include <linux/file.h>
8848 +#include <asm/uaccess.h>
8849 +
8850 +#include <litmus/fdso.h>
8851 +
8852 +extern struct fdso_ops generic_lock_ops;
8853 +
8854 +static const struct fdso_ops* fdso_ops[] = {
8855 +	&generic_lock_ops, /* FMLP_SEM */
8856 +	&generic_lock_ops, /* SRP_SEM */
8857 +	&generic_lock_ops, /* MPCP_SEM */
8858 +	&generic_lock_ops, /* MPCP_VS_SEM */
8859 +	&generic_lock_ops, /* DPCP_SEM */
8860 +	&generic_lock_ops, /* PCP_SEM */
8861 +	&generic_lock_ops, /* DFLP_SEM */
8862 +};
8863 +
8864 +static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
8865 +{
8866 +	BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
8867 +
8868 +	if (fdso_ops[type]->create)
8869 +		return fdso_ops[type]->create(obj_ref, type, config);
8870 +	else
8871 +		return -EINVAL;
8872 +}
8873 +
8874 +static void fdso_destroy(obj_type_t type, void* obj)
8875 +{
8876 +	fdso_ops[type]->destroy(type, obj);
8877 +}
8878 +
8879 +static int fdso_open(struct od_table_entry* entry, void* __user config)
8880 +{
8881 +	if (fdso_ops[entry->obj->type]->open)
8882 +		return fdso_ops[entry->obj->type]->open(entry, config);
8883 +	else
8884 +		return 0;
8885 +}
8886 +
8887 +static int fdso_close(struct od_table_entry* entry)
8888 +{
8889 +	if (fdso_ops[entry->obj->type]->close)
8890 +		return fdso_ops[entry->obj->type]->close(entry);
8891 +	else
8892 +		return 0;
8893 +}
8894 +
8895 +/* inode must be locked already */
8896 +static int alloc_inode_obj(struct inode_obj_id** obj_ref,
8897 +			   struct inode* inode,
8898 +			   obj_type_t type,
8899 +			   unsigned int id,
8900 +			   void* __user config)
8901 +{
8902 +	struct inode_obj_id* obj;
8903 +	void* raw_obj;
8904 +	int err;
8905 +
8906 +	obj = kmalloc(sizeof(*obj), GFP_KERNEL);
8907 +	if (!obj) {
8908 +		return -ENOMEM;
8909 +	}
8910 +
8911 +	err = fdso_create(&raw_obj, type, config);
8912 +	if (err != 0) {
8913 +		kfree(obj);
8914 +		return err;
8915 +	}
8916 +
8917 +	INIT_LIST_HEAD(&obj->list);
8918 +	atomic_set(&obj->count, 1);
8919 +	obj->type  = type;
8920 +	obj->id    = id;
8921 +	obj->obj   = raw_obj;
8922 +	obj->inode = inode;
8923 +
8924 +	list_add(&obj->list, &inode->i_obj_list);
8925 +	atomic_inc(&inode->i_count);
8926 +
8927 +	printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
8928 +
8929 +	*obj_ref = obj;
8930 +	return 0;
8931 +}
8932 +
8933 +/* inode must be locked already */
8934 +static struct inode_obj_id* get_inode_obj(struct inode* inode,
8935 +					  obj_type_t type,
8936 +					  unsigned int id)
8937 +{
8938 +	struct list_head* pos;
8939 +	struct inode_obj_id* obj = NULL;
8940 +
8941 +	list_for_each(pos, &inode->i_obj_list) {
8942 +		obj = list_entry(pos, struct inode_obj_id, list);
8943 +		if (obj->id == id && obj->type == type) {
8944 +			atomic_inc(&obj->count);
8945 +			return obj;
8946 +		}
8947 +	}
8948 +	printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
8949 +	return NULL;
8950 +}
8951 +
8952 +
8953 +static void put_inode_obj(struct inode_obj_id* obj)
8954 +{
8955 +	struct inode* inode;
8956 +	int let_go = 0;
8957 +
8958 +	inode = obj->inode;
8959 +	if (atomic_dec_and_test(&obj->count)) {
8960 +
8961 +		mutex_lock(&inode->i_obj_mutex);
8962 +		/* no new references can be obtained */
8963 +		if (!atomic_read(&obj->count)) {
8964 +			list_del(&obj->list);
8965 +			fdso_destroy(obj->type, obj->obj);
8966 +			kfree(obj);
8967 +			let_go = 1;
8968 +		}
8969 +		mutex_unlock(&inode->i_obj_mutex);
8970 +		if (let_go)
8971 +			iput(inode);
8972 +	}
8973 +}
8974 +
8975 +static struct od_table_entry*  get_od_entry(struct task_struct* t)
8976 +{
8977 +	struct od_table_entry* table;
8978 +	int i;
8979 +
8980 +
8981 +	table = t->od_table;
8982 +	if (!table) {
8983 +		table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
8984 +				GFP_KERNEL);
8985 +		t->od_table = table;
8986 +	}
8987 +
8988 +	for (i = 0; table &&  i < MAX_OBJECT_DESCRIPTORS; i++)
8989 +		if (!table[i].used) {
8990 +			table[i].used = 1;
8991 +			return table + i;
8992 +		}
8993 +	return NULL;
8994 +}
8995 +
8996 +static int put_od_entry(struct od_table_entry* od)
8997 +{
8998 +	put_inode_obj(od->obj);
8999 +	od->used = 0;
9000 +	return 0;
9001 +}
9002 +
9003 +static long close_od_entry(struct od_table_entry *od)
9004 +{
9005 +	long ret;
9006 +
9007 +	/* Give the class a chance to reject the close. */
9008 +	ret = fdso_close(od);
9009 +	if (ret == 0)
9010 +		ret = put_od_entry(od);
9011 +
9012 +	return ret;
9013 +}
9014 +
9015 +void exit_od_table(struct task_struct* t)
9016 +{
9017 +	int i;
9018 +
9019 +	if (t->od_table) {
9020 +		for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
9021 +			if (t->od_table[i].used)
9022 +				close_od_entry(t->od_table + i);
9023 +		kfree(t->od_table);
9024 +		t->od_table = NULL;
9025 +	}
9026 +}
9027 +
9028 +static int do_sys_od_open(struct file* file, obj_type_t type, int id,
9029 +			  void* __user config)
9030 +{
9031 +	int idx = 0, err = 0;
9032 +	struct inode* inode;
9033 +	struct inode_obj_id* obj = NULL;
9034 +	struct od_table_entry* entry;
9035 +
9036 +	inode = file->f_dentry->d_inode;
9037 +
9038 +	entry = get_od_entry(current);
9039 +	if (!entry)
9040 +		return -ENOMEM;
9041 +
9042 +	mutex_lock(&inode->i_obj_mutex);
9043 +	obj = get_inode_obj(inode, type, id);
9044 +	if (!obj)
9045 +		err = alloc_inode_obj(&obj, inode, type, id, config);
9046 +	if (err != 0) {
9047 +		obj = NULL;
9048 +		idx = err;
9049 +		entry->used = 0;
9050 +	} else {
9051 +		entry->obj   = obj;
9052 +		entry->class = fdso_ops[type];
9053 +		idx = entry - current->od_table;
9054 +	}
9055 +
9056 +	mutex_unlock(&inode->i_obj_mutex);
9057 +
9058 +	/* open only if creation succeeded */
9059 +	if (!err)
9060 +		err = fdso_open(entry, config);
9061 +	if (err < 0) {
9062 +		/* The class rejected the open call.
9063 +		 * We need to clean up and tell user space.
9064 +		 */
9065 +		if (obj)
9066 +			put_od_entry(entry);
9067 +		idx = err;
9068 +	}
9069 +
9070 +	return idx;
9071 +}
9072 +
9073 +
9074 +struct od_table_entry* get_entry_for_od(int od)
9075 +{
9076 +	struct task_struct *t = current;
9077 +
9078 +	if (!t->od_table)
9079 +		return NULL;
9080 +	if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
9081 +		return NULL;
9082 +	if (!t->od_table[od].used)
9083 +		return NULL;
9084 +	return t->od_table + od;
9085 +}
9086 +
9087 +
9088 +asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
9089 +{
9090 +	int ret = 0;
9091 +	struct file*  file;
9092 +
9093 +	/*
9094 +	   1) get file from fd, get inode from file
9095 +	   2) lock inode
9096 +	   3) try to lookup object
9097 +	   4) if not present create and enqueue object, inc inode refcnt
9098 +	   5) increment refcnt of object
9099 +	   6) alloc od_table_entry, setup ptrs
9100 +	   7) unlock inode
9101 +	   8) return offset in od_table as OD
9102 +	 */
9103 +
9104 +	if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
9105 +		ret = -EINVAL;
9106 +		goto out;
9107 +	}
9108 +
9109 +	file = fget(fd);
9110 +	if (!file) {
9111 +		ret = -EBADF;
9112 +		goto out;
9113 +	}
9114 +
9115 +	ret = do_sys_od_open(file, type, obj_id, config);
9116 +
9117 +	fput(file);
9118 +
9119 +out:
9120 +	return ret;
9121 +}
9122 +
9123 +
9124 +asmlinkage long sys_od_close(int od)
9125 +{
9126 +	int ret = -EINVAL;
9127 +	struct task_struct *t = current;
9128 +
9129 +	if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
9130 +		return ret;
9131 +
9132 +	if (!t->od_table || !t->od_table[od].used)
9133 +		return ret;
9134 +
9135 +
9136 +	ret = close_od_entry(t->od_table + od);
9137 +
9138 +	return ret;
9139 +}
9140 diff --git a/litmus/fp_common.c b/litmus/fp_common.c
9141 new file mode 100644
9142 index 0000000..964a472
9143 --- /dev/null
9144 +++ b/litmus/fp_common.c
9145 @@ -0,0 +1,119 @@
9146 +/*
9147 + * litmus/fp_common.c
9148 + *
9149 + * Common functions for fixed-priority scheduler.
9150 + */
9151 +
9152 +#include <linux/percpu.h>
9153 +#include <linux/sched.h>
9154 +#include <linux/list.h>
9155 +
9156 +#include <litmus/litmus.h>
9157 +#include <litmus/sched_plugin.h>
9158 +#include <litmus/sched_trace.h>
9159 +
9160 +#include <litmus/fp_common.h>
9161 +
9162 +/* fp_higher_prio -  returns true if first has a higher static priority
9163 + *                   than second. Ties are broken by PID.
9164 + *
9165 + * both first and second may be NULL
9166 + */
9167 +int fp_higher_prio(struct task_struct* first,
9168 +		   struct task_struct* second)
9169 +{
9170 +	struct task_struct *first_task = first;
9171 +	struct task_struct *second_task = second;
9172 +
9173 +	/* There is no point in comparing a task to itself. */
9174 +	if (unlikely(first && first == second)) {
9175 +		TRACE_TASK(first,
9176 +			   "WARNING: pointless FP priority comparison.\n");
9177 +		return 0;
9178 +	}
9179 +
9180 +
9181 +	/* check for NULL tasks */
9182 +	if (!first || !second)
9183 +		return first && !second;
9184 +
9185 +	if (!is_realtime(second_task))
9186 +		return 1;
9187 +
9188 +#ifdef CONFIG_LITMUS_LOCKING
9189 +
9190 +	/* Check for inherited priorities. Change task
9191 +	 * used for comparison in such a case.
9192 +	 */
9193 +	if (unlikely(first->rt_param.inh_task))
9194 +		first_task = first->rt_param.inh_task;
9195 +	if (unlikely(second->rt_param.inh_task))
9196 +		second_task = second->rt_param.inh_task;
9197 +
9198 +	/* Check for priority boosting. Tie-break by start of boosting.
9199 +	 */
9200 +	if (unlikely(is_priority_boosted(first_task))) {
9201 +		/* first_task is boosted, how about second_task? */
9202 +		if (is_priority_boosted(second_task))
9203 +			/* break by priority point */
9204 +			return lt_before(get_boost_start(first_task),
9205 +					 get_boost_start(second_task));
9206 +		else
9207 +			/* priority boosting wins. */
9208 +			return 1;
9209 +	} else if (unlikely(is_priority_boosted(second_task)))
9210 +		/* second_task is boosted, first is not*/
9211 +		return 0;
9212 +
9213 +#endif
9214 +
9215 +	/* Comparisons to itself are not expected; priority inheritance
9216 +	 * should also not cause this to happen. */
9217 +	BUG_ON(first_task == second_task);
9218 +
9219 +	if (get_priority(first_task) < get_priority(second_task))
9220 +		return 1;
9221 +	else if (get_priority(first_task) == get_priority(second_task))
9222 +		/* Break by PID. */
9223 +		return first_task->pid < second_task->pid;
9224 +	else
9225 +		return 0;
9226 +}
9227 +
9228 +int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
9229 +{
9230 +	return fp_higher_prio(bheap2task(a), bheap2task(b));
9231 +}
9232 +
9233 +void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
9234 +		    release_jobs_t release)
9235 +{
9236 +	rt_domain_init(rt,  fp_ready_order, resched, release);
9237 +}
9238 +
9239 +/* need_to_preempt - check whether the task t needs to be preempted
9240 + */
9241 +int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
9242 +{
9243 +	struct task_struct *pending;
9244 +
9245 +	pending = fp_prio_peek(q);
9246 +
9247 +	if (!pending)
9248 +		return 0;
9249 +	if (!t)
9250 +		return 1;
9251 +
9252 +	/* make sure to get non-rt stuff out of the way */
9253 +	return !is_realtime(t) || fp_higher_prio(pending, t);
9254 +}
9255 +
9256 +void fp_prio_queue_init(struct fp_prio_queue* q)
9257 +{
9258 +	int i;
9259 +
9260 +	for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
9261 +		q->bitmask[i] = 0;
9262 +	for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
9263 +		bheap_init(&q->queue[i]);
9264 +}
9265 diff --git a/litmus/jobs.c b/litmus/jobs.c
9266 new file mode 100644
9267 index 0000000..2d9f8aa
9268 --- /dev/null
9269 +++ b/litmus/jobs.c
9270 @@ -0,0 +1,77 @@
9271 +/* litmus/jobs.c - common job control code
9272 + */
9273 +
9274 +#include <linux/sched.h>
9275 +
9276 +#include <litmus/litmus.h>
9277 +#include <litmus/jobs.h>
9278 +
9279 +static inline void setup_release(struct task_struct *t, lt_t release)
9280 +{
9281 +	/* prepare next release */
9282 +	t->rt_param.job_params.release = release;
9283 +	t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
9284 +	t->rt_param.job_params.exec_time = 0;
9285 +
9286 +	/* update job sequence number */
9287 +	t->rt_param.job_params.job_no++;
9288 +}
9289 +
9290 +void prepare_for_next_period(struct task_struct *t)
9291 +{
9292 +	BUG_ON(!t);
9293 +
9294 +	/* Record lateness before we set up the next job's
9295 +	 * release and deadline. Lateness may be negative.
9296 +	 */
9297 +	t->rt_param.job_params.lateness =
9298 +		(long long)litmus_clock() -
9299 +		(long long)t->rt_param.job_params.deadline;
9300 +
9301 +	if (tsk_rt(t)->sporadic_release) {
9302 +		TRACE_TASK(t, "sporadic release at %llu\n",
9303 +			   tsk_rt(t)->sporadic_release_time);
9304 +		/* sporadic release */
9305 +		setup_release(t, tsk_rt(t)->sporadic_release_time);
9306 +		tsk_rt(t)->sporadic_release = 0;
9307 +	} else {
9308 +		/* periodic release => add period */
9309 +		setup_release(t, get_release(t) + get_rt_period(t));
9310 +	}
9311 +}
9312 +
9313 +void release_at(struct task_struct *t, lt_t start)
9314 +{
9315 +	BUG_ON(!t);
9316 +	setup_release(t, start);
9317 +	tsk_rt(t)->completed = 0;
9318 +}
9319 +
9320 +long default_wait_for_release_at(lt_t release_time)
9321 +{
9322 +	struct task_struct *t = current;
9323 +	unsigned long flags;
9324 +
9325 +	local_irq_save(flags);
9326 +	tsk_rt(t)->sporadic_release_time = release_time;
9327 +	smp_wmb();
9328 +	tsk_rt(t)->sporadic_release = 1;
9329 +	local_irq_restore(flags);
9330 +
9331 +	return complete_job();
9332 +}
9333 +
9334 +
9335 +/*
9336 + *	Deactivate current task until the beginning of the next period.
9337 + */
9338 +long complete_job(void)
9339 +{
9340 +	/* Mark that we do not excute anymore */
9341 +	tsk_rt(current)->completed = 1;
9342 +	/* call schedule, this will return when a new job arrives
9343 +	 * it also takes care of preparing for the next release
9344 +	 */
9345 +	schedule();
9346 +	return 0;
9347 +}
9348 diff --git a/litmus/litmus.c b/litmus/litmus.c
9349 new file mode 100644
9350 index 0000000..9c419cd
9351 --- /dev/null
9352 +++ b/litmus/litmus.c
9353 @@ -0,0 +1,639 @@
9354 +/*
9355 + * litmus.c -- Implementation of the LITMUS syscalls,
9356 + *             the LITMUS intialization code,
9357 + *             and the procfs interface..
9358 + */
9359 +#include <asm/uaccess.h>
9360 +#include <linux/uaccess.h>
9361 +#include <linux/sysrq.h>
9362 +#include <linux/sched.h>
9363 +#include <linux/module.h>
9364 +#include <linux/slab.h>
9365 +#include <linux/reboot.h>
9366 +#include <linux/stop_machine.h>
9367 +#include <linux/sched/rt.h>
9368 +#include <linux/rwsem.h>
9369 +
9370 +#include <litmus/litmus.h>
9371 +#include <litmus/bheap.h>
9372 +#include <litmus/trace.h>
9373 +#include <litmus/rt_domain.h>
9374 +#include <litmus/litmus_proc.h>
9375 +#include <litmus/sched_trace.h>
9376 +
9377 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9378 +#include <litmus/affinity.h>
9379 +#endif
9380 +
9381 +/* Number of RT tasks that exist in the system */
9382 +atomic_t rt_task_count 		= ATOMIC_INIT(0);
9383 +
9384 +#ifdef CONFIG_RELEASE_MASTER
9385 +/* current master CPU for handling timer IRQs */
9386 +atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
9387 +#endif
9388 +
9389 +static struct kmem_cache * bheap_node_cache;
9390 +extern struct kmem_cache * release_heap_cache;
9391 +
9392 +struct bheap_node* bheap_node_alloc(int gfp_flags)
9393 +{
9394 +	return kmem_cache_alloc(bheap_node_cache, gfp_flags);
9395 +}
9396 +
9397 +void bheap_node_free(struct bheap_node* hn)
9398 +{
9399 +	kmem_cache_free(bheap_node_cache, hn);
9400 +}
9401 +
9402 +struct release_heap* release_heap_alloc(int gfp_flags);
9403 +void release_heap_free(struct release_heap* rh);
9404 +
9405 +/**
9406 + * Get the quantum alignment as a cmdline option.
9407 + * Default is staggered quanta, as this results in lower overheads.
9408 + */
9409 +static bool aligned_quanta = 0;
9410 +module_param(aligned_quanta, bool, 0644);
9411 +
9412 +u64 cpu_stagger_offset(int cpu)
9413 +{
9414 +	u64 offset = 0;
9415 +
9416 +	if (!aligned_quanta) {
9417 +		offset = LITMUS_QUANTUM_LENGTH_NS;
9418 +		do_div(offset, num_possible_cpus());
9419 +		offset *= cpu;
9420 +	}
9421 +	return offset;
9422 +}
9423 +
9424 +/*
9425 + * sys_set_task_rt_param
9426 + * @pid: Pid of the task which scheduling parameters must be changed
9427 + * @param: New real-time extension parameters such as the execution cost and
9428 + *         period
9429 + * Syscall for manipulating with task rt extension params
9430 + * Returns EFAULT  if param is NULL.
9431 + *         ESRCH   if pid is not corrsponding
9432 + *	           to a valid task.
9433 + *	   EINVAL  if either period or execution cost is <=0
9434 + *	   EPERM   if pid is a real-time task
9435 + *	   0       if success
9436 + *
9437 + * Only non-real-time tasks may be configured with this system call
9438 + * to avoid races with the scheduler. In practice, this means that a
9439 + * task's parameters must be set _before_ calling sys_prepare_rt_task()
9440 + *
9441 + * find_task_by_vpid() assumes that we are in the same namespace of the
9442 + * target.
9443 + */
9444 +asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
9445 +{
9446 +	struct rt_task tp;
9447 +	struct task_struct *target;
9448 +	int retval = -EINVAL;
9449 +
9450 +	printk("Setting up rt task parameters for process %d.\n", pid);
9451 +
9452 +	if (pid < 0 || param == 0) {
9453 +		goto out;
9454 +	}
9455 +	if (copy_from_user(&tp, param, sizeof(tp))) {
9456 +		retval = -EFAULT;
9457 +		goto out;
9458 +	}
9459 +
9460 +	/* Task search and manipulation must be protected */
9461 +	read_lock_irq(&tasklist_lock);
9462 +	if (!(target = find_task_by_vpid(pid))) {
9463 +		retval = -ESRCH;
9464 +		goto out_unlock;
9465 +	}
9466 +
9467 +	if (is_realtime(target)) {
9468 +		/* The task is already a real-time task.
9469 +		 * We cannot not allow parameter changes at this point.
9470 +		 */
9471 +		retval = -EBUSY;
9472 +		goto out_unlock;
9473 +	}
9474 +
9475 +	/* set relative deadline to be implicit if left unspecified */
9476 +	if (tp.relative_deadline == 0)
9477 +		tp.relative_deadline = tp.period;
9478 +
9479 +	if (tp.exec_cost <= 0)
9480 +		goto out_unlock;
9481 +	if (tp.period <= 0)
9482 +		goto out_unlock;
9483 +	if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
9484 +	{
9485 +		printk(KERN_INFO "litmus: real-time task %d rejected "
9486 +		       "because task density > 1.0\n", pid);
9487 +		goto out_unlock;
9488 +	}
9489 +	if (tp.cls != RT_CLASS_HARD &&
9490 +	    tp.cls != RT_CLASS_SOFT &&
9491 +	    tp.cls != RT_CLASS_BEST_EFFORT)
9492 +	{
9493 +		printk(KERN_INFO "litmus: real-time task %d rejected "
9494 +				 "because its class is invalid\n", pid);
9495 +		goto out_unlock;
9496 +	}
9497 +	if (tp.budget_policy != NO_ENFORCEMENT &&
9498 +	    tp.budget_policy != QUANTUM_ENFORCEMENT &&
9499 +	    tp.budget_policy != PRECISE_ENFORCEMENT)
9500 +	{
9501 +		printk(KERN_INFO "litmus: real-time task %d rejected "
9502 +		       "because unsupported budget enforcement policy "
9503 +		       "specified (%d)\n",
9504 +		       pid, tp.budget_policy);
9505 +		goto out_unlock;
9506 +	}
9507 +
9508 +	target->rt_param.task_params = tp;
9509 +
9510 +	retval = 0;
9511 +      out_unlock:
9512 +	read_unlock_irq(&tasklist_lock);
9513 +      out:
9514 +	return retval;
9515 +}
9516 +
9517 +/*
9518 + * Getter of task's RT params
9519 + *   returns EINVAL if param or pid is NULL
9520 + *   returns ESRCH  if pid does not correspond to a valid task
9521 + *   returns EFAULT if copying of parameters has failed.
9522 + *
9523 + *   find_task_by_vpid() assumes that we are in the same namespace of the
9524 + *   target.
9525 + */
9526 +asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
9527 +{
9528 +	int retval = -EINVAL;
9529 +	struct task_struct *source;
9530 +	struct rt_task lp;
9531 +	if (param == 0 || pid < 0)
9532 +		goto out;
9533 +	read_lock(&tasklist_lock);
9534 +	if (!(source = find_task_by_vpid(pid))) {
9535 +		retval = -ESRCH;
9536 +		goto out_unlock;
9537 +	}
9538 +	lp = source->rt_param.task_params;
9539 +	read_unlock(&tasklist_lock);
9540 +	/* Do copying outside the lock */
9541 +	retval =
9542 +	    copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
9543 +	return retval;
9544 +      out_unlock:
9545 +	read_unlock(&tasklist_lock);
9546 +      out:
9547 +	return retval;
9548 +
9549 +}
9550 +
9551 +/*
9552 + *	This is the crucial function for periodic task implementation,
9553 + *	It checks if a task is periodic, checks if such kind of sleep
9554 + *	is permitted and calls plugin-specific sleep, which puts the
9555 + *	task into a wait array.
9556 + *	returns 0 on successful wakeup
9557 + *	returns EPERM if current conditions do not permit such sleep
9558 + *	returns EINVAL if current task is not able to go to sleep
9559 + */
9560 +asmlinkage long sys_complete_job(void)
9561 +{
9562 +	int retval = -EPERM;
9563 +	if (!is_realtime(current)) {
9564 +		retval = -EINVAL;
9565 +		goto out;
9566 +	}
9567 +	/* Task with negative or zero period cannot sleep */
9568 +	if (get_rt_period(current) <= 0) {
9569 +		retval = -EINVAL;
9570 +		goto out;
9571 +	}
9572 +	/* The plugin has to put the task into an
9573 +	 * appropriate queue and call schedule
9574 +	 */
9575 +	retval = litmus->complete_job();
9576 +      out:
9577 +	return retval;
9578 +}
9579 +
9580 +/*	This is an "improved" version of sys_complete_job that
9581 + *      addresses the problem of unintentionally missing a job after
9582 + *      an overrun.
9583 + *
9584 + *	returns 0 on successful wakeup
9585 + *	returns EPERM if current conditions do not permit such sleep
9586 + *	returns EINVAL if current task is not able to go to sleep
9587 + */
9588 +asmlinkage long sys_wait_for_job_release(unsigned int job)
9589 +{
9590 +	int retval = -EPERM;
9591 +	if (!is_realtime(current)) {
9592 +		retval = -EINVAL;
9593 +		goto out;
9594 +	}
9595 +
9596 +	/* Task with negative or zero period cannot sleep */
9597 +	if (get_rt_period(current) <= 0) {
9598 +		retval = -EINVAL;
9599 +		goto out;
9600 +	}
9601 +
9602 +	retval = 0;
9603 +
9604 +	/* first wait until we have "reached" the desired job
9605 +	 *
9606 +	 * This implementation has at least two problems:
9607 +	 *
9608 +	 * 1) It doesn't gracefully handle the wrap around of
9609 +	 *    job_no. Since LITMUS is a prototype, this is not much
9610 +	 *    of a problem right now.
9611 +	 *
9612 +	 * 2) It is theoretically racy if a job release occurs
9613 +	 *    between checking job_no and calling sleep_next_period().
9614 +	 *    A proper solution would requiring adding another callback
9615 +	 *    in the plugin structure and testing the condition with
9616 +	 *    interrupts disabled.
9617 +	 *
9618 +	 * FIXME: At least problem 2 should be taken care of eventually.
9619 +	 */
9620 +	while (!retval && job > current->rt_param.job_params.job_no)
9621 +		/* If the last job overran then job <= job_no and we
9622 +		 * don't send the task to sleep.
9623 +		 */
9624 +		retval = litmus->complete_job();
9625 +      out:
9626 +	return retval;
9627 +}
9628 +
9629 +/*	This is a helper syscall to query the current job sequence number.
9630 + *
9631 + *	returns 0 on successful query
9632 + *	returns EPERM if task is not a real-time task.
9633 + *      returns EFAULT if &job is not a valid pointer.
9634 + */
9635 +asmlinkage long sys_query_job_no(unsigned int __user *job)
9636 +{
9637 +	int retval = -EPERM;
9638 +	if (is_realtime(current))
9639 +		retval = put_user(current->rt_param.job_params.job_no, job);
9640 +
9641 +	return retval;
9642 +}
9643 +
9644 +/* sys_null_call() is only used for determining raw system call
9645 + * overheads (kernel entry, kernel exit). It has no useful side effects.
9646 + * If ts is non-NULL, then the current Feather-Trace time is recorded.
9647 + */
9648 +asmlinkage long sys_null_call(cycles_t __user *ts)
9649 +{
9650 +	long ret = 0;
9651 +	cycles_t now;
9652 +
9653 +	if (ts) {
9654 +		now = get_cycles();
9655 +		ret = put_user(now, ts);
9656 +	}
9657 +
9658 +	return ret;
9659 +}
9660 +
9661 +/* p is a real-time task. Re-init its state as a best-effort task. */
9662 +static void reinit_litmus_state(struct task_struct* p, int restore)
9663 +{
9664 +	struct rt_task  user_config = {};
9665 +	void*  ctrl_page     = NULL;
9666 +
9667 +	if (restore) {
9668 +		/* Safe user-space provided configuration data.
9669 +		 * and allocated page. */
9670 +		user_config = p->rt_param.task_params;
9671 +		ctrl_page   = p->rt_param.ctrl_page;
9672 +	}
9673 +
9674 +	/* We probably should not be inheriting any task's priority
9675 +	 * at this point in time.
9676 +	 */
9677 +	WARN_ON(p->rt_param.inh_task);
9678 +
9679 +	/* Cleanup everything else. */
9680 +	memset(&p->rt_param, 0, sizeof(p->rt_param));
9681 +
9682 +	/* Restore preserved fields. */
9683 +	if (restore) {
9684 +		p->rt_param.task_params = user_config;
9685 +		p->rt_param.ctrl_page   = ctrl_page;
9686 +	}
9687 +}
9688 +
9689 +long litmus_admit_task(struct task_struct* tsk)
9690 +{
9691 +	long retval = 0;
9692 +
9693 +	BUG_ON(is_realtime(tsk));
9694 +
9695 +	tsk_rt(tsk)->heap_node = NULL;
9696 +	tsk_rt(tsk)->rel_heap = NULL;
9697 +
9698 +	if (get_rt_relative_deadline(tsk) == 0 ||
9699 +	    get_exec_cost(tsk) >
9700 +			min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
9701 +		TRACE_TASK(tsk,
9702 +			"litmus admit: invalid task parameters "
9703 +			"(e = %lu, p = %lu, d = %lu)\n",
9704 +			get_exec_cost(tsk), get_rt_period(tsk),
9705 +			get_rt_relative_deadline(tsk));
9706 +		retval = -EINVAL;
9707 +		goto out;
9708 +	}
9709 +
9710 +	INIT_LIST_HEAD(&tsk_rt(tsk)->list);
9711 +
9712 +	/* allocate heap node for this task */
9713 +	tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
9714 +	tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
9715 +
9716 +	if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
9717 +		printk(KERN_WARNING "litmus: no more heap node memory!?\n");
9718 +
9719 +		retval = -ENOMEM;
9720 +		goto out;
9721 +	} else {
9722 +		bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
9723 +	}
9724 +
9725 +	preempt_disable();
9726 +
9727 +	retval = litmus->admit_task(tsk);
9728 +
9729 +	if (!retval) {
9730 +		sched_trace_task_name(tsk);
9731 +		sched_trace_task_param(tsk);
9732 +		atomic_inc(&rt_task_count);
9733 +	}
9734 +
9735 +	preempt_enable();
9736 +
9737 +out:
9738 +	if (retval) {
9739 +		if (tsk_rt(tsk)->heap_node)
9740 +			bheap_node_free(tsk_rt(tsk)->heap_node);
9741 +		if (tsk_rt(tsk)->rel_heap)
9742 +			release_heap_free(tsk_rt(tsk)->rel_heap);
9743 +	}
9744 +	return retval;
9745 +}
9746 +
9747 +void litmus_clear_state(struct task_struct* tsk)
9748 +{
9749 +    BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
9750 +    bheap_node_free(tsk_rt(tsk)->heap_node);
9751 +    release_heap_free(tsk_rt(tsk)->rel_heap);
9752 +
9753 +    atomic_dec(&rt_task_count);
9754 +    reinit_litmus_state(tsk, 1);
9755 +}
9756 +
9757 +/* called from sched_setscheduler() */
9758 +void litmus_exit_task(struct task_struct* tsk)
9759 +{
9760 +	if (is_realtime(tsk)) {
9761 +		sched_trace_task_completion(tsk, 1);
9762 +
9763 +		litmus->task_exit(tsk);
9764 +	}
9765 +}
9766 +
9767 +static DECLARE_RWSEM(plugin_switch_mutex);
9768 +
9769 +void litmus_plugin_switch_disable(void)
9770 +{
9771 +	down_read(&plugin_switch_mutex);
9772 +}
9773 +
9774 +void litmus_plugin_switch_enable(void)
9775 +{
9776 +	up_read(&plugin_switch_mutex);
9777 +}
9778 +
9779 +static int do_plugin_switch(void *_plugin)
9780 +{
9781 +	int ret;
9782 +	struct sched_plugin* plugin = _plugin;
9783 +
9784 +	/* don't switch if there are active real-time tasks */
9785 +	if (atomic_read(&rt_task_count) == 0) {
9786 +		ret = litmus->deactivate_plugin();
9787 +		if (0 != ret)
9788 +			goto out;
9789 +		ret = plugin->activate_plugin();
9790 +		if (0 != ret) {
9791 +			printk(KERN_INFO "Can't activate %s (%d).\n",
9792 +			       plugin->plugin_name, ret);
9793 +			plugin = &linux_sched_plugin;
9794 +		}
9795 +
9796 +		printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
9797 +		litmus = plugin;
9798 +	} else
9799 +		ret = -EBUSY;
9800 +out:
9801 +	return ret;
9802 +}
9803 +
9804 +/* Switching a plugin in use is tricky.
9805 + * We must watch out that no real-time tasks exists
9806 + * (and that none is created in parallel) and that the plugin is not
9807 + * currently in use on any processor (in theory).
9808 + */
9809 +int switch_sched_plugin(struct sched_plugin* plugin)
9810 +{
9811 +	int err;
9812 +	struct domain_proc_info* domain_info;
9813 +
9814 +	BUG_ON(!plugin);
9815 +
9816 +	if (atomic_read(&rt_task_count) == 0) {
9817 +		down_write(&plugin_switch_mutex);
9818 +
9819 +		deactivate_domain_proc();
9820 +
9821 +		err =  stop_machine(do_plugin_switch, plugin, NULL);
9822 +
9823 +		if(!litmus->get_domain_proc_info(&domain_info))
9824 +			activate_domain_proc(domain_info);
9825 +
9826 +		up_write(&plugin_switch_mutex);
9827 +		return err;
9828 +	} else
9829 +		return -EBUSY;
9830 +}
9831 +
9832 +/* Called upon fork.
9833 + * p is the newly forked task.
9834 + */
9835 +void litmus_fork(struct task_struct* p)
9836 +{
9837 +	if (is_realtime(p)) {
9838 +		/* clean out any litmus related state, don't preserve anything */
9839 +		reinit_litmus_state(p, 0);
9840 +		/* Don't let the child be a real-time task.  */
9841 +		p->sched_reset_on_fork = 1;
9842 +	} else
9843 +		/* non-rt tasks might have ctrl_page set */
9844 +		tsk_rt(p)->ctrl_page = NULL;
9845 +
9846 +	/* od tables are never inherited across a fork */
9847 +	p->od_table = NULL;
9848 +}
9849 +
9850 +/* Called upon execve().
9851 + * current is doing the exec.
9852 + * Don't let address space specific stuff leak.
9853 + */
9854 +void litmus_exec(void)
9855 +{
9856 +	struct task_struct* p = current;
9857 +
9858 +	if (is_realtime(p)) {
9859 +		WARN_ON(p->rt_param.inh_task);
9860 +		if (tsk_rt(p)->ctrl_page) {
9861 +			free_page((unsigned long) tsk_rt(p)->ctrl_page);
9862 +			tsk_rt(p)->ctrl_page = NULL;
9863 +		}
9864 +	}
9865 +}
9866 +
9867 +/* Called when dead_tsk is being deallocated
9868 + */
9869 +void exit_litmus(struct task_struct *dead_tsk)
9870 +{
9871 +	/* We also allow non-RT tasks to
9872 +	 * allocate control pages to allow
9873 +	 * measurements with non-RT tasks.
9874 +	 * So check if we need to free the page
9875 +	 * in any case.
9876 +	 */
9877 +	if (tsk_rt(dead_tsk)->ctrl_page) {
9878 +		TRACE_TASK(dead_tsk,
9879 +			   "freeing ctrl_page %p\n",
9880 +			   tsk_rt(dead_tsk)->ctrl_page);
9881 +		free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
9882 +	}
9883 +
9884 +	/* Tasks should not be real-time tasks any longer at this point. */
9885 +	BUG_ON(is_realtime(dead_tsk));
9886 +}
9887 +
9888 +void litmus_do_exit(struct task_struct *exiting_tsk)
9889 +{
9890 +	/* This task called do_exit(), but is still a real-time task. To avoid
9891 +	 * complications later, we force it to be a non-real-time task now. */
9892 +
9893 +	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
9894 +
9895 +	TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
9896 +	sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
9897 +}
9898 +
9899 +void litmus_dealloc(struct task_struct *tsk)
9900 +{
9901 +	/* tsk is no longer a real-time task */
9902 +	TRACE_TASK(tsk, "Deallocating real-time task data\n");
9903 +	litmus->task_cleanup(tsk);
9904 +	litmus_clear_state(tsk);
9905 +}
9906 +
9907 +#ifdef CONFIG_MAGIC_SYSRQ
9908 +int sys_kill(int pid, int sig);
9909 +
9910 +static void sysrq_handle_kill_rt_tasks(int key)
9911 +{
9912 +	struct task_struct *t;
9913 +	read_lock(&tasklist_lock);
9914 +	for_each_process(t) {
9915 +		if (is_realtime(t)) {
9916 +			sys_kill(t->pid, SIGKILL);
9917 +		}
9918 +	}
9919 +	read_unlock(&tasklist_lock);
9920 +}
9921 +
9922 +static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
9923 +	.handler	= sysrq_handle_kill_rt_tasks,
9924 +	.help_msg	= "quit-rt-tasks(X)",
9925 +	.action_msg	= "sent SIGKILL to all LITMUS^RT real-time tasks",
9926 +};
9927 +#endif
9928 +
9929 +extern struct sched_plugin linux_sched_plugin;
9930 +
9931 +static int litmus_shutdown_nb(struct notifier_block *unused1,
9932 +				unsigned long unused2, void *unused3)
9933 +{
9934 +	/* Attempt to switch back to regular Linux scheduling.
9935 +	 * Forces the active plugin to clean up.
9936 +	 */
9937 +	if (litmus != &linux_sched_plugin) {
9938 +		int ret = switch_sched_plugin(&linux_sched_plugin);
9939 +		if (ret) {
9940 +			printk("Auto-shutdown of active Litmus plugin failed.\n");
9941 +		}
9942 +	}
9943 +	return NOTIFY_DONE;
9944 +}
9945 +
9946 +static struct notifier_block shutdown_notifier = {
9947 +	.notifier_call = litmus_shutdown_nb,
9948 +};
9949 +
9950 +static int __init _init_litmus(void)
9951 +{
9952 +	/*      Common initializers,
9953 +	 *      mode change lock is used to enforce single mode change
9954 +	 *      operation.
9955 +	 */
9956 +	printk("Starting LITMUS^RT kernel\n");
9957 +
9958 +	register_sched_plugin(&linux_sched_plugin);
9959 +
9960 +	bheap_node_cache    = KMEM_CACHE(bheap_node, SLAB_PANIC);
9961 +	release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
9962 +
9963 +#ifdef CONFIG_MAGIC_SYSRQ
9964 +	/* offer some debugging help */
9965 +	if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
9966 +		printk("Registered kill rt tasks magic sysrq.\n");
9967 +	else
9968 +		printk("Could not register kill rt tasks magic sysrq.\n");
9969 +#endif
9970 +
9971 +	init_litmus_proc();
9972 +
9973 +#ifdef CONFIG_SCHED_CPU_AFFINITY
9974 +	init_topology();
9975 +#endif
9976 +
9977 +	register_reboot_notifier(&shutdown_notifier);
9978 +
9979 +	return 0;
9980 +}
9981 +
9982 +static void _exit_litmus(void)
9983 +{
9984 +	unregister_reboot_notifier(&shutdown_notifier);
9985 +
9986 +	exit_litmus_proc();
9987 +	kmem_cache_destroy(bheap_node_cache);
9988 +	kmem_cache_destroy(release_heap_cache);
9989 +}
9990 +
9991 +module_init(_init_litmus);
9992 +module_exit(_exit_litmus);
9993 diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
9994 new file mode 100644
9995 index 0000000..4db3fe2
9996 --- /dev/null
9997 +++ b/litmus/litmus_proc.c
9998 @@ -0,0 +1,576 @@
9999 +/*
10000 + * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
10001 + */
10002 +
10003 +#include <linux/sched.h>
10004 +#include <linux/slab.h>
10005 +#include <linux/uaccess.h>
10006 +#include <linux/seq_file.h>
10007 +
10008 +#include <litmus/litmus.h>
10009 +#include <litmus/litmus_proc.h>
10010 +
10011 +#include <litmus/clustered.h>
10012 +
10013 +/* in litmus/litmus.c */
10014 +extern atomic_t rt_task_count;
10015 +
10016 +static struct proc_dir_entry *litmus_dir = NULL,
10017 +	*curr_file = NULL,
10018 +	*stat_file = NULL,
10019 +	*plugs_dir = NULL,
10020 +#ifdef CONFIG_RELEASE_MASTER
10021 +	*release_master_file = NULL,
10022 +#endif
10023 +	*plugs_file = NULL,
10024 +	*domains_dir = NULL,
10025 +	*cpus_dir = NULL;
10026 +
10027 +
10028 +/* in litmus/sync.c */
10029 +int count_tasks_waiting_for_release(void);
10030 +
10031 +static int litmus_stats_proc_show(struct seq_file *m, void *v)
10032 +{
10033 +        seq_printf(m,
10034 +		   "real-time tasks   = %d\n"
10035 +		   "ready for release = %d\n",
10036 +		   atomic_read(&rt_task_count),
10037 +		   count_tasks_waiting_for_release());
10038 +	return 0;
10039 +}
10040 +
10041 +static int litmus_stats_proc_open(struct inode *inode, struct file *file)
10042 +{
10043 +	return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
10044 +}
10045 +
10046 +static const struct file_operations litmus_stats_proc_fops = {
10047 +	.open		= litmus_stats_proc_open,
10048 +	.read		= seq_read,
10049 +	.llseek		= seq_lseek,
10050 +	.release	= single_release,
10051 +};
10052 +
10053 +
10054 +static int litmus_loaded_proc_show(struct seq_file *m, void *v)
10055 +{
10056 +	print_sched_plugins(m);
10057 +	return 0;
10058 +}
10059 +
10060 +static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
10061 +{
10062 +	return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
10063 +}
10064 +
10065 +static const struct file_operations litmus_loaded_proc_fops = {
10066 +	.open		= litmus_loaded_proc_open,
10067 +	.read		= seq_read,
10068 +	.llseek		= seq_lseek,
10069 +	.release	= single_release,
10070 +};
10071 +
10072 +
10073 +
10074 +
10075 +/* in litmus/litmus.c */
10076 +int switch_sched_plugin(struct sched_plugin*);
10077 +
10078 +static ssize_t litmus_active_proc_write(struct file *file,
10079 +					const char __user *buffer, size_t count,
10080 +					loff_t *ppos)
10081 +{
10082 +	char name[65];
10083 +	struct sched_plugin* found;
10084 +	ssize_t ret = -EINVAL;
10085 +	int err;
10086 +
10087 +
10088 +	ret = copy_and_chomp(name, sizeof(name), buffer, count);
10089 +	if (ret < 0)
10090 +		return ret;
10091 +
10092 +	found = find_sched_plugin(name);
10093 +
10094 +	if (found) {
10095 +		err = switch_sched_plugin(found);
10096 +		if (err) {
10097 +			printk(KERN_INFO "Could not switch plugin: %d\n", err);
10098 +			ret = err;
10099 +		}
10100 +	} else {
10101 +		printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
10102 +		ret = -ESRCH;
10103 +	}
10104 +
10105 +	return ret;
10106 +}
10107 +
10108 +static int litmus_active_proc_show(struct seq_file *m, void *v)
10109 +{
10110 +	seq_printf(m, "%s\n", litmus->plugin_name);
10111 +	return 0;
10112 +}
10113 +
10114 +static int litmus_active_proc_open(struct inode *inode, struct file *file)
10115 +{
10116 +	return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
10117 +}
10118 +
10119 +static const struct file_operations litmus_active_proc_fops = {
10120 +	.open		= litmus_active_proc_open,
10121 +	.read		= seq_read,
10122 +	.llseek		= seq_lseek,
10123 +	.release	= single_release,
10124 +	.write		= litmus_active_proc_write,
10125 +};
10126 +
10127 +
10128 +#ifdef CONFIG_RELEASE_MASTER
10129 +static ssize_t litmus_release_master_proc_write(
10130 +	struct file *file,
10131 +	const char __user *buffer, size_t count,
10132 +	loff_t *ppos)
10133 +{
10134 +	int cpu, err, online = 0;
10135 +	char msg[64];
10136 +	ssize_t len;
10137 +
10138 +	len = copy_and_chomp(msg, sizeof(msg), buffer, count);
10139 +
10140 +	if (len < 0)
10141 +		return len;
10142 +
10143 +	if (strcmp(msg, "NO_CPU") == 0)
10144 +		atomic_set(&release_master_cpu, NO_CPU);
10145 +	else {
10146 +		err = sscanf(msg, "%d", &cpu);
10147 +		if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
10148 +			atomic_set(&release_master_cpu, cpu);
10149 +		} else {
10150 +			TRACE("invalid release master: '%s' "
10151 +			      "(err:%d cpu:%d online:%d)\n",
10152 +			      msg, err, cpu, online);
10153 +			len = -EINVAL;
10154 +		}
10155 +	}
10156 +	return len;
10157 +}
10158 +
10159 +static int litmus_release_master_proc_show(struct seq_file *m, void *v)
10160 +{
10161 +	int master;
10162 +	master = atomic_read(&release_master_cpu);
10163 +	if (master == NO_CPU)
10164 +		seq_printf(m, "NO_CPU\n");
10165 +	else
10166 +		seq_printf(m, "%d\n", master);
10167 +	return 0;
10168 +}
10169 +
10170 +static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
10171 +{
10172 +	return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
10173 +}
10174 +
10175 +static const struct file_operations litmus_release_master_proc_fops = {
10176 +	.open		= litmus_release_master_proc_open,
10177 +	.read		= seq_read,
10178 +	.llseek		= seq_lseek,
10179 +	.release	= single_release,
10180 +	.write		= litmus_release_master_proc_write,
10181 +};
10182 +#endif
10183 +
10184 +int __init init_litmus_proc(void)
10185 +{
10186 +	litmus_dir = proc_mkdir("litmus", NULL);
10187 +	if (!litmus_dir) {
10188 +		printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
10189 +		return -ENOMEM;
10190 +	}
10191 +
10192 +	curr_file = proc_create("active_plugin", 0644, litmus_dir,
10193 +				&litmus_active_proc_fops);
10194 +
10195 +	if (!curr_file) {
10196 +		printk(KERN_ERR "Could not allocate active_plugin "
10197 +		       "procfs entry.\n");
10198 +		return -ENOMEM;
10199 +	}
10200 +
10201 +#ifdef CONFIG_RELEASE_MASTER
10202 +	release_master_file = proc_create("release_master", 0644, litmus_dir,
10203 +					  &litmus_release_master_proc_fops);
10204 +	if (!release_master_file) {
10205 +		printk(KERN_ERR "Could not allocate release_master "
10206 +		       "procfs entry.\n");
10207 +		return -ENOMEM;
10208 +	}
10209 +#endif
10210 +
10211 +	stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
10212 +
10213 +	plugs_dir = proc_mkdir("plugins", litmus_dir);
10214 +	if (!plugs_dir){
10215 +		printk(KERN_ERR "Could not allocate plugins directory "
10216 +				"procfs entry.\n");
10217 +		return -ENOMEM;
10218 +	}
10219 +
10220 +	plugs_file = proc_create("loaded", 0444, plugs_dir,
10221 +				 &litmus_loaded_proc_fops);
10222 +
10223 +	domains_dir = proc_mkdir("domains", litmus_dir);
10224 +	if (!domains_dir) {
10225 +		printk(KERN_ERR "Could not allocate domains directory "
10226 +				"procfs entry.\n");
10227 +		return -ENOMEM;
10228 +	}
10229 +
10230 +	cpus_dir = proc_mkdir("cpus", litmus_dir);
10231 +	if (!cpus_dir) {
10232 +		printk(KERN_ERR "Could not allocate cpus directory "
10233 +				"procfs entry.\n");
10234 +		return -ENOMEM;
10235 +	}
10236 +
10237 +	return 0;
10238 +}
10239 +
10240 +void exit_litmus_proc(void)
10241 +{
10242 +	if (cpus_dir || domains_dir) {
10243 +		deactivate_domain_proc();
10244 +		if (cpus_dir)
10245 +			remove_proc_entry("cpus", litmus_dir);
10246 +		if (domains_dir)
10247 +			remove_proc_entry("domains", litmus_dir);
10248 +	}
10249 +	if (plugs_file)
10250 +		remove_proc_entry("loaded", plugs_dir);
10251 +	if (plugs_dir)
10252 +		remove_proc_entry("plugins", litmus_dir);
10253 +	if (stat_file)
10254 +		remove_proc_entry("stats", litmus_dir);
10255 +	if (curr_file)
10256 +		remove_proc_entry("active_plugin", litmus_dir);
10257 +#ifdef CONFIG_RELEASE_MASTER
10258 +	if (release_master_file)
10259 +		remove_proc_entry("release_master", litmus_dir);
10260 +#endif
10261 +	if (litmus_dir)
10262 +		remove_proc_entry("litmus", NULL);
10263 +}
10264 +
10265 +long make_plugin_proc_dir(struct sched_plugin* plugin,
10266 +		struct proc_dir_entry** pde_in)
10267 +{
10268 +	struct proc_dir_entry *pde_new = NULL;
10269 +	long rv;
10270 +
10271 +	if (!plugin || !plugin->plugin_name){
10272 +		printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
10273 +				__func__);
10274 +		rv = -EINVAL;
10275 +		goto out_no_pde;
10276 +	}
10277 +
10278 +	if (!plugs_dir){
10279 +		printk(KERN_ERR "Could not make plugin sub-directory, because "
10280 +				"/proc/litmus/plugins does not exist.\n");
10281 +		rv = -ENOENT;
10282 +		goto out_no_pde;
10283 +	}
10284 +
10285 +	pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
10286 +	if (!pde_new){
10287 +		printk(KERN_ERR "Could not make plugin sub-directory: "
10288 +				"out of memory?.\n");
10289 +		rv = -ENOMEM;
10290 +		goto out_no_pde;
10291 +	}
10292 +
10293 +	rv = 0;
10294 +	*pde_in = pde_new;
10295 +	goto out_ok;
10296 +
10297 +out_no_pde:
10298 +	*pde_in = NULL;
10299 +out_ok:
10300 +	return rv;
10301 +}
10302 +
10303 +void remove_plugin_proc_dir(struct sched_plugin* plugin)
10304 +{
10305 +	if (!plugin || !plugin->plugin_name){
10306 +		printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
10307 +				__func__);
10308 +		return;
10309 +	}
10310 +	remove_proc_entry(plugin->plugin_name, plugs_dir);
10311 +}
10312 +
10313 +
10314 +
10315 +/* misc. I/O helper functions */
10316 +
10317 +int copy_and_chomp(char *kbuf, unsigned long ksize,
10318 +		   __user const char* ubuf, unsigned long ulength)
10319 +{
10320 +	/* caller must provide buffer space */
10321 +	BUG_ON(!ksize);
10322 +
10323 +	ksize--; /* leave space for null byte */
10324 +
10325 +	if (ksize > ulength)
10326 +		ksize = ulength;
10327 +
10328 +	if(copy_from_user(kbuf, ubuf, ksize))
10329 +		return -EFAULT;
10330 +
10331 +	kbuf[ksize] = '\0';
10332 +
10333 +	/* chomp kbuf */
10334 +	if (ksize > 0 && kbuf[ksize - 1] == '\n')
10335 +		kbuf[ksize - 1] = '\0';
10336 +
10337 +	return ksize;
10338 +}
10339 +
10340 +/* helper functions for clustered plugins */
10341 +static const char* cache_level_names[] = {
10342 +	"ALL",
10343 +	"L1",
10344 +	"L2",
10345 +	"L3",
10346 +};
10347 +
10348 +int parse_cache_level(const char *cache_name, enum cache_level *level)
10349 +{
10350 +	int err = -EINVAL;
10351 +	int i;
10352 +	/* do a quick and dirty comparison to find the cluster size */
10353 +	for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
10354 +		if (!strcmp(cache_name, cache_level_names[i])) {
10355 +			*level = (enum cache_level) i;
10356 +			err = 0;
10357 +			break;
10358 +		}
10359 +	return err;
10360 +}
10361 +
10362 +const char* cache_level_name(enum cache_level level)
10363 +{
10364 +	int idx = level;
10365 +
10366 +	if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
10367 +		return cache_level_names[idx];
10368 +	else
10369 +		return "INVALID";
10370 +}
10371 +
10372 +
10373 +
10374 +
10375 +/* proc file interface to configure the cluster size */
10376 +
10377 +static ssize_t litmus_cluster_proc_write(struct file *file,
10378 +					const char __user *buffer, size_t count,
10379 +					loff_t *ppos)
10380 +{
10381 +	enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
10382 +	ssize_t len;
10383 +	char cache_name[8];
10384 +
10385 +	len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
10386 +
10387 +	if (len > 0 && parse_cache_level(cache_name, level)) {
10388 +		printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
10389 +		len = -EINVAL;
10390 +	}
10391 +
10392 +	return len;
10393 +}
10394 +
10395 +static int litmus_cluster_proc_show(struct seq_file *m, void *v)
10396 +{
10397 +	enum cache_level *level = (enum cache_level *)  m->private;
10398 +
10399 +	seq_printf(m, "%s\n", cache_level_name(*level));
10400 +	return 0;
10401 +}
10402 +
10403 +static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
10404 +{
10405 +	return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
10406 +}
10407 +
10408 +static const struct file_operations litmus_cluster_proc_fops = {
10409 +	.open		= litmus_cluster_proc_open,
10410 +	.read		= seq_read,
10411 +	.llseek		= seq_lseek,
10412 +	.release	= single_release,
10413 +	.write		= litmus_cluster_proc_write,
10414 +};
10415 +
10416 +struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
10417 +					   enum cache_level* level)
10418 +{
10419 +	struct proc_dir_entry* cluster_file;
10420 +
10421 +
10422 +	cluster_file = proc_create_data("cluster", 0644, parent,
10423 +					&litmus_cluster_proc_fops,
10424 +					(void *) level);
10425 +	if (!cluster_file) {
10426 +		printk(KERN_ERR
10427 +		       "Could not cluster procfs entry.\n");
10428 +	}
10429 +	return cluster_file;
10430 +}
10431 +
10432 +static struct domain_proc_info* active_mapping = NULL;
10433 +
10434 +static int litmus_mapping_proc_show(struct seq_file *m, void *v)
10435 +{
10436 +	struct cd_mapping *mapping = (struct cd_mapping*) m->private;
10437 +	char buf[256];
10438 +
10439 +	if(!mapping)
10440 +		return 0;
10441 +
10442 +	cpumask_scnprintf(buf, sizeof(buf), mapping->mask);
10443 +	buf[255] = '\0'; /* just in case... */
10444 +	seq_printf(m, "%s\n", buf);
10445 +	return 0;
10446 +}
10447 +
10448 +static int litmus_mapping_proc_open(struct inode *inode, struct file *file)
10449 +{
10450 +	return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode));
10451 +}
10452 +
10453 +static const struct file_operations litmus_domain_proc_fops = {
10454 +	.open		= litmus_mapping_proc_open,
10455 +	.read		= seq_read,
10456 +	.llseek 	= seq_lseek,
10457 +	.release 	= single_release,
10458 +};
10459 +
10460 +long activate_domain_proc(struct domain_proc_info* map)
10461 +{
10462 +	int i;
10463 +	char name[8];
10464 +
10465 +	if (!map)
10466 +		return -EINVAL;
10467 +	if (cpus_dir == NULL || domains_dir == NULL)
10468 +		return -EINVAL;
10469 +
10470 +	if (active_mapping)
10471 +		deactivate_domain_proc();
10472 +
10473 +	active_mapping = map;
10474 +
10475 +	for (i = 0; i < map->num_cpus; ++i) {
10476 +		struct cd_mapping* m = &map->cpu_to_domains[i];
10477 +		snprintf(name, sizeof(name), "%d", m->id);
10478 +		m->proc_file = proc_create_data(name, 0444, cpus_dir,
10479 +			&litmus_domain_proc_fops, (void*)m);
10480 +	}
10481 +
10482 +	for (i = 0; i < map->num_domains; ++i) {
10483 +		struct cd_mapping* m = &map->domain_to_cpus[i];
10484 +		snprintf(name, sizeof(name), "%d", m->id);
10485 +		m->proc_file = proc_create_data(name, 0444, domains_dir,
10486 +			&litmus_domain_proc_fops, (void*)m);
10487 +	}
10488 +
10489 +	return 0;
10490 +}
10491 +
10492 +long deactivate_domain_proc()
10493 +{
10494 +	int i;
10495 +	char name[65];
10496 +
10497 +	struct domain_proc_info* map = active_mapping;
10498 +
10499 +	if (!map)
10500 +		return -EINVAL;
10501 +
10502 +	for (i = 0; i < map->num_cpus; ++i) {
10503 +		struct cd_mapping* m = &map->cpu_to_domains[i];
10504 +		snprintf(name, sizeof(name), "%d", m->id);
10505 +		remove_proc_entry(name, cpus_dir);
10506 +		m->proc_file = NULL;
10507 +	}
10508 +	for (i = 0; i < map->num_domains; ++i) {
10509 +		struct cd_mapping* m = &map->domain_to_cpus[i];
10510 +		snprintf(name, sizeof(name), "%d", m->id);
10511 +		remove_proc_entry(name, domains_dir);
10512 +		m->proc_file = NULL;
10513 +	}
10514 +
10515 +	active_mapping = NULL;
10516 +
10517 +	return 0;
10518 +}
10519 +
10520 +long init_domain_proc_info(struct domain_proc_info* m,
10521 +				int num_cpus, int num_domains)
10522 +{
10523 +	int i;
10524 +	int num_alloced_cpu_masks = 0;
10525 +	int num_alloced_domain_masks = 0;
10526 +
10527 +	m->cpu_to_domains =
10528 +		kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus,
10529 +			GFP_ATOMIC);
10530 +	if(!m->cpu_to_domains)
10531 +		goto failure;
10532 +
10533 +	m->domain_to_cpus =
10534 +		kmalloc(sizeof(*(m->domain_to_cpus))*num_domains,
10535 +			GFP_ATOMIC);
10536 +	if(!m->domain_to_cpus)
10537 +		goto failure;
10538 +
10539 +	for(i = 0; i < num_cpus; ++i) {
10540 +		if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC))
10541 +			goto failure;
10542 +		++num_alloced_cpu_masks;
10543 +	}
10544 +	for(i = 0; i < num_domains; ++i) {
10545 +		if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC))
10546 +			goto failure;
10547 +		++num_alloced_domain_masks;
10548 +	}
10549 +
10550 +	return 0;
10551 +
10552 +failure:
10553 +	for(i = 0; i < num_alloced_cpu_masks; ++i)
10554 +		free_cpumask_var(m->cpu_to_domains[i].mask);
10555 +	for(i = 0; i < num_alloced_domain_masks; ++i)
10556 +		free_cpumask_var(m->domain_to_cpus[i].mask);
10557 +	if(m->cpu_to_domains)
10558 +		kfree(m->cpu_to_domains);
10559 +	if(m->domain_to_cpus)
10560 +		kfree(m->domain_to_cpus);
10561 +	return -ENOMEM;
10562 +}
10563 +
10564 +void destroy_domain_proc_info(struct domain_proc_info* m)
10565 +{
10566 +	int i;
10567 +	for(i = 0; i < m->num_cpus; ++i)
10568 +		free_cpumask_var(m->cpu_to_domains[i].mask);
10569 +	for(i = 0; i < m->num_domains; ++i)
10570 +		free_cpumask_var(m->domain_to_cpus[i].mask);
10571 +	kfree(m->cpu_to_domains);
10572 +	kfree(m->domain_to_cpus);
10573 +	memset(m, sizeof(*m), 0);
10574 +}
10575 diff --git a/litmus/locking.c b/litmus/locking.c
10576 new file mode 100644
10577 index 0000000..43d9aec
10578 --- /dev/null
10579 +++ b/litmus/locking.c
10580 @@ -0,0 +1,188 @@
10581 +#include <linux/sched.h>
10582 +#include <litmus/litmus.h>
10583 +#include <litmus/fdso.h>
10584 +
10585 +#ifdef CONFIG_LITMUS_LOCKING
10586 +
10587 +#include <linux/sched.h>
10588 +#include <litmus/litmus.h>
10589 +#include <litmus/sched_plugin.h>
10590 +#include <litmus/trace.h>
10591 +#include <litmus/wait.h>
10592 +
10593 +static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
10594 +static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
10595 +static int close_generic_lock(struct od_table_entry* entry);
10596 +static void destroy_generic_lock(obj_type_t type, void* sem);
10597 +
10598 +struct fdso_ops generic_lock_ops = {
10599 +	.create  = create_generic_lock,
10600 +	.open    = open_generic_lock,
10601 +	.close   = close_generic_lock,
10602 +	.destroy = destroy_generic_lock
10603 +};
10604 +
10605 +static inline bool is_lock(struct od_table_entry* entry)
10606 +{
10607 +	return entry->class == &generic_lock_ops;
10608 +}
10609 +
10610 +static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
10611 +{
10612 +	BUG_ON(!is_lock(entry));
10613 +	return (struct litmus_lock*) entry->obj->obj;
10614 +}
10615 +
10616 +static  int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
10617 +{
10618 +	struct litmus_lock* lock;
10619 +	int err;
10620 +
10621 +	err = litmus->allocate_lock(&lock, type, arg);
10622 +	if (err == 0)
10623 +		*obj_ref = lock;
10624 +	return err;
10625 +}
10626 +
10627 +static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
10628 +{
10629 +	struct litmus_lock* lock = get_lock(entry);
10630 +	if (lock->ops->open)
10631 +		return lock->ops->open(lock, arg);
10632 +	else
10633 +		return 0; /* default: any task can open it */
10634 +}
10635 +
10636 +static int close_generic_lock(struct od_table_entry* entry)
10637 +{
10638 +	struct litmus_lock* lock = get_lock(entry);
10639 +	if (lock->ops->close)
10640 +		return lock->ops->close(lock);
10641 +	else
10642 +		return 0; /* default: closing succeeds */
10643 +}
10644 +
10645 +static void destroy_generic_lock(obj_type_t type, void* obj)
10646 +{
10647 +	struct litmus_lock* lock = (struct litmus_lock*) obj;
10648 +	lock->ops->deallocate(lock);
10649 +}
10650 +
10651 +asmlinkage long sys_litmus_lock(int lock_od)
10652 +{
10653 +	long err = -EINVAL;
10654 +	struct od_table_entry* entry;
10655 +	struct litmus_lock* l;
10656 +
10657 +	TS_SYSCALL_IN_START;
10658 +
10659 +	TS_SYSCALL_IN_END;
10660 +
10661 +	TS_LOCK_START;
10662 +
10663 +	entry = get_entry_for_od(lock_od);
10664 +	if (entry && is_lock(entry)) {
10665 +		l = get_lock(entry);
10666 +		TRACE_CUR("attempts to lock 0x%p\n", l);
10667 +		err = l->ops->lock(l);
10668 +	}
10669 +
10670 +	/* Note: task my have been suspended or preempted in between!  Take
10671 +	 * this into account when computing overheads. */
10672 +	TS_LOCK_END;
10673 +
10674 +	TS_SYSCALL_OUT_START;
10675 +
10676 +	return err;
10677 +}
10678 +
10679 +asmlinkage long sys_litmus_unlock(int lock_od)
10680 +{
10681 +	long err = -EINVAL;
10682 +	struct od_table_entry* entry;
10683 +	struct litmus_lock* l;
10684 +
10685 +	TS_SYSCALL_IN_START;
10686 +
10687 +	TS_SYSCALL_IN_END;
10688 +
10689 +	TS_UNLOCK_START;
10690 +
10691 +	entry = get_entry_for_od(lock_od);
10692 +	if (entry && is_lock(entry)) {
10693 +		l = get_lock(entry);
10694 +		TRACE_CUR("attempts to unlock 0x%p\n", l);
10695 +		err = l->ops->unlock(l);
10696 +	}
10697 +
10698 +	/* Note: task my have been preempted in between!  Take this into
10699 +	 * account when computing overheads. */
10700 +	TS_UNLOCK_END;
10701 +
10702 +	TS_SYSCALL_OUT_START;
10703 +
10704 +	return err;
10705 +}
10706 +
10707 +struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
10708 +{
10709 +	wait_queue_t* q;
10710 +	struct task_struct* t = NULL;
10711 +
10712 +	if (waitqueue_active(wq)) {
10713 +		q = list_entry(wq->task_list.next,
10714 +			       wait_queue_t, task_list);
10715 +		t = (struct task_struct*) q->private;
10716 +		__remove_wait_queue(wq, q);
10717 +	}
10718 +	return(t);
10719 +}
10720 +
10721 +unsigned int __add_wait_queue_prio_exclusive(
10722 +	wait_queue_head_t* head,
10723 +	prio_wait_queue_t *new)
10724 +{
10725 +	struct list_head *pos;
10726 +	unsigned int passed = 0;
10727 +
10728 +	new->wq.flags |= WQ_FLAG_EXCLUSIVE;
10729 +
10730 +	/* find a spot where the new entry is less than the next */
10731 +	list_for_each(pos, &head->task_list) {
10732 +		prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
10733 +						       wq.task_list);
10734 +
10735 +		if (unlikely(lt_before(new->priority, queued->priority) ||
10736 +			     (new->priority == queued->priority &&
10737 +			      new->tie_breaker < queued->tie_breaker))) {
10738 +			/* pos is not less than new, thus insert here */
10739 +			__list_add(&new->wq.task_list, pos->prev, pos);
10740 +			goto out;
10741 +		}
10742 +		passed++;
10743 +	}
10744 +
10745 +	/* if we get to this point either the list is empty or every entry
10746 +	 * queued element is less than new.
10747 +	 * Let's add new to the end. */
10748 +	list_add_tail(&new->wq.task_list, &head->task_list);
10749 +out:
10750 +	return passed;
10751 +}
10752 +
10753 +
10754 +#else
10755 +
10756 +struct fdso_ops generic_lock_ops = {};
10757 +
10758 +asmlinkage long sys_litmus_lock(int sem_od)
10759 +{
10760 +	return -ENOSYS;
10761 +}
10762 +
10763 +asmlinkage long sys_litmus_unlock(int sem_od)
10764 +{
10765 +	return -ENOSYS;
10766 +}
10767 +
10768 +#endif
10769 diff --git a/litmus/preempt.c b/litmus/preempt.c
10770 new file mode 100644
10771 index 0000000..6be2f26
10772 --- /dev/null
10773 +++ b/litmus/preempt.c
10774 @@ -0,0 +1,137 @@
10775 +#include <linux/sched.h>
10776 +
10777 +#include <litmus/litmus.h>
10778 +#include <litmus/preempt.h>
10779 +#include <litmus/trace.h>
10780 +
10781 +/* The rescheduling state of each processor.
10782 + */
10783 +DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
10784 +
10785 +void sched_state_will_schedule(struct task_struct* tsk)
10786 +{
10787 +	/* Litmus hack: we only care about processor-local invocations of
10788 +	 * set_tsk_need_resched(). We can't reliably set the flag remotely
10789 +	 * since it might race with other updates to the scheduling state.  We
10790 +	 * can't rely on the runqueue lock protecting updates to the sched
10791 +	 * state since processors do not acquire the runqueue locks for all
10792 +	 * updates to the sched state (to avoid acquiring two runqueue locks at
10793 +	 * the same time). Further, if tsk is residing on a remote processor,
10794 +	 * then that processor doesn't actually know yet that it is going to
10795 +	 * reschedule; it still must receive an IPI (unless a local invocation
10796 +	 * races).
10797 +	 */
10798 +	if (likely(task_cpu(tsk) == smp_processor_id())) {
10799 +		VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
10800 +		if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
10801 +			set_sched_state(PICKED_WRONG_TASK);
10802 +		else
10803 +			set_sched_state(WILL_SCHEDULE);
10804 +	} else
10805 +		/* Litmus tasks should never be subject to a remote
10806 +		 * set_tsk_need_resched(). */
10807 +		BUG_ON(is_realtime(tsk));
10808 +#ifdef CONFIG_PREEMPT_STATE_TRACE
10809 +	TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
10810 +		   __builtin_return_address(0));
10811 +#endif
10812 +}
10813 +
10814 +/* Called by the IPI handler after another CPU called smp_send_resched(). */
10815 +void sched_state_ipi(void)
10816 +{
10817 +	/* If the IPI was slow, we might be in any state right now. The IPI is
10818 +	 * only meaningful if we are in SHOULD_SCHEDULE. */
10819 +	if (is_in_sched_state(SHOULD_SCHEDULE)) {
10820 +		/* Cause scheduler to be invoked.
10821 +		 * This will cause a transition to WILL_SCHEDULE. */
10822 +		set_tsk_need_resched(current);
10823 +		TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
10824 +			    current->comm, current->pid);
10825 +		TS_SEND_RESCHED_END;
10826 +	} else {
10827 +		/* ignore */
10828 +		TRACE_STATE("ignoring IPI in state %x (%s)\n",
10829 +			    get_sched_state(),
10830 +			    sched_state_name(get_sched_state()));
10831 +	}
10832 +}
10833 +
10834 +/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
10835 + * hold the lock that is used to serialize scheduling decisions. */
10836 +void litmus_reschedule(int cpu)
10837 +{
10838 +	int picked_transition_ok = 0;
10839 +	int scheduled_transition_ok = 0;
10840 +
10841 +	/* The (remote) CPU could be in any state. */
10842 +
10843 +	/* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
10844 +	 * is not aware of the need to reschedule at this point. */
10845 +
10846 +	/* is a context switch in progress? */
10847 +	if (cpu_is_in_sched_state(cpu, TASK_PICKED))
10848 +		picked_transition_ok = sched_state_transition_on(
10849 +			cpu, TASK_PICKED, PICKED_WRONG_TASK);
10850 +
10851 +	if (!picked_transition_ok &&
10852 +	    cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
10853 +		/* We either raced with the end of the context switch, or the
10854 +		 * CPU was in TASK_SCHEDULED anyway. */
10855 +		scheduled_transition_ok = sched_state_transition_on(
10856 +			cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
10857 +	}
10858 +
10859 +	/* If the CPU was in state TASK_SCHEDULED, then we need to cause the
10860 +	 * scheduler to be invoked. */
10861 +	if (scheduled_transition_ok) {
10862 +		if (smp_processor_id() == cpu)
10863 +			set_tsk_need_resched(current);
10864 +		else {
10865 +			TS_SEND_RESCHED_START(cpu);
10866 +			smp_send_reschedule(cpu);
10867 +		}
10868 +	}
10869 +
10870 +	TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
10871 +		    __FUNCTION__,
10872 +		    picked_transition_ok,
10873 +		    scheduled_transition_ok);
10874 +}
10875 +
10876 +void litmus_reschedule_local(void)
10877 +{
10878 +	if (is_in_sched_state(TASK_PICKED))
10879 +		set_sched_state(PICKED_WRONG_TASK);
10880 +	else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
10881 +		set_sched_state(WILL_SCHEDULE);
10882 +		set_tsk_need_resched(current);
10883 +	}
10884 +}
10885 +
10886 +#ifdef CONFIG_DEBUG_KERNEL
10887 +
10888 +void sched_state_plugin_check(void)
10889 +{
10890 +	if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
10891 +		TRACE("!!!! plugin did not call sched_state_task_picked()!"
10892 +		      "Calling sched_state_task_picked() is mandatory---fix this.\n");
10893 +		set_sched_state(TASK_PICKED);
10894 +	}
10895 +}
10896 +
10897 +#define NAME_CHECK(x) case x:  return #x
10898 +const char* sched_state_name(int s)
10899 +{
10900 +	switch (s) {
10901 +		NAME_CHECK(TASK_SCHEDULED);
10902 +		NAME_CHECK(SHOULD_SCHEDULE);
10903 +		NAME_CHECK(WILL_SCHEDULE);
10904 +		NAME_CHECK(TASK_PICKED);
10905 +		NAME_CHECK(PICKED_WRONG_TASK);
10906 +	default:
10907 +		return "UNKNOWN";
10908 +	};
10909 +}
10910 +
10911 +#endif
10912 diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
10913 new file mode 100644
10914 index 0000000..e5dec0b
10915 --- /dev/null
10916 +++ b/litmus/rt_domain.c
10917 @@ -0,0 +1,353 @@
10918 +/*
10919 + * litmus/rt_domain.c
10920 + *
10921 + * LITMUS real-time infrastructure. This file contains the
10922 + * functions that manipulate RT domains. RT domains are an abstraction
10923 + * of a ready queue and a release queue.
10924 + */
10925 +
10926 +#include <linux/percpu.h>
10927 +#include <linux/sched.h>
10928 +#include <linux/list.h>
10929 +#include <linux/slab.h>
10930 +
10931 +#include <litmus/litmus.h>
10932 +#include <litmus/sched_plugin.h>
10933 +#include <litmus/sched_trace.h>
10934 +
10935 +#include <litmus/rt_domain.h>
10936 +
10937 +#include <litmus/trace.h>
10938 +
10939 +#include <litmus/bheap.h>
10940 +
10941 +/* Uncomment when debugging timer races... */
10942 +#if 0
10943 +#define VTRACE_TASK TRACE_TASK
10944 +#define VTRACE TRACE
10945 +#else
10946 +#define VTRACE_TASK(t, fmt, args...) /* shut up */
10947 +#define VTRACE(fmt, args...) /* be quiet already */
10948 +#endif
10949 +
10950 +static int dummy_resched(rt_domain_t *rt)
10951 +{
10952 +	return 0;
10953 +}
10954 +
10955 +static int dummy_order(struct bheap_node* a, struct bheap_node* b)
10956 +{
10957 +	return 0;
10958 +}
10959 +
10960 +/* default implementation: use default lock */
10961 +static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
10962 +{
10963 +	merge_ready(rt, tasks);
10964 +}
10965 +
10966 +static unsigned int time2slot(lt_t time)
10967 +{
10968 +	return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
10969 +}
10970 +
10971 +static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
10972 +{
10973 +	unsigned long flags;
10974 +	struct release_heap* rh;
10975 +	rh = container_of(timer, struct release_heap, timer);
10976 +
10977 +	TS_RELEASE_LATENCY(rh->release_time);
10978 +
10979 +	VTRACE("on_release_timer(0x%p) starts.\n", timer);
10980 +
10981 +	TS_RELEASE_START;
10982 +
10983 +
10984 +	raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
10985 +	VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
10986 +	/* remove from release queue */
10987 +	list_del(&rh->list);
10988 +	raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
10989 +	VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
10990 +
10991 +	/* call release callback */
10992 +	rh->dom->release_jobs(rh->dom, &rh->heap);
10993 +	/* WARNING: rh can be referenced from other CPUs from now on. */
10994 +
10995 +	TS_RELEASE_END;
10996 +
10997 +	VTRACE("on_release_timer(0x%p) ends.\n", timer);
10998 +
10999 +	return  HRTIMER_NORESTART;
11000 +}
11001 +
11002 +/* allocated in litmus.c */
11003 +struct kmem_cache * release_heap_cache;
11004 +
11005 +struct release_heap* release_heap_alloc(int gfp_flags)
11006 +{
11007 +	struct release_heap* rh;
11008 +	rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
11009 +	if (rh) {
11010 +		/* initialize timer */
11011 +		hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
11012 +		rh->timer.function = on_release_timer;
11013 +	}
11014 +	return rh;
11015 +}
11016 +
11017 +void release_heap_free(struct release_heap* rh)
11018 +{
11019 +	/* make sure timer is no longer in use */
11020 +	hrtimer_cancel(&rh->timer);
11021 +	kmem_cache_free(release_heap_cache, rh);
11022 +}
11023 +
11024 +/* Caller must hold release lock.
11025 + * Will return heap for given time. If no such heap exists prior to
11026 + * the invocation it will be created.
11027 + */
11028 +static struct release_heap* get_release_heap(rt_domain_t *rt,
11029 +					     struct task_struct* t,
11030 +					     int use_task_heap)
11031 +{
11032 +	struct list_head* pos;
11033 +	struct release_heap* heap = NULL;
11034 +	struct release_heap* rh;
11035 +	lt_t release_time = get_release(t);
11036 +	unsigned int slot = time2slot(release_time);
11037 +
11038 +	/* initialize pos for the case that the list is empty */
11039 +	pos = rt->release_queue.slot[slot].next;
11040 +	list_for_each(pos, &rt->release_queue.slot[slot]) {
11041 +		rh = list_entry(pos, struct release_heap, list);
11042 +		if (release_time == rh->release_time) {
11043 +			/* perfect match -- this happens on hyperperiod
11044 +			 * boundaries
11045 +			 */
11046 +			heap = rh;
11047 +			break;
11048 +		} else if (lt_before(release_time, rh->release_time)) {
11049 +			/* we need to insert a new node since rh is
11050 +			 * already in the future
11051 +			 */
11052 +			break;
11053 +		}
11054 +	}
11055 +	if (!heap && use_task_heap) {
11056 +		/* use pre-allocated release heap */
11057 +		rh = tsk_rt(t)->rel_heap;
11058 +
11059 +		rh->dom = rt;
11060 +		rh->release_time = release_time;
11061 +
11062 +		/* add to release queue */
11063 +		list_add(&rh->list, pos->prev);
11064 +		heap = rh;
11065 +	}
11066 +	return heap;
11067 +}
11068 +
11069 +static void reinit_release_heap(struct task_struct* t)
11070 +{
11071 +	struct release_heap* rh;
11072 +
11073 +	/* use pre-allocated release heap */
11074 +	rh = tsk_rt(t)->rel_heap;
11075 +
11076 +	/* Make sure it is safe to use.  The timer callback could still
11077 +	 * be executing on another CPU; hrtimer_cancel() will wait
11078 +	 * until the timer callback has completed.  However, under no
11079 +	 * circumstances should the timer be active (= yet to be
11080 +	 * triggered).
11081 +	 *
11082 +	 * WARNING: If the CPU still holds the release_lock at this point,
11083 +	 *          deadlock may occur!
11084 +	 */
11085 +	BUG_ON(hrtimer_cancel(&rh->timer));
11086 +
11087 +	/* initialize */
11088 +	bheap_init(&rh->heap);
11089 +#ifdef CONFIG_RELEASE_MASTER
11090 +	atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
11091 +#endif
11092 +}
11093 +/* arm_release_timer() - start local release timer or trigger
11094 + *     remote timer (pull timer)
11095 + *
11096 + * Called by add_release() with:
11097 + * - tobe_lock taken
11098 + * - IRQ disabled
11099 + */
11100 +#ifdef CONFIG_RELEASE_MASTER
11101 +#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
11102 +static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
11103 +#else
11104 +static void arm_release_timer(rt_domain_t *_rt)
11105 +#endif
11106 +{
11107 +	rt_domain_t *rt = _rt;
11108 +	struct list_head list;
11109 +	struct list_head *pos, *safe;
11110 +	struct task_struct* t;
11111 +	struct release_heap* rh;
11112 +
11113 +	VTRACE("arm_release_timer() at %llu\n", litmus_clock());
11114 +	list_replace_init(&rt->tobe_released, &list);
11115 +
11116 +	list_for_each_safe(pos, safe, &list) {
11117 +		/* pick task of work list */
11118 +		t = list_entry(pos, struct task_struct, rt_param.list);
11119 +		sched_trace_task_release(t);
11120 +		list_del(pos);
11121 +
11122 +		/* put into release heap while holding release_lock */
11123 +		raw_spin_lock(&rt->release_lock);
11124 +		VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
11125 +
11126 +		rh = get_release_heap(rt, t, 0);
11127 +		if (!rh) {
11128 +			/* need to use our own, but drop lock first */
11129 +			raw_spin_unlock(&rt->release_lock);
11130 +			VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
11131 +				    &rt->release_lock);
11132 +
11133 +			reinit_release_heap(t);
11134 +			VTRACE_TASK(t, "release_heap ready\n");
11135 +
11136 +			raw_spin_lock(&rt->release_lock);
11137 +			VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
11138 +				    &rt->release_lock);
11139 +
11140 +			rh = get_release_heap(rt, t, 1);
11141 +		}
11142 +		bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
11143 +		VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
11144 +
11145 +		raw_spin_unlock(&rt->release_lock);
11146 +		VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
11147 +
11148 +		/* To avoid arming the timer multiple times, we only let the
11149 +		 * owner do the arming (which is the "first" task to reference
11150 +		 * this release_heap anyway).
11151 +		 */
11152 +		if (rh == tsk_rt(t)->rel_heap) {
11153 +			VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
11154 +
11155 +			if (!hrtimer_is_hres_active(&rh->timer)) {
11156 +				TRACE_TASK(t, "WARNING: no hires timer!!!\n");
11157 +			}
11158 +
11159 +			/* we cannot arm the timer using hrtimer_start()
11160 +			 * as it may deadlock on rq->lock
11161 +			 *
11162 +			 * PINNED mode is ok on both local and remote CPU
11163 +			 */
11164 +#ifdef CONFIG_RELEASE_MASTER
11165 +			if (rt->release_master == NO_CPU &&
11166 +			    target_cpu == NO_CPU)
11167 +#endif
11168 +				__hrtimer_start_range_ns(&rh->timer,
11169 +						ns_to_ktime(rh->release_time),
11170 +						0, HRTIMER_MODE_ABS_PINNED, 0);
11171 +#ifdef CONFIG_RELEASE_MASTER
11172 +			else
11173 +				hrtimer_start_on(
11174 +					/* target_cpu overrides release master */
11175 +					(target_cpu != NO_CPU ?
11176 +					 target_cpu : rt->release_master),
11177 +					&rh->info, &rh->timer,
11178 +					ns_to_ktime(rh->release_time),
11179 +					HRTIMER_MODE_ABS_PINNED);
11180 +#endif
11181 +		} else
11182 +			VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
11183 +	}
11184 +}
11185 +
11186 +void rt_domain_init(rt_domain_t *rt,
11187 +		    bheap_prio_t order,
11188 +		    check_resched_needed_t check,
11189 +		    release_jobs_t release
11190 +		   )
11191 +{
11192 +	int i;
11193 +
11194 +	BUG_ON(!rt);
11195 +	if (!check)
11196 +		check = dummy_resched;
11197 +	if (!release)
11198 +		release = default_release_jobs;
11199 +	if (!order)
11200 +		order = dummy_order;
11201 +
11202 +#ifdef CONFIG_RELEASE_MASTER
11203 +	rt->release_master = NO_CPU;
11204 +#endif
11205 +
11206 +	bheap_init(&rt->ready_queue);
11207 +	INIT_LIST_HEAD(&rt->tobe_released);
11208 +	for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
11209 +		INIT_LIST_HEAD(&rt->release_queue.slot[i]);
11210 +
11211 +	raw_spin_lock_init(&rt->ready_lock);
11212 +	raw_spin_lock_init(&rt->release_lock);
11213 +	raw_spin_lock_init(&rt->tobe_lock);
11214 +
11215 +	rt->check_resched 	= check;
11216 +	rt->release_jobs	= release;
11217 +	rt->order		= order;
11218 +}
11219 +
11220 +/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
11221 + * @new:       the newly released task
11222 + */
11223 +void __add_ready(rt_domain_t* rt, struct task_struct *new)
11224 +{
11225 +	TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
11226 +		"to ready queue at %llu\n",
11227 +		new->comm, new->pid,
11228 +		get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
11229 +		get_release(new), litmus_clock());
11230 +
11231 +	BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
11232 +
11233 +	bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
11234 +	rt->check_resched(rt);
11235 +}
11236 +
11237 +/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
11238 + * @tasks      - the newly released tasks
11239 + */
11240 +void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
11241 +{
11242 +	bheap_union(rt->order, &rt->ready_queue, tasks);
11243 +	rt->check_resched(rt);
11244 +}
11245 +
11246 +
11247 +#ifdef CONFIG_RELEASE_MASTER
11248 +void __add_release_on(rt_domain_t* rt, struct task_struct *task,
11249 +		      int target_cpu)
11250 +{
11251 +	TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
11252 +		   get_release(task), target_cpu);
11253 +	list_add(&tsk_rt(task)->list, &rt->tobe_released);
11254 +	task->rt_param.domain = rt;
11255 +
11256 +	arm_release_timer_on(rt, target_cpu);
11257 +}
11258 +#endif
11259 +
11260 +/* add_release - add a real-time task to the rt release queue.
11261 + * @task:        the sleeping task
11262 + */
11263 +void __add_release(rt_domain_t* rt, struct task_struct *task)
11264 +{
11265 +	TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
11266 +	list_add(&tsk_rt(task)->list, &rt->tobe_released);
11267 +	task->rt_param.domain = rt;
11268 +
11269 +	arm_release_timer(rt);
11270 +}
11271 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
11272 new file mode 100644
11273 index 0000000..edd91e9
11274 --- /dev/null
11275 +++ b/litmus/sched_plugin.c
11276 @@ -0,0 +1,238 @@
11277 +/* sched_plugin.c -- core infrastructure for the scheduler plugin system
11278 + *
11279 + * This file includes the initialization of the plugin system, the no-op Linux
11280 + * scheduler plugin, some dummy functions, and some helper functions.
11281 + */
11282 +
11283 +#include <linux/list.h>
11284 +#include <linux/spinlock.h>
11285 +#include <linux/sched.h>
11286 +#include <linux/seq_file.h>
11287 +
11288 +#include <litmus/litmus.h>
11289 +#include <litmus/sched_plugin.h>
11290 +#include <litmus/preempt.h>
11291 +#include <litmus/jobs.h>
11292 +
11293 +/*
11294 + * Generic function to trigger preemption on either local or remote cpu
11295 + * from scheduler plugins. The key feature is that this function is
11296 + * non-preemptive section aware and does not invoke the scheduler / send
11297 + * IPIs if the to-be-preempted task is actually non-preemptive.
11298 + */
11299 +void preempt_if_preemptable(struct task_struct* t, int cpu)
11300 +{
11301 +	/* t is the real-time task executing on CPU on_cpu If t is NULL, then
11302 +	 * on_cpu is currently scheduling background work.
11303 +	 */
11304 +
11305 +	int reschedule = 0;
11306 +
11307 +	if (!t)
11308 +		/* move non-real-time task out of the way */
11309 +		reschedule = 1;
11310 +	else {
11311 +		if (smp_processor_id() == cpu) {
11312 +			/* local CPU case */
11313 +			/* check if we need to poke userspace */
11314 +			if (is_user_np(t))
11315 +				/* Yes, poke it. This doesn't have to be atomic since
11316 +				 * the task is definitely not executing. */
11317 +				request_exit_np(t);
11318 +			else if (!is_kernel_np(t))
11319 +				/* only if we are allowed to preempt the
11320 +				 * currently-executing task */
11321 +				reschedule = 1;
11322 +		} else {
11323 +			/* Remote CPU case.  Only notify if it's not a kernel
11324 +			 * NP section and if we didn't set the userspace
11325 +			 * flag. */
11326 +			reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
11327 +		}
11328 +	}
11329 +	if (likely(reschedule))
11330 +		litmus_reschedule(cpu);
11331 +}
11332 +
11333 +
11334 +/*************************************************************
11335 + *                   Dummy plugin functions                  *
11336 + *************************************************************/
11337 +
11338 +static void litmus_dummy_finish_switch(struct task_struct * prev)
11339 +{
11340 +}
11341 +
11342 +static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
11343 +{
11344 +	sched_state_task_picked();
11345 +	return NULL;
11346 +}
11347 +
11348 +static long litmus_dummy_admit_task(struct task_struct* tsk)
11349 +{
11350 +	printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
11351 +		tsk->comm, tsk->pid);
11352 +	return -EINVAL;
11353 +}
11354 +
11355 +static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
11356 +{
11357 +}
11358 +
11359 +static void litmus_dummy_task_wake_up(struct task_struct *task)
11360 +{
11361 +}
11362 +
11363 +static void litmus_dummy_task_block(struct task_struct *task)
11364 +{
11365 +}
11366 +
11367 +static void litmus_dummy_task_exit(struct task_struct *task)
11368 +{
11369 +}
11370 +
11371 +static void litmus_dummy_task_cleanup(struct task_struct *task)
11372 +{
11373 +}
11374 +
11375 +static long litmus_dummy_complete_job(void)
11376 +{
11377 +	return -ENOSYS;
11378 +}
11379 +
11380 +static long litmus_dummy_activate_plugin(void)
11381 +{
11382 +	return 0;
11383 +}
11384 +
11385 +static long litmus_dummy_deactivate_plugin(void)
11386 +{
11387 +	return 0;
11388 +}
11389 +
11390 +static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d)
11391 +{
11392 +	*d = NULL;
11393 +	return 0;
11394 +}
11395 +
11396 +static void litmus_dummy_synchronous_release_at(lt_t time_zero)
11397 +{
11398 +	/* ignore */
11399 +}
11400 +
11401 +#ifdef CONFIG_LITMUS_LOCKING
11402 +
11403 +static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
11404 +				       void* __user config)
11405 +{
11406 +	return -ENXIO;
11407 +}
11408 +
11409 +#endif
11410 +
11411 +
11412 +/* The default scheduler plugin. It doesn't do anything and lets Linux do its
11413 + * job.
11414 + */
11415 +struct sched_plugin linux_sched_plugin = {
11416 +	.plugin_name = "Linux",
11417 +	.task_new   = litmus_dummy_task_new,
11418 +	.task_exit = litmus_dummy_task_exit,
11419 +	.task_wake_up = litmus_dummy_task_wake_up,
11420 +	.task_block = litmus_dummy_task_block,
11421 +	.complete_job = litmus_dummy_complete_job,
11422 +	.schedule = litmus_dummy_schedule,
11423 +	.finish_switch = litmus_dummy_finish_switch,
11424 +	.activate_plugin = litmus_dummy_activate_plugin,
11425 +	.deactivate_plugin = litmus_dummy_deactivate_plugin,
11426 +	.get_domain_proc_info = litmus_dummy_get_domain_proc_info,
11427 +	.synchronous_release_at = litmus_dummy_synchronous_release_at,
11428 +#ifdef CONFIG_LITMUS_LOCKING
11429 +	.allocate_lock = litmus_dummy_allocate_lock,
11430 +#endif
11431 +	.admit_task = litmus_dummy_admit_task
11432 +};
11433 +
11434 +/*
11435 + *	The reference to current plugin that is used to schedule tasks within
11436 + *	the system. It stores references to actual function implementations
11437 + *	Should be initialized by calling "init_***_plugin()"
11438 + */
11439 +struct sched_plugin *litmus = &linux_sched_plugin;
11440 +
11441 +/* the list of registered scheduling plugins */
11442 +static LIST_HEAD(sched_plugins);
11443 +static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
11444 +
11445 +#define CHECK(func) {\
11446 +	if (!plugin->func) \
11447 +		plugin->func = litmus_dummy_ ## func;}
11448 +
11449 +/* FIXME: get reference to module  */
11450 +int register_sched_plugin(struct sched_plugin* plugin)
11451 +{
11452 +	printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
11453 +	       plugin->plugin_name);
11454 +
11455 +	/* make sure we don't trip over null pointers later */
11456 +	CHECK(finish_switch);
11457 +	CHECK(schedule);
11458 +	CHECK(task_wake_up);
11459 +	CHECK(task_exit);
11460 +	CHECK(task_cleanup);
11461 +	CHECK(task_block);
11462 +	CHECK(task_new);
11463 +	CHECK(complete_job);
11464 +	CHECK(activate_plugin);
11465 +	CHECK(deactivate_plugin);
11466 +	CHECK(get_domain_proc_info);
11467 +#ifdef CONFIG_LITMUS_LOCKING
11468 +	CHECK(allocate_lock);
11469 +#endif
11470 +	CHECK(admit_task);
11471 +	CHECK(synchronous_release_at);
11472 +
11473 +	if (!plugin->wait_for_release_at)
11474 +		plugin->wait_for_release_at = default_wait_for_release_at;
11475 +
11476 +	raw_spin_lock(&sched_plugins_lock);
11477 +	list_add(&plugin->list, &sched_plugins);
11478 +	raw_spin_unlock(&sched_plugins_lock);
11479 +
11480 +	return 0;
11481 +}
11482 +
11483 +
11484 +/* FIXME: reference counting, etc. */
11485 +struct sched_plugin* find_sched_plugin(const char* name)
11486 +{
11487 +	struct list_head *pos;
11488 +	struct sched_plugin *plugin;
11489 +
11490 +	raw_spin_lock(&sched_plugins_lock);
11491 +	list_for_each(pos, &sched_plugins) {
11492 +		plugin = list_entry(pos, struct sched_plugin, list);
11493 +		if (!strcmp(plugin->plugin_name, name))
11494 +		    goto out_unlock;
11495 +	}
11496 +	plugin = NULL;
11497 +
11498 +out_unlock:
11499 +	raw_spin_unlock(&sched_plugins_lock);
11500 +	return plugin;
11501 +}
11502 +
11503 +void print_sched_plugins(struct seq_file *m)
11504 +{
11505 +	struct list_head *pos;
11506 +	struct sched_plugin *plugin;
11507 +
11508 +	raw_spin_lock(&sched_plugins_lock);
11509 +	list_for_each(pos, &sched_plugins) {
11510 +		plugin = list_entry(pos, struct sched_plugin, list);
11511 +		seq_printf(m, "%s\n", plugin->plugin_name);
11512 +	}
11513 +	raw_spin_unlock(&sched_plugins_lock);
11514 +}
11515 diff --git a/litmus/srp.c b/litmus/srp.c
11516 new file mode 100644
11517 index 0000000..e4e3811
11518 --- /dev/null
11519 +++ b/litmus/srp.c
11520 @@ -0,0 +1,313 @@
11521 +/* ************************************************************************** */
11522 +/*                          STACK RESOURCE POLICY                             */
11523 +/* ************************************************************************** */
11524 +
11525 +#include <asm/atomic.h>
11526 +#include <linux/sched.h>
11527 +#include <linux/wait.h>
11528 +
11529 +#include <litmus/litmus.h>
11530 +#include <litmus/sched_plugin.h>
11531 +#include <litmus/fdso.h>
11532 +#include <litmus/trace.h>
11533 +
11534 +
11535 +#ifdef CONFIG_LITMUS_LOCKING
11536 +
11537 +#include <litmus/srp.h>
11538 +
11539 +srp_prioritization_t get_srp_prio;
11540 +
11541 +struct srp {
11542 +	struct list_head	ceiling;
11543 +	wait_queue_head_t	ceiling_blocked;
11544 +};
11545 +#define system_ceiling(srp) list2prio(srp->ceiling.next)
11546 +#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
11547 +
11548 +#define UNDEF_SEM -2
11549 +
11550 +atomic_t srp_objects_in_use = ATOMIC_INIT(0);
11551 +
11552 +DEFINE_PER_CPU(struct srp, srp);
11553 +
11554 +/* Initialize SRP semaphores at boot time. */
11555 +static int __init srp_init(void)
11556 +{
11557 +	int i;
11558 +
11559 +	printk("Initializing SRP per-CPU ceilings...");
11560 +	for (i = 0; i < NR_CPUS; i++) {
11561 +		init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
11562 +		INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
11563 +	}
11564 +	printk(" done!\n");
11565 +
11566 +	return 0;
11567 +}
11568 +module_init(srp_init);
11569 +
11570 +/* SRP task priority comparison function. Smaller numeric values have higher
11571 + * priority, tie-break is PID. Special case: priority == 0 <=> no priority
11572 + */
11573 +static int srp_higher_prio(struct srp_priority* first,
11574 +			   struct srp_priority* second)
11575 +{
11576 +	if (!first->priority)
11577 +		return 0;
11578 +	else
11579 +		return  !second->priority ||
11580 +			first->priority < second->priority || (
11581 +			first->priority == second->priority &&
11582 +			first->pid < second->pid);
11583 +}
11584 +
11585 +
11586 +static int srp_exceeds_ceiling(struct task_struct* first,
11587 +			       struct srp* srp)
11588 +{
11589 +	struct srp_priority prio;
11590 +
11591 +	if (list_empty(&srp->ceiling))
11592 +		return 1;
11593 +	else {
11594 +		prio.pid = first->pid;
11595 +		prio.priority = get_srp_prio(first);
11596 +		return srp_higher_prio(&prio, system_ceiling(srp)) ||
11597 +			ceiling2sem(system_ceiling(srp))->owner == first;
11598 +	}
11599 +}
11600 +
11601 +static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
11602 +{
11603 +	struct list_head *pos;
11604 +	if (in_list(&prio->list)) {
11605 +		printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
11606 +		       "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
11607 +		return;
11608 +	}
11609 +	list_for_each(pos, &srp->ceiling)
11610 +		if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
11611 +			__list_add(&prio->list, pos->prev, pos);
11612 +			return;
11613 +		}
11614 +
11615 +	list_add_tail(&prio->list, &srp->ceiling);
11616 +}
11617 +
11618 +
11619 +static int lock_srp_semaphore(struct litmus_lock* l)
11620 +{
11621 +	struct task_struct* t = current;
11622 +	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
11623 +
11624 +	if (!is_realtime(t))
11625 +		return -EPERM;
11626 +
11627 +	/* prevent acquisition of local locks in global critical sections */
11628 +	if (tsk_rt(t)->num_locks_held)
11629 +		return -EBUSY;
11630 +
11631 +	preempt_disable();
11632 +
11633 +	/* Update ceiling. */
11634 +	srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
11635 +
11636 +	/* SRP invariant: all resources available */
11637 +	BUG_ON(sem->owner != NULL);
11638 +
11639 +	sem->owner = t;
11640 +	TRACE_CUR("acquired srp 0x%p\n", sem);
11641 +
11642 +	tsk_rt(t)->num_local_locks_held++;
11643 +
11644 +	preempt_enable();
11645 +
11646 +	return 0;
11647 +}
11648 +
11649 +static int unlock_srp_semaphore(struct litmus_lock* l)
11650 +{
11651 +	struct task_struct* t = current;
11652 +	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
11653 +	int err = 0;
11654 +
11655 +	preempt_disable();
11656 +
11657 +	if (sem->owner != t) {
11658 +		err = -EINVAL;
11659 +	} else {
11660 +		/* The current owner should be executing on the correct CPU.
11661 +		 *
11662 +		 * FIXME: if the owner transitioned out of RT mode or is
11663 +		 * exiting, then we it might have already been migrated away by
11664 +		 * the best-effort scheduler and we just have to deal with
11665 +		 * it. This is currently not supported. */
11666 +		BUG_ON(sem->cpu != smp_processor_id());
11667 +
11668 +		/* Determine new system priority ceiling for this CPU. */
11669 +		BUG_ON(!in_list(&sem->ceiling.list));
11670 +
11671 +		list_del(&sem->ceiling.list);
11672 +		sem->owner = NULL;
11673 +
11674 +		/* Wake tasks on this CPU, if they exceed current ceiling. */
11675 +		TRACE_CUR("released srp 0x%p\n", sem);
11676 +		wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
11677 +
11678 +		tsk_rt(t)->num_local_locks_held--;
11679 +	}
11680 +
11681 +	preempt_enable();
11682 +	return err;
11683 +}
11684 +
11685 +static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
11686 +{
11687 +	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
11688 +	int err = 0;
11689 +	struct task_struct* t = current;
11690 +	struct srp_priority t_prio;
11691 +
11692 +	if (!is_realtime(t))
11693 +		return -EPERM;
11694 +
11695 +	TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
11696 +
11697 +	preempt_disable();
11698 +
11699 +	if (sem->owner != NULL)
11700 +		err = -EBUSY;
11701 +
11702 +	if (err == 0) {
11703 +		if (sem->cpu == UNDEF_SEM)
11704 +			sem->cpu = get_partition(t);
11705 +		else if (sem->cpu != get_partition(t))
11706 +			err = -EPERM;
11707 +	}
11708 +
11709 +	if (err == 0) {
11710 +		t_prio.priority = get_srp_prio(t);
11711 +		t_prio.pid      = t->pid;
11712 +		if (srp_higher_prio(&t_prio, &sem->ceiling)) {
11713 +			sem->ceiling.priority = t_prio.priority;
11714 +			sem->ceiling.pid      = t_prio.pid;
11715 +		}
11716 +	}
11717 +
11718 +	preempt_enable();
11719 +
11720 +	return err;
11721 +}
11722 +
11723 +static int close_srp_semaphore(struct litmus_lock* l)
11724 +{
11725 +	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
11726 +	int err = 0;
11727 +
11728 +	preempt_disable();
11729 +
11730 +	if (sem->owner == current)
11731 +		unlock_srp_semaphore(l);
11732 +
11733 +	preempt_enable();
11734 +
11735 +	return err;
11736 +}
11737 +
11738 +static void deallocate_srp_semaphore(struct litmus_lock* l)
11739 +{
11740 +	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
11741 +	atomic_dec(&srp_objects_in_use);
11742 +	kfree(sem);
11743 +}
11744 +
11745 +static struct litmus_lock_ops srp_lock_ops = {
11746 +	.open   = open_srp_semaphore,
11747 +	.close  = close_srp_semaphore,
11748 +	.lock   = lock_srp_semaphore,
11749 +	.unlock = unlock_srp_semaphore,
11750 +	.deallocate = deallocate_srp_semaphore,
11751 +};
11752 +
11753 +struct srp_semaphore* allocate_srp_semaphore(void)
11754 +{
11755 +	struct srp_semaphore* sem;
11756 +
11757 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
11758 +	if (!sem)
11759 +		return NULL;
11760 +
11761 +	INIT_LIST_HEAD(&sem->ceiling.list);
11762 +	sem->ceiling.priority = 0;
11763 +	sem->cpu     = UNDEF_SEM;
11764 +	sem->owner   = NULL;
11765 +
11766 +	sem->litmus_lock.ops = &srp_lock_ops;
11767 +
11768 +	atomic_inc(&srp_objects_in_use);
11769 +	return sem;
11770 +}
11771 +
11772 +static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
11773 +		       void *key)
11774 +{
11775 +	int cpu = smp_processor_id();
11776 +	struct task_struct *tsk = wait->private;
11777 +	if (cpu != get_partition(tsk))
11778 +		TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
11779 +			   get_partition(tsk));
11780 +	else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
11781 +		return default_wake_function(wait, mode, sync, key);
11782 +	return 0;
11783 +}
11784 +
11785 +static void do_ceiling_block(struct task_struct *tsk)
11786 +{
11787 +	wait_queue_t wait = {
11788 +		.private   = tsk,
11789 +		.func      = srp_wake_up,
11790 +		.task_list = {NULL, NULL}
11791 +	};
11792 +
11793 +	tsk->state = TASK_UNINTERRUPTIBLE;
11794 +	add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
11795 +	tsk->rt_param.srp_non_recurse = 1;
11796 +	preempt_enable_no_resched();
11797 +	schedule();
11798 +	preempt_disable();
11799 +	tsk->rt_param.srp_non_recurse = 0;
11800 +	remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
11801 +}
11802 +
11803 +/* Wait for current task priority to exceed system-wide priority ceiling.
11804 + * FIXME: the hotpath should be inline.
11805 + */
11806 +void srp_ceiling_block(void)
11807 +{
11808 +	struct task_struct *tsk = current;
11809 +
11810 +	/* Only applies to real-time tasks, but optimize for RT tasks. */
11811 +	if (unlikely(!is_realtime(tsk)))
11812 +		return;
11813 +
11814 +	/* Avoid recursive ceiling blocking. */
11815 +	if (unlikely(tsk->rt_param.srp_non_recurse))
11816 +		return;
11817 +
11818 +	/* Bail out early if there aren't any SRP resources around. */
11819 +	if (likely(!atomic_read(&srp_objects_in_use)))
11820 +		return;
11821 +
11822 +	preempt_disable();
11823 +	if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
11824 +		TRACE_CUR("is priority ceiling blocked.\n");
11825 +		while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
11826 +			do_ceiling_block(tsk);
11827 +		TRACE_CUR("finally exceeds system ceiling.\n");
11828 +	} else
11829 +		TRACE_CUR("is not priority ceiling blocked\n");
11830 +	preempt_enable();
11831 +}
11832 +
11833 +#endif
11834 diff --git a/litmus/sync.c b/litmus/sync.c
11835 new file mode 100644
11836 index 0000000..5d18060
11837 --- /dev/null
11838 +++ b/litmus/sync.c
11839 @@ -0,0 +1,152 @@
11840 +/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
11841 + *
11842 + *
11843 + */
11844 +
11845 +#include <asm/atomic.h>
11846 +#include <asm/uaccess.h>
11847 +#include <linux/spinlock.h>
11848 +#include <linux/list.h>
11849 +#include <linux/sched.h>
11850 +#include <linux/completion.h>
11851 +
11852 +#include <litmus/litmus.h>
11853 +#include <litmus/sched_plugin.h>
11854 +#include <litmus/jobs.h>
11855 +
11856 +#include <litmus/sched_trace.h>
11857 +
11858 +struct ts_release_wait {
11859 +	struct list_head list;
11860 +	struct completion completion;
11861 +	lt_t ts_release_time;
11862 +};
11863 +
11864 +#define DECLARE_TS_RELEASE_WAIT(symb)					\
11865 +	struct ts_release_wait symb =					\
11866 +	{								\
11867 +		LIST_HEAD_INIT(symb.list),				\
11868 +		COMPLETION_INITIALIZER_ONSTACK(symb.completion),	\
11869 +		0							\
11870 +	}
11871 +
11872 +static LIST_HEAD(task_release_list);
11873 +static DEFINE_MUTEX(task_release_lock);
11874 +
11875 +static long do_wait_for_ts_release(void)
11876 +{
11877 +	DECLARE_TS_RELEASE_WAIT(wait);
11878 +
11879 +	long ret = -ERESTARTSYS;
11880 +
11881 +	if (mutex_lock_interruptible(&task_release_lock))
11882 +		goto out;
11883 +
11884 +	list_add(&wait.list, &task_release_list);
11885 +
11886 +	mutex_unlock(&task_release_lock);
11887 +
11888 +	/* We are enqueued, now we wait for someone to wake us up. */
11889 +	ret = wait_for_completion_interruptible(&wait.completion);
11890 +
11891 +	if (!ret) {
11892 +		/* Completion succeeded, setup release time. */
11893 +		ret = litmus->wait_for_release_at(
11894 +			wait.ts_release_time + get_rt_phase(current));
11895 +	} else {
11896 +		/* We were interrupted, must cleanup list. */
11897 +		mutex_lock(&task_release_lock);
11898 +		if (!wait.completion.done)
11899 +			list_del(&wait.list);
11900 +		mutex_unlock(&task_release_lock);
11901 +	}
11902 +
11903 +out:
11904 +	return ret;
11905 +}
11906 +
11907 +int count_tasks_waiting_for_release(void)
11908 +{
11909 +	int task_count = 0;
11910 +	struct list_head *pos;
11911 +
11912 +	mutex_lock(&task_release_lock);
11913 +
11914 +	list_for_each(pos, &task_release_list) {
11915 +		task_count++;
11916 +	}
11917 +
11918 +	mutex_unlock(&task_release_lock);
11919 +
11920 +
11921 +	return task_count;
11922 +}
11923 +
11924 +static long do_release_ts(lt_t start)
11925 +{
11926 +	long  task_count = 0;
11927 +
11928 +	struct list_head	*pos, *safe;
11929 +	struct ts_release_wait	*wait;
11930 +
11931 +	if (mutex_lock_interruptible(&task_release_lock)) {
11932 +		task_count = -ERESTARTSYS;
11933 +		goto out;
11934 +	}
11935 +
11936 +	TRACE("<<<<<< synchronous task system release >>>>>>\n");
11937 +	sched_trace_sys_release(&start);
11938 +	litmus->synchronous_release_at(start);
11939 +
11940 +	task_count = 0;
11941 +	list_for_each_safe(pos, safe, &task_release_list) {
11942 +		wait = (struct ts_release_wait*)
11943 +			list_entry(pos, struct ts_release_wait, list);
11944 +
11945 +		task_count++;
11946 +		wait->ts_release_time = start;
11947 +		complete(&wait->completion);
11948 +	}
11949 +
11950 +	/* clear stale list */
11951 +	INIT_LIST_HEAD(&task_release_list);
11952 +
11953 +	mutex_unlock(&task_release_lock);
11954 +
11955 +out:
11956 +	return task_count;
11957 +}
11958 +
11959 +
11960 +asmlinkage long sys_wait_for_ts_release(void)
11961 +{
11962 +	long ret = -EPERM;
11963 +	struct task_struct *t = current;
11964 +
11965 +	if (is_realtime(t))
11966 +		ret = do_wait_for_ts_release();
11967 +
11968 +	return ret;
11969 +}
11970 +
11971 +#define ONE_MS 1000000
11972 +
11973 +asmlinkage long sys_release_ts(lt_t __user *__delay)
11974 +{
11975 +	long ret;
11976 +	lt_t delay;
11977 +	lt_t start_time;
11978 +
11979 +	/* FIXME: check capabilities... */
11980 +
11981 +	ret = copy_from_user(&delay, __delay, sizeof(delay));
11982 +	if (ret == 0) {
11983 +		/* round up to next larger integral millisecond */
11984 +		start_time = litmus_clock();
11985 +		do_div(start_time, ONE_MS);
11986 +		start_time *= ONE_MS;
11987 +		ret = do_release_ts(start_time + delay);
11988 +	}
11989 +
11990 +	return ret;
11991 +}
11992 diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c
11993 new file mode 100644
11994 index 0000000..06a6a7c
11995 --- /dev/null
11996 +++ b/litmus/uncachedev.c
11997 @@ -0,0 +1,102 @@
11998 +#include <linux/sched.h>
11999 +#include <linux/kernel.h>
12000 +#include <linux/mm.h>
12001 +#include <linux/fs.h>
12002 +#include <linux/errno.h>
12003 +#include <linux/highmem.h>
12004 +#include <asm/page.h>
12005 +#include <linux/miscdevice.h>
12006 +#include <linux/module.h>
12007 +
12008 +#include <litmus/litmus.h>
12009 +
12010 +/* device for allocating pages not cached by the CPU */
12011 +
12012 +#define UNCACHE_NAME        "litmus/uncache"
12013 +
12014 +void litmus_uncache_vm_open(struct vm_area_struct *vma)
12015 +{
12016 +}
12017 +
12018 +void litmus_uncache_vm_close(struct vm_area_struct *vma)
12019 +{
12020 +}
12021 +
12022 +int litmus_uncache_vm_fault(struct vm_area_struct* vma,
12023 +							struct vm_fault* vmf)
12024 +{
12025 +	/* modeled after SG DMA video4linux, but without DMA. */
12026 +	/* (see drivers/media/video/videobuf-dma-sg.c) */
12027 +	struct page *page;
12028 +
12029 +	page = alloc_page(GFP_USER);
12030 +	if (!page)
12031 +		return VM_FAULT_OOM;
12032 +
12033 +	clear_user_highpage(page, (unsigned long)vmf->virtual_address);
12034 +	vmf->page = page;
12035 +
12036 +	return 0;
12037 +}
12038 +
12039 +static struct vm_operations_struct litmus_uncache_vm_ops = {
12040 +	.open = litmus_uncache_vm_open,
12041 +	.close = litmus_uncache_vm_close,
12042 +	.fault = litmus_uncache_vm_fault,
12043 +};
12044 +
12045 +static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma)
12046 +{
12047 +	/* first make sure mapper knows what he's doing */
12048 +
12049 +	/* you can only map the "first" page */
12050 +	if (vma->vm_pgoff != 0)
12051 +		return -EINVAL;
12052 +
12053 +	/* you can't share it with anyone */
12054 +	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
12055 +		return -EINVAL;
12056 +
12057 +	/* cannot be expanded, and is not a "normal" page. */
12058 +	vma->vm_flags |= VM_DONTEXPAND;
12059 +
12060 +	/* noncached pages are not explicitly locked in memory (for now). */
12061 +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
12062 +
12063 +	vma->vm_ops = &litmus_uncache_vm_ops;
12064 +
12065 +	return 0;
12066 +}
12067 +
12068 +static struct file_operations litmus_uncache_fops = {
12069 +	.owner = THIS_MODULE,
12070 +	.mmap  = litmus_uncache_mmap,
12071 +};
12072 +
12073 +static struct miscdevice litmus_uncache_dev = {
12074 +	.name  = UNCACHE_NAME,
12075 +	.minor = MISC_DYNAMIC_MINOR,
12076 +	.fops  = &litmus_uncache_fops,
12077 +	/* pages are not locked, so there is no reason why
12078 +	   anyone cannot allocate an uncache pages */
12079 +	.mode  = (S_IRUGO | S_IWUGO),
12080 +};
12081 +
12082 +static int __init init_litmus_uncache_dev(void)
12083 +{
12084 +	int err;
12085 +
12086 +	printk("Initializing LITMUS^RT uncache device.\n");
12087 +	err = misc_register(&litmus_uncache_dev);
12088 +	if (err)
12089 +		printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err);
12090 +	return err;
12091 +}
12092 +
12093 +static void __exit exit_litmus_uncache_dev(void)
12094 +{
12095 +	misc_deregister(&litmus_uncache_dev);
12096 +}
12097 +
12098 +module_init(init_litmus_uncache_dev);
12099 +module_exit(exit_litmus_uncache_dev);
12100 -- 
12101 1.8.1.2
12102 
12103 
12104 From 469c9f7ad36d105b31f478855f9c45ff376d3582 Mon Sep 17 00:00:00 2001
12105 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12106 Date: Tue, 12 Feb 2013 18:57:35 +0100
12107 Subject: [PATCH 021/119] Add LITMUS^RT syscalls for ARM
12108 
12109 ---
12110  arch/arm/include/asm/unistd.h      |  3 ++-
12111  arch/arm/include/uapi/asm/unistd.h |  3 +++
12112  arch/arm/kernel/calls.S            | 13 +++++++++++++
12113  3 files changed, 18 insertions(+), 1 deletion(-)
12114 
12115 diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
12116 index 141baa3..8b26b32 100644
12117 --- a/arch/arm/include/asm/unistd.h
12118 +++ b/arch/arm/include/asm/unistd.h
12119 @@ -15,7 +15,8 @@
12120  
12121  #include <uapi/asm/unistd.h>
12122  
12123 -#define __NR_syscalls  (380)
12124 +#define __NR_syscalls  (380 + NR_litmus_syscalls)
12125 +
12126  #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
12127  
12128  #define __ARCH_WANT_STAT64
12129 diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
12130 index af33b44..1a767bf 100644
12131 --- a/arch/arm/include/uapi/asm/unistd.h
12132 +++ b/arch/arm/include/uapi/asm/unistd.h
12133 @@ -407,6 +407,9 @@
12134  #define __NR_kcmp			(__NR_SYSCALL_BASE+378)
12135  #define __NR_finit_module		(__NR_SYSCALL_BASE+379)
12136  
12137 +#define __NR_LITMUS (__NR_SYSCALL_BASE+380)
12138 +#include <litmus/unistd_32.h>
12139 +
12140  /*
12141   * This may need to be greater than __NR_last_syscall+1 in order to
12142   * account for the padding in the syscall table
12143 diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
12144 index c6ca7e3..2da776a 100644
12145 --- a/arch/arm/kernel/calls.S
12146 +++ b/arch/arm/kernel/calls.S
12147 @@ -389,6 +389,19 @@
12148  		CALL(sys_process_vm_writev)
12149  		CALL(sys_kcmp)
12150  		CALL(sys_finit_module)
12151 +/* 380 */	CALL(sys_set_rt_task_param)
12152 +		CALL(sys_get_rt_task_param)
12153 +		CALL(sys_complete_job)
12154 +		CALL(sys_od_open)
12155 +                CALL(sys_od_close)
12156 +/* 385 */	CALL(sys_litmus_lock)
12157 +		CALL(sys_litmus_unlock)
12158 +		CALL(sys_query_job_no)
12159 +		CALL(sys_wait_for_job_release)
12160 +        	CALL(sys_wait_for_ts_release)
12161 +/* 390 */	CALL(sys_release_ts)
12162 +		CALL(sys_null_call)
12163 +
12164  #ifndef syscalls_counted
12165  .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
12166  #define syscalls_counted
12167 -- 
12168 1.8.1.2
12169 
12170 
12171 From 0072c939b32ecc73a15dc2cc28185f4324d28536 Mon Sep 17 00:00:00 2001
12172 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12173 Date: Tue, 12 Feb 2013 19:01:01 +0100
12174 Subject: [PATCH 022/119] Add LITMUS^RT syscalls for x86
12175 
12176 ---
12177  arch/x86/syscalls/syscall_32.tbl | 13 +++++++++++++
12178  arch/x86/syscalls/syscall_64.tbl | 13 +++++++++++++
12179  2 files changed, 26 insertions(+)
12180 
12181 diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
12182 index aabfb83..ffe39dd 100644
12183 --- a/arch/x86/syscalls/syscall_32.tbl
12184 +++ b/arch/x86/syscalls/syscall_32.tbl
12185 @@ -357,3 +357,16 @@
12186  348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
12187  349	i386	kcmp			sys_kcmp
12188  350	i386	finit_module		sys_finit_module
12189 +
12190 +351	i386	set_rt_task_param	sys_set_rt_task_param
12191 +352	i386	get_rt_task_param	sys_get_rt_task_param
12192 +353	i386	complete_job		sys_complete_job
12193 +354	i386	od_open			sys_od_open
12194 +355	i386	od_close		sys_od_close
12195 +356	i386	litmus_lock		sys_litmus_lock
12196 +357	i386	litmus_unlock		sys_litmus_unlock
12197 +358	i386	query_job_no		sys_query_job_no
12198 +359	i386	wait_for_job_release	sys_wait_for_job_release
12199 +360	i386	wait_for_ts_release	sys_wait_for_ts_release
12200 +361	i386	release_ts		sys_release_ts
12201 +362	i386	null_call		sys_null_call
12202 diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
12203 index 38ae65d..cde714e 100644
12204 --- a/arch/x86/syscalls/syscall_64.tbl
12205 +++ b/arch/x86/syscalls/syscall_64.tbl
12206 @@ -321,6 +321,19 @@
12207  312	common	kcmp			sys_kcmp
12208  313	common	finit_module		sys_finit_module
12209  
12210 +351	common	set_rt_task_param	sys_set_rt_task_param
12211 +352	common	get_rt_task_param	sys_get_rt_task_param
12212 +353	common	complete_job		sys_complete_job
12213 +354	common	od_open			sys_od_open
12214 +355	common	od_close		sys_od_close
12215 +356	common	litmus_lock		sys_litmus_lock
12216 +357	common	litmus_unlock		sys_litmus_unlock
12217 +358	common	query_job_no		sys_query_job_no
12218 +359	common	wait_for_job_release	sys_wait_for_job_release
12219 +360	common	wait_for_ts_release	sys_wait_for_ts_release
12220 +361	common	release_ts		sys_release_ts
12221 +362	common	null_call		sys_null_call
12222 +
12223  #
12224  # x32-specific system call numbers start at 512 to avoid cache impact
12225  # for native 64-bit operation.
12226 -- 
12227 1.8.1.2
12228 
12229 
12230 From a1b5e6c8f816c6c28dfbce7e16d3977f96e380f2 Mon Sep 17 00:00:00 2001
12231 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12232 Date: Sun, 21 Jul 2013 13:51:38 +0200
12233 Subject: [PATCH 023/119] Move trace point definition to litmus/litmus.c
12234 
12235 If !CONFIG_SCHED_TASK_TRACE, but CONFIG_SCHED_LITMUS_TRACEPOINT, then
12236 we still need to define the tracepoint structures.
12237 
12238 This patch should be integrated with the earlier sched_task_trace.c
12239 patches during one of the next major rebasing efforts.
12240 ---
12241  litmus/litmus.c           | 5 +++++
12242  litmus/sched_task_trace.c | 5 -----
12243  2 files changed, 5 insertions(+), 5 deletions(-)
12244 
12245 diff --git a/litmus/litmus.c b/litmus/litmus.c
12246 index 9c419cd..a061343 100644
12247 --- a/litmus/litmus.c
12248 +++ b/litmus/litmus.c
12249 @@ -25,6 +25,11 @@
12250  #include <litmus/affinity.h>
12251  #endif
12252  
12253 +#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
12254 +#define CREATE_TRACE_POINTS
12255 +#include <trace/events/litmus.h>
12256 +#endif
12257 +
12258  /* Number of RT tasks that exist in the system */
12259  atomic_t rt_task_count 		= ATOMIC_INIT(0);
12260  
12261 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
12262 index 2bdfbbd..933e7e4 100644
12263 --- a/litmus/sched_task_trace.c
12264 +++ b/litmus/sched_task_trace.c
12265 @@ -15,11 +15,6 @@
12266  #include <litmus/feather_trace.h>
12267  #include <litmus/ftdev.h>
12268  
12269 -#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
12270 -#define CREATE_TRACE_POINTS
12271 -#include <trace/events/litmus.h>
12272 -#endif
12273 -
12274  #define NO_EVENTS		(1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
12275  
12276  #define now() litmus_clock()
12277 -- 
12278 1.8.1.2
12279 
12280 
12281 From 751073bc996ac77c219c2031282ee7ce0f473af5 Mon Sep 17 00:00:00 2001
12282 From: Glenn Elliott <gelliott@cs.unc.edu>
12283 Date: Mon, 16 Sep 2013 17:26:56 -0400
12284 Subject: [PATCH 024/119] Record LITMUS^RT timestamp in ftrace records
12285 
12286 Patch updates ftrace.h to record a litmus_clock() time stamp
12287 in ftrace records.
12288 ---
12289  include/trace/ftrace.h | 7 +++++--
12290  1 file changed, 5 insertions(+), 2 deletions(-)
12291 
12292 diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
12293 index 66dba42..7571012 100644
12294 --- a/include/trace/ftrace.h
12295 +++ b/include/trace/ftrace.h
12296 @@ -18,6 +18,9 @@
12297  
12298  #include <linux/ftrace_event.h>
12299  
12300 +/* for litmus_clock() */
12301 +#include <litmus/litmus.h>
12302 +
12303  /*
12304   * DECLARE_EVENT_CLASS can be used to add a generic function
12305   * handlers for events. That is, if all events have the same
12306 @@ -54,7 +57,7 @@
12307  #define __string(item, src) __dynamic_array(char, item, -1)
12308  
12309  #undef TP_STRUCT__entry
12310 -#define TP_STRUCT__entry(args...) args
12311 +#define TP_STRUCT__entry(args...) args __field( unsigned long long, __rt_ts )
12312  
12313  #undef DECLARE_EVENT_CLASS
12314  #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)	\
12315 @@ -502,7 +505,7 @@ static inline notrace int ftrace_get_offsets_##call(			\
12316  	strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
12317  
12318  #undef TP_fast_assign
12319 -#define TP_fast_assign(args...) args
12320 +#define TP_fast_assign(args...) args; __entry->__rt_ts = litmus_clock();
12321  
12322  #undef TP_perf_assign
12323  #define TP_perf_assign(args...)
12324 -- 
12325 1.8.1.2
12326 
12327 
12328 From d40f1cc129917e9478d48658073e46462968b973 Mon Sep 17 00:00:00 2001
12329 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12330 Date: Tue, 25 Jun 2013 07:30:56 +0200
12331 Subject: [PATCH 025/119] Integrate preemption state machine with Linux
12332  scheduler
12333 
12334 Track when a processor is going to schedule "soon".
12335 ---
12336  arch/arm/kernel/smp.c |  4 ++++
12337  arch/x86/kernel/smp.c |  6 ++++++
12338  include/linux/sched.h |  2 ++
12339  kernel/sched/core.c   | 21 ++++++++++++++++++++-
12340  4 files changed, 32 insertions(+), 1 deletion(-)
12341 
12342 diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
12343 index 5919eb4..1a945e2 100644
12344 --- a/arch/arm/kernel/smp.c
12345 +++ b/arch/arm/kernel/smp.c
12346 @@ -46,6 +46,8 @@
12347  #include <asm/virt.h>
12348  #include <asm/mach/arch.h>
12349  
12350 +#include <litmus/preempt.h>
12351 +
12352  /*
12353   * as from 2.5, kernels no longer have an init_tasks structure
12354   * so we need some other way of telling a new secondary core
12355 @@ -617,6 +619,8 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
12356  #endif
12357  
12358  	case IPI_RESCHEDULE:
12359 +		/* LITMUS^RT: take action based on scheduler state */
12360 +		sched_state_ipi();
12361  		scheduler_ipi();
12362  		break;
12363  
12364 diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
12365 index a52ef7f..becf5c3 100644
12366 --- a/arch/x86/kernel/smp.c
12367 +++ b/arch/x86/kernel/smp.c
12368 @@ -24,6 +24,7 @@
12369  #include <linux/cpu.h>
12370  #include <linux/gfp.h>
12371  
12372 +#include <litmus/preempt.h>
12373  #include <litmus/debug_trace.h>
12374  
12375  #include <asm/mtrr.h>
12376 @@ -269,6 +270,11 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
12377  	/*
12378  	 * KVM uses this interrupt to force a cpu out of guest mode
12379  	 */
12380 +
12381 +	/* LITMUS^RT: this IPI might need to trigger the sched state machine.
12382 +	 * Starting from 3.0 schedule_ipi() actually does something.  This may
12383 +	 * increase IPI latencies compared with previous versions. */
12384 +	sched_state_ipi();
12385  }
12386  
12387  void smp_call_function_interrupt(struct pt_regs *regs)
12388 diff --git a/include/linux/sched.h b/include/linux/sched.h
12389 index cbb3b44..5dc3e5b 100644
12390 --- a/include/linux/sched.h
12391 +++ b/include/linux/sched.h
12392 @@ -56,6 +56,7 @@ struct sched_param {
12393  #include <asm/processor.h>
12394  
12395  #include <litmus/rt_param.h>
12396 +#include <litmus/preempt.h>
12397  
12398  struct exec_domain;
12399  struct futex_pi_state;
12400 @@ -2375,6 +2376,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
12401  static inline void set_tsk_need_resched(struct task_struct *tsk)
12402  {
12403  	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
12404 +	sched_state_will_schedule(tsk);
12405  }
12406  
12407  static inline void clear_tsk_need_resched(struct task_struct *tsk)
12408 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
12409 index 3a471d6..17992b2 100644
12410 --- a/kernel/sched/core.c
12411 +++ b/kernel/sched/core.c
12412 @@ -1966,8 +1966,12 @@ static inline void post_schedule(struct rq *rq)
12413  asmlinkage void schedule_tail(struct task_struct *prev)
12414  	__releases(rq->lock)
12415  {
12416 -	struct rq *rq = this_rq();
12417 +	struct rq *rq;
12418 +
12419  
12420 +	preempt_disable();
12421 +
12422 +	rq = this_rq();
12423  	finish_task_switch(rq, prev);
12424  
12425  	/*
12426 @@ -1976,6 +1980,11 @@ asmlinkage void schedule_tail(struct task_struct *prev)
12427  	 */
12428  	post_schedule(rq);
12429  
12430 +	if (sched_state_validate_switch())
12431 +		litmus_reschedule_local();
12432 +
12433 +	preempt_enable();
12434 +
12435  #ifdef __ARCH_WANT_UNLOCKED_CTXSW
12436  	/* In this case, finish_task_switch does not reenable preemption */
12437  	preempt_enable();
12438 @@ -2973,11 +2982,16 @@ static void __sched __schedule(void)
12439  
12440  need_resched:
12441  	preempt_disable();
12442 +	sched_state_entered_schedule();
12443  	cpu = smp_processor_id();
12444  	rq = cpu_rq(cpu);
12445  	rcu_note_context_switch(cpu);
12446  	prev = rq->curr;
12447  
12448 +	/* LITMUS^RT: quickly re-evaluate the scheduling decision
12449 +	 * if the previous one is no longer valid after context switch.
12450 +	 */
12451 +litmus_need_resched_nonpreemptible:
12452  	TS_SCHED_START;
12453  
12454  	schedule_debug(prev);
12455 @@ -3053,6 +3067,11 @@ need_resched:
12456  
12457  	post_schedule(rq);
12458  
12459 +	if (sched_state_validate_switch()) {
12460 +		TS_SCHED2_END(prev);
12461 +		goto litmus_need_resched_nonpreemptible;
12462 +	}
12463 +
12464  	sched_preempt_enable_no_resched();
12465  
12466  	TS_SCHED2_END(prev);
12467 -- 
12468 1.8.1.2
12469 
12470 
12471 From 1042b270f038a2c654d93aa3fd8b9ae9abe542d9 Mon Sep 17 00:00:00 2001
12472 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12473 Date: Wed, 10 Jul 2013 18:34:34 +0200
12474 Subject: [PATCH 026/119] Call sched_state_task_picked() from
12475  pick_next_task_stop()
12476 
12477 Otherwise, the scheduler state machine becomes confused (and goes into
12478 a rescheduling loop) when stop-machine is triggered.
12479 ---
12480  kernel/sched/stop_task.c | 8 ++++++++
12481  1 file changed, 8 insertions(+)
12482 
12483 diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
12484 index da5eb5b..6835d31 100644
12485 --- a/kernel/sched/stop_task.c
12486 +++ b/kernel/sched/stop_task.c
12487 @@ -1,5 +1,7 @@
12488  #include "sched.h"
12489  
12490 +#include <litmus/preempt.h>
12491 +
12492  /*
12493   * stop-task scheduling class.
12494   *
12495 @@ -29,6 +31,12 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
12496  
12497  	if (stop && stop->on_rq) {
12498  		stop->se.exec_start = rq->clock_task;
12499 +		/* Let the LITMUS^RT scheduler state machine know
12500 +		 * that a task was picked. This is needed because the
12501 +		 * LITMUS^RT scheduling plugin will not be called
12502 +		 * if the stop-task class picks a task.
12503 +		 */
12504 +		sched_state_task_picked();
12505  		return stop;
12506  	}
12507  
12508 -- 
12509 1.8.1.2
12510 
12511 
12512 From 87c71e1c704021c7381821a6c654096db4f07b20 Mon Sep 17 00:00:00 2001
12513 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12514 Date: Tue, 12 Feb 2013 17:45:17 +0100
12515 Subject: [PATCH 027/119] Hook into fork(), exec(), and exit()
12516 
12517 Allow LITMUS^RT to do some work when a process is created or
12518 terminated.
12519 ---
12520  fs/exec.c     | 3 +++
12521  kernel/exit.c | 4 ++++
12522  kernel/fork.c | 9 +++++++++
12523  3 files changed, 16 insertions(+)
12524 
12525 diff --git a/fs/exec.c b/fs/exec.c
12526 index bb60cda..d84259a 100644
12527 --- a/fs/exec.c
12528 +++ b/fs/exec.c
12529 @@ -56,6 +56,8 @@
12530  #include <linux/oom.h>
12531  #include <linux/compat.h>
12532  
12533 +#include <litmus/litmus.h>
12534 +
12535  #include <asm/uaccess.h>
12536  #include <asm/mmu_context.h>
12537  #include <asm/tlb.h>
12538 @@ -1506,6 +1508,7 @@ static int do_execve_common(const char *filename,
12539  		goto out_unmark;
12540  
12541  	sched_exec();
12542 +	litmus_exec();
12543  
12544  	bprm->file = file;
12545  	bprm->filename = filename;
12546 diff --git a/kernel/exit.c b/kernel/exit.c
12547 index 7bb73f9..ab36666 100644
12548 --- a/kernel/exit.c
12549 +++ b/kernel/exit.c
12550 @@ -59,6 +59,8 @@
12551  #include <asm/pgtable.h>
12552  #include <asm/mmu_context.h>
12553  
12554 +extern void exit_od_table(struct task_struct *t);
12555 +
12556  static void exit_mm(struct task_struct * tsk);
12557  
12558  static void __unhash_process(struct task_struct *p, bool group_dead)
12559 @@ -781,6 +783,8 @@ void do_exit(long code)
12560  		tty_audit_exit();
12561  	audit_free(tsk);
12562  
12563 +	exit_od_table(tsk);
12564 +
12565  	tsk->exit_code = code;
12566  	taskstats_exit(tsk, group_dead);
12567  
12568 diff --git a/kernel/fork.c b/kernel/fork.c
12569 index ff7be9d..b8aa56b 100644
12570 --- a/kernel/fork.c
12571 +++ b/kernel/fork.c
12572 @@ -81,6 +81,9 @@
12573  
12574  #include <trace/events/sched.h>
12575  
12576 +#include <litmus/litmus.h>
12577 +#include <litmus/sched_plugin.h>
12578 +
12579  #define CREATE_TRACE_POINTS
12580  #include <trace/events/task.h>
12581  
12582 @@ -238,6 +241,9 @@ void __put_task_struct(struct task_struct *tsk)
12583  	WARN_ON(tsk == current);
12584  
12585  	security_task_free(tsk);
12586 +
12587 +	exit_litmus(tsk);
12588 +
12589  	exit_creds(tsk);
12590  	delayacct_tsk_free(tsk);
12591  	put_signal_struct(tsk->signal);
12592 @@ -312,6 +318,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
12593  
12594  	tsk->stack = ti;
12595  
12596 +	/* Don't let the new task be a real-time task. */
12597 +	litmus_fork(tsk);
12598 +
12599  	setup_thread_stack(tsk, orig);
12600  	clear_user_return_notifier(tsk);
12601  	clear_tsk_need_resched(tsk);
12602 -- 
12603 1.8.1.2
12604 
12605 
12606 From e44cd07ea0123cac05852b00f3c9d514a8999933 Mon Sep 17 00:00:00 2001
12607 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12608 Date: Sat, 8 Jun 2013 18:22:35 +0200
12609 Subject: [PATCH 028/119] Augment rt_task() with is_realtime()
12610 
12611 Whenever the kernel checks for rt_task() to avoid delaying real-time
12612 tasks, we want it to also not delay LITMUS^RT tasks.  Hence, most
12613 calls to rt_task() should be matched by an equivalent call to
12614 is_realtime().
12615 
12616 Notably, this affects the implementations of select() and nanosleep(),
12617 which use timer_slack_ns when setting up timers for non-real-time
12618 tasks.
12619 ---
12620  fs/select.c         | 4 +++-
12621  kernel/hrtimer.c    | 3 ++-
12622  kernel/mutex.c      | 5 ++++-
12623  mm/page-writeback.c | 6 ++++--
12624  mm/page_alloc.c     | 5 ++++-
12625  5 files changed, 17 insertions(+), 6 deletions(-)
12626 
12627 diff --git a/fs/select.c b/fs/select.c
12628 index 8c1c96c..f53b3e4 100644
12629 --- a/fs/select.c
12630 +++ b/fs/select.c
12631 @@ -28,6 +28,8 @@
12632  #include <linux/hrtimer.h>
12633  #include <linux/sched/rt.h>
12634  
12635 +#include <litmus/litmus.h> /* for is_realtime() */
12636 +
12637  #include <asm/uaccess.h>
12638  
12639  
12640 @@ -77,7 +79,7 @@ long select_estimate_accuracy(struct timespec *tv)
12641  	 * Realtime tasks get a slack of 0 for obvious reasons.
12642  	 */
12643  
12644 -	if (rt_task(current))
12645 +	if (rt_task(current) || is_realtime(current))
12646  		return 0;
12647  
12648  	ktime_get_ts(&now);
12649 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
12650 index c7f0c79..60b6329 100644
12651 --- a/kernel/hrtimer.c
12652 +++ b/kernel/hrtimer.c
12653 @@ -49,6 +49,7 @@
12654  #include <linux/timer.h>
12655  
12656  #include <litmus/debug_trace.h>
12657 +#include <litmus/litmus.h>
12658  
12659  #include <asm/uaccess.h>
12660  
12661 @@ -1701,7 +1702,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
12662  	unsigned long slack;
12663  
12664  	slack = current->timer_slack_ns;
12665 -	if (rt_task(current))
12666 +	if (rt_task(current) || is_realtime(current))
12667  		slack = 0;
12668  
12669  	hrtimer_init_on_stack(&t.timer, clockid, mode);
12670 diff --git a/kernel/mutex.c b/kernel/mutex.c
12671 index ad53a66..a60d05e 100644
12672 --- a/kernel/mutex.c
12673 +++ b/kernel/mutex.c
12674 @@ -25,6 +25,8 @@
12675  #include <linux/interrupt.h>
12676  #include <linux/debug_locks.h>
12677  
12678 +#include <litmus/litmus.h>
12679 +
12680  /*
12681   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
12682   * which forces all calls into the slowpath:
12683 @@ -325,7 +327,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
12684  		 * we're an RT task that will live-lock because we won't let
12685  		 * the owner complete.
12686  		 */
12687 -		if (!owner && (need_resched() || rt_task(task)))
12688 +		if (!owner && (need_resched() ||
12689 +			       rt_task(task) || is_realtime(task)))
12690  			break;
12691  
12692  		/*
12693 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
12694 index 73cbc5d..1f0073b 100644
12695 --- a/mm/page-writeback.c
12696 +++ b/mm/page-writeback.c
12697 @@ -38,6 +38,8 @@
12698  #include <linux/sched/rt.h>
12699  #include <trace/events/writeback.h>
12700  
12701 +#include <litmus/litmus.h> /* for is_realtime() */
12702 +
12703  /*
12704   * Sleep at most 200ms at a time in balance_dirty_pages().
12705   */
12706 @@ -300,7 +302,7 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
12707  	if (background >= dirty)
12708  		background = dirty / 2;
12709  	tsk = current;
12710 -	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
12711 +	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) || is_realtime(tsk)) {
12712  		background += background / 4;
12713  		dirty += dirty / 4;
12714  	}
12715 @@ -328,7 +330,7 @@ static unsigned long zone_dirty_limit(struct zone *zone)
12716  	else
12717  		dirty = vm_dirty_ratio * zone_memory / 100;
12718  
12719 -	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
12720 +	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) || is_realtime(tsk))
12721  		dirty += dirty / 4;
12722  
12723  	return dirty;
12724 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
12725 index 2ee0fd3..6529939 100644
12726 --- a/mm/page_alloc.c
12727 +++ b/mm/page_alloc.c
12728 @@ -61,6 +61,8 @@
12729  #include <linux/hugetlb.h>
12730  #include <linux/sched/rt.h>
12731  
12732 +#include <litmus/litmus.h> /* for is_realtime() */
12733 +
12734  #include <asm/tlbflush.h>
12735  #include <asm/div64.h>
12736  #include "internal.h"
12737 @@ -2362,7 +2364,8 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
12738  		 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
12739  		 */
12740  		alloc_flags &= ~ALLOC_CPUSET;
12741 -	} else if (unlikely(rt_task(current)) && !in_interrupt())
12742 +	} else if (unlikely(rt_task(current) || is_realtime(current))
12743 +		   && !in_interrupt())
12744  		alloc_flags |= ALLOC_HARDER;
12745  
12746  	if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
12747 -- 
12748 1.8.1.2
12749 
12750 
12751 From bb196a3537cf825b18aa46cdf962fb9422bc2a8f Mon Sep 17 00:00:00 2001
12752 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12753 Date: Mon, 1 Jul 2013 22:06:19 +0200
12754 Subject: [PATCH 029/119] Hookup sched_trace_XXX() tracing in Linux scheduler
12755 
12756 This patch adds context switch tracing to the main Linux scheduler.
12757 ---
12758  kernel/sched/core.c | 5 +++++
12759  1 file changed, 5 insertions(+)
12760 
12761 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
12762 index 17992b2..3d37e2a 100644
12763 --- a/kernel/sched/core.c
12764 +++ b/kernel/sched/core.c
12765 @@ -87,6 +87,7 @@
12766  #include "../smpboot.h"
12767  
12768  #include <litmus/trace.h>
12769 +#include <litmus/sched_trace.h>
12770  
12771  #define CREATE_TRACE_POINTS
12772  #include <trace/events/sched.h>
12773 @@ -1974,6 +1975,8 @@ asmlinkage void schedule_tail(struct task_struct *prev)
12774  	rq = this_rq();
12775  	finish_task_switch(rq, prev);
12776  
12777 +	sched_trace_task_switch_to(current);
12778 +
12779  	/*
12780  	 * FIXME: do we need to worry about rq being invalidated by the
12781  	 * task_switch?
12782 @@ -2993,6 +2996,7 @@ need_resched:
12783  	 */
12784  litmus_need_resched_nonpreemptible:
12785  	TS_SCHED_START;
12786 +	sched_trace_task_switch_away(prev);
12787  
12788  	schedule_debug(prev);
12789  
12790 @@ -3064,6 +3068,7 @@ litmus_need_resched_nonpreemptible:
12791  	}
12792  
12793  	TS_SCHED2_START(prev);
12794 +	sched_trace_task_switch_to(current);
12795  
12796  	post_schedule(rq);
12797  
12798 -- 
12799 1.8.1.2
12800 
12801 
12802 From b23a712f8a488189cee3ef373d878c39f4a1dab4 Mon Sep 17 00:00:00 2001
12803 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12804 Date: Mon, 1 Jul 2013 22:15:48 +0200
12805 Subject: [PATCH 030/119] Integrate SRP ceiling blocking callback with Linux
12806  scheduler
12807 
12808 Check whether a suspension is required at end of schedule().
12809 ---
12810  kernel/sched/core.c | 3 +++
12811  1 file changed, 3 insertions(+)
12812 
12813 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
12814 index 3d37e2a..403aa9e 100644
12815 --- a/kernel/sched/core.c
12816 +++ b/kernel/sched/core.c
12817 @@ -86,6 +86,7 @@
12818  #include "../workqueue_internal.h"
12819  #include "../smpboot.h"
12820  
12821 +#include <litmus/litmus.h>
12822  #include <litmus/trace.h>
12823  #include <litmus/sched_trace.h>
12824  
12825 @@ -3083,6 +3084,8 @@ litmus_need_resched_nonpreemptible:
12826  
12827  	if (need_resched())
12828  		goto need_resched;
12829 +
12830 +	srp_ceiling_block();
12831  }
12832  
12833  static inline void sched_submit_work(struct task_struct *tsk)
12834 -- 
12835 1.8.1.2
12836 
12837 
12838 From 55d08a9fb5f361e38a8b21dc601e9037b0a3a98d Mon Sep 17 00:00:00 2001
12839 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12840 Date: Mon, 1 Jul 2013 22:30:19 +0200
12841 Subject: [PATCH 031/119] Introduce LITMUS^RT runqueue dummy into struct rq
12842 
12843 ---
12844  kernel/sched/sched.h | 6 ++++++
12845  1 file changed, 6 insertions(+)
12846 
12847 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
12848 index dfa31d5..62f508b 100644
12849 --- a/kernel/sched/sched.h
12850 +++ b/kernel/sched/sched.h
12851 @@ -358,6 +358,11 @@ struct rt_rq {
12852  #endif
12853  };
12854  
12855 +struct litmus_rq {
12856 +	unsigned long nr_running;
12857 +	struct task_struct *prev;
12858 +};
12859 +
12860  #ifdef CONFIG_SMP
12861  
12862  /*
12863 @@ -422,6 +427,7 @@ struct rq {
12864  
12865  	struct cfs_rq cfs;
12866  	struct rt_rq rt;
12867 +	struct litmus_rq litmus;
12868  
12869  #ifdef CONFIG_FAIR_GROUP_SCHED
12870  	/* list of leaf cfs_rq on this cpu: */
12871 -- 
12872 1.8.1.2
12873 
12874 
12875 From 4d892c962033fe3b959ce4cffe68f0f27304a436 Mon Sep 17 00:00:00 2001
12876 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12877 Date: Mon, 1 Jul 2013 22:31:47 +0200
12878 Subject: [PATCH 032/119] Add LITMUS^RT scheduling class in
12879  kernel/sched/Makefile
12880 
12881 ---
12882  kernel/sched/Makefile | 4 ++++
12883  1 file changed, 4 insertions(+)
12884 
12885 diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
12886 index deaf90e..7002348 100644
12887 --- a/kernel/sched/Makefile
12888 +++ b/kernel/sched/Makefile
12889 @@ -17,3 +17,7 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
12890  obj-$(CONFIG_SCHEDSTATS) += stats.o
12891  obj-$(CONFIG_SCHED_DEBUG) += debug.o
12892  obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
12893 +
12894 +
12895 +# LITMUS^RT scheduling class
12896 +obj-y += litmus.o
12897 \ No newline at end of file
12898 -- 
12899 1.8.1.2
12900 
12901 
12902 From 7c23bd3e9b9744e746222f3f4b77f4cf7978c746 Mon Sep 17 00:00:00 2001
12903 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12904 Date: Mon, 1 Jul 2013 22:38:20 +0200
12905 Subject: [PATCH 033/119] Make LITMUS^RT scheduling class the highest-priority
12906  scheduling class
12907 
12908 Needs to be above stop_machine_class for legacy reasons; the main
12909 plugins were developed before stop_machine_class was introduced and
12910 assume that they are the highest-priority scheduling class.
12911 ---
12912  kernel/sched/litmus.c | 2 +-
12913  kernel/sched/sched.h  | 3 ++-
12914  2 files changed, 3 insertions(+), 2 deletions(-)
12915 
12916 diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
12917 index ad88a14..b84361f 100644
12918 --- a/kernel/sched/litmus.c
12919 +++ b/kernel/sched/litmus.c
12920 @@ -314,7 +314,7 @@ const struct sched_class litmus_sched_class = {
12921  	 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
12922  	 * CPU capacity.
12923  	 */
12924 -	.next			= &rt_sched_class,
12925 +	.next			= &stop_sched_class,
12926  	.enqueue_task		= enqueue_task_litmus,
12927  	.dequeue_task		= dequeue_task_litmus,
12928  	.yield_task		= yield_task_litmus,
12929 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
12930 index 62f508b..91b0641 100644
12931 --- a/kernel/sched/sched.h
12932 +++ b/kernel/sched/sched.h
12933 @@ -1017,11 +1017,12 @@ struct sched_class {
12934  #endif
12935  };
12936  
12937 -#define sched_class_highest (&stop_sched_class)
12938 +#define sched_class_highest (&litmus_sched_class)
12939  #define for_each_class(class) \
12940     for (class = sched_class_highest; class; class = class->next)
12941  
12942  extern const struct sched_class stop_sched_class;
12943 +extern const struct sched_class litmus_sched_class;
12944  extern const struct sched_class rt_sched_class;
12945  extern const struct sched_class fair_sched_class;
12946  extern const struct sched_class idle_sched_class;
12947 -- 
12948 1.8.1.2
12949 
12950 
12951 From 685b84e0e122117b1c38e06bc24378d757967545 Mon Sep 17 00:00:00 2001
12952 From: Bjoern Brandenburg <bbb@mpi-sws.org>
12953 Date: Mon, 1 Jul 2013 22:50:05 +0200
12954 Subject: [PATCH 034/119] Integrate LITMUS^RT with try_to_wake_up() path
12955 
12956 ---
12957  kernel/sched/core.c | 20 +++++++++++++++++++-
12958  1 file changed, 19 insertions(+), 1 deletion(-)
12959 
12960 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
12961 index 403aa9e..210d42d 100644
12962 --- a/kernel/sched/core.c
12963 +++ b/kernel/sched/core.c
12964 @@ -1463,7 +1463,12 @@ static void ttwu_queue(struct task_struct *p, int cpu)
12965  	struct rq *rq = cpu_rq(cpu);
12966  
12967  #if defined(CONFIG_SMP)
12968 -	if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
12969 +	/*
12970 +	 * LITMUS^RT: whether to send an IPI to the remote CPU is plugin
12971 +	 * specific.
12972 +	 */
12973 +	if (!is_realtime(p) &&
12974 +	    sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
12975  		sched_clock_cpu(cpu); /* sync clocks x-cpu */
12976  		ttwu_queue_remote(p, cpu);
12977  		return;
12978 @@ -1496,6 +1501,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
12979  	unsigned long flags;
12980  	int cpu, success = 0;
12981  
12982 +	if (is_realtime(p))
12983 +		TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
12984 +
12985  	/*
12986  	 * If we are going to wake up a thread waiting for CONDITION we
12987  	 * need to ensure that CONDITION=1 done by the caller can not be
12988 @@ -1525,6 +1533,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
12989  	 */
12990  	smp_rmb();
12991  
12992 +	/* LITMUS^RT: once the task can be safely referenced by this
12993 +	 * CPU, don't mess with Linux load balancing stuff.
12994 +	 */
12995 +	if (is_realtime(p))
12996 +		goto litmus_out_activate;
12997 +
12998  	p->sched_contributes_to_load = !!task_contributes_to_load(p);
12999  	p->state = TASK_WAKING;
13000  
13001 @@ -1536,12 +1550,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
13002  		wake_flags |= WF_MIGRATED;
13003  		set_task_cpu(p, cpu);
13004  	}
13005 +
13006 +litmus_out_activate:
13007  #endif /* CONFIG_SMP */
13008  
13009  	ttwu_queue(p, cpu);
13010  stat:
13011  	ttwu_stat(p, cpu, wake_flags);
13012  out:
13013 +	if (is_realtime(p))
13014 +		TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
13015  	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
13016  
13017  	return success;
13018 -- 
13019 1.8.1.2
13020 
13021 
13022 From d2d9bd3ce3b2af5edb9ac2a5b01fc6db4589c885 Mon Sep 17 00:00:00 2001
13023 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13024 Date: Mon, 1 Jul 2013 23:45:08 +0200
13025 Subject: [PATCH 035/119] Disable cut-to-CFS optimization in Linux scheduler
13026 
13027 Global plugins require that the plugin be called even if there
13028 currently is no real-time task executing on the local core.
13029 ---
13030  kernel/sched/core.c | 9 ++++++++-
13031  1 file changed, 8 insertions(+), 1 deletion(-)
13032 
13033 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13034 index 210d42d..d54b6d6 100644
13035 --- a/kernel/sched/core.c
13036 +++ b/kernel/sched/core.c
13037 @@ -2942,12 +2942,19 @@ pick_next_task(struct rq *rq)
13038  	/*
13039  	 * Optimization: we know that if all tasks are in
13040  	 * the fair class we can call that function directly:
13041 -	 */
13042 +
13043 +	 * NOT IN LITMUS^RT!
13044 +
13045 +	 * This breaks many assumptions in the plugins.
13046 +	 * Do not uncomment without thinking long and hard
13047 +	 * about how this affects global plugins such as GSN-EDF.
13048 +
13049  	if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
13050  		p = fair_sched_class.pick_next_task(rq);
13051  		if (likely(p))
13052  			return p;
13053  	}
13054 +	*/
13055  
13056  	for_each_class(class) {
13057  		p = class->pick_next_task(rq);
13058 -- 
13059 1.8.1.2
13060 
13061 
13062 From 5d0d1599bdcd8e35b3f23777234e0c9243fd4498 Mon Sep 17 00:00:00 2001
13063 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13064 Date: Mon, 1 Jul 2013 23:46:56 +0200
13065 Subject: [PATCH 036/119] Integrate LITMUS^RT scheduling class with
13066  sched_setscheduler
13067 
13068 ---
13069  kernel/sched/core.c | 34 ++++++++++++++++++++++++++++++++--
13070  1 file changed, 32 insertions(+), 2 deletions(-)
13071 
13072 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13073 index d54b6d6..afc134d 100644
13074 --- a/kernel/sched/core.c
13075 +++ b/kernel/sched/core.c
13076 @@ -89,6 +89,7 @@
13077  #include <litmus/litmus.h>
13078  #include <litmus/trace.h>
13079  #include <litmus/sched_trace.h>
13080 +#include <litmus/sched_plugin.h>
13081  
13082  #define CREATE_TRACE_POINTS
13083  #include <trace/events/sched.h>
13084 @@ -3909,7 +3910,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
13085  	p->normal_prio = normal_prio(p);
13086  	/* we are holding p->pi_lock already */
13087  	p->prio = rt_mutex_getprio(p);
13088 -	if (rt_prio(p->prio))
13089 +	if (p->policy == SCHED_LITMUS)
13090 +		p->sched_class = &litmus_sched_class;
13091 +	else if (rt_prio(p->prio))
13092  		p->sched_class = &rt_sched_class;
13093  	else
13094  		p->sched_class = &fair_sched_class;
13095 @@ -3940,6 +3943,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
13096  	const struct sched_class *prev_class;
13097  	struct rq *rq;
13098  	int reset_on_fork;
13099 +	int litmus_task = 0;
13100  
13101  	/* may grab non-irq protected spin_locks */
13102  	BUG_ON(in_interrupt());
13103 @@ -3954,7 +3958,7 @@ recheck:
13104  
13105  		if (policy != SCHED_FIFO && policy != SCHED_RR &&
13106  				policy != SCHED_NORMAL && policy != SCHED_BATCH &&
13107 -				policy != SCHED_IDLE)
13108 +				policy != SCHED_IDLE && policy != SCHED_LITMUS)
13109  			return -EINVAL;
13110  	}
13111  
13112 @@ -3969,6 +3973,8 @@ recheck:
13113  		return -EINVAL;
13114  	if (rt_policy(policy) != (param->sched_priority != 0))
13115  		return -EINVAL;
13116 +	if (policy == SCHED_LITMUS && policy == p->policy)
13117 +		return -EINVAL;
13118  
13119  	/*
13120  	 * Allow unprivileged RT tasks to decrease priority:
13121 @@ -4012,6 +4018,12 @@ recheck:
13122  			return retval;
13123  	}
13124  
13125 +	if (policy == SCHED_LITMUS) {
13126 +		retval = litmus_admit_task(p);
13127 +		if (retval)
13128 +			return retval;
13129 +	}
13130 +
13131  	/*
13132  	 * make sure no PI-waiters arrive (or leave) while we are
13133  	 * changing the priority of the task:
13134 @@ -4068,10 +4080,25 @@ recheck:
13135  
13136  	p->sched_reset_on_fork = reset_on_fork;
13137  
13138 +	if (p->policy == SCHED_LITMUS) {
13139 +		litmus_exit_task(p);
13140 +		litmus_task = 1;
13141 +	}
13142 +
13143  	oldprio = p->prio;
13144  	prev_class = p->sched_class;
13145  	__setscheduler(rq, p, policy, param->sched_priority);
13146  
13147 +	if (policy == SCHED_LITMUS) {
13148 +#ifdef CONFIG_SMP
13149 +		p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
13150 +#else
13151 +		p->rt_param.stack_in_use = running ? 0 : NO_CPU;
13152 +#endif
13153 +		p->rt_param.present = running;
13154 +		litmus->task_new(p, on_rq, running);
13155 +	}
13156 +
13157  	if (running)
13158  		p->sched_class->set_curr_task(rq);
13159  	if (on_rq)
13160 @@ -4082,6 +4109,9 @@ recheck:
13161  
13162  	rt_mutex_adjust_pi(p);
13163  
13164 +	if (litmus_task)
13165 +		litmus_dealloc(p);
13166 +
13167  	return 0;
13168  }
13169  
13170 -- 
13171 1.8.1.2
13172 
13173 
13174 From c728e26abc70530c62b794b017e6135ecd8df8f0 Mon Sep 17 00:00:00 2001
13175 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13176 Date: Mon, 1 Jul 2013 23:48:22 +0200
13177 Subject: [PATCH 037/119] Block sched_setaffinity() for SCHED_LITMUS tasks
13178 
13179 ---
13180  kernel/sched/core.c | 4 ++--
13181  1 file changed, 2 insertions(+), 2 deletions(-)
13182 
13183 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13184 index afc134d..eb32fd0 100644
13185 --- a/kernel/sched/core.c
13186 +++ b/kernel/sched/core.c
13187 @@ -4269,10 +4269,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
13188  	rcu_read_lock();
13189  
13190  	p = find_process_by_pid(pid);
13191 -	if (!p) {
13192 +	if (!p || is_realtime(p)) {
13193  		rcu_read_unlock();
13194  		put_online_cpus();
13195 -		return -ESRCH;
13196 +		return p ? -EPERM : -ESRCH;
13197  	}
13198  
13199  	/* Prevent p going away */
13200 -- 
13201 1.8.1.2
13202 
13203 
13204 From 4fd5fc39aeed8941623be74fcadc3742055f459d Mon Sep 17 00:00:00 2001
13205 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13206 Date: Mon, 1 Jul 2013 23:51:06 +0200
13207 Subject: [PATCH 038/119] Reset SCHED_LITMUS scheduling class on fork
13208 
13209 ---
13210  kernel/sched/core.c | 2 +-
13211  1 file changed, 1 insertion(+), 1 deletion(-)
13212 
13213 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13214 index eb32fd0..8a421e9 100644
13215 --- a/kernel/sched/core.c
13216 +++ b/kernel/sched/core.c
13217 @@ -1721,7 +1721,7 @@ void sched_fork(struct task_struct *p)
13218  	 * Revert to default priority/policy on fork if requested.
13219  	 */
13220  	if (unlikely(p->sched_reset_on_fork)) {
13221 -		if (task_has_rt_policy(p)) {
13222 +		if (task_has_rt_policy(p) || p->policy == SCHED_LITMUS) {
13223  			p->policy = SCHED_NORMAL;
13224  			p->static_prio = NICE_TO_PRIO(0);
13225  			p->rt_priority = 0;
13226 -- 
13227 1.8.1.2
13228 
13229 
13230 From e07b27e05418f939cb559e96733aa2e96de71592 Mon Sep 17 00:00:00 2001
13231 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13232 Date: Mon, 1 Jul 2013 23:52:14 +0200
13233 Subject: [PATCH 039/119] Hook into finish_switch()
13234 
13235 To keep track of stack usage and to notify plugin, if necessary.
13236 ---
13237  kernel/sched/core.c | 2 ++
13238  1 file changed, 2 insertions(+)
13239 
13240 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13241 index 8a421e9..b50b3c2 100644
13242 --- a/kernel/sched/core.c
13243 +++ b/kernel/sched/core.c
13244 @@ -1925,6 +1925,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
13245  	prev_state = prev->state;
13246  	vtime_task_switch(prev);
13247  	finish_arch_switch(prev);
13248 +	litmus->finish_switch(prev);
13249 +	prev->rt_param.stack_in_use = NO_CPU;
13250  	perf_event_task_sched_in(prev, current);
13251  	finish_lock_switch(rq, prev);
13252  	finish_arch_post_lock_switch();
13253 -- 
13254 1.8.1.2
13255 
13256 
13257 From f40461bc0f6f2b7d76e6c24aef758862e3cbd0c1 Mon Sep 17 00:00:00 2001
13258 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13259 Date: Mon, 1 Jul 2013 23:55:09 +0200
13260 Subject: [PATCH 040/119] Cache 'prev' in LITMUS^RT runqueue
13261 
13262 LITMUS^RT plugins like to know who 'prev' is. pick_next_task() doesn't
13263 expose that info, so we just cache prev in the runqueue. Could robably
13264 be replaced by looking at 'current' instead.
13265 ---
13266  kernel/sched/core.c | 8 ++++++++
13267  1 file changed, 8 insertions(+)
13268 
13269 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13270 index b50b3c2..ad6ba36 100644
13271 --- a/kernel/sched/core.c
13272 +++ b/kernel/sched/core.c
13273 @@ -1953,6 +1953,14 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
13274  {
13275  	if (prev->sched_class->pre_schedule)
13276  		prev->sched_class->pre_schedule(rq, prev);
13277 +
13278 +       /* LITMUS^RT not very clean hack: we need to save the prev task as our
13279 +        * scheduling decision rely on it (as we drop the rq lock something in
13280 +        * prev can change...); there is no way to escape this hack apart from
13281 +        * modifying pick_nex_task(rq, _prev_) or falling back on the previous
13282 +        * solution of decoupling scheduling decisions.
13283 +        */
13284 +	rq->litmus.prev = prev;
13285  }
13286  
13287  /* rq->lock is NOT held, but preemption is disabled */
13288 -- 
13289 1.8.1.2
13290 
13291 
13292 From 0aea465f37878c328da2361607b4670725be1139 Mon Sep 17 00:00:00 2001
13293 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13294 Date: Mon, 1 Jul 2013 23:55:58 +0200
13295 Subject: [PATCH 041/119] Don't trigger load balancer in scheduler tick for
13296  LITMUS^RT
13297 
13298 ---
13299  kernel/sched/core.c | 3 ++-
13300  1 file changed, 2 insertions(+), 1 deletion(-)
13301 
13302 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13303 index ad6ba36..1be8a35 100644
13304 --- a/kernel/sched/core.c
13305 +++ b/kernel/sched/core.c
13306 @@ -2804,7 +2804,8 @@ void scheduler_tick(void)
13307  
13308  #ifdef CONFIG_SMP
13309  	rq->idle_balance = idle_cpu(cpu);
13310 -	trigger_load_balance(rq, cpu);
13311 +	if (!is_realtime(current))
13312 +		trigger_load_balance(rq, cpu);
13313  #endif
13314  	rq_last_tick_reset(rq);
13315  
13316 -- 
13317 1.8.1.2
13318 
13319 
13320 From e801a3104619e8dbee8e1fed05402f160d8f790d Mon Sep 17 00:00:00 2001
13321 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13322 Date: Mon, 1 Jul 2013 23:56:31 +0200
13323 Subject: [PATCH 042/119] Hook into rt scheduling class to protect LITMUS^RT
13324  tasks
13325 
13326 The rt scheduling class thinks it's the highest-priority scheduling
13327 class around. It is not in LITMUS^RT. Don't go preempting remote cores
13328 that run SCHED_LITMUS tasks.
13329 ---
13330  kernel/sched/rt.c | 9 +++++++--
13331  1 file changed, 7 insertions(+), 2 deletions(-)
13332 
13333 diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
13334 index 15334e6..dbe21ae 100644
13335 --- a/kernel/sched/rt.c
13336 +++ b/kernel/sched/rt.c
13337 @@ -7,6 +7,8 @@
13338  
13339  #include <linux/slab.h>
13340  
13341 +#include <litmus/litmus.h>
13342 +
13343  int sched_rr_timeslice = RR_TIMESLICE;
13344  
13345  static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
13346 @@ -436,7 +438,9 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
13347  	if (rt_rq->rt_nr_running) {
13348  		if (rt_se && !on_rt_rq(rt_se))
13349  			enqueue_rt_entity(rt_se, false);
13350 -		if (rt_rq->highest_prio.curr < curr->prio)
13351 +		if (rt_rq->highest_prio.curr < curr->prio
13352 +		    /* Don't subject LITMUS^RT tasks to remote reschedules. */
13353 +		    && !is_realtime(curr))
13354  			resched_task(curr);
13355  	}
13356  }
13357 @@ -530,7 +534,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
13358  
13359  static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
13360  {
13361 -	if (rt_rq->rt_nr_running)
13362 +	if (rt_rq->rt_nr_running
13363 +	    && !is_realtime(rq_of_rt_rq(rt_rq)->curr))
13364  		resched_task(rq_of_rt_rq(rt_rq)->curr);
13365  }
13366  
13367 -- 
13368 1.8.1.2
13369 
13370 
13371 From 2c85fa0767ce6355759868583a9eeedb1feb384f Mon Sep 17 00:00:00 2001
13372 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13373 Date: Thu, 25 Jul 2013 22:56:55 +0200
13374 Subject: [PATCH 043/119] Don't call set_tsk_need_resched() on remote LITMUS^RT
13375  task
13376 
13377 This patch fixes a BUG_ON() in litmus/preempt.c:33 reported by Felipe
13378 Cerqueira & Manohar Vanga.
13379 ---
13380  kernel/sched/core.c | 9 +++++++--
13381  1 file changed, 7 insertions(+), 2 deletions(-)
13382 
13383 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13384 index 1be8a35..6bced0e 100644
13385 --- a/kernel/sched/core.c
13386 +++ b/kernel/sched/core.c
13387 @@ -526,9 +526,14 @@ void resched_task(struct task_struct *p)
13388  	if (test_tsk_need_resched(p))
13389  		return;
13390  
13391 -	set_tsk_need_resched(p);
13392 -
13393  	cpu = task_cpu(p);
13394 +
13395 +	/* Cannot call set_tsk_need_resched() on LITMUS^RT task
13396 +	 * on remote core. Only policy plugins may do this via
13397 +	 * litmus_reschedule(). */
13398 +	if (!is_realtime(p) || cpu == smp_processor_id())
13399 +		set_tsk_need_resched(p);
13400 +
13401  	if (cpu == smp_processor_id())
13402  		return;
13403  
13404 -- 
13405 1.8.1.2
13406 
13407 
13408 From 232ab01e5abd3bf3b494f46d558898122d57f6d8 Mon Sep 17 00:00:00 2001
13409 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13410 Date: Thu, 25 Jul 2013 23:00:29 +0200
13411 Subject: [PATCH 044/119] Protect LITMUS^RT tasks from re-nicing
13412 
13413 Assigning a nice value to LITMUS^RT tasks is meaningless. Bail out
13414 early.
13415 ---
13416  kernel/sched/core.c | 2 +-
13417  1 file changed, 1 insertion(+), 1 deletion(-)
13418 
13419 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
13420 index 6bced0e..46ceebb 100644
13421 --- a/kernel/sched/core.c
13422 +++ b/kernel/sched/core.c
13423 @@ -3770,7 +3770,7 @@ void set_user_nice(struct task_struct *p, long nice)
13424  	 * it wont have any effect on scheduling until the task is
13425  	 * SCHED_FIFO/SCHED_RR:
13426  	 */
13427 -	if (task_has_rt_policy(p)) {
13428 +	if (task_has_rt_policy(p) || is_realtime(p)) {
13429  		p->static_prio = NICE_TO_PRIO(nice);
13430  		goto out_unlock;
13431  	}
13432 -- 
13433 1.8.1.2
13434 
13435 
13436 From d14164377790b4c15fd1fa4665329f6e6febe78f Mon Sep 17 00:00:00 2001
13437 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13438 Date: Mon, 2 Jun 2014 14:22:02 +0200
13439 Subject: [PATCH 045/119] Hook into kernel/exit.c to force exiting RT tasks
13440  into best-effort mode
13441 
13442 ---
13443  kernel/exit.c | 11 +++++++++++
13444  1 file changed, 11 insertions(+)
13445 
13446 diff --git a/kernel/exit.c b/kernel/exit.c
13447 index ab36666..2fc678f 100644
13448 --- a/kernel/exit.c
13449 +++ b/kernel/exit.c
13450 @@ -59,6 +59,8 @@
13451  #include <asm/pgtable.h>
13452  #include <asm/mmu_context.h>
13453  
13454 +#include <litmus/litmus.h>
13455 +
13456  extern void exit_od_table(struct task_struct *t);
13457  
13458  static void exit_mm(struct task_struct * tsk);
13459 @@ -720,6 +722,15 @@ void do_exit(long code)
13460  	if (unlikely(!tsk->pid))
13461  		panic("Attempted to kill the idle task!");
13462  
13463 +	if (unlikely(is_realtime(tsk))) {
13464 +		/* We would like the task to be polite
13465 +		 * and transition out of RT mode first.
13466 +		 * Let's give it a little help.
13467 +		 */
13468 +		litmus_do_exit(tsk);
13469 +		BUG_ON(is_realtime(tsk));
13470 +	}
13471 +
13472  	/*
13473  	 * If do_exit is called because this processes oopsed, it's possible
13474  	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
13475 -- 
13476 1.8.1.2
13477 
13478 
13479 From ad5164aa251591c954a084a51aaa866c1380e7b3 Mon Sep 17 00:00:00 2001
13480 From: Bjoern Brandenburg <bbb@mpi-sws.org>
13481 Date: Tue, 12 Feb 2013 19:15:27 +0100
13482 Subject: [PATCH 046/119] Add PSN-EDF scheduler plugin
13483 
13484 ---
13485  litmus/Makefile        |   4 +-
13486  litmus/sched_psn_edf.c | 689 +++++++++++++++++++++++++++++++++++++++++++++++++
13487  2 files changed, 692 insertions(+), 1 deletion(-)
13488  create mode 100644 litmus/sched_psn_edf.c
13489 
13490 diff --git a/litmus/Makefile b/litmus/Makefile
13491 index f7ceabc..0db695e 100644
13492 --- a/litmus/Makefile
13493 +++ b/litmus/Makefile
13494 @@ -18,7 +18,9 @@ obj-y     = sched_plugin.o litmus.o \
13495  	    bheap.o \
13496  	    binheap.o \
13497  	    ctrldev.o \
13498 -	    uncachedev.o
13499 +	    uncachedev.o \
13500 +	    sched_psn_edf.o
13501 +
13502  
13503  obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
13504  
13505 diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
13506 new file mode 100644
13507 index 0000000..dd042db
13508 --- /dev/null
13509 +++ b/litmus/sched_psn_edf.c
13510 @@ -0,0 +1,689 @@
13511 +/*
13512 + * kernel/sched_psn_edf.c
13513 + *
13514 + * Implementation of the PSN-EDF scheduler plugin.
13515 + * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
13516 + *
13517 + * Suspensions and non-preemptable sections are supported.
13518 + * Priority inheritance is not supported.
13519 + */
13520 +
13521 +#include <linux/percpu.h>
13522 +#include <linux/sched.h>
13523 +#include <linux/list.h>
13524 +#include <linux/spinlock.h>
13525 +#include <linux/module.h>
13526 +
13527 +#include <litmus/litmus.h>
13528 +#include <litmus/jobs.h>
13529 +#include <litmus/preempt.h>
13530 +#include <litmus/budget.h>
13531 +#include <litmus/sched_plugin.h>
13532 +#include <litmus/edf_common.h>
13533 +#include <litmus/sched_trace.h>
13534 +#include <litmus/trace.h>
13535 +
13536 +/* to set up domain/cpu mappings */
13537 +#include <litmus/litmus_proc.h>
13538 +
13539 +typedef struct {
13540 +	rt_domain_t 		domain;
13541 +	int          		cpu;
13542 +	struct task_struct* 	scheduled; /* only RT tasks */
13543 +/*
13544 + * scheduling lock slock
13545 + * protects the domain and serializes scheduling decisions
13546 + */
13547 +#define slock domain.ready_lock
13548 +
13549 +} psnedf_domain_t;
13550 +
13551 +DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
13552 +
13553 +#define local_edf		(&__get_cpu_var(psnedf_domains).domain)
13554 +#define local_pedf		(&__get_cpu_var(psnedf_domains))
13555 +#define remote_edf(cpu)		(&per_cpu(psnedf_domains, cpu).domain)
13556 +#define remote_pedf(cpu)	(&per_cpu(psnedf_domains, cpu))
13557 +#define task_edf(task)		remote_edf(get_partition(task))
13558 +#define task_pedf(task)		remote_pedf(get_partition(task))
13559 +
13560 +
13561 +static void psnedf_domain_init(psnedf_domain_t* pedf,
13562 +			       check_resched_needed_t check,
13563 +			       release_jobs_t release,
13564 +			       int cpu)
13565 +{
13566 +	edf_domain_init(&pedf->domain, check, release);
13567 +	pedf->cpu      		= cpu;
13568 +	pedf->scheduled		= NULL;
13569 +}
13570 +
13571 +static void requeue(struct task_struct* t, rt_domain_t *edf)
13572 +{
13573 +	if (t->state != TASK_RUNNING)
13574 +		TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
13575 +
13576 +	tsk_rt(t)->completed = 0;
13577 +	if (is_early_releasing(t) || is_released(t, litmus_clock()))
13578 +		__add_ready(edf, t);
13579 +	else
13580 +		add_release(edf, t); /* it has got to wait */
13581 +}
13582 +
13583 +/* we assume the lock is being held */
13584 +static void preempt(psnedf_domain_t *pedf)
13585 +{
13586 +	preempt_if_preemptable(pedf->scheduled, pedf->cpu);
13587 +}
13588 +
13589 +#ifdef CONFIG_LITMUS_LOCKING
13590 +
13591 +static void boost_priority(struct task_struct* t)
13592 +{
13593 +	unsigned long		flags;
13594 +	psnedf_domain_t* 	pedf = task_pedf(t);
13595 +	lt_t			now;
13596 +
13597 +	raw_spin_lock_irqsave(&pedf->slock, flags);
13598 +	now = litmus_clock();
13599 +
13600 +	TRACE_TASK(t, "priority boosted at %llu\n", now);
13601 +
13602 +	tsk_rt(t)->priority_boosted = 1;
13603 +	tsk_rt(t)->boost_start_time = now;
13604 +
13605 +	if (pedf->scheduled != t) {
13606 +		/* holder may be queued: first stop queue changes */
13607 +		raw_spin_lock(&pedf->domain.release_lock);
13608 +		if (is_queued(t) &&
13609 +		    /* If it is queued, then we need to re-order. */
13610 +		    bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
13611 +		    /* If we bubbled to the top, then we need to check for preemptions. */
13612 +		    edf_preemption_needed(&pedf->domain, pedf->scheduled))
13613 +				preempt(pedf);
13614 +		raw_spin_unlock(&pedf->domain.release_lock);
13615 +	} /* else: nothing to do since the job is not queued while scheduled */
13616 +
13617 +	raw_spin_unlock_irqrestore(&pedf->slock, flags);
13618 +}
13619 +
13620 +static void unboost_priority(struct task_struct* t)
13621 +{
13622 +	unsigned long		flags;
13623 +	psnedf_domain_t* 	pedf = task_pedf(t);
13624 +	lt_t			now;
13625 +
13626 +	raw_spin_lock_irqsave(&pedf->slock, flags);
13627 +	now = litmus_clock();
13628 +
13629 +	/* Assumption: this only happens when the job is scheduled.
13630 +	 * Exception: If t transitioned to non-real-time mode, we no longer
13631 +	 * care about it. */
13632 +	BUG_ON(pedf->scheduled != t && is_realtime(t));
13633 +
13634 +	TRACE_TASK(t, "priority restored at %llu\n", now);
13635 +
13636 +	tsk_rt(t)->priority_boosted = 0;
13637 +	tsk_rt(t)->boost_start_time = 0;
13638 +
13639 +	/* check if this changes anything */
13640 +	if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
13641 +		preempt(pedf);
13642 +
13643 +	raw_spin_unlock_irqrestore(&pedf->slock, flags);
13644 +}
13645 +
13646 +#endif
13647 +
13648 +static int psnedf_preempt_check(psnedf_domain_t *pedf)
13649 +{
13650 +	if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
13651 +		preempt(pedf);
13652 +		return 1;
13653 +	} else
13654 +		return 0;
13655 +}
13656 +
13657 +/* This check is trivial in partioned systems as we only have to consider
13658 + * the CPU of the partition.
13659 + */
13660 +static int psnedf_check_resched(rt_domain_t *edf)
13661 +{
13662 +	psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
13663 +
13664 +	/* because this is a callback from rt_domain_t we already hold
13665 +	 * the necessary lock for the ready queue
13666 +	 */
13667 +	return psnedf_preempt_check(pedf);
13668 +}
13669 +
13670 +static void job_completion(struct task_struct* t, int forced)
13671 +{
13672 +	sched_trace_task_completion(t,forced);
13673 +	TRACE_TASK(t, "job_completion().\n");
13674 +
13675 +	tsk_rt(t)->completed = 0;
13676 +	prepare_for_next_period(t);
13677 +}
13678 +
13679 +static struct task_struct* psnedf_schedule(struct task_struct * prev)
13680 +{
13681 +	psnedf_domain_t* 	pedf = local_pedf;
13682 +	rt_domain_t*		edf  = &pedf->domain;
13683 +	struct task_struct*	next;
13684 +
13685 +	int 			out_of_time, sleep, preempt,
13686 +				np, exists, blocks, resched;
13687 +
13688 +	raw_spin_lock(&pedf->slock);
13689 +
13690 +	/* sanity checking
13691 +	 * differently from gedf, when a task exits (dead)
13692 +	 * pedf->schedule may be null and prev _is_ realtime
13693 +	 */
13694 +	BUG_ON(pedf->scheduled && pedf->scheduled != prev);
13695 +	BUG_ON(pedf->scheduled && !is_realtime(prev));
13696 +
13697 +	/* (0) Determine state */
13698 +	exists      = pedf->scheduled != NULL;
13699 +	blocks      = exists && !is_running(pedf->scheduled);
13700 +	out_of_time = exists &&
13701 +				  budget_enforced(pedf->scheduled) &&
13702 +				  budget_exhausted(pedf->scheduled);
13703 +	np 	    = exists && is_np(pedf->scheduled);
13704 +	sleep	    = exists && is_completed(pedf->scheduled);
13705 +	preempt     = edf_preemption_needed(edf, prev);
13706 +
13707 +	/* If we need to preempt do so.
13708 +	 * The following checks set resched to 1 in case of special
13709 +	 * circumstances.
13710 +	 */
13711 +	resched = preempt;
13712 +
13713 +	/* If a task blocks we have no choice but to reschedule.
13714 +	 */
13715 +	if (blocks)
13716 +		resched = 1;
13717 +
13718 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
13719 +	 * Multiple calls to request_exit_np() don't hurt.
13720 +	 */
13721 +	if (np && (out_of_time || preempt || sleep))
13722 +		request_exit_np(pedf->scheduled);
13723 +
13724 +	/* Any task that is preemptable and either exhausts its execution
13725 +	 * budget or wants to sleep completes. We may have to reschedule after
13726 +	 * this.
13727 +	 */
13728 +	if (!np && (out_of_time || sleep) && !blocks) {
13729 +		job_completion(pedf->scheduled, !sleep);
13730 +		resched = 1;
13731 +	}
13732 +
13733 +	/* The final scheduling decision. Do we need to switch for some reason?
13734 +	 * Switch if we are in RT mode and have no task or if we need to
13735 +	 * resched.
13736 +	 */
13737 +	next = NULL;
13738 +	if ((!np || blocks) && (resched || !exists)) {
13739 +		/* When preempting a task that does not block, then
13740 +		 * re-insert it into either the ready queue or the
13741 +		 * release queue (if it completed). requeue() picks
13742 +		 * the appropriate queue.
13743 +		 */
13744 +		if (pedf->scheduled && !blocks)
13745 +			requeue(pedf->scheduled, edf);
13746 +		next = __take_ready(edf);
13747 +	} else
13748 +		/* Only override Linux scheduler if we have a real-time task
13749 +		 * scheduled that needs to continue.
13750 +		 */
13751 +		if (exists)
13752 +			next = prev;
13753 +
13754 +	if (next) {
13755 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
13756 +	} else {
13757 +		TRACE("becoming idle at %llu\n", litmus_clock());
13758 +	}
13759 +
13760 +	pedf->scheduled = next;
13761 +	sched_state_task_picked();
13762 +	raw_spin_unlock(&pedf->slock);
13763 +
13764 +	return next;
13765 +}
13766 +
13767 +
13768 +/*	Prepare a task for running in RT mode
13769 + */
13770 +static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
13771 +{
13772 +	rt_domain_t* 		edf  = task_edf(t);
13773 +	psnedf_domain_t* 	pedf = task_pedf(t);
13774 +	unsigned long		flags;
13775 +
13776 +	TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
13777 +		   t->rt_param.task_params.cpu);
13778 +
13779 +	/* setup job parameters */
13780 +	release_at(t, litmus_clock());
13781 +
13782 +	/* The task should be running in the queue, otherwise signal
13783 +	 * code will try to wake it up with fatal consequences.
13784 +	 */
13785 +	raw_spin_lock_irqsave(&pedf->slock, flags);
13786 +	if (is_scheduled) {
13787 +		/* there shouldn't be anything else scheduled at the time */
13788 +		BUG_ON(pedf->scheduled);
13789 +		pedf->scheduled = t;
13790 +	} else {
13791 +		/* !is_scheduled means it is not scheduled right now, but it
13792 +		 * does not mean that it is suspended. If it is not suspended,
13793 +		 * it still needs to be requeued. If it is suspended, there is
13794 +		 * nothing that we need to do as it will be handled by the
13795 +		 * wake_up() handler. */
13796 +		if (is_running(t)) {
13797 +			requeue(t, edf);
13798 +			/* maybe we have to reschedule */
13799 +			psnedf_preempt_check(pedf);
13800 +		}
13801 +	}
13802 +	raw_spin_unlock_irqrestore(&pedf->slock, flags);
13803 +}
13804 +
13805 +static void psnedf_task_wake_up(struct task_struct *task)
13806 +{
13807 +	unsigned long		flags;
13808 +	psnedf_domain_t* 	pedf = task_pedf(task);
13809 +	rt_domain_t* 		edf  = task_edf(task);
13810 +	lt_t			now;
13811 +
13812 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
13813 +	raw_spin_lock_irqsave(&pedf->slock, flags);
13814 +	BUG_ON(is_queued(task));
13815 +	now = litmus_clock();
13816 +	if (is_sporadic(task) && is_tardy(task, now)
13817 +#ifdef CONFIG_LITMUS_LOCKING
13818 +	/* We need to take suspensions because of semaphores into
13819 +	 * account! If a job resumes after being suspended due to acquiring
13820 +	 * a semaphore, it should never be treated as a new job release.
13821 +	 */
13822 +	    && !is_priority_boosted(task)
13823 +#endif
13824 +		) {
13825 +		/* new sporadic release */
13826 +		release_at(task, now);
13827 +		sched_trace_task_release(task);
13828 +	}
13829 +
13830 +	/* Only add to ready queue if it is not the currently-scheduled
13831 +	 * task. This could be the case if a task was woken up concurrently
13832 +	 * on a remote CPU before the executing CPU got around to actually
13833 +	 * de-scheduling the task, i.e., wake_up() raced with schedule()
13834 +	 * and won.
13835 +	 */
13836 +	if (pedf->scheduled != task) {
13837 +		requeue(task, edf);
13838 +		psnedf_preempt_check(pedf);
13839 +	}
13840 +
13841 +	raw_spin_unlock_irqrestore(&pedf->slock, flags);
13842 +	TRACE_TASK(task, "wake up done\n");
13843 +}
13844 +
13845 +static void psnedf_task_block(struct task_struct *t)
13846 +{
13847 +	/* only running tasks can block, thus t is in no queue */
13848 +	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
13849 +
13850 +	BUG_ON(!is_realtime(t));
13851 +	BUG_ON(is_queued(t));
13852 +}
13853 +
13854 +static void psnedf_task_exit(struct task_struct * t)
13855 +{
13856 +	unsigned long flags;
13857 +	psnedf_domain_t* 	pedf = task_pedf(t);
13858 +	rt_domain_t*		edf;
13859 +
13860 +	raw_spin_lock_irqsave(&pedf->slock, flags);
13861 +	if (is_queued(t)) {
13862 +		/* dequeue */
13863 +		edf  = task_edf(t);
13864 +		remove(edf, t);
13865 +	}
13866 +	if (pedf->scheduled == t)
13867 +		pedf->scheduled = NULL;
13868 +
13869 +	TRACE_TASK(t, "RIP, now reschedule\n");
13870 +
13871 +	preempt(pedf);
13872 +	raw_spin_unlock_irqrestore(&pedf->slock, flags);
13873 +}
13874 +
13875 +#ifdef CONFIG_LITMUS_LOCKING
13876 +
13877 +#include <litmus/fdso.h>
13878 +#include <litmus/srp.h>
13879 +
13880 +/* ******************** SRP support ************************ */
13881 +
13882 +static unsigned int psnedf_get_srp_prio(struct task_struct* t)
13883 +{
13884 +	return get_rt_relative_deadline(t);
13885 +}
13886 +
13887 +/* ******************** FMLP support ********************** */
13888 +
13889 +/* struct for semaphore with priority inheritance */
13890 +struct fmlp_semaphore {
13891 +	struct litmus_lock litmus_lock;
13892 +
13893 +	/* current resource holder */
13894 +	struct task_struct *owner;
13895 +
13896 +	/* FIFO queue of waiting tasks */
13897 +	wait_queue_head_t wait;
13898 +};
13899 +
13900 +static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
13901 +{
13902 +	return container_of(lock, struct fmlp_semaphore, litmus_lock);
13903 +}
13904 +int psnedf_fmlp_lock(struct litmus_lock* l)
13905 +{
13906 +	struct task_struct* t = current;
13907 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
13908 +	wait_queue_t wait;
13909 +	unsigned long flags;
13910 +
13911 +	if (!is_realtime(t))
13912 +		return -EPERM;
13913 +
13914 +	/* prevent nested lock acquisition --- not supported by FMLP */
13915 +	if (tsk_rt(t)->num_locks_held ||
13916 +	    tsk_rt(t)->num_local_locks_held)
13917 +		return -EBUSY;
13918 +
13919 +	spin_lock_irqsave(&sem->wait.lock, flags);
13920 +
13921 +	if (sem->owner) {
13922 +		/* resource is not free => must suspend and wait */
13923 +
13924 +		init_waitqueue_entry(&wait, t);
13925 +
13926 +		/* FIXME: interruptible would be nice some day */
13927 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
13928 +
13929 +		__add_wait_queue_tail_exclusive(&sem->wait, &wait);
13930 +
13931 +		TS_LOCK_SUSPEND;
13932 +
13933 +		/* release lock before sleeping */
13934 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
13935 +
13936 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
13937 +		 * when we wake up; we are guaranteed to have the lock since
13938 +		 * there is only one wake up per release.
13939 +		 */
13940 +
13941 +		schedule();
13942 +
13943 +		TS_LOCK_RESUME;
13944 +
13945 +		/* Since we hold the lock, no other task will change
13946 +		 * ->owner. We can thus check it without acquiring the spin
13947 +		 * lock. */
13948 +		BUG_ON(sem->owner != t);
13949 +	} else {
13950 +		/* it's ours now */
13951 +		sem->owner = t;
13952 +
13953 +		/* mark the task as priority-boosted. */
13954 +		boost_priority(t);
13955 +
13956 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
13957 +	}
13958 +
13959 +	tsk_rt(t)->num_locks_held++;
13960 +
13961 +	return 0;
13962 +}
13963 +
13964 +int psnedf_fmlp_unlock(struct litmus_lock* l)
13965 +{
13966 +	struct task_struct *t = current, *next;
13967 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
13968 +	unsigned long flags;
13969 +	int err = 0;
13970 +
13971 +	spin_lock_irqsave(&sem->wait.lock, flags);
13972 +
13973 +	if (sem->owner != t) {
13974 +		err = -EINVAL;
13975 +		goto out;
13976 +	}
13977 +
13978 +	tsk_rt(t)->num_locks_held--;
13979 +
13980 +	/* we lose the benefit of priority boosting */
13981 +
13982 +	unboost_priority(t);
13983 +
13984 +	/* check if there are jobs waiting for this resource */
13985 +	next = __waitqueue_remove_first(&sem->wait);
13986 +	if (next) {
13987 +		/* boost next job */
13988 +		boost_priority(next);
13989 +
13990 +		/* next becomes the resouce holder */
13991 +		sem->owner = next;
13992 +
13993 +		/* wake up next */
13994 +		wake_up_process(next);
13995 +	} else
13996 +		/* resource becomes available */
13997 +		sem->owner = NULL;
13998 +
13999 +out:
14000 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
14001 +	return err;
14002 +}
14003 +
14004 +int psnedf_fmlp_close(struct litmus_lock* l)
14005 +{
14006 +	struct task_struct *t = current;
14007 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
14008 +	unsigned long flags;
14009 +
14010 +	int owner;
14011 +
14012 +	spin_lock_irqsave(&sem->wait.lock, flags);
14013 +
14014 +	owner = sem->owner == t;
14015 +
14016 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
14017 +
14018 +	if (owner)
14019 +		psnedf_fmlp_unlock(l);
14020 +
14021 +	return 0;
14022 +}
14023 +
14024 +void psnedf_fmlp_free(struct litmus_lock* lock)
14025 +{
14026 +	kfree(fmlp_from_lock(lock));
14027 +}
14028 +
14029 +static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
14030 +	.close  = psnedf_fmlp_close,
14031 +	.lock   = psnedf_fmlp_lock,
14032 +	.unlock = psnedf_fmlp_unlock,
14033 +	.deallocate = psnedf_fmlp_free,
14034 +};
14035 +
14036 +static struct litmus_lock* psnedf_new_fmlp(void)
14037 +{
14038 +	struct fmlp_semaphore* sem;
14039 +
14040 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
14041 +	if (!sem)
14042 +		return NULL;
14043 +
14044 +	sem->owner   = NULL;
14045 +	init_waitqueue_head(&sem->wait);
14046 +	sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
14047 +
14048 +	return &sem->litmus_lock;
14049 +}
14050 +
14051 +/* **** lock constructor **** */
14052 +
14053 +
14054 +static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
14055 +				 void* __user unused)
14056 +{
14057 +	int err = -ENXIO;
14058 +	struct srp_semaphore* srp;
14059 +
14060 +	/* PSN-EDF currently supports the SRP for local resources and the FMLP
14061 +	 * for global resources. */
14062 +	switch (type) {
14063 +	case FMLP_SEM:
14064 +		/* Flexible Multiprocessor Locking Protocol */
14065 +		*lock = psnedf_new_fmlp();
14066 +		if (*lock)
14067 +			err = 0;
14068 +		else
14069 +			err = -ENOMEM;
14070 +		break;
14071 +
14072 +	case SRP_SEM:
14073 +		/* Baker's Stack Resource Policy */
14074 +		srp = allocate_srp_semaphore();
14075 +		if (srp) {
14076 +			*lock = &srp->litmus_lock;
14077 +			err = 0;
14078 +		} else
14079 +			err = -ENOMEM;
14080 +		break;
14081 +	};
14082 +
14083 +	return err;
14084 +}
14085 +
14086 +#endif
14087 +
14088 +static struct domain_proc_info psnedf_domain_proc_info;
14089 +static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
14090 +{
14091 +	*ret = &psnedf_domain_proc_info;
14092 +	return 0;
14093 +}
14094 +
14095 +static void psnedf_setup_domain_proc(void)
14096 +{
14097 +	int i, cpu;
14098 +	int release_master =
14099 +#ifdef CONFIG_RELEASE_MASTER
14100 +		atomic_read(&release_master_cpu);
14101 +#else
14102 +		NO_CPU;
14103 +#endif
14104 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
14105 +	struct cd_mapping *cpu_map, *domain_map;
14106 +
14107 +	memset(&psnedf_domain_proc_info, sizeof(psnedf_domain_proc_info), 0);
14108 +	init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
14109 +	psnedf_domain_proc_info.num_cpus = num_rt_cpus;
14110 +	psnedf_domain_proc_info.num_domains = num_rt_cpus;
14111 +
14112 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
14113 +		if (cpu == release_master)
14114 +			continue;
14115 +		cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
14116 +		domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
14117 +
14118 +		cpu_map->id = cpu;
14119 +		domain_map->id = i; /* enumerate w/o counting the release master */
14120 +		cpumask_set_cpu(i, cpu_map->mask);
14121 +		cpumask_set_cpu(cpu, domain_map->mask);
14122 +		++i;
14123 +	}
14124 +}
14125 +
14126 +static long psnedf_activate_plugin(void)
14127 +{
14128 +#ifdef CONFIG_RELEASE_MASTER
14129 +	int cpu;
14130 +
14131 +	for_each_online_cpu(cpu) {
14132 +		remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
14133 +	}
14134 +#endif
14135 +
14136 +#ifdef CONFIG_LITMUS_LOCKING
14137 +	get_srp_prio = psnedf_get_srp_prio;
14138 +#endif
14139 +
14140 +	psnedf_setup_domain_proc();
14141 +
14142 +	return 0;
14143 +}
14144 +
14145 +static long psnedf_deactivate_plugin(void)
14146 +{
14147 +	destroy_domain_proc_info(&psnedf_domain_proc_info);
14148 +	return 0;
14149 +}
14150 +
14151 +static long psnedf_admit_task(struct task_struct* tsk)
14152 +{
14153 +	if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
14154 +#ifdef CONFIG_RELEASE_MASTER
14155 +	    /* don't allow tasks on release master CPU */
14156 +	     && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
14157 +#endif
14158 +		)
14159 +		return 0;
14160 +	else
14161 +		return -EINVAL;
14162 +}
14163 +
14164 +/*	Plugin object	*/
14165 +static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
14166 +	.plugin_name		= "PSN-EDF",
14167 +	.task_new		= psnedf_task_new,
14168 +	.complete_job		= complete_job,
14169 +	.task_exit		= psnedf_task_exit,
14170 +	.schedule		= psnedf_schedule,
14171 +	.task_wake_up		= psnedf_task_wake_up,
14172 +	.task_block		= psnedf_task_block,
14173 +	.admit_task		= psnedf_admit_task,
14174 +	.activate_plugin	= psnedf_activate_plugin,
14175 +	.deactivate_plugin	= psnedf_deactivate_plugin,
14176 +	.get_domain_proc_info	= psnedf_get_domain_proc_info,
14177 +#ifdef CONFIG_LITMUS_LOCKING
14178 +	.allocate_lock		= psnedf_allocate_lock,
14179 +#endif
14180 +};
14181 +
14182 +
14183 +static int __init init_psn_edf(void)
14184 +{
14185 +	int i;
14186 +
14187 +	/* We do not really want to support cpu hotplug, do we? ;)
14188 +	 * However, if we are so crazy to do so,
14189 +	 * we cannot use num_online_cpu()
14190 +	 */
14191 +	for (i = 0; i < num_online_cpus(); i++) {
14192 +		psnedf_domain_init(remote_pedf(i),
14193 +				   psnedf_check_resched,
14194 +				   NULL, i);
14195 +	}
14196 +	return register_sched_plugin(&psn_edf_plugin);
14197 +}
14198 +
14199 +module_init(init_psn_edf);
14200 -- 
14201 1.8.1.2
14202 
14203 
14204 From 8fcdf62f4db13de12ae638c8e7e3535858fb8d95 Mon Sep 17 00:00:00 2001
14205 From: Bjoern Brandenburg <bbb@mpi-sws.org>
14206 Date: Tue, 12 Feb 2013 19:16:03 +0100
14207 Subject: [PATCH 047/119] Add GSN-EDF scheduler plugin
14208 
14209 ---
14210  litmus/Makefile        |    1 +
14211  litmus/sched_gsn_edf.c | 1069 ++++++++++++++++++++++++++++++++++++++++++++++++
14212  2 files changed, 1070 insertions(+)
14213  create mode 100644 litmus/sched_gsn_edf.c
14214 
14215 diff --git a/litmus/Makefile b/litmus/Makefile
14216 index 0db695e..c01ce3e 100644
14217 --- a/litmus/Makefile
14218 +++ b/litmus/Makefile
14219 @@ -19,6 +19,7 @@ obj-y     = sched_plugin.o litmus.o \
14220  	    binheap.o \
14221  	    ctrldev.o \
14222  	    uncachedev.o \
14223 +	    sched_gsn_edf.o \
14224  	    sched_psn_edf.o
14225  
14226  
14227 diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
14228 new file mode 100644
14229 index 0000000..9307d0b
14230 --- /dev/null
14231 +++ b/litmus/sched_gsn_edf.c
14232 @@ -0,0 +1,1069 @@
14233 +/*
14234 + * litmus/sched_gsn_edf.c
14235 + *
14236 + * Implementation of the GSN-EDF scheduling algorithm.
14237 + *
14238 + * This version uses the simple approach and serializes all scheduling
14239 + * decisions by the use of a queue lock. This is probably not the
14240 + * best way to do it, but it should suffice for now.
14241 + */
14242 +
14243 +#include <linux/spinlock.h>
14244 +#include <linux/percpu.h>
14245 +#include <linux/sched.h>
14246 +#include <linux/slab.h>
14247 +
14248 +#include <litmus/litmus.h>
14249 +#include <litmus/jobs.h>
14250 +#include <litmus/sched_plugin.h>
14251 +#include <litmus/edf_common.h>
14252 +#include <litmus/sched_trace.h>
14253 +#include <litmus/trace.h>
14254 +
14255 +#include <litmus/preempt.h>
14256 +#include <litmus/budget.h>
14257 +
14258 +#include <litmus/bheap.h>
14259 +
14260 +#ifdef CONFIG_SCHED_CPU_AFFINITY
14261 +#include <litmus/affinity.h>
14262 +#endif
14263 +
14264 +/* to set up domain/cpu mappings */
14265 +#include <litmus/litmus_proc.h>
14266 +
14267 +#include <linux/module.h>
14268 +
14269 +/* Overview of GSN-EDF operations.
14270 + *
14271 + * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
14272 + * description only covers how the individual operations are implemented in
14273 + * LITMUS.
14274 + *
14275 + * link_task_to_cpu(T, cpu) 	- Low-level operation to update the linkage
14276 + *                                structure (NOT the actually scheduled
14277 + *                                task). If there is another linked task To
14278 + *                                already it will set To->linked_on = NO_CPU
14279 + *                                (thereby removing its association with this
14280 + *                                CPU). However, it will not requeue the
14281 + *                                previously linked task (if any). It will set
14282 + *                                T's state to not completed and check whether
14283 + *                                it is already running somewhere else. If T
14284 + *                                is scheduled somewhere else it will link
14285 + *                                it to that CPU instead (and pull the linked
14286 + *                                task to cpu). T may be NULL.
14287 + *
14288 + * unlink(T)			- Unlink removes T from all scheduler data
14289 + *                                structures. If it is linked to some CPU it
14290 + *                                will link NULL to that CPU. If it is
14291 + *                                currently queued in the gsnedf queue it will
14292 + *                                be removed from the rt_domain. It is safe to
14293 + *                                call unlink(T) if T is not linked. T may not
14294 + *                                be NULL.
14295 + *
14296 + * requeue(T)			- Requeue will insert T into the appropriate
14297 + *                                queue. If the system is in real-time mode and
14298 + *                                the T is released already, it will go into the
14299 + *                                ready queue. If the system is not in
14300 + *                                real-time mode is T, then T will go into the
14301 + *                                release queue. If T's release time is in the
14302 + *                                future, it will go into the release
14303 + *                                queue. That means that T's release time/job
14304 + *                                no/etc. has to be updated before requeu(T) is
14305 + *                                called. It is not safe to call requeue(T)
14306 + *                                when T is already queued. T may not be NULL.
14307 + *
14308 + * gsnedf_job_arrival(T)	- This is the catch all function when T enters
14309 + *                                the system after either a suspension or at a
14310 + *                                job release. It will queue T (which means it
14311 + *                                is not safe to call gsnedf_job_arrival(T) if
14312 + *                                T is already queued) and then check whether a
14313 + *                                preemption is necessary. If a preemption is
14314 + *                                necessary it will update the linkage
14315 + *                                accordingly and cause scheduled to be called
14316 + *                                (either with an IPI or need_resched). It is
14317 + *                                safe to call gsnedf_job_arrival(T) if T's
14318 + *                                next job has not been actually released yet
14319 + *                                (releast time in the future). T will be put
14320 + *                                on the release queue in that case.
14321 + *
14322 + * job_completion(T)		- Take care of everything that needs to be done
14323 + *                                to prepare T for its next release and place
14324 + *                                it in the right queue with
14325 + *                                gsnedf_job_arrival().
14326 + *
14327 + *
14328 + * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
14329 + * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
14330 + * the functions will automatically propagate pending task from the ready queue
14331 + * to a linked task. This is the job of the calling function ( by means of
14332 + * __take_ready).
14333 + */
14334 +
14335 +
14336 +/* cpu_entry_t - maintain the linked and scheduled state
14337 + */
14338 +typedef struct  {
14339 +	int 			cpu;
14340 +	struct task_struct*	linked;		/* only RT tasks */
14341 +	struct task_struct*	scheduled;	/* only RT tasks */
14342 +	struct bheap_node*	hn;
14343 +} cpu_entry_t;
14344 +DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
14345 +
14346 +cpu_entry_t* gsnedf_cpus[NR_CPUS];
14347 +
14348 +/* the cpus queue themselves according to priority in here */
14349 +static struct bheap_node gsnedf_heap_node[NR_CPUS];
14350 +static struct bheap      gsnedf_cpu_heap;
14351 +
14352 +static rt_domain_t gsnedf;
14353 +#define gsnedf_lock (gsnedf.ready_lock)
14354 +
14355 +
14356 +/* Uncomment this if you want to see all scheduling decisions in the
14357 + * TRACE() log.
14358 +#define WANT_ALL_SCHED_EVENTS
14359 + */
14360 +
14361 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
14362 +{
14363 +	cpu_entry_t *a, *b;
14364 +	a = _a->value;
14365 +	b = _b->value;
14366 +	/* Note that a and b are inverted: we want the lowest-priority CPU at
14367 +	 * the top of the heap.
14368 +	 */
14369 +	return edf_higher_prio(b->linked, a->linked);
14370 +}
14371 +
14372 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
14373 + *                       order in the cpu queue. Caller must hold gsnedf lock.
14374 + */
14375 +static void update_cpu_position(cpu_entry_t *entry)
14376 +{
14377 +	if (likely(bheap_node_in_heap(entry->hn)))
14378 +		bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
14379 +	bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
14380 +}
14381 +
14382 +/* caller must hold gsnedf lock */
14383 +static cpu_entry_t* lowest_prio_cpu(void)
14384 +{
14385 +	struct bheap_node* hn;
14386 +	hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
14387 +	return hn->value;
14388 +}
14389 +
14390 +
14391 +/* link_task_to_cpu - Update the link of a CPU.
14392 + *                    Handles the case where the to-be-linked task is already
14393 + *                    scheduled on a different CPU.
14394 + */
14395 +static noinline void link_task_to_cpu(struct task_struct* linked,
14396 +				      cpu_entry_t *entry)
14397 +{
14398 +	cpu_entry_t *sched;
14399 +	struct task_struct* tmp;
14400 +	int on_cpu;
14401 +
14402 +	BUG_ON(linked && !is_realtime(linked));
14403 +
14404 +	/* Currently linked task is set to be unlinked. */
14405 +	if (entry->linked) {
14406 +		entry->linked->rt_param.linked_on = NO_CPU;
14407 +	}
14408 +
14409 +	/* Link new task to CPU. */
14410 +	if (linked) {
14411 +		/* handle task is already scheduled somewhere! */
14412 +		on_cpu = linked->rt_param.scheduled_on;
14413 +		if (on_cpu != NO_CPU) {
14414 +			sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
14415 +			/* this should only happen if not linked already */
14416 +			BUG_ON(sched->linked == linked);
14417 +
14418 +			/* If we are already scheduled on the CPU to which we
14419 +			 * wanted to link, we don't need to do the swap --
14420 +			 * we just link ourselves to the CPU and depend on
14421 +			 * the caller to get things right.
14422 +			 */
14423 +			if (entry != sched) {
14424 +				TRACE_TASK(linked,
14425 +					   "already scheduled on %d, updating link.\n",
14426 +					   sched->cpu);
14427 +				tmp = sched->linked;
14428 +				linked->rt_param.linked_on = sched->cpu;
14429 +				sched->linked = linked;
14430 +				update_cpu_position(sched);
14431 +				linked = tmp;
14432 +			}
14433 +		}
14434 +		if (linked) /* might be NULL due to swap */
14435 +			linked->rt_param.linked_on = entry->cpu;
14436 +	}
14437 +	entry->linked = linked;
14438 +#ifdef WANT_ALL_SCHED_EVENTS
14439 +	if (linked)
14440 +		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
14441 +	else
14442 +		TRACE("NULL linked to %d.\n", entry->cpu);
14443 +#endif
14444 +	update_cpu_position(entry);
14445 +}
14446 +
14447 +/* unlink - Make sure a task is not linked any longer to an entry
14448 + *          where it was linked before. Must hold gsnedf_lock.
14449 + */
14450 +static noinline void unlink(struct task_struct* t)
14451 +{
14452 +	cpu_entry_t *entry;
14453 +
14454 +	if (t->rt_param.linked_on != NO_CPU) {
14455 +		/* unlink */
14456 +		entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
14457 +		t->rt_param.linked_on = NO_CPU;
14458 +		link_task_to_cpu(NULL, entry);
14459 +	} else if (is_queued(t)) {
14460 +		/* This is an interesting situation: t is scheduled,
14461 +		 * but was just recently unlinked.  It cannot be
14462 +		 * linked anywhere else (because then it would have
14463 +		 * been relinked to this CPU), thus it must be in some
14464 +		 * queue. We must remove it from the list in this
14465 +		 * case.
14466 +		 */
14467 +		remove(&gsnedf, t);
14468 +	}
14469 +}
14470 +
14471 +
14472 +/* preempt - force a CPU to reschedule
14473 + */
14474 +static void preempt(cpu_entry_t *entry)
14475 +{
14476 +	preempt_if_preemptable(entry->scheduled, entry->cpu);
14477 +}
14478 +
14479 +/* requeue - Put an unlinked task into gsn-edf domain.
14480 + *           Caller must hold gsnedf_lock.
14481 + */
14482 +static noinline void requeue(struct task_struct* task)
14483 +{
14484 +	BUG_ON(!task);
14485 +	/* sanity check before insertion */
14486 +	BUG_ON(is_queued(task));
14487 +
14488 +	if (is_early_releasing(task) || is_released(task, litmus_clock()))
14489 +		__add_ready(&gsnedf, task);
14490 +	else {
14491 +		/* it has got to wait */
14492 +		add_release(&gsnedf, task);
14493 +	}
14494 +}
14495 +
14496 +#ifdef CONFIG_SCHED_CPU_AFFINITY
14497 +static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
14498 +{
14499 +	cpu_entry_t *affinity;
14500 +
14501 +	get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
14502 +#ifdef CONFIG_RELEASE_MASTER
14503 +			gsnedf.release_master
14504 +#else
14505 +			NO_CPU
14506 +#endif
14507 +			);
14508 +
14509 +	return(affinity);
14510 +}
14511 +#endif
14512 +
14513 +/* check for any necessary preemptions */
14514 +static void check_for_preemptions(void)
14515 +{
14516 +	struct task_struct *task;
14517 +	cpu_entry_t *last;
14518 +
14519 +
14520 +#ifdef CONFIG_PREFER_LOCAL_LINKING
14521 +	cpu_entry_t *local;
14522 +
14523 +	/* Before linking to other CPUs, check first whether the local CPU is
14524 +	 * idle. */
14525 +	local = &__get_cpu_var(gsnedf_cpu_entries);
14526 +	task  = __peek_ready(&gsnedf);
14527 +
14528 +	if (task && !local->linked
14529 +#ifdef CONFIG_RELEASE_MASTER
14530 +	    && likely(local->cpu != gsnedf.release_master)
14531 +#endif
14532 +		) {
14533 +		task = __take_ready(&gsnedf);
14534 +		TRACE_TASK(task, "linking to local CPU %d to avoid IPI\n", local->cpu);
14535 +		link_task_to_cpu(task, local);
14536 +		preempt(local);
14537 +	}
14538 +#endif
14539 +
14540 +	for (last = lowest_prio_cpu();
14541 +	     edf_preemption_needed(&gsnedf, last->linked);
14542 +	     last = lowest_prio_cpu()) {
14543 +		/* preemption necessary */
14544 +		task = __take_ready(&gsnedf);
14545 +		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
14546 +		      task->pid, last->cpu);
14547 +
14548 +#ifdef CONFIG_SCHED_CPU_AFFINITY
14549 +		{
14550 +			cpu_entry_t *affinity =
14551 +					gsnedf_get_nearest_available_cpu(
14552 +						&per_cpu(gsnedf_cpu_entries, task_cpu(task)));
14553 +			if (affinity)
14554 +				last = affinity;
14555 +			else if (requeue_preempted_job(last->linked))
14556 +				requeue(last->linked);
14557 +		}
14558 +#else
14559 +		if (requeue_preempted_job(last->linked))
14560 +			requeue(last->linked);
14561 +#endif
14562 +
14563 +		link_task_to_cpu(task, last);
14564 +		preempt(last);
14565 +	}
14566 +}
14567 +
14568 +/* gsnedf_job_arrival: task is either resumed or released */
14569 +static noinline void gsnedf_job_arrival(struct task_struct* task)
14570 +{
14571 +	BUG_ON(!task);
14572 +
14573 +	requeue(task);
14574 +	check_for_preemptions();
14575 +}
14576 +
14577 +static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
14578 +{
14579 +	unsigned long flags;
14580 +
14581 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);
14582 +
14583 +	__merge_ready(rt, tasks);
14584 +	check_for_preemptions();
14585 +
14586 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
14587 +}
14588 +
14589 +/* caller holds gsnedf_lock */
14590 +static noinline void job_completion(struct task_struct *t, int forced)
14591 +{
14592 +	BUG_ON(!t);
14593 +
14594 +	sched_trace_task_completion(t, forced);
14595 +
14596 +	TRACE_TASK(t, "job_completion().\n");
14597 +
14598 +	/* set flags */
14599 +	tsk_rt(t)->completed = 0;
14600 +	/* prepare for next period */
14601 +	prepare_for_next_period(t);
14602 +	if (is_early_releasing(t) || is_released(t, litmus_clock()))
14603 +		sched_trace_task_release(t);
14604 +	/* unlink */
14605 +	unlink(t);
14606 +	/* requeue
14607 +	 * But don't requeue a blocking task. */
14608 +	if (is_running(t))
14609 +		gsnedf_job_arrival(t);
14610 +}
14611 +
14612 +/* Getting schedule() right is a bit tricky. schedule() may not make any
14613 + * assumptions on the state of the current task since it may be called for a
14614 + * number of reasons. The reasons include a scheduler_tick() determined that it
14615 + * was necessary, because sys_exit_np() was called, because some Linux
14616 + * subsystem determined so, or even (in the worst case) because there is a bug
14617 + * hidden somewhere. Thus, we must take extreme care to determine what the
14618 + * current state is.
14619 + *
14620 + * The CPU could currently be scheduling a task (or not), be linked (or not).
14621 + *
14622 + * The following assertions for the scheduled task could hold:
14623 + *
14624 + *      - !is_running(scheduled)        // the job blocks
14625 + *	- scheduled->timeslice == 0	// the job completed (forcefully)
14626 + *	- is_completed()		// the job completed (by syscall)
14627 + * 	- linked != scheduled		// we need to reschedule (for any reason)
14628 + * 	- is_np(scheduled)		// rescheduling must be delayed,
14629 + *					   sys_exit_np must be requested
14630 + *
14631 + * Any of these can occur together.
14632 + */
14633 +static struct task_struct* gsnedf_schedule(struct task_struct * prev)
14634 +{
14635 +	cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
14636 +	int out_of_time, sleep, preempt, np, exists, blocks;
14637 +	struct task_struct* next = NULL;
14638 +
14639 +#ifdef CONFIG_RELEASE_MASTER
14640 +	/* Bail out early if we are the release master.
14641 +	 * The release master never schedules any real-time tasks.
14642 +	 */
14643 +	if (unlikely(gsnedf.release_master == entry->cpu)) {
14644 +		sched_state_task_picked();
14645 +		return NULL;
14646 +	}
14647 +#endif
14648 +
14649 +	raw_spin_lock(&gsnedf_lock);
14650 +
14651 +	/* sanity checking */
14652 +	BUG_ON(entry->scheduled && entry->scheduled != prev);
14653 +	BUG_ON(entry->scheduled && !is_realtime(prev));
14654 +	BUG_ON(is_realtime(prev) && !entry->scheduled);
14655 +
14656 +	/* (0) Determine state */
14657 +	exists      = entry->scheduled != NULL;
14658 +	blocks      = exists && !is_running(entry->scheduled);
14659 +	out_of_time = exists && budget_enforced(entry->scheduled)
14660 +		&& budget_exhausted(entry->scheduled);
14661 +	np 	    = exists && is_np(entry->scheduled);
14662 +	sleep	    = exists && is_completed(entry->scheduled);
14663 +	preempt     = entry->scheduled != entry->linked;
14664 +
14665 +#ifdef WANT_ALL_SCHED_EVENTS
14666 +	TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
14667 +#endif
14668 +
14669 +	if (exists)
14670 +		TRACE_TASK(prev,
14671 +			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
14672 +			   "state:%d sig:%d\n",
14673 +			   blocks, out_of_time, np, sleep, preempt,
14674 +			   prev->state, signal_pending(prev));
14675 +	if (entry->linked && preempt)
14676 +		TRACE_TASK(prev, "will be preempted by %s/%d\n",
14677 +			   entry->linked->comm, entry->linked->pid);
14678 +
14679 +
14680 +	/* If a task blocks we have no choice but to reschedule.
14681 +	 */
14682 +	if (blocks)
14683 +		unlink(entry->scheduled);
14684 +
14685 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
14686 +	 * We need to make sure to update the link structure anyway in case
14687 +	 * that we are still linked. Multiple calls to request_exit_np() don't
14688 +	 * hurt.
14689 +	 */
14690 +	if (np && (out_of_time || preempt || sleep)) {
14691 +		unlink(entry->scheduled);
14692 +		request_exit_np(entry->scheduled);
14693 +	}
14694 +
14695 +	/* Any task that is preemptable and either exhausts its execution
14696 +	 * budget or wants to sleep completes. We may have to reschedule after
14697 +	 * this. Don't do a job completion if we block (can't have timers running
14698 +	 * for blocked jobs).
14699 +	 */
14700 +	if (!np && (out_of_time || sleep) && !blocks)
14701 +		job_completion(entry->scheduled, !sleep);
14702 +
14703 +	/* Link pending task if we became unlinked.
14704 +	 */
14705 +	if (!entry->linked)
14706 +		link_task_to_cpu(__take_ready(&gsnedf), entry);
14707 +
14708 +	/* The final scheduling decision. Do we need to switch for some reason?
14709 +	 * If linked is different from scheduled, then select linked as next.
14710 +	 */
14711 +	if ((!np || blocks) &&
14712 +	    entry->linked != entry->scheduled) {
14713 +		/* Schedule a linked job? */
14714 +		if (entry->linked) {
14715 +			entry->linked->rt_param.scheduled_on = entry->cpu;
14716 +			next = entry->linked;
14717 +			TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
14718 +		}
14719 +		if (entry->scheduled) {
14720 +			/* not gonna be scheduled soon */
14721 +			entry->scheduled->rt_param.scheduled_on = NO_CPU;
14722 +			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
14723 +		}
14724 +	} else
14725 +		/* Only override Linux scheduler if we have a real-time task
14726 +		 * scheduled that needs to continue.
14727 +		 */
14728 +		if (exists)
14729 +			next = prev;
14730 +
14731 +	sched_state_task_picked();
14732 +
14733 +	raw_spin_unlock(&gsnedf_lock);
14734 +
14735 +#ifdef WANT_ALL_SCHED_EVENTS
14736 +	TRACE("gsnedf_lock released, next=0x%p\n", next);
14737 +
14738 +	if (next)
14739 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
14740 +	else if (exists && !next)
14741 +		TRACE("becomes idle at %llu.\n", litmus_clock());
14742 +#endif
14743 +
14744 +
14745 +	return next;
14746 +}
14747 +
14748 +
14749 +/* _finish_switch - we just finished the switch away from prev
14750 + */
14751 +static void gsnedf_finish_switch(struct task_struct *prev)
14752 +{
14753 +	cpu_entry_t* 	entry = &__get_cpu_var(gsnedf_cpu_entries);
14754 +
14755 +	entry->scheduled = is_realtime(current) ? current : NULL;
14756 +#ifdef WANT_ALL_SCHED_EVENTS
14757 +	TRACE_TASK(prev, "switched away from\n");
14758 +#endif
14759 +}
14760 +
14761 +
14762 +/*	Prepare a task for running in RT mode
14763 + */
14764 +static void gsnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
14765 +{
14766 +	unsigned long 		flags;
14767 +	cpu_entry_t* 		entry;
14768 +
14769 +	TRACE("gsn edf: task new %d\n", t->pid);
14770 +
14771 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);
14772 +
14773 +	/* setup job params */
14774 +	release_at(t, litmus_clock());
14775 +
14776 +	if (is_scheduled) {
14777 +		entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
14778 +		BUG_ON(entry->scheduled);
14779 +
14780 +#ifdef CONFIG_RELEASE_MASTER
14781 +		if (entry->cpu != gsnedf.release_master) {
14782 +#endif
14783 +			entry->scheduled = t;
14784 +			tsk_rt(t)->scheduled_on = task_cpu(t);
14785 +#ifdef CONFIG_RELEASE_MASTER
14786 +		} else {
14787 +			/* do not schedule on release master */
14788 +			preempt(entry); /* force resched */
14789 +			tsk_rt(t)->scheduled_on = NO_CPU;
14790 +		}
14791 +#endif
14792 +	} else {
14793 +		t->rt_param.scheduled_on = NO_CPU;
14794 +	}
14795 +	t->rt_param.linked_on          = NO_CPU;
14796 +
14797 +	if (is_running(t))
14798 +		gsnedf_job_arrival(t);
14799 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
14800 +}
14801 +
14802 +static void gsnedf_task_wake_up(struct task_struct *task)
14803 +{
14804 +	unsigned long flags;
14805 +	lt_t now;
14806 +
14807 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
14808 +
14809 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);
14810 +	now = litmus_clock();
14811 +	if (is_sporadic(task) && is_tardy(task, now)) {
14812 +		/* new sporadic release */
14813 +		release_at(task, now);
14814 +		sched_trace_task_release(task);
14815 +	}
14816 +	gsnedf_job_arrival(task);
14817 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
14818 +}
14819 +
14820 +static void gsnedf_task_block(struct task_struct *t)
14821 +{
14822 +	unsigned long flags;
14823 +
14824 +	TRACE_TASK(t, "block at %llu\n", litmus_clock());
14825 +
14826 +	/* unlink if necessary */
14827 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);
14828 +	unlink(t);
14829 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
14830 +
14831 +	BUG_ON(!is_realtime(t));
14832 +}
14833 +
14834 +
14835 +static void gsnedf_task_exit(struct task_struct * t)
14836 +{
14837 +	unsigned long flags;
14838 +
14839 +	/* unlink if necessary */
14840 +	raw_spin_lock_irqsave(&gsnedf_lock, flags);
14841 +	unlink(t);
14842 +	if (tsk_rt(t)->scheduled_on != NO_CPU) {
14843 +		gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
14844 +		tsk_rt(t)->scheduled_on = NO_CPU;
14845 +	}
14846 +	raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
14847 +
14848 +	BUG_ON(!is_realtime(t));
14849 +        TRACE_TASK(t, "RIP\n");
14850 +}
14851 +
14852 +
14853 +static long gsnedf_admit_task(struct task_struct* tsk)
14854 +{
14855 +	return 0;
14856 +}
14857 +
14858 +#ifdef CONFIG_LITMUS_LOCKING
14859 +
14860 +#include <litmus/fdso.h>
14861 +
14862 +/* called with IRQs off */
14863 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
14864 +{
14865 +	int linked_on;
14866 +	int check_preempt = 0;
14867 +
14868 +	raw_spin_lock(&gsnedf_lock);
14869 +
14870 +	TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
14871 +	tsk_rt(t)->inh_task = prio_inh;
14872 +
14873 +	linked_on  = tsk_rt(t)->linked_on;
14874 +
14875 +	/* If it is scheduled, then we need to reorder the CPU heap. */
14876 +	if (linked_on != NO_CPU) {
14877 +		TRACE_TASK(t, "%s: linked  on %d\n",
14878 +			   __FUNCTION__, linked_on);
14879 +		/* Holder is scheduled; need to re-order CPUs.
14880 +		 * We can't use heap_decrease() here since
14881 +		 * the cpu_heap is ordered in reverse direction, so
14882 +		 * it is actually an increase. */
14883 +		bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
14884 +			    gsnedf_cpus[linked_on]->hn);
14885 +		bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
14886 +			    gsnedf_cpus[linked_on]->hn);
14887 +	} else {
14888 +		/* holder may be queued: first stop queue changes */
14889 +		raw_spin_lock(&gsnedf.release_lock);
14890 +		if (is_queued(t)) {
14891 +			TRACE_TASK(t, "%s: is queued\n",
14892 +				   __FUNCTION__);
14893 +			/* We need to update the position of holder in some
14894 +			 * heap. Note that this could be a release heap if we
14895 +			 * budget enforcement is used and this job overran. */
14896 +			check_preempt =
14897 +				!bheap_decrease(edf_ready_order,
14898 +					       tsk_rt(t)->heap_node);
14899 +		} else {
14900 +			/* Nothing to do: if it is not queued and not linked
14901 +			 * then it is either sleeping or currently being moved
14902 +			 * by other code (e.g., a timer interrupt handler) that
14903 +			 * will use the correct priority when enqueuing the
14904 +			 * task. */
14905 +			TRACE_TASK(t, "%s: is NOT queued => Done.\n",
14906 +				   __FUNCTION__);
14907 +		}
14908 +		raw_spin_unlock(&gsnedf.release_lock);
14909 +
14910 +		/* If holder was enqueued in a release heap, then the following
14911 +		 * preemption check is pointless, but we can't easily detect
14912 +		 * that case. If you want to fix this, then consider that
14913 +		 * simply adding a state flag requires O(n) time to update when
14914 +		 * releasing n tasks, which conflicts with the goal to have
14915 +		 * O(log n) merges. */
14916 +		if (check_preempt) {
14917 +			/* heap_decrease() hit the top level of the heap: make
14918 +			 * sure preemption checks get the right task, not the
14919 +			 * potentially stale cache. */
14920 +			bheap_uncache_min(edf_ready_order,
14921 +					 &gsnedf.ready_queue);
14922 +			check_for_preemptions();
14923 +		}
14924 +	}
14925 +
14926 +	raw_spin_unlock(&gsnedf_lock);
14927 +}
14928 +
14929 +/* called with IRQs off */
14930 +static void clear_priority_inheritance(struct task_struct* t)
14931 +{
14932 +	raw_spin_lock(&gsnedf_lock);
14933 +
14934 +	/* A job only stops inheriting a priority when it releases a
14935 +	 * resource. Thus we can make the following assumption.*/
14936 +	BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
14937 +
14938 +	TRACE_TASK(t, "priority restored\n");
14939 +	tsk_rt(t)->inh_task = NULL;
14940 +
14941 +	/* Check if rescheduling is necessary. We can't use heap_decrease()
14942 +	 * since the priority was effectively lowered. */
14943 +	unlink(t);
14944 +	gsnedf_job_arrival(t);
14945 +
14946 +	raw_spin_unlock(&gsnedf_lock);
14947 +}
14948 +
14949 +
14950 +/* ******************** FMLP support ********************** */
14951 +
14952 +/* struct for semaphore with priority inheritance */
14953 +struct fmlp_semaphore {
14954 +	struct litmus_lock litmus_lock;
14955 +
14956 +	/* current resource holder */
14957 +	struct task_struct *owner;
14958 +
14959 +	/* highest-priority waiter */
14960 +	struct task_struct *hp_waiter;
14961 +
14962 +	/* FIFO queue of waiting tasks */
14963 +	wait_queue_head_t wait;
14964 +};
14965 +
14966 +static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
14967 +{
14968 +	return container_of(lock, struct fmlp_semaphore, litmus_lock);
14969 +}
14970 +
14971 +/* caller is responsible for locking */
14972 +struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
14973 +				   struct task_struct* skip)
14974 +{
14975 +	struct list_head	*pos;
14976 +	struct task_struct 	*queued, *found = NULL;
14977 +
14978 +	list_for_each(pos, &sem->wait.task_list) {
14979 +		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
14980 +							   task_list)->private;
14981 +
14982 +		/* Compare task prios, find high prio task. */
14983 +		if (queued != skip && edf_higher_prio(queued, found))
14984 +			found = queued;
14985 +	}
14986 +	return found;
14987 +}
14988 +
14989 +int gsnedf_fmlp_lock(struct litmus_lock* l)
14990 +{
14991 +	struct task_struct* t = current;
14992 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
14993 +	wait_queue_t wait;
14994 +	unsigned long flags;
14995 +
14996 +	if (!is_realtime(t))
14997 +		return -EPERM;
14998 +
14999 +	/* prevent nested lock acquisition --- not supported by FMLP */
15000 +	if (tsk_rt(t)->num_locks_held)
15001 +		return -EBUSY;
15002 +
15003 +	spin_lock_irqsave(&sem->wait.lock, flags);
15004 +
15005 +	if (sem->owner) {
15006 +		/* resource is not free => must suspend and wait */
15007 +
15008 +		init_waitqueue_entry(&wait, t);
15009 +
15010 +		/* FIXME: interruptible would be nice some day */
15011 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
15012 +
15013 +		__add_wait_queue_tail_exclusive(&sem->wait, &wait);
15014 +
15015 +		/* check if we need to activate priority inheritance */
15016 +		if (edf_higher_prio(t, sem->hp_waiter)) {
15017 +			sem->hp_waiter = t;
15018 +			if (edf_higher_prio(t, sem->owner))
15019 +				set_priority_inheritance(sem->owner, sem->hp_waiter);
15020 +		}
15021 +
15022 +		TS_LOCK_SUSPEND;
15023 +
15024 +		/* release lock before sleeping */
15025 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
15026 +
15027 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
15028 +		 * when we wake up; we are guaranteed to have the lock since
15029 +		 * there is only one wake up per release.
15030 +		 */
15031 +
15032 +		schedule();
15033 +
15034 +		TS_LOCK_RESUME;
15035 +
15036 +		/* Since we hold the lock, no other task will change
15037 +		 * ->owner. We can thus check it without acquiring the spin
15038 +		 * lock. */
15039 +		BUG_ON(sem->owner != t);
15040 +	} else {
15041 +		/* it's ours now */
15042 +		sem->owner = t;
15043 +
15044 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
15045 +	}
15046 +
15047 +	tsk_rt(t)->num_locks_held++;
15048 +
15049 +	return 0;
15050 +}
15051 +
15052 +int gsnedf_fmlp_unlock(struct litmus_lock* l)
15053 +{
15054 +	struct task_struct *t = current, *next;
15055 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
15056 +	unsigned long flags;
15057 +	int err = 0;
15058 +
15059 +	spin_lock_irqsave(&sem->wait.lock, flags);
15060 +
15061 +	if (sem->owner != t) {
15062 +		err = -EINVAL;
15063 +		goto out;
15064 +	}
15065 +
15066 +	tsk_rt(t)->num_locks_held--;
15067 +
15068 +	/* check if there are jobs waiting for this resource */
15069 +	next = __waitqueue_remove_first(&sem->wait);
15070 +	if (next) {
15071 +		/* next becomes the resouce holder */
15072 +		sem->owner = next;
15073 +		TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
15074 +
15075 +		/* determine new hp_waiter if necessary */
15076 +		if (next == sem->hp_waiter) {
15077 +			TRACE_TASK(next, "was highest-prio waiter\n");
15078 +			/* next has the highest priority --- it doesn't need to
15079 +			 * inherit.  However, we need to make sure that the
15080 +			 * next-highest priority in the queue is reflected in
15081 +			 * hp_waiter. */
15082 +			sem->hp_waiter = find_hp_waiter(sem, next);
15083 +			if (sem->hp_waiter)
15084 +				TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
15085 +			else
15086 +				TRACE("no further waiters\n");
15087 +		} else {
15088 +			/* Well, if next is not the highest-priority waiter,
15089 +			 * then it ought to inherit the highest-priority
15090 +			 * waiter's priority. */
15091 +			set_priority_inheritance(next, sem->hp_waiter);
15092 +		}
15093 +
15094 +		/* wake up next */
15095 +		wake_up_process(next);
15096 +	} else
15097 +		/* becomes available */
15098 +		sem->owner = NULL;
15099 +
15100 +	/* we lose the benefit of priority inheritance (if any) */
15101 +	if (tsk_rt(t)->inh_task)
15102 +		clear_priority_inheritance(t);
15103 +
15104 +out:
15105 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
15106 +
15107 +	return err;
15108 +}
15109 +
15110 +int gsnedf_fmlp_close(struct litmus_lock* l)
15111 +{
15112 +	struct task_struct *t = current;
15113 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
15114 +	unsigned long flags;
15115 +
15116 +	int owner;
15117 +
15118 +	spin_lock_irqsave(&sem->wait.lock, flags);
15119 +
15120 +	owner = sem->owner == t;
15121 +
15122 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
15123 +
15124 +	if (owner)
15125 +		gsnedf_fmlp_unlock(l);
15126 +
15127 +	return 0;
15128 +}
15129 +
15130 +void gsnedf_fmlp_free(struct litmus_lock* lock)
15131 +{
15132 +	kfree(fmlp_from_lock(lock));
15133 +}
15134 +
15135 +static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
15136 +	.close  = gsnedf_fmlp_close,
15137 +	.lock   = gsnedf_fmlp_lock,
15138 +	.unlock = gsnedf_fmlp_unlock,
15139 +	.deallocate = gsnedf_fmlp_free,
15140 +};
15141 +
15142 +static struct litmus_lock* gsnedf_new_fmlp(void)
15143 +{
15144 +	struct fmlp_semaphore* sem;
15145 +
15146 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
15147 +	if (!sem)
15148 +		return NULL;
15149 +
15150 +	sem->owner   = NULL;
15151 +	sem->hp_waiter = NULL;
15152 +	init_waitqueue_head(&sem->wait);
15153 +	sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops;
15154 +
15155 +	return &sem->litmus_lock;
15156 +}
15157 +
15158 +/* **** lock constructor **** */
15159 +
15160 +
15161 +static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
15162 +				 void* __user unused)
15163 +{
15164 +	int err = -ENXIO;
15165 +
15166 +	/* GSN-EDF currently only supports the FMLP for global resources. */
15167 +	switch (type) {
15168 +
15169 +	case FMLP_SEM:
15170 +		/* Flexible Multiprocessor Locking Protocol */
15171 +		*lock = gsnedf_new_fmlp();
15172 +		if (*lock)
15173 +			err = 0;
15174 +		else
15175 +			err = -ENOMEM;
15176 +		break;
15177 +
15178 +	};
15179 +
15180 +	return err;
15181 +}
15182 +
15183 +#endif
15184 +
15185 +static struct domain_proc_info gsnedf_domain_proc_info;
15186 +static long gsnedf_get_domain_proc_info(struct domain_proc_info **ret)
15187 +{
15188 +	*ret = &gsnedf_domain_proc_info;
15189 +	return 0;
15190 +}
15191 +
15192 +static void gsnedf_setup_domain_proc(void)
15193 +{
15194 +	int i, cpu;
15195 +	int release_master =
15196 +#ifdef CONFIG_RELEASE_MASTER
15197 +			atomic_read(&release_master_cpu);
15198 +#else
15199 +		NO_CPU;
15200 +#endif
15201 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
15202 +	struct cd_mapping *map;
15203 +
15204 +	memset(&gsnedf_domain_proc_info, sizeof(gsnedf_domain_proc_info), 0);
15205 +	init_domain_proc_info(&gsnedf_domain_proc_info, num_rt_cpus, 1);
15206 +	gsnedf_domain_proc_info.num_cpus = num_rt_cpus;
15207 +	gsnedf_domain_proc_info.num_domains = 1;
15208 +
15209 +	gsnedf_domain_proc_info.domain_to_cpus[0].id = 0;
15210 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
15211 +		if (cpu == release_master)
15212 +			continue;
15213 +		map = &gsnedf_domain_proc_info.cpu_to_domains[i];
15214 +		map->id = cpu;
15215 +		cpumask_set_cpu(0, map->mask);
15216 +		++i;
15217 +
15218 +		/* add cpu to the domain */
15219 +		cpumask_set_cpu(cpu,
15220 +			gsnedf_domain_proc_info.domain_to_cpus[0].mask);
15221 +	}
15222 +}
15223 +
15224 +static long gsnedf_activate_plugin(void)
15225 +{
15226 +	int cpu;
15227 +	cpu_entry_t *entry;
15228 +
15229 +	bheap_init(&gsnedf_cpu_heap);
15230 +#ifdef CONFIG_RELEASE_MASTER
15231 +	gsnedf.release_master = atomic_read(&release_master_cpu);
15232 +#endif
15233 +
15234 +	for_each_online_cpu(cpu) {
15235 +		entry = &per_cpu(gsnedf_cpu_entries, cpu);
15236 +		bheap_node_init(&entry->hn, entry);
15237 +		entry->linked    = NULL;
15238 +		entry->scheduled = NULL;
15239 +#ifdef CONFIG_RELEASE_MASTER
15240 +		if (cpu != gsnedf.release_master) {
15241 +#endif
15242 +			TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
15243 +			update_cpu_position(entry);
15244 +#ifdef CONFIG_RELEASE_MASTER
15245 +		} else {
15246 +			TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
15247 +		}
15248 +#endif
15249 +	}
15250 +
15251 +	gsnedf_setup_domain_proc();
15252 +
15253 +	return 0;
15254 +}
15255 +
15256 +static long gsnedf_deactivate_plugin(void)
15257 +{
15258 +	destroy_domain_proc_info(&gsnedf_domain_proc_info);
15259 +	return 0;
15260 +}
15261 +
15262 +/*	Plugin object	*/
15263 +static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
15264 +	.plugin_name		= "GSN-EDF",
15265 +	.finish_switch		= gsnedf_finish_switch,
15266 +	.task_new		= gsnedf_task_new,
15267 +	.complete_job		= complete_job,
15268 +	.task_exit		= gsnedf_task_exit,
15269 +	.schedule		= gsnedf_schedule,
15270 +	.task_wake_up		= gsnedf_task_wake_up,
15271 +	.task_block		= gsnedf_task_block,
15272 +	.admit_task		= gsnedf_admit_task,
15273 +	.activate_plugin	= gsnedf_activate_plugin,
15274 +	.deactivate_plugin	= gsnedf_deactivate_plugin,
15275 +	.get_domain_proc_info	= gsnedf_get_domain_proc_info,
15276 +#ifdef CONFIG_LITMUS_LOCKING
15277 +	.allocate_lock		= gsnedf_allocate_lock,
15278 +#endif
15279 +};
15280 +
15281 +
15282 +static int __init init_gsn_edf(void)
15283 +{
15284 +	int cpu;
15285 +	cpu_entry_t *entry;
15286 +
15287 +	bheap_init(&gsnedf_cpu_heap);
15288 +	/* initialize CPU state */
15289 +	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
15290 +		entry = &per_cpu(gsnedf_cpu_entries, cpu);
15291 +		gsnedf_cpus[cpu] = entry;
15292 +		entry->cpu 	 = cpu;
15293 +		entry->hn        = &gsnedf_heap_node[cpu];
15294 +		bheap_node_init(&entry->hn, entry);
15295 +	}
15296 +	edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
15297 +	return register_sched_plugin(&gsn_edf_plugin);
15298 +}
15299 +
15300 +
15301 +module_init(init_gsn_edf);
15302 -- 
15303 1.8.1.2
15304 
15305 
15306 From 7cf2307d2c200a960c9e54839ba2134730adda52 Mon Sep 17 00:00:00 2001
15307 From: Bjoern Brandenburg <bbb@mpi-sws.org>
15308 Date: Tue, 12 Feb 2013 19:17:08 +0100
15309 Subject: [PATCH 048/119] Add P-FP scheduler plugin
15310 
15311 ---
15312  litmus/Makefile    |    4 +-
15313  litmus/fp_common.c |   17 +-
15314  litmus/sched_pfp.c | 2013 ++++++++++++++++++++++++++++++++++++++++++++++++++++
15315  3 files changed, 2028 insertions(+), 6 deletions(-)
15316  create mode 100644 litmus/sched_pfp.c
15317 
15318 diff --git a/litmus/Makefile b/litmus/Makefile
15319 index c01ce3e..2d2e0a5 100644
15320 --- a/litmus/Makefile
15321 +++ b/litmus/Makefile
15322 @@ -20,7 +20,9 @@ obj-y     = sched_plugin.o litmus.o \
15323  	    ctrldev.o \
15324  	    uncachedev.o \
15325  	    sched_gsn_edf.o \
15326 -	    sched_psn_edf.o
15327 +	    sched_psn_edf.o \
15328 +	    sched_pfp.o
15329 +
15330  
15331  
15332  obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
15333 diff --git a/litmus/fp_common.c b/litmus/fp_common.c
15334 index 964a472..ff0f30a 100644
15335 --- a/litmus/fp_common.c
15336 +++ b/litmus/fp_common.c
15337 @@ -32,7 +32,6 @@ int fp_higher_prio(struct task_struct* first,
15338  		return 0;
15339  	}
15340  
15341 -
15342  	/* check for NULL tasks */
15343  	if (!first || !second)
15344  		return first && !second;
15345 @@ -50,6 +49,15 @@ int fp_higher_prio(struct task_struct* first,
15346  	if (unlikely(second->rt_param.inh_task))
15347  		second_task = second->rt_param.inh_task;
15348  
15349 +	/* Comparisons to itself are only possible with
15350 +	 * priority inheritance when svc_preempt interrupt just
15351 +	 * before scheduling (and everything that could follow in the
15352 +	 * ready queue). Always favour the original job, as that one will just
15353 +	 * suspend itself to resolve this.
15354 +	 */
15355 +	if(first_task == second_task)
15356 +		return first_task == first;
15357 +
15358  	/* Check for priority boosting. Tie-break by start of boosting.
15359  	 */
15360  	if (unlikely(is_priority_boosted(first_task))) {
15361 @@ -65,11 +73,10 @@ int fp_higher_prio(struct task_struct* first,
15362  		/* second_task is boosted, first is not*/
15363  		return 0;
15364  
15365 -#endif
15366 -
15367 -	/* Comparisons to itself are not expected; priority inheritance
15368 -	 * should also not cause this to happen. */
15369 +#else
15370 +	/* No locks, no priority inheritance, no comparisons to itself */
15371  	BUG_ON(first_task == second_task);
15372 +#endif
15373  
15374  	if (get_priority(first_task) < get_priority(second_task))
15375  		return 1;
15376 diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
15377 new file mode 100644
15378 index 0000000..af7de76
15379 --- /dev/null
15380 +++ b/litmus/sched_pfp.c
15381 @@ -0,0 +1,2013 @@
15382 +/*
15383 + * litmus/sched_pfp.c
15384 + *
15385 + * Implementation of partitioned fixed-priority scheduling.
15386 + * Based on PSN-EDF.
15387 + */
15388 +
15389 +#include <linux/percpu.h>
15390 +#include <linux/sched.h>
15391 +#include <linux/list.h>
15392 +#include <linux/spinlock.h>
15393 +#include <linux/module.h>
15394 +
15395 +#include <litmus/litmus.h>
15396 +#include <litmus/wait.h>
15397 +#include <litmus/jobs.h>
15398 +#include <litmus/preempt.h>
15399 +#include <litmus/fp_common.h>
15400 +#include <litmus/sched_plugin.h>
15401 +#include <litmus/sched_trace.h>
15402 +#include <litmus/trace.h>
15403 +#include <litmus/budget.h>
15404 +
15405 +/* to set up domain/cpu mappings */
15406 +#include <litmus/litmus_proc.h>
15407 +#include <linux/uaccess.h>
15408 +
15409 +
15410 +typedef struct {
15411 +	rt_domain_t 		domain;
15412 +	struct fp_prio_queue	ready_queue;
15413 +	int          		cpu;
15414 +	struct task_struct* 	scheduled; /* only RT tasks */
15415 +/*
15416 + * scheduling lock slock
15417 + * protects the domain and serializes scheduling decisions
15418 + */
15419 +#define slock domain.ready_lock
15420 +
15421 +} pfp_domain_t;
15422 +
15423 +DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
15424 +
15425 +pfp_domain_t* pfp_doms[NR_CPUS];
15426 +
15427 +#define local_pfp		(&__get_cpu_var(pfp_domains))
15428 +#define remote_dom(cpu)		(&per_cpu(pfp_domains, cpu).domain)
15429 +#define remote_pfp(cpu)	(&per_cpu(pfp_domains, cpu))
15430 +#define task_dom(task)		remote_dom(get_partition(task))
15431 +#define task_pfp(task)		remote_pfp(get_partition(task))
15432 +
15433 +
15434 +#ifdef CONFIG_LITMUS_LOCKING
15435 +DEFINE_PER_CPU(uint64_t,fmlp_timestamp);
15436 +#endif
15437 +
15438 +/* we assume the lock is being held */
15439 +static void preempt(pfp_domain_t *pfp)
15440 +{
15441 +	preempt_if_preemptable(pfp->scheduled, pfp->cpu);
15442 +}
15443 +
15444 +static unsigned int priority_index(struct task_struct* t)
15445 +{
15446 +#ifdef CONFIG_LITMUS_LOCKING
15447 +	if (unlikely(t->rt_param.inh_task))
15448 +		/* use effective priority */
15449 +		t = t->rt_param.inh_task;
15450 +
15451 +	if (is_priority_boosted(t)) {
15452 +		/* zero is reserved for priority-boosted tasks */
15453 +		return 0;
15454 +	} else
15455 +#endif
15456 +		return get_priority(t);
15457 +}
15458 +
15459 +static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
15460 +{
15461 +	pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
15462 +	unsigned long flags;
15463 +	struct task_struct* t;
15464 +	struct bheap_node* hn;
15465 +
15466 +	raw_spin_lock_irqsave(&pfp->slock, flags);
15467 +
15468 +	while (!bheap_empty(tasks)) {
15469 +		hn = bheap_take(fp_ready_order, tasks);
15470 +		t = bheap2task(hn);
15471 +		TRACE_TASK(t, "released (part:%d prio:%d)\n",
15472 +			   get_partition(t), get_priority(t));
15473 +		fp_prio_add(&pfp->ready_queue, t, priority_index(t));
15474 +	}
15475 +
15476 +	/* do we need to preempt? */
15477 +	if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
15478 +		TRACE_CUR("preempted by new release\n");
15479 +		preempt(pfp);
15480 +	}
15481 +
15482 +	raw_spin_unlock_irqrestore(&pfp->slock, flags);
15483 +}
15484 +
15485 +static void pfp_preempt_check(pfp_domain_t *pfp)
15486 +{
15487 +	if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
15488 +		preempt(pfp);
15489 +}
15490 +
15491 +static void pfp_domain_init(pfp_domain_t* pfp,
15492 +			       int cpu)
15493 +{
15494 +	fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
15495 +	pfp->cpu      		= cpu;
15496 +	pfp->scheduled		= NULL;
15497 +	fp_prio_queue_init(&pfp->ready_queue);
15498 +}
15499 +
15500 +static void requeue(struct task_struct* t, pfp_domain_t *pfp)
15501 +{
15502 +	BUG_ON(!is_running(t));
15503 +
15504 +	tsk_rt(t)->completed = 0;
15505 +	if (is_released(t, litmus_clock()))
15506 +		fp_prio_add(&pfp->ready_queue, t, priority_index(t));
15507 +	else
15508 +		add_release(&pfp->domain, t); /* it has got to wait */
15509 +}
15510 +
15511 +static void job_completion(struct task_struct* t, int forced)
15512 +{
15513 +	sched_trace_task_completion(t,forced);
15514 +	TRACE_TASK(t, "job_completion().\n");
15515 +
15516 +	tsk_rt(t)->completed = 0;
15517 +	prepare_for_next_period(t);
15518 +	if (is_released(t, litmus_clock()))
15519 +		sched_trace_task_release(t);
15520 +}
15521 +
15522 +static struct task_struct* pfp_schedule(struct task_struct * prev)
15523 +{
15524 +	pfp_domain_t* 	pfp = local_pfp;
15525 +	struct task_struct*	next;
15526 +
15527 +	int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
15528 +
15529 +	raw_spin_lock(&pfp->slock);
15530 +
15531 +	/* sanity checking
15532 +	 * differently from gedf, when a task exits (dead)
15533 +	 * pfp->schedule may be null and prev _is_ realtime
15534 +	 */
15535 +	BUG_ON(pfp->scheduled && pfp->scheduled != prev);
15536 +	BUG_ON(pfp->scheduled && !is_realtime(prev));
15537 +
15538 +	/* (0) Determine state */
15539 +	exists      = pfp->scheduled != NULL;
15540 +	blocks      = exists && !is_running(pfp->scheduled);
15541 +	out_of_time = exists &&
15542 +				  budget_enforced(pfp->scheduled) &&
15543 +				  budget_exhausted(pfp->scheduled);
15544 +	np 	    = exists && is_np(pfp->scheduled);
15545 +	sleep	    = exists && is_completed(pfp->scheduled);
15546 +	migrate     = exists && get_partition(pfp->scheduled) != pfp->cpu;
15547 +	preempt     = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
15548 +
15549 +	/* If we need to preempt do so.
15550 +	 * The following checks set resched to 1 in case of special
15551 +	 * circumstances.
15552 +	 */
15553 +	resched = preempt;
15554 +
15555 +	/* If a task blocks we have no choice but to reschedule.
15556 +	 */
15557 +	if (blocks)
15558 +		resched = 1;
15559 +
15560 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
15561 +	 * Multiple calls to request_exit_np() don't hurt.
15562 +	 */
15563 +	if (np && (out_of_time || preempt || sleep))
15564 +		request_exit_np(pfp->scheduled);
15565 +
15566 +	/* Any task that is preemptable and either exhausts its execution
15567 +	 * budget or wants to sleep completes. We may have to reschedule after
15568 +	 * this.
15569 +	 */
15570 +	if (!np && (out_of_time || sleep) && !blocks && !migrate) {
15571 +		job_completion(pfp->scheduled, !sleep);
15572 +		resched = 1;
15573 +	}
15574 +
15575 +	/* The final scheduling decision. Do we need to switch for some reason?
15576 +	 * Switch if we are in RT mode and have no task or if we need to
15577 +	 * resched.
15578 +	 */
15579 +	next = NULL;
15580 +	if ((!np || blocks) && (resched || !exists)) {
15581 +		/* When preempting a task that does not block, then
15582 +		 * re-insert it into either the ready queue or the
15583 +		 * release queue (if it completed). requeue() picks
15584 +		 * the appropriate queue.
15585 +		 */
15586 +		if (pfp->scheduled && !blocks  && !migrate)
15587 +			requeue(pfp->scheduled, pfp);
15588 +		next = fp_prio_take(&pfp->ready_queue);
15589 +		if (next == prev) {
15590 +			struct task_struct *t = fp_prio_peek(&pfp->ready_queue);
15591 +			TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d "
15592 +				   "boost=%d empty=%d prio-idx=%u prio=%u\n",
15593 +				   sleep, out_of_time, np, preempt, migrate,
15594 +				   is_priority_boosted(next),
15595 +				   t == NULL,
15596 +				   priority_index(next),
15597 +				   get_priority(next));
15598 +			if (t)
15599 +				TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n",
15600 +					   is_priority_boosted(t),
15601 +					   priority_index(t),
15602 +					   get_priority(t));
15603 +		}
15604 +		/* If preempt is set, we should not see the same task again. */
15605 +		BUG_ON(preempt && next == prev);
15606 +		/* Similarly, if preempt is set, then next may not be NULL,
15607 +		 * unless it's a migration. */
15608 +		BUG_ON(preempt && !migrate && next == NULL);
15609 +	} else
15610 +		/* Only override Linux scheduler if we have a real-time task
15611 +		 * scheduled that needs to continue.
15612 +		 */
15613 +		if (exists)
15614 +			next = prev;
15615 +
15616 +	if (next) {
15617 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
15618 +	} else {
15619 +		TRACE("becoming idle at %llu\n", litmus_clock());
15620 +	}
15621 +
15622 +	pfp->scheduled = next;
15623 +	sched_state_task_picked();
15624 +	raw_spin_unlock(&pfp->slock);
15625 +
15626 +	return next;
15627 +}
15628 +
15629 +#ifdef CONFIG_LITMUS_LOCKING
15630 +
15631 +/* prev is no longer scheduled --- see if it needs to migrate */
15632 +static void pfp_finish_switch(struct task_struct *prev)
15633 +{
15634 +	pfp_domain_t *to;
15635 +
15636 +	if (is_realtime(prev) &&
15637 +	    is_running(prev) &&
15638 +	    get_partition(prev) != smp_processor_id()) {
15639 +		TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
15640 +			   smp_processor_id(), get_partition(prev));
15641 +
15642 +		to = task_pfp(prev);
15643 +
15644 +		raw_spin_lock(&to->slock);
15645 +
15646 +		TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
15647 +		requeue(prev, to);
15648 +		if (fp_preemption_needed(&to->ready_queue, to->scheduled))
15649 +			preempt(to);
15650 +
15651 +		raw_spin_unlock(&to->slock);
15652 +
15653 +	}
15654 +}
15655 +
15656 +#endif
15657 +
15658 +/*	Prepare a task for running in RT mode
15659 + */
15660 +static void pfp_task_new(struct task_struct * t, int on_rq, int is_scheduled)
15661 +{
15662 +	pfp_domain_t* 	pfp = task_pfp(t);
15663 +	unsigned long		flags;
15664 +
15665 +	TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
15666 +		   t->rt_param.task_params.cpu);
15667 +
15668 +	/* setup job parameters */
15669 +	release_at(t, litmus_clock());
15670 +
15671 +	raw_spin_lock_irqsave(&pfp->slock, flags);
15672 +	if (is_scheduled) {
15673 +		/* there shouldn't be anything else running at the time */
15674 +		BUG_ON(pfp->scheduled);
15675 +		pfp->scheduled = t;
15676 +	} else if (is_running(t)) {
15677 +		requeue(t, pfp);
15678 +		/* maybe we have to reschedule */
15679 +		pfp_preempt_check(pfp);
15680 +	}
15681 +	raw_spin_unlock_irqrestore(&pfp->slock, flags);
15682 +}
15683 +
15684 +static void pfp_task_wake_up(struct task_struct *task)
15685 +{
15686 +	unsigned long		flags;
15687 +	pfp_domain_t*		pfp = task_pfp(task);
15688 +	lt_t			now;
15689 +
15690 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
15691 +	raw_spin_lock_irqsave(&pfp->slock, flags);
15692 +
15693 +#ifdef CONFIG_LITMUS_LOCKING
15694 +	/* Should only be queued when processing a fake-wake up due to a
15695 +	 * migration-related state change. */
15696 +	if (unlikely(is_queued(task))) {
15697 +		TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
15698 +		goto out_unlock;
15699 +	}
15700 +#else
15701 +	BUG_ON(is_queued(task));
15702 +#endif
15703 +	now = litmus_clock();
15704 +	if (is_sporadic(task) && is_tardy(task, now)
15705 +#ifdef CONFIG_LITMUS_LOCKING
15706 +	/* We need to take suspensions because of semaphores into
15707 +	 * account! If a job resumes after being suspended due to acquiring
15708 +	 * a semaphore, it should never be treated as a new job release.
15709 +	 */
15710 +	    && !is_priority_boosted(task)
15711 +#endif
15712 +		) {
15713 +		/* new sporadic release */
15714 +		release_at(task, now);
15715 +		sched_trace_task_release(task);
15716 +	}
15717 +
15718 +	/* Only add to ready queue if it is not the currently-scheduled
15719 +	 * task. This could be the case if a task was woken up concurrently
15720 +	 * on a remote CPU before the executing CPU got around to actually
15721 +	 * de-scheduling the task, i.e., wake_up() raced with schedule()
15722 +	 * and won. Also, don't requeue if it is still queued, which can
15723 +	 * happen under the DPCP due wake-ups racing with migrations.
15724 +	 */
15725 +	if (pfp->scheduled != task) {
15726 +		requeue(task, pfp);
15727 +		pfp_preempt_check(pfp);
15728 +	}
15729 +
15730 +#ifdef CONFIG_LITMUS_LOCKING
15731 +out_unlock:
15732 +#endif
15733 +	raw_spin_unlock_irqrestore(&pfp->slock, flags);
15734 +	TRACE_TASK(task, "wake up done\n");
15735 +}
15736 +
15737 +static void pfp_task_block(struct task_struct *t)
15738 +{
15739 +	/* only running tasks can block, thus t is in no queue */
15740 +	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
15741 +
15742 +	BUG_ON(!is_realtime(t));
15743 +
15744 +	/* If this task blocked normally, it shouldn't be queued. The exception is
15745 +	 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
15746 +	 * This should only happen if the DPCP is being used.
15747 +	 */
15748 +#ifdef CONFIG_LITMUS_LOCKING
15749 +	if (unlikely(is_queued(t)))
15750 +		TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
15751 +#else
15752 +	BUG_ON(is_queued(t));
15753 +#endif
15754 +}
15755 +
15756 +static void pfp_task_exit(struct task_struct * t)
15757 +{
15758 +	unsigned long flags;
15759 +	pfp_domain_t* 	pfp = task_pfp(t);
15760 +	rt_domain_t*		dom;
15761 +
15762 +	raw_spin_lock_irqsave(&pfp->slock, flags);
15763 +	if (is_queued(t)) {
15764 +		BUG(); /* This currently doesn't work. */
15765 +		/* dequeue */
15766 +		dom  = task_dom(t);
15767 +		remove(dom, t);
15768 +	}
15769 +	if (pfp->scheduled == t) {
15770 +		pfp->scheduled = NULL;
15771 +		preempt(pfp);
15772 +	}
15773 +	TRACE_TASK(t, "RIP, now reschedule\n");
15774 +
15775 +	raw_spin_unlock_irqrestore(&pfp->slock, flags);
15776 +}
15777 +
15778 +#ifdef CONFIG_LITMUS_LOCKING
15779 +
15780 +#include <litmus/fdso.h>
15781 +#include <litmus/srp.h>
15782 +
15783 +static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
15784 +{
15785 +	BUG_ON(pfp->scheduled == t && is_queued(t));
15786 +	if (is_queued(t))
15787 +		fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
15788 +}
15789 +
15790 +static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
15791 +			    struct task_struct* prio_inh)
15792 +{
15793 +	int requeue;
15794 +
15795 +	if (!t || t->rt_param.inh_task == prio_inh) {
15796 +		/* no update  required */
15797 +		if (t)
15798 +			TRACE_TASK(t, "no prio-inh update required\n");
15799 +		return;
15800 +	}
15801 +
15802 +	requeue = is_queued(t);
15803 +	TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
15804 +
15805 +	if (requeue)
15806 +		/* first remove */
15807 +		fp_dequeue(pfp, t);
15808 +
15809 +	t->rt_param.inh_task = prio_inh;
15810 +
15811 +	if (requeue)
15812 +		/* add again to the right queue */
15813 +		fp_prio_add(&pfp->ready_queue, t, priority_index(t));
15814 +}
15815 +
15816 +static int effective_agent_priority(int prio)
15817 +{
15818 +	/* make sure agents have higher priority */
15819 +	return prio - LITMUS_MAX_PRIORITY;
15820 +}
15821 +
15822 +static lt_t prio_point(int eprio)
15823 +{
15824 +	/* make sure we have non-negative prio points */
15825 +	return eprio + LITMUS_MAX_PRIORITY;
15826 +}
15827 +
15828 +static void boost_priority(struct task_struct* t, lt_t priority_point)
15829 +{
15830 +	unsigned long		flags;
15831 +	pfp_domain_t* 	pfp = task_pfp(t);
15832 +
15833 +	raw_spin_lock_irqsave(&pfp->slock, flags);
15834 +
15835 +
15836 +	TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
15837 +
15838 +	tsk_rt(t)->priority_boosted = 1;
15839 +	/* tie-break by protocol-specific priority point */
15840 +	tsk_rt(t)->boost_start_time = priority_point;
15841 +
15842 +	/* Priority boosting currently only takes effect for already-scheduled
15843 +	 * tasks. This is sufficient since priority boosting only kicks in as
15844 +	 * part of lock acquisitions. */
15845 +	BUG_ON(pfp->scheduled != t);
15846 +
15847 +	raw_spin_unlock_irqrestore(&pfp->slock, flags);
15848 +}
15849 +
15850 +static void unboost_priority(struct task_struct* t)
15851 +{
15852 +	unsigned long		flags;
15853 +	pfp_domain_t* 	pfp = task_pfp(t);
15854 +
15855 +	raw_spin_lock_irqsave(&pfp->slock, flags);
15856 +
15857 +	/* Assumption: this only happens when the job is scheduled.
15858 +	 * Exception: If t transitioned to non-real-time mode, we no longer
15859 +	 * care abou tit. */
15860 +	BUG_ON(pfp->scheduled != t && is_realtime(t));
15861 +
15862 +	TRACE_TASK(t, "priority restored at %llu\n", litmus_clock());
15863 +
15864 +	tsk_rt(t)->priority_boosted = 0;
15865 +	tsk_rt(t)->boost_start_time = 0;
15866 +
15867 +	/* check if this changes anything */
15868 +	if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
15869 +		preempt(pfp);
15870 +
15871 +	raw_spin_unlock_irqrestore(&pfp->slock, flags);
15872 +}
15873 +
15874 +/* ******************** SRP support ************************ */
15875 +
15876 +static unsigned int pfp_get_srp_prio(struct task_struct* t)
15877 +{
15878 +	return get_priority(t);
15879 +}
15880 +
15881 +/* ******************** FMLP support ********************** */
15882 +
15883 +struct fmlp_semaphore {
15884 +	struct litmus_lock litmus_lock;
15885 +
15886 +	/* current resource holder */
15887 +	struct task_struct *owner;
15888 +
15889 +	/* FIFO queue of waiting tasks */
15890 +	wait_queue_head_t wait;
15891 +};
15892 +
15893 +static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
15894 +{
15895 +	return container_of(lock, struct fmlp_semaphore, litmus_lock);
15896 +}
15897 +
15898 +static inline lt_t
15899 +fmlp_clock(void)
15900 +{
15901 +	return (lt_t) __get_cpu_var(fmlp_timestamp)++;
15902 +}
15903 +
15904 +int pfp_fmlp_lock(struct litmus_lock* l)
15905 +{
15906 +	struct task_struct* t = current;
15907 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
15908 +	wait_queue_t wait;
15909 +	unsigned long flags;
15910 +	lt_t time_of_request;
15911 +
15912 +	if (!is_realtime(t))
15913 +		return -EPERM;
15914 +
15915 +	/* prevent nested lock acquisition --- not supported by FMLP */
15916 +	if (tsk_rt(t)->num_locks_held ||
15917 +	    tsk_rt(t)->num_local_locks_held)
15918 +		return -EBUSY;
15919 +
15920 +	spin_lock_irqsave(&sem->wait.lock, flags);
15921 +
15922 +	/* tie-break by this point in time */
15923 +	time_of_request = fmlp_clock();
15924 +
15925 +	/* Priority-boost ourself *before* we suspend so that
15926 +	 * our priority is boosted when we resume. */
15927 +	boost_priority(t, time_of_request);
15928 +
15929 +	if (sem->owner) {
15930 +		/* resource is not free => must suspend and wait */
15931 +
15932 +		init_waitqueue_entry(&wait, t);
15933 +
15934 +		/* FIXME: interruptible would be nice some day */
15935 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
15936 +
15937 +		__add_wait_queue_tail_exclusive(&sem->wait, &wait);
15938 +
15939 +		TS_LOCK_SUSPEND;
15940 +
15941 +		/* release lock before sleeping */
15942 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
15943 +
15944 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
15945 +		 * when we wake up; we are guaranteed to have the lock since
15946 +		 * there is only one wake up per release.
15947 +		 */
15948 +
15949 +		schedule();
15950 +
15951 +		TS_LOCK_RESUME;
15952 +
15953 +		/* Since we hold the lock, no other task will change
15954 +		 * ->owner. We can thus check it without acquiring the spin
15955 +		 * lock. */
15956 +		BUG_ON(sem->owner != t);
15957 +	} else {
15958 +		/* it's ours now */
15959 +		sem->owner = t;
15960 +
15961 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
15962 +	}
15963 +
15964 +	tsk_rt(t)->num_locks_held++;
15965 +
15966 +	return 0;
15967 +}
15968 +
15969 +int pfp_fmlp_unlock(struct litmus_lock* l)
15970 +{
15971 +	struct task_struct *t = current, *next = NULL;
15972 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
15973 +	unsigned long flags;
15974 +	int err = 0;
15975 +
15976 +	preempt_disable();
15977 +
15978 +	spin_lock_irqsave(&sem->wait.lock, flags);
15979 +
15980 +	if (sem->owner != t) {
15981 +		err = -EINVAL;
15982 +		goto out;
15983 +	}
15984 +
15985 +	tsk_rt(t)->num_locks_held--;
15986 +
15987 +	/* we lose the benefit of priority boosting */
15988 +
15989 +	unboost_priority(t);
15990 +
15991 +	/* check if there are jobs waiting for this resource */
15992 +	next = __waitqueue_remove_first(&sem->wait);
15993 +	sem->owner = next;
15994 +
15995 +out:
15996 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
15997 +
15998 +	/* Wake up next. The waiting job is already priority-boosted. */
15999 +	if(next) {
16000 +		wake_up_process(next);
16001 +	}
16002 +
16003 +	preempt_enable();
16004 +
16005 +	return err;
16006 +}
16007 +
16008 +int pfp_fmlp_close(struct litmus_lock* l)
16009 +{
16010 +	struct task_struct *t = current;
16011 +	struct fmlp_semaphore *sem = fmlp_from_lock(l);
16012 +	unsigned long flags;
16013 +
16014 +	int owner;
16015 +
16016 +	spin_lock_irqsave(&sem->wait.lock, flags);
16017 +
16018 +	owner = sem->owner == t;
16019 +
16020 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
16021 +
16022 +	if (owner)
16023 +		pfp_fmlp_unlock(l);
16024 +
16025 +	return 0;
16026 +}
16027 +
16028 +void pfp_fmlp_free(struct litmus_lock* lock)
16029 +{
16030 +	kfree(fmlp_from_lock(lock));
16031 +}
16032 +
16033 +static struct litmus_lock_ops pfp_fmlp_lock_ops = {
16034 +	.close  = pfp_fmlp_close,
16035 +	.lock   = pfp_fmlp_lock,
16036 +	.unlock = pfp_fmlp_unlock,
16037 +	.deallocate = pfp_fmlp_free,
16038 +};
16039 +
16040 +static struct litmus_lock* pfp_new_fmlp(void)
16041 +{
16042 +	struct fmlp_semaphore* sem;
16043 +
16044 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
16045 +	if (!sem)
16046 +		return NULL;
16047 +
16048 +	sem->owner   = NULL;
16049 +	init_waitqueue_head(&sem->wait);
16050 +	sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
16051 +
16052 +	return &sem->litmus_lock;
16053 +}
16054 +
16055 +/* ******************** MPCP support ********************** */
16056 +
16057 +struct mpcp_semaphore {
16058 +	struct litmus_lock litmus_lock;
16059 +
16060 +	/* current resource holder */
16061 +	struct task_struct *owner;
16062 +
16063 +	/* priority queue of waiting tasks */
16064 +	wait_queue_head_t wait;
16065 +
16066 +	/* priority ceiling per cpu */
16067 +	unsigned int prio_ceiling[NR_CPUS];
16068 +
16069 +	/* should jobs spin "virtually" for this resource? */
16070 +	int vspin;
16071 +};
16072 +
16073 +#define OMEGA_CEILING UINT_MAX
16074 +
16075 +/* Since jobs spin "virtually" while waiting to acquire a lock,
16076 + * they first must aquire a local per-cpu resource.
16077 + */
16078 +static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
16079 +static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
16080 +
16081 +/* called with preemptions off <=> no local modifications */
16082 +static void mpcp_vspin_enter(void)
16083 +{
16084 +	struct task_struct* t = current;
16085 +
16086 +	while (1) {
16087 +		if (__get_cpu_var(mpcpvs_vspin) == NULL) {
16088 +			/* good, we get to issue our request */
16089 +			__get_cpu_var(mpcpvs_vspin) = t;
16090 +			break;
16091 +		} else {
16092 +			/* some job is spinning => enqueue in request queue */
16093 +			prio_wait_queue_t wait;
16094 +			wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
16095 +			unsigned long flags;
16096 +
16097 +			/* ordered by regular priority */
16098 +			init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
16099 +
16100 +			spin_lock_irqsave(&vspin->lock, flags);
16101 +
16102 +			set_task_state(t, TASK_UNINTERRUPTIBLE);
16103 +
16104 +			__add_wait_queue_prio_exclusive(vspin, &wait);
16105 +
16106 +			spin_unlock_irqrestore(&vspin->lock, flags);
16107 +
16108 +			TS_LOCK_SUSPEND;
16109 +
16110 +			preempt_enable_no_resched();
16111 +
16112 +			schedule();
16113 +
16114 +			preempt_disable();
16115 +
16116 +			TS_LOCK_RESUME;
16117 +			/* Recheck if we got it --- some higher-priority process might
16118 +			 * have swooped in. */
16119 +		}
16120 +	}
16121 +	/* ok, now it is ours */
16122 +}
16123 +
16124 +/* called with preemptions off */
16125 +static void mpcp_vspin_exit(void)
16126 +{
16127 +	struct task_struct* t = current, *next;
16128 +	unsigned long flags;
16129 +	wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
16130 +
16131 +	BUG_ON(__get_cpu_var(mpcpvs_vspin) != t);
16132 +
16133 +	/* no spinning job */
16134 +	__get_cpu_var(mpcpvs_vspin) = NULL;
16135 +
16136 +	/* see if anyone is waiting for us to stop "spinning" */
16137 +	spin_lock_irqsave(&vspin->lock, flags);
16138 +	next = __waitqueue_remove_first(vspin);
16139 +
16140 +	if (next)
16141 +		wake_up_process(next);
16142 +
16143 +	spin_unlock_irqrestore(&vspin->lock, flags);
16144 +}
16145 +
16146 +static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
16147 +{
16148 +	return container_of(lock, struct mpcp_semaphore, litmus_lock);
16149 +}
16150 +
16151 +int pfp_mpcp_lock(struct litmus_lock* l)
16152 +{
16153 +	struct task_struct* t = current;
16154 +	struct mpcp_semaphore *sem = mpcp_from_lock(l);
16155 +	prio_wait_queue_t wait;
16156 +	unsigned long flags;
16157 +
16158 +	if (!is_realtime(t))
16159 +		return -EPERM;
16160 +
16161 +	/* prevent nested lock acquisition */
16162 +	if (tsk_rt(t)->num_locks_held ||
16163 +	    tsk_rt(t)->num_local_locks_held)
16164 +		return -EBUSY;
16165 +
16166 +	preempt_disable();
16167 +
16168 +	if (sem->vspin)
16169 +		mpcp_vspin_enter();
16170 +
16171 +	/* Priority-boost ourself *before* we suspend so that
16172 +	 * our priority is boosted when we resume. Use the priority
16173 +	 * ceiling for the local partition. */
16174 +	boost_priority(t, sem->prio_ceiling[get_partition(t)]);
16175 +
16176 +	spin_lock_irqsave(&sem->wait.lock, flags);
16177 +
16178 +	preempt_enable_no_resched();
16179 +
16180 +	if (sem->owner) {
16181 +		/* resource is not free => must suspend and wait */
16182 +
16183 +		/* ordered by regular priority */
16184 +		init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
16185 +
16186 +		/* FIXME: interruptible would be nice some day */
16187 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
16188 +
16189 +		__add_wait_queue_prio_exclusive(&sem->wait, &wait);
16190 +
16191 +		TS_LOCK_SUSPEND;
16192 +
16193 +		/* release lock before sleeping */
16194 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
16195 +
16196 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
16197 +		 * when we wake up; we are guaranteed to have the lock since
16198 +		 * there is only one wake up per release.
16199 +		 */
16200 +
16201 +		schedule();
16202 +
16203 +		TS_LOCK_RESUME;
16204 +
16205 +		/* Since we hold the lock, no other task will change
16206 +		 * ->owner. We can thus check it without acquiring the spin
16207 +		 * lock. */
16208 +		BUG_ON(sem->owner != t);
16209 +	} else {
16210 +		/* it's ours now */
16211 +		sem->owner = t;
16212 +
16213 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
16214 +	}
16215 +
16216 +	tsk_rt(t)->num_locks_held++;
16217 +
16218 +	return 0;
16219 +}
16220 +
16221 +int pfp_mpcp_unlock(struct litmus_lock* l)
16222 +{
16223 +	struct task_struct *t = current, *next = NULL;
16224 +	struct mpcp_semaphore *sem = mpcp_from_lock(l);
16225 +	unsigned long flags;
16226 +	int err = 0;
16227 +
16228 +	preempt_disable();
16229 +
16230 +	spin_lock_irqsave(&sem->wait.lock, flags);
16231 +
16232 +	if (sem->owner != t) {
16233 +		err = -EINVAL;
16234 +		goto out;
16235 +	}
16236 +
16237 +	tsk_rt(t)->num_locks_held--;
16238 +
16239 +	/* we lose the benefit of priority boosting */
16240 +	unboost_priority(t);
16241 +
16242 +	/* check if there are jobs waiting for this resource */
16243 +	next = __waitqueue_remove_first(&sem->wait);
16244 +	sem->owner = next;
16245 +
16246 +out:
16247 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
16248 +
16249 +	/* Wake up next. The waiting job is already priority-boosted. */
16250 +	if(next) {
16251 +		wake_up_process(next);
16252 +	}
16253 +
16254 +	if (sem->vspin && err == 0) {
16255 +		mpcp_vspin_exit();
16256 +	}
16257 +
16258 +	preempt_enable();
16259 +
16260 +	return err;
16261 +}
16262 +
16263 +int pfp_mpcp_open(struct litmus_lock* l, void* config)
16264 +{
16265 +	struct task_struct *t = current;
16266 +	int cpu, local_cpu;
16267 +	struct mpcp_semaphore *sem = mpcp_from_lock(l);
16268 +	unsigned long flags;
16269 +
16270 +	if (!is_realtime(t))
16271 +		/* we need to know the real-time priority */
16272 +		return -EPERM;
16273 +
16274 +	local_cpu = get_partition(t);
16275 +
16276 +	spin_lock_irqsave(&sem->wait.lock, flags);
16277 +	for (cpu = 0; cpu < NR_CPUS; cpu++) {
16278 +		if (cpu != local_cpu) {
16279 +			sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
16280 +						     get_priority(t));
16281 +			TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
16282 +				  sem, sem->prio_ceiling[cpu], cpu);
16283 +		}
16284 +	}
16285 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
16286 +
16287 +	return 0;
16288 +}
16289 +
16290 +int pfp_mpcp_close(struct litmus_lock* l)
16291 +{
16292 +	struct task_struct *t = current;
16293 +	struct mpcp_semaphore *sem = mpcp_from_lock(l);
16294 +	unsigned long flags;
16295 +
16296 +	int owner;
16297 +
16298 +	spin_lock_irqsave(&sem->wait.lock, flags);
16299 +
16300 +	owner = sem->owner == t;
16301 +
16302 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
16303 +
16304 +	if (owner)
16305 +		pfp_mpcp_unlock(l);
16306 +
16307 +	return 0;
16308 +}
16309 +
16310 +void pfp_mpcp_free(struct litmus_lock* lock)
16311 +{
16312 +	kfree(mpcp_from_lock(lock));
16313 +}
16314 +
16315 +static struct litmus_lock_ops pfp_mpcp_lock_ops = {
16316 +	.close  = pfp_mpcp_close,
16317 +	.lock   = pfp_mpcp_lock,
16318 +	.open	= pfp_mpcp_open,
16319 +	.unlock = pfp_mpcp_unlock,
16320 +	.deallocate = pfp_mpcp_free,
16321 +};
16322 +
16323 +static struct litmus_lock* pfp_new_mpcp(int vspin)
16324 +{
16325 +	struct mpcp_semaphore* sem;
16326 +	int cpu;
16327 +
16328 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
16329 +	if (!sem)
16330 +		return NULL;
16331 +
16332 +	sem->owner   = NULL;
16333 +	init_waitqueue_head(&sem->wait);
16334 +	sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
16335 +
16336 +	for (cpu = 0; cpu < NR_CPUS; cpu++)
16337 +		sem->prio_ceiling[cpu] = OMEGA_CEILING;
16338 +
16339 +	/* mark as virtual spinning */
16340 +	sem->vspin = vspin;
16341 +
16342 +	return &sem->litmus_lock;
16343 +}
16344 +
16345 +
16346 +/* ******************** PCP support ********************** */
16347 +
16348 +
16349 +struct pcp_semaphore {
16350 +	struct litmus_lock litmus_lock;
16351 +
16352 +	struct list_head ceiling;
16353 +
16354 +	/* current resource holder */
16355 +	struct task_struct *owner;
16356 +
16357 +	/* priority ceiling --- can be negative due to DPCP support */
16358 +	int prio_ceiling;
16359 +
16360 +	/* on which processor is this PCP semaphore allocated? */
16361 +	int on_cpu;
16362 +};
16363 +
16364 +static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock)
16365 +{
16366 +	return container_of(lock, struct pcp_semaphore, litmus_lock);
16367 +}
16368 +
16369 +
16370 +struct pcp_state {
16371 +	struct list_head system_ceiling;
16372 +
16373 +	/* highest-priority waiting task */
16374 +	struct task_struct* hp_waiter;
16375 +
16376 +	/* list of jobs waiting to get past the system ceiling */
16377 +	wait_queue_head_t ceiling_blocked;
16378 +};
16379 +
16380 +static void pcp_init_state(struct pcp_state* s)
16381 +{
16382 +	INIT_LIST_HEAD(&s->system_ceiling);
16383 +	s->hp_waiter = NULL;
16384 +	init_waitqueue_head(&s->ceiling_blocked);
16385 +}
16386 +
16387 +static DEFINE_PER_CPU(struct pcp_state, pcp_state);
16388 +
16389 +/* assumes preemptions are off */
16390 +static struct pcp_semaphore* pcp_get_ceiling(void)
16391 +{
16392 +	struct list_head* top = &__get_cpu_var(pcp_state).system_ceiling;
16393 +	return list_first_entry_or_null(top, struct pcp_semaphore, ceiling);
16394 +}
16395 +
16396 +/* assumes preempt off */
16397 +static void pcp_add_ceiling(struct pcp_semaphore* sem)
16398 +{
16399 +	struct list_head *pos;
16400 +	struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling;
16401 +	struct pcp_semaphore* held;
16402 +
16403 +	BUG_ON(sem->on_cpu != smp_processor_id());
16404 +	BUG_ON(in_list(&sem->ceiling));
16405 +
16406 +	list_for_each(pos, in_use) {
16407 +		held = list_entry(pos, struct pcp_semaphore, ceiling);
16408 +		if (held->prio_ceiling >= sem->prio_ceiling) {
16409 +			__list_add(&sem->ceiling, pos->prev, pos);
16410 +			return;
16411 +		}
16412 +	}
16413 +
16414 +	/* we hit the end of the list */
16415 +
16416 +	list_add_tail(&sem->ceiling, in_use);
16417 +}
16418 +
16419 +/* assumes preempt off */
16420 +static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
16421 +			      struct task_struct* task,
16422 +			      int effective_prio)
16423 +{
16424 +	return ceiling == NULL ||
16425 +		ceiling->prio_ceiling > effective_prio ||
16426 +		ceiling->owner == task;
16427 +}
16428 +
16429 +/* assumes preempt off */
16430 +static void pcp_priority_inheritance(void)
16431 +{
16432 +	unsigned long	flags;
16433 +	pfp_domain_t* 	pfp = local_pfp;
16434 +
16435 +	struct pcp_semaphore* ceiling = pcp_get_ceiling();
16436 +	struct task_struct *blocker, *blocked;
16437 +
16438 +	blocker = ceiling ?  ceiling->owner : NULL;
16439 +	blocked = __get_cpu_var(pcp_state).hp_waiter;
16440 +
16441 +	raw_spin_lock_irqsave(&pfp->slock, flags);
16442 +
16443 +	/* Current is no longer inheriting anything by default.  This should be
16444 +	 * the currently scheduled job, and hence not currently queued.
16445 +	 * Special case: if current stopped being a real-time task, it will no longer
16446 +	 * be registered as pfp->scheduled. */
16447 +	BUG_ON(current != pfp->scheduled && is_realtime(current));
16448 +
16449 +	fp_set_prio_inh(pfp, current, NULL);
16450 +	fp_set_prio_inh(pfp, blocked, NULL);
16451 +	fp_set_prio_inh(pfp, blocker, NULL);
16452 +
16453 +	/* Let blocking job inherit priority of blocked job, if required. */
16454 +	if (blocker && blocked &&
16455 +	    fp_higher_prio(blocked, blocker)) {
16456 +		TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
16457 +			   blocked->comm, blocked->pid,
16458 +			   get_priority(blocker), get_priority(blocked));
16459 +		fp_set_prio_inh(pfp, blocker, blocked);
16460 +	}
16461 +
16462 +	/* Check if anything changed. If the blocked job is current, then it is
16463 +	 * just blocking and hence is going to call the scheduler anyway. */
16464 +	if (blocked != current &&
16465 +	    fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
16466 +		preempt(pfp);
16467 +
16468 +	raw_spin_unlock_irqrestore(&pfp->slock, flags);
16469 +}
16470 +
16471 +/* called with preemptions off */
16472 +static void pcp_raise_ceiling(struct pcp_semaphore* sem,
16473 +			      int effective_prio)
16474 +{
16475 +	struct task_struct* t = current;
16476 +	struct pcp_semaphore* ceiling;
16477 +	prio_wait_queue_t wait;
16478 +	unsigned int waiting_higher_prio;
16479 +
16480 +	while(1) {
16481 +		ceiling = pcp_get_ceiling();
16482 +		if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
16483 +			break;
16484 +
16485 +		TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
16486 +			  sem, ceiling->owner->comm, ceiling->owner->pid);
16487 +
16488 +		/* we need to wait until the ceiling is lowered */
16489 +
16490 +		/* enqueue in priority order */
16491 +		init_prio_waitqueue_entry(&wait, t, effective_prio);
16492 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
16493 +		waiting_higher_prio = add_wait_queue_prio_exclusive(
16494 +			&__get_cpu_var(pcp_state).ceiling_blocked, &wait);
16495 +
16496 +		if (waiting_higher_prio == 0) {
16497 +			TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
16498 +
16499 +			/* we are the new highest-priority waiting job
16500 +			 * => update inheritance */
16501 +			__get_cpu_var(pcp_state).hp_waiter = t;
16502 +			pcp_priority_inheritance();
16503 +		}
16504 +
16505 +		TS_LOCK_SUSPEND;
16506 +
16507 +		preempt_enable_no_resched();
16508 +		schedule();
16509 +		preempt_disable();
16510 +
16511 +		/* pcp_resume_unblocked() removed us from wait queue */
16512 +
16513 +		TS_LOCK_RESUME;
16514 +	}
16515 +
16516 +	TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
16517 +
16518 +	/* We are good to go. The semaphore should be available. */
16519 +	BUG_ON(sem->owner != NULL);
16520 +
16521 +	sem->owner = t;
16522 +
16523 +	pcp_add_ceiling(sem);
16524 +}
16525 +
16526 +static void pcp_resume_unblocked(void)
16527 +{
16528 +	wait_queue_head_t *blocked =  &__get_cpu_var(pcp_state).ceiling_blocked;
16529 +	unsigned long flags;
16530 +	prio_wait_queue_t* q;
16531 +	struct task_struct* t = NULL;
16532 +
16533 +	struct pcp_semaphore* ceiling = pcp_get_ceiling();
16534 +
16535 +	spin_lock_irqsave(&blocked->lock, flags);
16536 +
16537 +	while (waitqueue_active(blocked)) {
16538 +		/* check first == highest-priority waiting job */
16539 +		q = list_entry(blocked->task_list.next,
16540 +			       prio_wait_queue_t, wq.task_list);
16541 +		t = (struct task_struct*) q->wq.private;
16542 +
16543 +		/* can it proceed now? => let it go */
16544 +		if (pcp_exceeds_ceiling(ceiling, t, q->priority)) {
16545 +		    __remove_wait_queue(blocked, &q->wq);
16546 +		    wake_up_process(t);
16547 +		} else {
16548 +			/* We are done. Update highest-priority waiter. */
16549 +			__get_cpu_var(pcp_state).hp_waiter = t;
16550 +			goto out;
16551 +		}
16552 +	}
16553 +	/* If we get here, then there are no more waiting
16554 +	 * jobs. */
16555 +	__get_cpu_var(pcp_state).hp_waiter = NULL;
16556 +out:
16557 +	spin_unlock_irqrestore(&blocked->lock, flags);
16558 +}
16559 +
16560 +/* assumes preempt off */
16561 +static void pcp_lower_ceiling(struct pcp_semaphore* sem)
16562 +{
16563 +	BUG_ON(!in_list(&sem->ceiling));
16564 +	BUG_ON(sem->owner != current);
16565 +	BUG_ON(sem->on_cpu != smp_processor_id());
16566 +
16567 +	/* remove from ceiling list */
16568 +	list_del(&sem->ceiling);
16569 +
16570 +	/* release */
16571 +	sem->owner = NULL;
16572 +
16573 +	TRACE_CUR("PCP released sem %p\n", sem);
16574 +
16575 +	/* Wake up all ceiling-blocked jobs that now pass the ceiling. */
16576 +	pcp_resume_unblocked();
16577 +
16578 +	pcp_priority_inheritance();
16579 +}
16580 +
16581 +static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
16582 +				    int effective_prio)
16583 +{
16584 +	/* This needs to be synchronized on something.
16585 +	 * Might as well use waitqueue lock for the processor.
16586 +	 * We assume this happens only before the task set starts execution,
16587 +	 * (i.e., during initialization), but it may happen on multiple processors
16588 +	 * at the same time.
16589 +	 */
16590 +	unsigned long flags;
16591 +
16592 +	struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
16593 +
16594 +	spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
16595 +
16596 +	sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
16597 +
16598 +	spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
16599 +}
16600 +
16601 +static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
16602 +{
16603 +	sem->owner   = NULL;
16604 +	INIT_LIST_HEAD(&sem->ceiling);
16605 +	sem->prio_ceiling = INT_MAX;
16606 +	sem->on_cpu = cpu;
16607 +}
16608 +
16609 +int pfp_pcp_lock(struct litmus_lock* l)
16610 +{
16611 +	struct task_struct* t = current;
16612 +	struct pcp_semaphore *sem = pcp_from_lock(l);
16613 +
16614 +	/* The regular PCP uses the regular task priorities, not agent
16615 +	 * priorities. */
16616 +	int eprio = get_priority(t);
16617 +	int from  = get_partition(t);
16618 +	int to    = sem->on_cpu;
16619 +
16620 +	if (!is_realtime(t) || from != to)
16621 +		return -EPERM;
16622 +
16623 +	/* prevent nested lock acquisition in global critical section */
16624 +	if (tsk_rt(t)->num_locks_held)
16625 +		return -EBUSY;
16626 +
16627 +	preempt_disable();
16628 +
16629 +	pcp_raise_ceiling(sem, eprio);
16630 +
16631 +	preempt_enable();
16632 +
16633 +	tsk_rt(t)->num_local_locks_held++;
16634 +
16635 +	return 0;
16636 +}
16637 +
16638 +int pfp_pcp_unlock(struct litmus_lock* l)
16639 +{
16640 +	struct task_struct *t = current;
16641 +	struct pcp_semaphore *sem = pcp_from_lock(l);
16642 +
16643 +	int err = 0;
16644 +
16645 +	preempt_disable();
16646 +
16647 +	if (sem->owner != t) {
16648 +		err = -EINVAL;
16649 +		goto out;
16650 +	}
16651 +
16652 +	/* The current owner should be executing on the correct CPU.
16653 +	 *
16654 +	 * FIXME: if the owner transitioned out of RT mode or is exiting, then
16655 +	 * we it might have already been migrated away by the best-effort
16656 +	 * scheduler and we just have to deal with it. This is currently not
16657 +	 * supported. */
16658 +	BUG_ON(sem->on_cpu != smp_processor_id());
16659 +
16660 +	tsk_rt(t)->num_local_locks_held--;
16661 +
16662 +	/* give it back */
16663 +	pcp_lower_ceiling(sem);
16664 +
16665 +out:
16666 +	preempt_enable();
16667 +
16668 +	return err;
16669 +}
16670 +
16671 +int pfp_pcp_open(struct litmus_lock* l, void* __user config)
16672 +{
16673 +	struct task_struct *t = current;
16674 +	struct pcp_semaphore *sem = pcp_from_lock(l);
16675 +
16676 +	int cpu, eprio;
16677 +
16678 +	if (!is_realtime(t))
16679 +		/* we need to know the real-time priority */
16680 +		return -EPERM;
16681 +
16682 +	if (!config)
16683 +		cpu = get_partition(t);
16684 +	else if (get_user(cpu, (int*) config))
16685 +		return -EFAULT;
16686 +
16687 +	/* make sure the resource location matches */
16688 +	if (cpu != sem->on_cpu)
16689 +		return -EINVAL;
16690 +
16691 +	/* The regular PCP uses regular task priorites, not agent
16692 +	 * priorities. */
16693 +	eprio = get_priority(t);
16694 +
16695 +	pcp_update_prio_ceiling(sem, eprio);
16696 +
16697 +	return 0;
16698 +}
16699 +
16700 +int pfp_pcp_close(struct litmus_lock* l)
16701 +{
16702 +	struct task_struct *t = current;
16703 +	struct pcp_semaphore *sem = pcp_from_lock(l);
16704 +
16705 +	int owner = 0;
16706 +
16707 +	preempt_disable();
16708 +
16709 +	if (sem->on_cpu == smp_processor_id())
16710 +		owner = sem->owner == t;
16711 +
16712 +	preempt_enable();
16713 +
16714 +	if (owner)
16715 +		pfp_pcp_unlock(l);
16716 +
16717 +	return 0;
16718 +}
16719 +
16720 +void pfp_pcp_free(struct litmus_lock* lock)
16721 +{
16722 +	kfree(pcp_from_lock(lock));
16723 +}
16724 +
16725 +
16726 +static struct litmus_lock_ops pfp_pcp_lock_ops = {
16727 +	.close  = pfp_pcp_close,
16728 +	.lock   = pfp_pcp_lock,
16729 +	.open	= pfp_pcp_open,
16730 +	.unlock = pfp_pcp_unlock,
16731 +	.deallocate = pfp_pcp_free,
16732 +};
16733 +
16734 +
16735 +static struct litmus_lock* pfp_new_pcp(int on_cpu)
16736 +{
16737 +	struct pcp_semaphore* sem;
16738 +
16739 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
16740 +	if (!sem)
16741 +		return NULL;
16742 +
16743 +	sem->litmus_lock.ops = &pfp_pcp_lock_ops;
16744 +	pcp_init_semaphore(sem, on_cpu);
16745 +
16746 +	return &sem->litmus_lock;
16747 +}
16748 +
16749 +/* ******************** DPCP support ********************** */
16750 +
16751 +struct dpcp_semaphore {
16752 +	struct litmus_lock litmus_lock;
16753 +	struct pcp_semaphore  pcp;
16754 +	int owner_cpu;
16755 +};
16756 +
16757 +static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
16758 +{
16759 +	return container_of(lock, struct dpcp_semaphore, litmus_lock);
16760 +}
16761 +
16762 +/* called with preemptions disabled */
16763 +static void pfp_migrate_to(int target_cpu)
16764 +{
16765 +	struct task_struct* t = current;
16766 +	pfp_domain_t *from;
16767 +
16768 +	if (get_partition(t) == target_cpu)
16769 +		return;
16770 +
16771 +	/* make sure target_cpu makes sense */
16772 +	BUG_ON(!cpu_online(target_cpu));
16773 +
16774 +	local_irq_disable();
16775 +
16776 +	from = task_pfp(t);
16777 +	raw_spin_lock(&from->slock);
16778 +
16779 +	/* Scheduled task should not be in any ready or release queue.  Check
16780 +	 * this while holding the lock to avoid RT mode transitions.*/
16781 +	BUG_ON(is_realtime(t) && is_queued(t));
16782 +
16783 +	/* switch partitions */
16784 +	tsk_rt(t)->task_params.cpu = target_cpu;
16785 +
16786 +	raw_spin_unlock(&from->slock);
16787 +
16788 +	/* Don't trace scheduler costs as part of
16789 +	 * locking overhead. Scheduling costs are accounted for
16790 +	 * explicitly. */
16791 +	TS_LOCK_SUSPEND;
16792 +
16793 +	local_irq_enable();
16794 +	preempt_enable_no_resched();
16795 +
16796 +	/* deschedule to be migrated */
16797 +	schedule();
16798 +
16799 +	/* we are now on the target processor */
16800 +	preempt_disable();
16801 +
16802 +	/* start recording costs again */
16803 +	TS_LOCK_RESUME;
16804 +
16805 +	BUG_ON(smp_processor_id() != target_cpu && is_realtime(t));
16806 +}
16807 +
16808 +int pfp_dpcp_lock(struct litmus_lock* l)
16809 +{
16810 +	struct task_struct* t = current;
16811 +	struct dpcp_semaphore *sem = dpcp_from_lock(l);
16812 +	int eprio = effective_agent_priority(get_priority(t));
16813 +	int from  = get_partition(t);
16814 +	int to    = sem->pcp.on_cpu;
16815 +
16816 +	if (!is_realtime(t))
16817 +		return -EPERM;
16818 +
16819 +	/* prevent nested lock accquisition */
16820 +	if (tsk_rt(t)->num_locks_held ||
16821 +	    tsk_rt(t)->num_local_locks_held)
16822 +		return -EBUSY;
16823 +
16824 +	preempt_disable();
16825 +
16826 +	/* Priority-boost ourself *before* we suspend so that
16827 +	 * our priority is boosted when we resume. */
16828 +
16829 +	boost_priority(t, get_priority(t));
16830 +
16831 +	pfp_migrate_to(to);
16832 +
16833 +	pcp_raise_ceiling(&sem->pcp, eprio);
16834 +
16835 +	/* yep, we got it => execute request */
16836 +	sem->owner_cpu = from;
16837 +
16838 +	preempt_enable();
16839 +
16840 +	tsk_rt(t)->num_locks_held++;
16841 +
16842 +	return 0;
16843 +}
16844 +
16845 +int pfp_dpcp_unlock(struct litmus_lock* l)
16846 +{
16847 +	struct task_struct *t = current;
16848 +	struct dpcp_semaphore *sem = dpcp_from_lock(l);
16849 +	int err = 0;
16850 +	int home;
16851 +
16852 +	preempt_disable();
16853 +
16854 +	if (sem->pcp.owner != t) {
16855 +		err = -EINVAL;
16856 +		goto out;
16857 +	}
16858 +
16859 +	/* The current owner should be executing on the correct CPU.
16860 +	 *
16861 +	 * FIXME: if the owner transitioned out of RT mode or is exiting, then
16862 +	 * we it might have already been migrated away by the best-effort
16863 +	 * scheduler and we just have to deal with it. This is currently not
16864 +	 * supported. */
16865 +	BUG_ON(sem->pcp.on_cpu != smp_processor_id());
16866 +
16867 +	tsk_rt(t)->num_locks_held--;
16868 +
16869 +	home = sem->owner_cpu;
16870 +
16871 +	/* give it back */
16872 +	pcp_lower_ceiling(&sem->pcp);
16873 +
16874 +	/* we lose the benefit of priority boosting */
16875 +	unboost_priority(t);
16876 +
16877 +	pfp_migrate_to(home);
16878 +
16879 +out:
16880 +	preempt_enable();
16881 +
16882 +	return err;
16883 +}
16884 +
16885 +int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
16886 +{
16887 +	struct task_struct *t = current;
16888 +	struct dpcp_semaphore *sem = dpcp_from_lock(l);
16889 +	int cpu, eprio;
16890 +
16891 +	if (!is_realtime(t))
16892 +		/* we need to know the real-time priority */
16893 +		return -EPERM;
16894 +
16895 +	if (get_user(cpu, (int*) config))
16896 +		return -EFAULT;
16897 +
16898 +	/* make sure the resource location matches */
16899 +	if (cpu != sem->pcp.on_cpu)
16900 +		return -EINVAL;
16901 +
16902 +	eprio = effective_agent_priority(get_priority(t));
16903 +
16904 +	pcp_update_prio_ceiling(&sem->pcp, eprio);
16905 +
16906 +	return 0;
16907 +}
16908 +
16909 +int pfp_dpcp_close(struct litmus_lock* l)
16910 +{
16911 +	struct task_struct *t = current;
16912 +	struct dpcp_semaphore *sem = dpcp_from_lock(l);
16913 +	int owner = 0;
16914 +
16915 +	preempt_disable();
16916 +
16917 +	if (sem->pcp.on_cpu == smp_processor_id())
16918 +		owner = sem->pcp.owner == t;
16919 +
16920 +	preempt_enable();
16921 +
16922 +	if (owner)
16923 +		pfp_dpcp_unlock(l);
16924 +
16925 +	return 0;
16926 +}
16927 +
16928 +void pfp_dpcp_free(struct litmus_lock* lock)
16929 +{
16930 +	kfree(dpcp_from_lock(lock));
16931 +}
16932 +
16933 +static struct litmus_lock_ops pfp_dpcp_lock_ops = {
16934 +	.close  = pfp_dpcp_close,
16935 +	.lock   = pfp_dpcp_lock,
16936 +	.open	= pfp_dpcp_open,
16937 +	.unlock = pfp_dpcp_unlock,
16938 +	.deallocate = pfp_dpcp_free,
16939 +};
16940 +
16941 +static struct litmus_lock* pfp_new_dpcp(int on_cpu)
16942 +{
16943 +	struct dpcp_semaphore* sem;
16944 +
16945 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
16946 +	if (!sem)
16947 +		return NULL;
16948 +
16949 +	sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
16950 +	sem->owner_cpu = NO_CPU;
16951 +	pcp_init_semaphore(&sem->pcp, on_cpu);
16952 +
16953 +	return &sem->litmus_lock;
16954 +}
16955 +
16956 +
16957 +/* ******************** DFLP support ********************** */
16958 +
16959 +struct dflp_semaphore {
16960 +	struct litmus_lock litmus_lock;
16961 +
16962 +	/* current resource holder */
16963 +	struct task_struct *owner;
16964 +	int owner_cpu;
16965 +
16966 +	/* FIFO queue of waiting tasks */
16967 +	wait_queue_head_t wait;
16968 +
16969 +	/* where is the resource assigned to */
16970 +	int on_cpu;
16971 +};
16972 +
16973 +static inline struct dflp_semaphore* dflp_from_lock(struct litmus_lock* lock)
16974 +{
16975 +	return container_of(lock, struct dflp_semaphore, litmus_lock);
16976 +}
16977 +
16978 +int pfp_dflp_lock(struct litmus_lock* l)
16979 +{
16980 +	struct task_struct* t = current;
16981 +	struct dflp_semaphore *sem = dflp_from_lock(l);
16982 +	int from  = get_partition(t);
16983 +	int to    = sem->on_cpu;
16984 +	unsigned long flags;
16985 +	wait_queue_t wait;
16986 +	lt_t time_of_request;
16987 +
16988 +	if (!is_realtime(t))
16989 +		return -EPERM;
16990 +
16991 +	/* prevent nested lock accquisition */
16992 +	if (tsk_rt(t)->num_locks_held ||
16993 +	    tsk_rt(t)->num_local_locks_held)
16994 +		return -EBUSY;
16995 +
16996 +	preempt_disable();
16997 +
16998 +	/* tie-break by this point in time */
16999 +	time_of_request = litmus_clock();
17000 +
17001 +	/* Priority-boost ourself *before* we suspend so that
17002 +	 * our priority is boosted when we resume. */
17003 +	boost_priority(t, time_of_request);
17004 +
17005 +	pfp_migrate_to(to);
17006 +
17007 +	/* Now on the right CPU, preemptions still disabled. */
17008 +
17009 +	spin_lock_irqsave(&sem->wait.lock, flags);
17010 +
17011 +	if (sem->owner) {
17012 +		/* resource is not free => must suspend and wait */
17013 +
17014 +		init_waitqueue_entry(&wait, t);
17015 +
17016 +		/* FIXME: interruptible would be nice some day */
17017 +		set_task_state(t, TASK_UNINTERRUPTIBLE);
17018 +
17019 +		__add_wait_queue_tail_exclusive(&sem->wait, &wait);
17020 +
17021 +		TS_LOCK_SUSPEND;
17022 +
17023 +		/* release lock before sleeping */
17024 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
17025 +
17026 +		/* We depend on the FIFO order.  Thus, we don't need to recheck
17027 +		 * when we wake up; we are guaranteed to have the lock since
17028 +		 * there is only one wake up per release.
17029 +		 */
17030 +
17031 +		preempt_enable_no_resched();
17032 +
17033 +		schedule();
17034 +
17035 +		preempt_disable();
17036 +
17037 +		TS_LOCK_RESUME;
17038 +
17039 +		/* Since we hold the lock, no other task will change
17040 +		 * ->owner. We can thus check it without acquiring the spin
17041 +		 * lock. */
17042 +		BUG_ON(sem->owner != t);
17043 +	} else {
17044 +		/* it's ours now */
17045 +		sem->owner = t;
17046 +
17047 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
17048 +	}
17049 +
17050 +	sem->owner_cpu = from;
17051 +
17052 +	preempt_enable();
17053 +
17054 +	tsk_rt(t)->num_locks_held++;
17055 +
17056 +	return 0;
17057 +}
17058 +
17059 +int pfp_dflp_unlock(struct litmus_lock* l)
17060 +{
17061 +	struct task_struct *t = current, *next;
17062 +	struct dflp_semaphore *sem = dflp_from_lock(l);
17063 +	int err = 0;
17064 +	int home;
17065 +	unsigned long flags;
17066 +
17067 +	preempt_disable();
17068 +
17069 +	spin_lock_irqsave(&sem->wait.lock, flags);
17070 +
17071 +	if (sem->owner != t) {
17072 +		err = -EINVAL;
17073 +		spin_unlock_irqrestore(&sem->wait.lock, flags);
17074 +		goto out;
17075 +	}
17076 +
17077 +	/* check if there are jobs waiting for this resource */
17078 +	next = __waitqueue_remove_first(&sem->wait);
17079 +	if (next) {
17080 +		/* next becomes the resouce holder */
17081 +		sem->owner = next;
17082 +
17083 +		/* Wake up next. The waiting job is already priority-boosted. */
17084 +		wake_up_process(next);
17085 +	} else
17086 +		/* resource becomes available */
17087 +		sem->owner = NULL;
17088 +
17089 +	tsk_rt(t)->num_locks_held--;
17090 +
17091 +	home = sem->owner_cpu;
17092 +
17093 +	spin_unlock_irqrestore(&sem->wait.lock, flags);
17094 +
17095 +	/* we lose the benefit of priority boosting */
17096 +	unboost_priority(t);
17097 +
17098 +	pfp_migrate_to(home);
17099 +
17100 +out:
17101 +	preempt_enable();
17102 +
17103 +	return err;
17104 +}
17105 +
17106 +int pfp_dflp_open(struct litmus_lock* l, void* __user config)
17107 +{
17108 +	struct dflp_semaphore *sem = dflp_from_lock(l);
17109 +	int cpu;
17110 +
17111 +	if (get_user(cpu, (int*) config))
17112 +		return -EFAULT;
17113 +
17114 +	/* make sure the resource location matches */
17115 +	if (cpu != sem->on_cpu)
17116 +		return -EINVAL;
17117 +
17118 +	return 0;
17119 +}
17120 +
17121 +int pfp_dflp_close(struct litmus_lock* l)
17122 +{
17123 +	struct task_struct *t = current;
17124 +	struct dflp_semaphore *sem = dflp_from_lock(l);
17125 +	int owner = 0;
17126 +
17127 +	preempt_disable();
17128 +
17129 +	if (sem->on_cpu == smp_processor_id())
17130 +		owner = sem->owner == t;
17131 +
17132 +	preempt_enable();
17133 +
17134 +	if (owner)
17135 +		pfp_dflp_unlock(l);
17136 +
17137 +	return 0;
17138 +}
17139 +
17140 +void pfp_dflp_free(struct litmus_lock* lock)
17141 +{
17142 +	kfree(dflp_from_lock(lock));
17143 +}
17144 +
17145 +static struct litmus_lock_ops pfp_dflp_lock_ops = {
17146 +	.close  = pfp_dflp_close,
17147 +	.lock   = pfp_dflp_lock,
17148 +	.open	= pfp_dflp_open,
17149 +	.unlock = pfp_dflp_unlock,
17150 +	.deallocate = pfp_dflp_free,
17151 +};
17152 +
17153 +static struct litmus_lock* pfp_new_dflp(int on_cpu)
17154 +{
17155 +	struct dflp_semaphore* sem;
17156 +
17157 +	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
17158 +	if (!sem)
17159 +		return NULL;
17160 +
17161 +	sem->litmus_lock.ops = &pfp_dflp_lock_ops;
17162 +	sem->owner_cpu = NO_CPU;
17163 +	sem->owner   = NULL;
17164 +	sem->on_cpu  = on_cpu;
17165 +	init_waitqueue_head(&sem->wait);
17166 +
17167 +	return &sem->litmus_lock;
17168 +}
17169 +
17170 +
17171 +/* **** lock constructor **** */
17172 +
17173 +
17174 +static long pfp_allocate_lock(struct litmus_lock **lock, int type,
17175 +				 void* __user config)
17176 +{
17177 +	int err = -ENXIO, cpu;
17178 +	struct srp_semaphore* srp;
17179 +
17180 +	/* P-FP currently supports the SRP for local resources and the FMLP
17181 +	 * for global resources. */
17182 +	switch (type) {
17183 +	case FMLP_SEM:
17184 +		/* FIFO Mutex Locking Protocol */
17185 +		*lock = pfp_new_fmlp();
17186 +		if (*lock)
17187 +			err = 0;
17188 +		else
17189 +			err = -ENOMEM;
17190 +		break;
17191 +
17192 +	case MPCP_SEM:
17193 +		/* Multiprocesor Priority Ceiling Protocol */
17194 +		*lock = pfp_new_mpcp(0);
17195 +		if (*lock)
17196 +			err = 0;
17197 +		else
17198 +			err = -ENOMEM;
17199 +		break;
17200 +
17201 +	case MPCP_VS_SEM:
17202 +		/* Multiprocesor Priority Ceiling Protocol with virtual spinning */
17203 +		*lock = pfp_new_mpcp(1);
17204 +		if (*lock)
17205 +			err = 0;
17206 +		else
17207 +			err = -ENOMEM;
17208 +		break;
17209 +
17210 +	case DPCP_SEM:
17211 +		/* Distributed Priority Ceiling Protocol */
17212 +		if (get_user(cpu, (int*) config))
17213 +			return -EFAULT;
17214 +
17215 +		if (!cpu_online(cpu))
17216 +			return -EINVAL;
17217 +
17218 +		*lock = pfp_new_dpcp(cpu);
17219 +		if (*lock)
17220 +			err = 0;
17221 +		else
17222 +			err = -ENOMEM;
17223 +		break;
17224 +
17225 +	case DFLP_SEM:
17226 +		/* Distributed FIFO Locking Protocol */
17227 +		if (get_user(cpu, (int*) config))
17228 +			return -EFAULT;
17229 +
17230 +		if (!cpu_online(cpu))
17231 +			return -EINVAL;
17232 +
17233 +		*lock = pfp_new_dflp(cpu);
17234 +		if (*lock)
17235 +			err = 0;
17236 +		else
17237 +			err = -ENOMEM;
17238 +		break;
17239 +
17240 +	case SRP_SEM:
17241 +		/* Baker's Stack Resource Policy */
17242 +		srp = allocate_srp_semaphore();
17243 +		if (srp) {
17244 +			*lock = &srp->litmus_lock;
17245 +			err = 0;
17246 +		} else
17247 +			err = -ENOMEM;
17248 +		break;
17249 +
17250 +        case PCP_SEM:
17251 +		/* Priority Ceiling Protocol */
17252 +		if (!config)
17253 +			cpu = get_partition(current);
17254 +		else if (get_user(cpu, (int*) config))
17255 +			return -EFAULT;
17256 +
17257 +		if (!cpu_online(cpu))
17258 +			return -EINVAL;
17259 +
17260 +		*lock = pfp_new_pcp(cpu);
17261 +		if (*lock)
17262 +			err = 0;
17263 +		else
17264 +			err = -ENOMEM;
17265 +		break;
17266 +	};
17267 +
17268 +	return err;
17269 +}
17270 +
17271 +#endif
17272 +
17273 +static long pfp_admit_task(struct task_struct* tsk)
17274 +{
17275 +	if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
17276 +#ifdef CONFIG_RELEASE_MASTER
17277 +	    /* don't allow tasks on release master CPU */
17278 +	    task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
17279 +#endif
17280 +	    litmus_is_valid_fixed_prio(get_priority(tsk)))
17281 +		return 0;
17282 +	else
17283 +		return -EINVAL;
17284 +}
17285 +
17286 +static struct domain_proc_info pfp_domain_proc_info;
17287 +static long pfp_get_domain_proc_info(struct domain_proc_info **ret)
17288 +{
17289 +	*ret = &pfp_domain_proc_info;
17290 +	return 0;
17291 +}
17292 +
17293 +static void pfp_setup_domain_proc(void)
17294 +{
17295 +	int i, cpu;
17296 +	int release_master =
17297 +#ifdef CONFIG_RELEASE_MASTER
17298 +		atomic_read(&release_master_cpu);
17299 +#else
17300 +		NO_CPU;
17301 +#endif
17302 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
17303 +	struct cd_mapping *cpu_map, *domain_map;
17304 +
17305 +	memset(&pfp_domain_proc_info, sizeof(pfp_domain_proc_info), 0);
17306 +	init_domain_proc_info(&pfp_domain_proc_info, num_rt_cpus, num_rt_cpus);
17307 +	pfp_domain_proc_info.num_cpus = num_rt_cpus;
17308 +	pfp_domain_proc_info.num_domains = num_rt_cpus;
17309 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
17310 +		if (cpu == release_master)
17311 +			continue;
17312 +		cpu_map = &pfp_domain_proc_info.cpu_to_domains[i];
17313 +		domain_map = &pfp_domain_proc_info.domain_to_cpus[i];
17314 +
17315 +		cpu_map->id = cpu;
17316 +		domain_map->id = i; /* enumerate w/o counting the release master */
17317 +		cpumask_set_cpu(i, cpu_map->mask);
17318 +		cpumask_set_cpu(cpu, domain_map->mask);
17319 +		++i;
17320 +	}
17321 +}
17322 +
17323 +static long pfp_activate_plugin(void)
17324 +{
17325 +#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
17326 +	int cpu;
17327 +#endif
17328 +
17329 +#ifdef CONFIG_RELEASE_MASTER
17330 +	for_each_online_cpu(cpu) {
17331 +		remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
17332 +	}
17333 +#endif
17334 +
17335 +#ifdef CONFIG_LITMUS_LOCKING
17336 +	get_srp_prio = pfp_get_srp_prio;
17337 +
17338 +	for_each_online_cpu(cpu) {
17339 +		init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
17340 +		per_cpu(mpcpvs_vspin, cpu) = NULL;
17341 +
17342 +		pcp_init_state(&per_cpu(pcp_state, cpu));
17343 +		pfp_doms[cpu] = remote_pfp(cpu);
17344 +		per_cpu(fmlp_timestamp,cpu) = 0;
17345 +	}
17346 +
17347 +#endif
17348 +
17349 +	pfp_setup_domain_proc();
17350 +
17351 +	return 0;
17352 +}
17353 +
17354 +static long pfp_deactivate_plugin(void)
17355 +{
17356 +	destroy_domain_proc_info(&pfp_domain_proc_info);
17357 +	return 0;
17358 +}
17359 +
17360 +/*	Plugin object	*/
17361 +static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
17362 +	.plugin_name		= "P-FP",
17363 +	.task_new		= pfp_task_new,
17364 +	.complete_job		= complete_job,
17365 +	.task_exit		= pfp_task_exit,
17366 +	.schedule		= pfp_schedule,
17367 +	.task_wake_up		= pfp_task_wake_up,
17368 +	.task_block		= pfp_task_block,
17369 +	.admit_task		= pfp_admit_task,
17370 +	.activate_plugin	= pfp_activate_plugin,
17371 +	.deactivate_plugin	= pfp_deactivate_plugin,
17372 +	.get_domain_proc_info	= pfp_get_domain_proc_info,
17373 +#ifdef CONFIG_LITMUS_LOCKING
17374 +	.allocate_lock		= pfp_allocate_lock,
17375 +	.finish_switch		= pfp_finish_switch,
17376 +#endif
17377 +};
17378 +
17379 +
17380 +static int __init init_pfp(void)
17381 +{
17382 +	int i;
17383 +
17384 +	/* We do not really want to support cpu hotplug, do we? ;)
17385 +	 * However, if we are so crazy to do so,
17386 +	 * we cannot use num_online_cpu()
17387 +	 */
17388 +	for (i = 0; i < num_online_cpus(); i++) {
17389 +		pfp_domain_init(remote_pfp(i), i);
17390 +	}
17391 +	return register_sched_plugin(&pfp_plugin);
17392 +}
17393 +
17394 +module_init(init_pfp);
17395 -- 
17396 1.8.1.2
17397 
17398 
17399 From dcd52da5373b0afb556b0d4fb006568dc44f2ba0 Mon Sep 17 00:00:00 2001
17400 From: Bjoern Brandenburg <bbb@mpi-sws.org>
17401 Date: Tue, 12 Feb 2013 19:18:11 +0100
17402 Subject: [PATCH 049/119] Add C-EDF scheduler plugin
17403 
17404 ---
17405  litmus/Kconfig      |  10 +
17406  litmus/Makefile     |   2 +-
17407  litmus/sched_cedf.c | 903 ++++++++++++++++++++++++++++++++++++++++++++++++++++
17408  3 files changed, 914 insertions(+), 1 deletion(-)
17409  create mode 100644 litmus/sched_cedf.c
17410 
17411 diff --git a/litmus/Kconfig b/litmus/Kconfig
17412 index fdf31f3..38d9e43 100644
17413 --- a/litmus/Kconfig
17414 +++ b/litmus/Kconfig
17415 @@ -2,6 +2,16 @@ menu "LITMUS^RT"
17416  
17417  menu "Scheduling"
17418  
17419 +config PLUGIN_CEDF
17420 +        bool "Clustered-EDF"
17421 +	depends on X86 && SYSFS
17422 +        default y
17423 +        help
17424 +          Include the Clustered EDF (C-EDF) plugin in the kernel.
17425 +          This is appropriate for large platforms with shared caches.
17426 +          On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
17427 +          makes little sense since there aren't any shared caches.
17428 +
17429  config RELEASE_MASTER
17430          bool "Release-master Support"
17431  	depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
17432 diff --git a/litmus/Makefile b/litmus/Makefile
17433 index 2d2e0a5..8110a5a 100644
17434 --- a/litmus/Makefile
17435 +++ b/litmus/Makefile
17436 @@ -23,7 +23,7 @@ obj-y     = sched_plugin.o litmus.o \
17437  	    sched_psn_edf.o \
17438  	    sched_pfp.o
17439  
17440 -
17441 +obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
17442  
17443  obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
17444  
17445 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
17446 new file mode 100644
17447 index 0000000..b1454c0
17448 --- /dev/null
17449 +++ b/litmus/sched_cedf.c
17450 @@ -0,0 +1,903 @@
17451 +/*
17452 + * litmus/sched_cedf.c
17453 + *
17454 + * Implementation of the C-EDF scheduling algorithm.
17455 + *
17456 + * This implementation is based on G-EDF:
17457 + * - CPUs are clustered around L2 or L3 caches.
17458 + * - Clusters topology is automatically detected (this is arch dependent
17459 + *   and is working only on x86 at the moment --- and only with modern
17460 + *   cpus that exports cpuid4 information)
17461 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
17462 + *   the programmer needs to be aware of the topology to place tasks
17463 + *   in the desired cluster
17464 + * - default clustering is around L2 cache (cache index = 2)
17465 + *   supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
17466 + *   online_cpus are placed in a single cluster).
17467 + *
17468 + *   For details on functions, take a look at sched_gsn_edf.c
17469 + *
17470 + * Currently, we do not support changes in the number of online cpus.
17471 + * If the num_online_cpus() dynamically changes, the plugin is broken.
17472 + *
17473 + * This version uses the simple approach and serializes all scheduling
17474 + * decisions by the use of a queue lock. This is probably not the
17475 + * best way to do it, but it should suffice for now.
17476 + */
17477 +
17478 +#include <linux/spinlock.h>
17479 +#include <linux/percpu.h>
17480 +#include <linux/sched.h>
17481 +#include <linux/slab.h>
17482 +
17483 +#include <linux/module.h>
17484 +
17485 +#include <litmus/litmus.h>
17486 +#include <litmus/jobs.h>
17487 +#include <litmus/preempt.h>
17488 +#include <litmus/budget.h>
17489 +#include <litmus/sched_plugin.h>
17490 +#include <litmus/edf_common.h>
17491 +#include <litmus/sched_trace.h>
17492 +
17493 +#include <litmus/clustered.h>
17494 +
17495 +#include <litmus/bheap.h>
17496 +
17497 +#ifdef CONFIG_SCHED_CPU_AFFINITY
17498 +#include <litmus/affinity.h>
17499 +#endif
17500 +
17501 +/* to configure the cluster size */
17502 +#include <litmus/litmus_proc.h>
17503 +#include <linux/uaccess.h>
17504 +
17505 +/* Reference configuration variable. Determines which cache level is used to
17506 + * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
17507 + * all CPUs form a single cluster (just like GSN-EDF).
17508 + */
17509 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
17510 +
17511 +struct clusterdomain;
17512 +
17513 +/* cpu_entry_t - maintain the linked and scheduled state
17514 + *
17515 + * A cpu also contains a pointer to the cedf_domain_t cluster
17516 + * that owns it (struct clusterdomain*)
17517 + */
17518 +typedef struct  {
17519 +	int 			cpu;
17520 +	struct clusterdomain*	cluster;	/* owning cluster */
17521 +	struct task_struct*	linked;		/* only RT tasks */
17522 +	struct task_struct*	scheduled;	/* only RT tasks */
17523 +	atomic_t		will_schedule;	/* prevent unneeded IPIs */
17524 +	struct bheap_node*	hn;
17525 +} cpu_entry_t;
17526 +
17527 +/* one cpu_entry_t per CPU */
17528 +DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
17529 +
17530 +#define set_will_schedule() \
17531 +	(atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
17532 +#define clear_will_schedule() \
17533 +	(atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
17534 +#define test_will_schedule(cpu) \
17535 +	(atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
17536 +
17537 +/*
17538 + * In C-EDF there is a cedf domain _per_ cluster
17539 + * The number of clusters is dynamically determined accordingly to the
17540 + * total cpu number and the cluster size
17541 + */
17542 +typedef struct clusterdomain {
17543 +	/* rt_domain for this cluster */
17544 +	rt_domain_t	domain;
17545 +	/* cpus in this cluster */
17546 +	cpu_entry_t*	*cpus;
17547 +	/* map of this cluster cpus */
17548 +	cpumask_var_t	cpu_map;
17549 +	/* the cpus queue themselves according to priority in here */
17550 +	struct bheap_node *heap_node;
17551 +	struct bheap      cpu_heap;
17552 +	/* lock for this cluster */
17553 +#define cluster_lock domain.ready_lock
17554 +} cedf_domain_t;
17555 +
17556 +/* a cedf_domain per cluster; allocation is done at init/activation time */
17557 +cedf_domain_t *cedf;
17558 +
17559 +#define remote_cluster(cpu)	((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
17560 +#define task_cpu_cluster(task)	remote_cluster(get_partition(task))
17561 +
17562 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
17563 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
17564 + * information during the initialization of the plugin (e.g., topology)
17565 +#define WANT_ALL_SCHED_EVENTS
17566 + */
17567 +#define VERBOSE_INIT
17568 +
17569 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
17570 +{
17571 +	cpu_entry_t *a, *b;
17572 +	a = _a->value;
17573 +	b = _b->value;
17574 +	/* Note that a and b are inverted: we want the lowest-priority CPU at
17575 +	 * the top of the heap.
17576 +	 */
17577 +	return edf_higher_prio(b->linked, a->linked);
17578 +}
17579 +
17580 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
17581 + *                       order in the cpu queue. Caller must hold cedf lock.
17582 + */
17583 +static void update_cpu_position(cpu_entry_t *entry)
17584 +{
17585 +	cedf_domain_t *cluster = entry->cluster;
17586 +
17587 +	if (likely(bheap_node_in_heap(entry->hn)))
17588 +		bheap_delete(cpu_lower_prio,
17589 +				&cluster->cpu_heap,
17590 +				entry->hn);
17591 +
17592 +	bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
17593 +}
17594 +
17595 +/* caller must hold cedf lock */
17596 +static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
17597 +{
17598 +	struct bheap_node* hn;
17599 +	hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
17600 +	return hn->value;
17601 +}
17602 +
17603 +
17604 +/* link_task_to_cpu - Update the link of a CPU.
17605 + *                    Handles the case where the to-be-linked task is already
17606 + *                    scheduled on a different CPU.
17607 + */
17608 +static noinline void link_task_to_cpu(struct task_struct* linked,
17609 +				      cpu_entry_t *entry)
17610 +{
17611 +	cpu_entry_t *sched;
17612 +	struct task_struct* tmp;
17613 +	int on_cpu;
17614 +
17615 +	BUG_ON(linked && !is_realtime(linked));
17616 +
17617 +	/* Currently linked task is set to be unlinked. */
17618 +	if (entry->linked) {
17619 +		entry->linked->rt_param.linked_on = NO_CPU;
17620 +	}
17621 +
17622 +	/* Link new task to CPU. */
17623 +	if (linked) {
17624 +		/* handle task is already scheduled somewhere! */
17625 +		on_cpu = linked->rt_param.scheduled_on;
17626 +		if (on_cpu != NO_CPU) {
17627 +			sched = &per_cpu(cedf_cpu_entries, on_cpu);
17628 +			/* this should only happen if not linked already */
17629 +			BUG_ON(sched->linked == linked);
17630 +
17631 +			/* If we are already scheduled on the CPU to which we
17632 +			 * wanted to link, we don't need to do the swap --
17633 +			 * we just link ourselves to the CPU and depend on
17634 +			 * the caller to get things right.
17635 +			 */
17636 +			if (entry != sched) {
17637 +				TRACE_TASK(linked,
17638 +					   "already scheduled on %d, updating link.\n",
17639 +					   sched->cpu);
17640 +				tmp = sched->linked;
17641 +				linked->rt_param.linked_on = sched->cpu;
17642 +				sched->linked = linked;
17643 +				update_cpu_position(sched);
17644 +				linked = tmp;
17645 +			}
17646 +		}
17647 +		if (linked) /* might be NULL due to swap */
17648 +			linked->rt_param.linked_on = entry->cpu;
17649 +	}
17650 +	entry->linked = linked;
17651 +#ifdef WANT_ALL_SCHED_EVENTS
17652 +	if (linked)
17653 +		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
17654 +	else
17655 +		TRACE("NULL linked to %d.\n", entry->cpu);
17656 +#endif
17657 +	update_cpu_position(entry);
17658 +}
17659 +
17660 +/* unlink - Make sure a task is not linked any longer to an entry
17661 + *          where it was linked before. Must hold cedf_lock.
17662 + */
17663 +static noinline void unlink(struct task_struct* t)
17664 +{
17665 +	cpu_entry_t *entry;
17666 +
17667 +	if (t->rt_param.linked_on != NO_CPU) {
17668 +		/* unlink */
17669 +		entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
17670 +		t->rt_param.linked_on = NO_CPU;
17671 +		link_task_to_cpu(NULL, entry);
17672 +	} else if (is_queued(t)) {
17673 +		/* This is an interesting situation: t is scheduled,
17674 +		 * but was just recently unlinked.  It cannot be
17675 +		 * linked anywhere else (because then it would have
17676 +		 * been relinked to this CPU), thus it must be in some
17677 +		 * queue. We must remove it from the list in this
17678 +		 * case.
17679 +		 *
17680 +		 * in C-EDF case is should be somewhere in the queue for
17681 +		 * its domain, therefore and we can get the domain using
17682 +		 * task_cpu_cluster
17683 +		 */
17684 +		remove(&(task_cpu_cluster(t))->domain, t);
17685 +	}
17686 +}
17687 +
17688 +
17689 +/* preempt - force a CPU to reschedule
17690 + */
17691 +static void preempt(cpu_entry_t *entry)
17692 +{
17693 +	preempt_if_preemptable(entry->scheduled, entry->cpu);
17694 +}
17695 +
17696 +/* requeue - Put an unlinked task into gsn-edf domain.
17697 + *           Caller must hold cedf_lock.
17698 + */
17699 +static noinline void requeue(struct task_struct* task)
17700 +{
17701 +	cedf_domain_t *cluster = task_cpu_cluster(task);
17702 +	BUG_ON(!task);
17703 +	/* sanity check before insertion */
17704 +	BUG_ON(is_queued(task));
17705 +
17706 +	if (is_early_releasing(task) || is_released(task, litmus_clock()))
17707 +		__add_ready(&cluster->domain, task);
17708 +	else {
17709 +		/* it has got to wait */
17710 +		add_release(&cluster->domain, task);
17711 +	}
17712 +}
17713 +
17714 +#ifdef CONFIG_SCHED_CPU_AFFINITY
17715 +static cpu_entry_t* cedf_get_nearest_available_cpu(
17716 +				cedf_domain_t *cluster, cpu_entry_t *start)
17717 +{
17718 +	cpu_entry_t *affinity;
17719 +
17720 +	get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
17721 +#ifdef CONFIG_RELEASE_MASTER
17722 +		cluster->domain.release_master
17723 +#else
17724 +		NO_CPU
17725 +#endif
17726 +		);
17727 +
17728 +	/* make sure CPU is in our cluster */
17729 +	if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
17730 +		return(affinity);
17731 +	else
17732 +		return(NULL);
17733 +}
17734 +#endif
17735 +
17736 +
17737 +/* check for any necessary preemptions */
17738 +static void check_for_preemptions(cedf_domain_t *cluster)
17739 +{
17740 +	struct task_struct *task;
17741 +	cpu_entry_t *last;
17742 +
17743 +#ifdef CONFIG_PREFER_LOCAL_LINKING
17744 +	cpu_entry_t *local;
17745 +
17746 +	/* Before linking to other CPUs, check first whether the local CPU is
17747 +	 * idle. */
17748 +	local = &__get_cpu_var(cedf_cpu_entries);
17749 +	task  = __peek_ready(&cluster->domain);
17750 +
17751 +	if (task && !local->linked
17752 +#ifdef CONFIG_RELEASE_MASTER
17753 +	    && likely(local->cpu != cluster->domain.release_master)
17754 +#endif
17755 +		) {
17756 +		task = __take_ready(&cluster->domain);
17757 +		TRACE_TASK(task, "linking to local CPU %d to avoid IPI\n", local->cpu);
17758 +		link_task_to_cpu(task, local);
17759 +		preempt(local);
17760 +	}
17761 +#endif
17762 +
17763 +
17764 +	for(last = lowest_prio_cpu(cluster);
17765 +	    edf_preemption_needed(&cluster->domain, last->linked);
17766 +	    last = lowest_prio_cpu(cluster)) {
17767 +		/* preemption necessary */
17768 +		task = __take_ready(&cluster->domain);
17769 +		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
17770 +		      task->pid, last->cpu);
17771 +#ifdef CONFIG_SCHED_CPU_AFFINITY
17772 +		{
17773 +			cpu_entry_t *affinity =
17774 +					cedf_get_nearest_available_cpu(cluster,
17775 +						&per_cpu(cedf_cpu_entries, task_cpu(task)));
17776 +			if(affinity)
17777 +				last = affinity;
17778 +			else if(requeue_preempted_job(last->linked))
17779 +				requeue(last->linked);
17780 +		}
17781 +#else
17782 +		if (requeue_preempted_job(last->linked))
17783 +			requeue(last->linked);
17784 +#endif
17785 +		link_task_to_cpu(task, last);
17786 +		preempt(last);
17787 +	}
17788 +}
17789 +
17790 +/* cedf_job_arrival: task is either resumed or released */
17791 +static noinline void cedf_job_arrival(struct task_struct* task)
17792 +{
17793 +	cedf_domain_t *cluster = task_cpu_cluster(task);
17794 +	BUG_ON(!task);
17795 +
17796 +	requeue(task);
17797 +	check_for_preemptions(cluster);
17798 +}
17799 +
17800 +static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
17801 +{
17802 +	cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
17803 +	unsigned long flags;
17804 +
17805 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
17806 +
17807 +	__merge_ready(&cluster->domain, tasks);
17808 +	check_for_preemptions(cluster);
17809 +
17810 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
17811 +}
17812 +
17813 +/* caller holds cedf_lock */
17814 +static noinline void job_completion(struct task_struct *t, int forced)
17815 +{
17816 +	BUG_ON(!t);
17817 +
17818 +	sched_trace_task_completion(t, forced);
17819 +
17820 +	TRACE_TASK(t, "job_completion().\n");
17821 +
17822 +	/* set flags */
17823 +	tsk_rt(t)->completed = 0;
17824 +	/* prepare for next period */
17825 +	prepare_for_next_period(t);
17826 +	if (is_early_releasing(t) || is_released(t, litmus_clock()))
17827 +		sched_trace_task_release(t);
17828 +	/* unlink */
17829 +	unlink(t);
17830 +	/* requeue
17831 +	 * But don't requeue a blocking task. */
17832 +	if (is_running(t))
17833 +		cedf_job_arrival(t);
17834 +}
17835 +
17836 +/* Getting schedule() right is a bit tricky. schedule() may not make any
17837 + * assumptions on the state of the current task since it may be called for a
17838 + * number of reasons. The reasons include a scheduler_tick() determined that it
17839 + * was necessary, because sys_exit_np() was called, because some Linux
17840 + * subsystem determined so, or even (in the worst case) because there is a bug
17841 + * hidden somewhere. Thus, we must take extreme care to determine what the
17842 + * current state is.
17843 + *
17844 + * The CPU could currently be scheduling a task (or not), be linked (or not).
17845 + *
17846 + * The following assertions for the scheduled task could hold:
17847 + *
17848 + *      - !is_running(scheduled)        // the job blocks
17849 + *	- scheduled->timeslice == 0	// the job completed (forcefully)
17850 + *	- is_completed()		// the job completed (by syscall)
17851 + * 	- linked != scheduled		// we need to reschedule (for any reason)
17852 + * 	- is_np(scheduled)		// rescheduling must be delayed,
17853 + *					   sys_exit_np must be requested
17854 + *
17855 + * Any of these can occur together.
17856 + */
17857 +static struct task_struct* cedf_schedule(struct task_struct * prev)
17858 +{
17859 +	cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
17860 +	cedf_domain_t *cluster = entry->cluster;
17861 +	int out_of_time, sleep, preempt, np, exists, blocks;
17862 +	struct task_struct* next = NULL;
17863 +
17864 +#ifdef CONFIG_RELEASE_MASTER
17865 +	/* Bail out early if we are the release master.
17866 +	 * The release master never schedules any real-time tasks.
17867 +	 */
17868 +	if (unlikely(cluster->domain.release_master == entry->cpu)) {
17869 +		sched_state_task_picked();
17870 +		return NULL;
17871 +	}
17872 +#endif
17873 +
17874 +	raw_spin_lock(&cluster->cluster_lock);
17875 +	clear_will_schedule();
17876 +
17877 +	/* sanity checking */
17878 +	BUG_ON(entry->scheduled && entry->scheduled != prev);
17879 +	BUG_ON(entry->scheduled && !is_realtime(prev));
17880 +	BUG_ON(is_realtime(prev) && !entry->scheduled);
17881 +
17882 +	/* (0) Determine state */
17883 +	exists      = entry->scheduled != NULL;
17884 +	blocks      = exists && !is_running(entry->scheduled);
17885 +	out_of_time = exists &&
17886 +				  budget_enforced(entry->scheduled) &&
17887 +				  budget_exhausted(entry->scheduled);
17888 +	np 	    = exists && is_np(entry->scheduled);
17889 +	sleep	    = exists && is_completed(entry->scheduled);
17890 +	preempt     = entry->scheduled != entry->linked;
17891 +
17892 +#ifdef WANT_ALL_SCHED_EVENTS
17893 +	TRACE_TASK(prev, "invoked cedf_schedule.\n");
17894 +#endif
17895 +
17896 +	if (exists)
17897 +		TRACE_TASK(prev,
17898 +			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
17899 +			   "state:%d sig:%d\n",
17900 +			   blocks, out_of_time, np, sleep, preempt,
17901 +			   prev->state, signal_pending(prev));
17902 +	if (entry->linked && preempt)
17903 +		TRACE_TASK(prev, "will be preempted by %s/%d\n",
17904 +			   entry->linked->comm, entry->linked->pid);
17905 +
17906 +
17907 +	/* If a task blocks we have no choice but to reschedule.
17908 +	 */
17909 +	if (blocks)
17910 +		unlink(entry->scheduled);
17911 +
17912 +	/* Request a sys_exit_np() call if we would like to preempt but cannot.
17913 +	 * We need to make sure to update the link structure anyway in case
17914 +	 * that we are still linked. Multiple calls to request_exit_np() don't
17915 +	 * hurt.
17916 +	 */
17917 +	if (np && (out_of_time || preempt || sleep)) {
17918 +		unlink(entry->scheduled);
17919 +		request_exit_np(entry->scheduled);
17920 +	}
17921 +
17922 +	/* Any task that is preemptable and either exhausts its execution
17923 +	 * budget or wants to sleep completes. We may have to reschedule after
17924 +	 * this. Don't do a job completion if we block (can't have timers running
17925 +	 * for blocked jobs).
17926 +	 */
17927 +	if (!np && (out_of_time || sleep) && !blocks)
17928 +		job_completion(entry->scheduled, !sleep);
17929 +
17930 +	/* Link pending task if we became unlinked.
17931 +	 */
17932 +	if (!entry->linked)
17933 +		link_task_to_cpu(__take_ready(&cluster->domain), entry);
17934 +
17935 +	/* The final scheduling decision. Do we need to switch for some reason?
17936 +	 * If linked is different from scheduled, then select linked as next.
17937 +	 */
17938 +	if ((!np || blocks) &&
17939 +	    entry->linked != entry->scheduled) {
17940 +		/* Schedule a linked job? */
17941 +		if (entry->linked) {
17942 +			entry->linked->rt_param.scheduled_on = entry->cpu;
17943 +			next = entry->linked;
17944 +		}
17945 +		if (entry->scheduled) {
17946 +			/* not gonna be scheduled soon */
17947 +			entry->scheduled->rt_param.scheduled_on = NO_CPU;
17948 +			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
17949 +		}
17950 +	} else
17951 +		/* Only override Linux scheduler if we have a real-time task
17952 +		 * scheduled that needs to continue.
17953 +		 */
17954 +		if (exists)
17955 +			next = prev;
17956 +
17957 +	sched_state_task_picked();
17958 +	raw_spin_unlock(&cluster->cluster_lock);
17959 +
17960 +#ifdef WANT_ALL_SCHED_EVENTS
17961 +	TRACE("cedf_lock released, next=0x%p\n", next);
17962 +
17963 +	if (next)
17964 +		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
17965 +	else if (exists && !next)
17966 +		TRACE("becomes idle at %llu.\n", litmus_clock());
17967 +#endif
17968 +
17969 +
17970 +	return next;
17971 +}
17972 +
17973 +
17974 +/* _finish_switch - we just finished the switch away from prev
17975 + */
17976 +static void cedf_finish_switch(struct task_struct *prev)
17977 +{
17978 +	cpu_entry_t* 	entry = &__get_cpu_var(cedf_cpu_entries);
17979 +
17980 +	entry->scheduled = is_realtime(current) ? current : NULL;
17981 +#ifdef WANT_ALL_SCHED_EVENTS
17982 +	TRACE_TASK(prev, "switched away from\n");
17983 +#endif
17984 +}
17985 +
17986 +
17987 +/*	Prepare a task for running in RT mode
17988 + */
17989 +static void cedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
17990 +{
17991 +	unsigned long 		flags;
17992 +	cpu_entry_t* 		entry;
17993 +	cedf_domain_t*		cluster;
17994 +
17995 +	TRACE("gsn edf: task new %d\n", t->pid);
17996 +
17997 +	/* the cluster doesn't change even if t is scheduled */
17998 +	cluster = task_cpu_cluster(t);
17999 +
18000 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
18001 +
18002 +	/* setup job params */
18003 +	release_at(t, litmus_clock());
18004 +
18005 +	if (is_scheduled) {
18006 +		entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
18007 +		BUG_ON(entry->scheduled);
18008 +
18009 +#ifdef CONFIG_RELEASE_MASTER
18010 +		if (entry->cpu != cluster->domain.release_master) {
18011 +#endif
18012 +			entry->scheduled = t;
18013 +			tsk_rt(t)->scheduled_on = task_cpu(t);
18014 +#ifdef CONFIG_RELEASE_MASTER
18015 +		} else {
18016 +			/* do not schedule on release master */
18017 +			preempt(entry); /* force resched */
18018 +			tsk_rt(t)->scheduled_on = NO_CPU;
18019 +		}
18020 +#endif
18021 +	} else {
18022 +		t->rt_param.scheduled_on = NO_CPU;
18023 +	}
18024 +	t->rt_param.linked_on          = NO_CPU;
18025 +
18026 +	if (is_running(t))
18027 +		cedf_job_arrival(t);
18028 +	raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
18029 +}
18030 +
18031 +static void cedf_task_wake_up(struct task_struct *task)
18032 +{
18033 +	unsigned long flags;
18034 +	lt_t now;
18035 +	cedf_domain_t *cluster;
18036 +
18037 +	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
18038 +
18039 +	cluster = task_cpu_cluster(task);
18040 +
18041 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
18042 +	now = litmus_clock();
18043 +	if (is_sporadic(task) && is_tardy(task, now)) {
18044 +		/* new sporadic release */
18045 +		release_at(task, now);
18046 +		sched_trace_task_release(task);
18047 +	}
18048 +	cedf_job_arrival(task);
18049 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
18050 +}
18051 +
18052 +static void cedf_task_block(struct task_struct *t)
18053 +{
18054 +	unsigned long flags;
18055 +	cedf_domain_t *cluster;
18056 +
18057 +	TRACE_TASK(t, "block at %llu\n", litmus_clock());
18058 +
18059 +	cluster = task_cpu_cluster(t);
18060 +
18061 +	/* unlink if necessary */
18062 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
18063 +	unlink(t);
18064 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
18065 +
18066 +	BUG_ON(!is_realtime(t));
18067 +}
18068 +
18069 +
18070 +static void cedf_task_exit(struct task_struct * t)
18071 +{
18072 +	unsigned long flags;
18073 +	cedf_domain_t *cluster = task_cpu_cluster(t);
18074 +
18075 +	/* unlink if necessary */
18076 +	raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
18077 +	unlink(t);
18078 +	if (tsk_rt(t)->scheduled_on != NO_CPU) {
18079 +		cpu_entry_t *cpu;
18080 +		cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
18081 +		cpu->scheduled = NULL;
18082 +		tsk_rt(t)->scheduled_on = NO_CPU;
18083 +	}
18084 +	raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
18085 +
18086 +	BUG_ON(!is_realtime(t));
18087 +        TRACE_TASK(t, "RIP\n");
18088 +}
18089 +
18090 +static long cedf_admit_task(struct task_struct* tsk)
18091 +{
18092 +	return (remote_cluster(task_cpu(tsk)) == task_cpu_cluster(tsk)) ?
18093 +			0 : -EINVAL;
18094 +}
18095 +
18096 +/* total number of cluster */
18097 +static int num_clusters;
18098 +/* we do not support cluster of different sizes */
18099 +static unsigned int cluster_size;
18100 +
18101 +#ifdef VERBOSE_INIT
18102 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
18103 +{
18104 +	int chk;
18105 +	char buf[255];
18106 +
18107 +	chk = cpulist_scnprintf(buf, 254, mask);
18108 +	buf[chk] = '\0';
18109 +	printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
18110 +
18111 +}
18112 +#endif
18113 +
18114 +static int clusters_allocated = 0;
18115 +
18116 +static void cleanup_cedf(void)
18117 +{
18118 +	int i;
18119 +
18120 +	if (clusters_allocated) {
18121 +		for (i = 0; i < num_clusters; i++) {
18122 +			kfree(cedf[i].cpus);
18123 +			kfree(cedf[i].heap_node);
18124 +			free_cpumask_var(cedf[i].cpu_map);
18125 +		}
18126 +
18127 +		kfree(cedf);
18128 +	}
18129 +}
18130 +
18131 +static struct domain_proc_info cedf_domain_proc_info;
18132 +static long cedf_get_domain_proc_info(struct domain_proc_info **ret)
18133 +{
18134 +	*ret = &cedf_domain_proc_info;
18135 +	return 0;
18136 +}
18137 +
18138 +static void cedf_setup_domain_proc(void)
18139 +{
18140 +	int i, cpu, domain;
18141 +#ifdef CONFIG_RELEASE_MASTER
18142 +	int release_master = atomic_read(&release_master_cpu);
18143 +	/* skip over the domain with the release master if cluster size is 1 */
18144 +	int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
18145 +			release_master : NO_CPU;
18146 +#else
18147 +	int release_master = NO_CPU;
18148 +	int skip_domain = NO_CPU;
18149 +#endif
18150 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
18151 +	int num_rt_domains = num_clusters - (skip_domain != NO_CPU);
18152 +	struct cd_mapping *map;
18153 +
18154 +	memset(&cedf_domain_proc_info, sizeof(cedf_domain_proc_info), 0);
18155 +	init_domain_proc_info(&cedf_domain_proc_info, num_rt_cpus, num_rt_domains);
18156 +	cedf_domain_proc_info.num_cpus = num_rt_cpus;
18157 +	cedf_domain_proc_info.num_domains = num_rt_domains;
18158 +
18159 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
18160 +		if (cpu == release_master)
18161 +			continue;
18162 +		map = &cedf_domain_proc_info.cpu_to_domains[i];
18163 +		/* pointer math to figure out the domain index */
18164 +		domain = remote_cluster(cpu) - cedf;
18165 +		map->id = cpu;
18166 +		cpumask_set_cpu(domain, map->mask);
18167 +		++i;
18168 +	}
18169 +
18170 +	for (domain = 0, i = 0; domain < num_clusters; ++domain) {
18171 +		if (domain == skip_domain)
18172 +			continue;
18173 +		map = &cedf_domain_proc_info.domain_to_cpus[i];
18174 +		map->id = i;
18175 +		cpumask_copy(map->mask, cedf[domain].cpu_map);
18176 +		++i;
18177 +	}
18178 +}
18179 +
18180 +static long cedf_activate_plugin(void)
18181 +{
18182 +	int i, j, cpu, ccpu, cpu_count;
18183 +	cpu_entry_t *entry;
18184 +
18185 +	cpumask_var_t mask;
18186 +	int chk = 0;
18187 +
18188 +	/* de-allocate old clusters, if any */
18189 +	cleanup_cedf();
18190 +
18191 +	printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
18192 +			cluster_config);
18193 +
18194 +	/* need to get cluster_size first */
18195 +	if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
18196 +		return -ENOMEM;
18197 +
18198 +	if (cluster_config == GLOBAL_CLUSTER) {
18199 +		cluster_size = num_online_cpus();
18200 +	} else {
18201 +		chk = get_shared_cpu_map(mask, 0, cluster_config);
18202 +		if (chk) {
18203 +			/* if chk != 0 then it is the max allowed index */
18204 +			printk(KERN_INFO "C-EDF: Cluster configuration = %d "
18205 +			       "is not supported on this hardware.\n",
18206 +			       cluster_config);
18207 +			/* User should notice that the configuration failed, so
18208 +			 * let's bail out. */
18209 +			return -EINVAL;
18210 +		}
18211 +
18212 +		cluster_size = cpumask_weight(mask);
18213 +	}
18214 +
18215 +	if ((num_online_cpus() % cluster_size) != 0) {
18216 +		/* this can't be right, some cpus are left out */
18217 +		printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
18218 +				num_online_cpus(), cluster_size);
18219 +		return -1;
18220 +	}
18221 +
18222 +	num_clusters = num_online_cpus() / cluster_size;
18223 +	printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
18224 +			num_clusters, cluster_size);
18225 +
18226 +	/* initialize clusters */
18227 +	cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
18228 +	for (i = 0; i < num_clusters; i++) {
18229 +
18230 +		cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
18231 +				GFP_ATOMIC);
18232 +		cedf[i].heap_node = kmalloc(
18233 +				cluster_size * sizeof(struct bheap_node),
18234 +				GFP_ATOMIC);
18235 +		bheap_init(&(cedf[i].cpu_heap));
18236 +		edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
18237 +
18238 +		if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
18239 +			return -ENOMEM;
18240 +#ifdef CONFIG_RELEASE_MASTER
18241 +		cedf[i].domain.release_master = atomic_read(&release_master_cpu);
18242 +#endif
18243 +	}
18244 +
18245 +	/* cycle through cluster and add cpus to them */
18246 +	for (i = 0; i < num_clusters; i++) {
18247 +
18248 +		for_each_online_cpu(cpu) {
18249 +			/* check if the cpu is already in a cluster */
18250 +			for (j = 0; j < num_clusters; j++)
18251 +				if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
18252 +					break;
18253 +			/* if it is in a cluster go to next cpu */
18254 +			if (j < num_clusters &&
18255 +					cpumask_test_cpu(cpu, cedf[j].cpu_map))
18256 +				continue;
18257 +
18258 +			/* this cpu isn't in any cluster */
18259 +			/* get the shared cpus */
18260 +			if (unlikely(cluster_config == GLOBAL_CLUSTER))
18261 +				cpumask_copy(mask, cpu_online_mask);
18262 +			else
18263 +				get_shared_cpu_map(mask, cpu, cluster_config);
18264 +
18265 +			cpumask_copy(cedf[i].cpu_map, mask);
18266 +#ifdef VERBOSE_INIT
18267 +			print_cluster_topology(mask, cpu);
18268 +#endif
18269 +			/* add cpus to current cluster and init cpu_entry_t */
18270 +			cpu_count = 0;
18271 +			for_each_cpu(ccpu, cedf[i].cpu_map) {
18272 +
18273 +				entry = &per_cpu(cedf_cpu_entries, ccpu);
18274 +				cedf[i].cpus[cpu_count] = entry;
18275 +				atomic_set(&entry->will_schedule, 0);
18276 +				entry->cpu = ccpu;
18277 +				entry->cluster = &cedf[i];
18278 +				entry->hn = &(cedf[i].heap_node[cpu_count]);
18279 +				bheap_node_init(&entry->hn, entry);
18280 +
18281 +				cpu_count++;
18282 +
18283 +				entry->linked = NULL;
18284 +				entry->scheduled = NULL;
18285 +#ifdef CONFIG_RELEASE_MASTER
18286 +				/* only add CPUs that should schedule jobs */
18287 +				if (entry->cpu != entry->cluster->domain.release_master)
18288 +#endif
18289 +					update_cpu_position(entry);
18290 +			}
18291 +			/* done with this cluster */
18292 +			break;
18293 +		}
18294 +	}
18295 +
18296 +	clusters_allocated = 1;
18297 +	free_cpumask_var(mask);
18298 +
18299 +	cedf_setup_domain_proc();
18300 +
18301 +	return 0;
18302 +}
18303 +
18304 +static long cedf_deactivate_plugin(void)
18305 +{
18306 +	destroy_domain_proc_info(&cedf_domain_proc_info);
18307 +	return 0;
18308 +}
18309 +
18310 +/*	Plugin object	*/
18311 +static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
18312 +	.plugin_name		= "C-EDF",
18313 +	.finish_switch		= cedf_finish_switch,
18314 +	.task_new		= cedf_task_new,
18315 +	.complete_job		= complete_job,
18316 +	.task_exit		= cedf_task_exit,
18317 +	.schedule		= cedf_schedule,
18318 +	.task_wake_up		= cedf_task_wake_up,
18319 +	.task_block		= cedf_task_block,
18320 +	.admit_task		= cedf_admit_task,
18321 +	.activate_plugin	= cedf_activate_plugin,
18322 +	.deactivate_plugin	= cedf_deactivate_plugin,
18323 +	.get_domain_proc_info	= cedf_get_domain_proc_info,
18324 +};
18325 +
18326 +static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
18327 +
18328 +static int __init init_cedf(void)
18329 +{
18330 +	int err, fs;
18331 +
18332 +	err = register_sched_plugin(&cedf_plugin);
18333 +	if (!err) {
18334 +		fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
18335 +		if (!fs)
18336 +			cluster_file = create_cluster_file(cedf_dir, &cluster_config);
18337 +		else
18338 +			printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
18339 +	}
18340 +	return err;
18341 +}
18342 +
18343 +static void clean_cedf(void)
18344 +{
18345 +	cleanup_cedf();
18346 +	if (cluster_file)
18347 +		remove_proc_entry("cluster", cedf_dir);
18348 +	if (cedf_dir)
18349 +		remove_plugin_proc_dir(&cedf_plugin);
18350 +}
18351 +
18352 +module_init(init_cedf);
18353 +module_exit(clean_cedf);
18354 -- 
18355 1.8.1.2
18356 
18357 
18358 From 3e37b4b502634d6598bbc45d89fef854a2a13ae6 Mon Sep 17 00:00:00 2001
18359 From: Bjoern Brandenburg <bbb@mpi-sws.org>
18360 Date: Tue, 12 Feb 2013 19:21:11 +0100
18361 Subject: [PATCH 050/119] Add PD^2 scheduler plugin
18362 
18363 ---
18364  litmus/Kconfig       |   13 +
18365  litmus/Makefile      |    1 +
18366  litmus/sched_pfair.c | 1165 ++++++++++++++++++++++++++++++++++++++++++++++++++
18367  3 files changed, 1179 insertions(+)
18368  create mode 100644 litmus/sched_pfair.c
18369 
18370 diff --git a/litmus/Kconfig b/litmus/Kconfig
18371 index 38d9e43..babb43d 100644
18372 --- a/litmus/Kconfig
18373 +++ b/litmus/Kconfig
18374 @@ -12,6 +12,19 @@ config PLUGIN_CEDF
18375            On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
18376            makes little sense since there aren't any shared caches.
18377  
18378 +config PLUGIN_PFAIR
18379 +	bool "PFAIR"
18380 +	default y
18381 +	help
18382 +	  Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel.
18383 +	  The PFAIR plugin requires high resolution timers (for staggered
18384 +	  quanta) and also requires HZ_PERIODIC (i.e., periodic timer ticks
18385 +	  even if a processor is idle, as quanta could be missed otherwise).
18386 +	  Further, the PFAIR plugin uses the system tick and thus requires
18387 +	  HZ=1000 to achive reasonable granularity.
18388 +
18389 +	  If unsure, say Yes.
18390 +
18391  config RELEASE_MASTER
18392          bool "Release-master Support"
18393  	depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
18394 diff --git a/litmus/Makefile b/litmus/Makefile
18395 index 8110a5a..84b173a 100644
18396 --- a/litmus/Makefile
18397 +++ b/litmus/Makefile
18398 @@ -24,6 +24,7 @@ obj-y     = sched_plugin.o litmus.o \
18399  	    sched_pfp.o
18400  
18401  obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
18402 +obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
18403  
18404  obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
18405  
18406 diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
18407 new file mode 100644
18408 index 0000000..91f1e08
18409 --- /dev/null
18410 +++ b/litmus/sched_pfair.c
18411 @@ -0,0 +1,1165 @@
18412 +/*
18413 + * kernel/sched_pfair.c
18414 + *
18415 + * Implementation of the PD^2 pfair scheduling algorithm. This
18416 + * implementation realizes "early releasing," i.e., it is work-conserving.
18417 + *
18418 + */
18419 +
18420 +#include <asm/div64.h>
18421 +#include <linux/delay.h>
18422 +#include <linux/module.h>
18423 +#include <linux/spinlock.h>
18424 +#include <linux/percpu.h>
18425 +#include <linux/sched.h>
18426 +#include <linux/list.h>
18427 +#include <linux/slab.h>
18428 +
18429 +#include <litmus/litmus.h>
18430 +#include <litmus/jobs.h>
18431 +#include <litmus/preempt.h>
18432 +#include <litmus/rt_domain.h>
18433 +#include <litmus/sched_plugin.h>
18434 +#include <litmus/sched_trace.h>
18435 +#include <litmus/trace.h>
18436 +
18437 +#include <litmus/bheap.h>
18438 +
18439 +/* to configure the cluster size */
18440 +#include <litmus/litmus_proc.h>
18441 +
18442 +#include <litmus/clustered.h>
18443 +
18444 +static enum cache_level pfair_cluster_level = GLOBAL_CLUSTER;
18445 +
18446 +struct subtask {
18447 +	/* measured in quanta relative to job release */
18448 +	quanta_t release;
18449 +        quanta_t deadline;
18450 +	quanta_t overlap; /* called "b bit" by PD^2 */
18451 +	quanta_t group_deadline;
18452 +};
18453 +
18454 +struct pfair_param   {
18455 +	quanta_t	quanta;       /* number of subtasks */
18456 +	quanta_t	cur;          /* index of current subtask */
18457 +
18458 +	quanta_t	release;      /* in quanta */
18459 +	quanta_t	period;       /* in quanta */
18460 +
18461 +	quanta_t	last_quantum; /* when scheduled last */
18462 +	int		last_cpu;     /* where scheduled last */
18463 +
18464 +	struct pfair_cluster* cluster; /* where this task is scheduled */
18465 +
18466 +	struct subtask subtasks[0];   /* allocate together with pfair_param */
18467 +};
18468 +
18469 +#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
18470 +
18471 +struct pfair_state {
18472 +	struct cluster_cpu topology;
18473 +
18474 +	struct hrtimer quantum_timer;
18475 +
18476 +	volatile quanta_t cur_tick;    /* updated by the CPU that is advancing
18477 +				        * the time */
18478 +	volatile quanta_t local_tick;  /* What tick is the local CPU currently
18479 +				        * executing? Updated only by the local
18480 +				        * CPU. In QEMU, this may lag behind the
18481 +				        * current tick. In a real system, with
18482 +				        * proper timers and aligned quanta,
18483 +				        * that should only be the case for a
18484 +				        * very short time after the time
18485 +				        * advanced. With staggered quanta, it
18486 +				        * will lag for the duration of the
18487 +				        * offset.
18488 +					*/
18489 +
18490 +	struct task_struct* linked;    /* the task that should be executing */
18491 +	struct task_struct* local;     /* the local copy of linked          */
18492 +	struct task_struct* scheduled; /* what is actually scheduled        */
18493 +
18494 +	lt_t offset;			/* stagger offset */
18495 +	unsigned int missed_updates;
18496 +	unsigned int missed_quanta;
18497 +};
18498 +
18499 +struct pfair_cluster {
18500 +	struct scheduling_cluster topology;
18501 +
18502 +	/* The "global" time in this cluster. */
18503 +	quanta_t pfair_time; /* the "official" PFAIR clock */
18504 +
18505 +	/* The ready queue for this cluster. */
18506 +	rt_domain_t pfair;
18507 +
18508 +	/* The set of jobs that should have their release enacted at the next
18509 +	 * quantum boundary.
18510 +	 */
18511 +	struct bheap release_queue;
18512 +	raw_spinlock_t release_lock;
18513 +};
18514 +
18515 +#define FLAGS_NEED_REQUEUE 0x1
18516 +
18517 +static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
18518 +{
18519 +	return container_of(state->topology.cluster, struct pfair_cluster, topology);
18520 +}
18521 +
18522 +static inline int cpu_id(struct pfair_state* state)
18523 +{
18524 +	return state->topology.id;
18525 +}
18526 +
18527 +static inline struct pfair_state* from_cluster_list(struct list_head* pos)
18528 +{
18529 +	return list_entry(pos, struct pfair_state, topology.cluster_list);
18530 +}
18531 +
18532 +static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
18533 +{
18534 +	return container_of(rt, struct pfair_cluster, pfair);
18535 +}
18536 +
18537 +static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
18538 +{
18539 +	/* The ready_lock is used to serialize all scheduling events. */
18540 +	return &cluster->pfair.ready_lock;
18541 +}
18542 +
18543 +static inline raw_spinlock_t* cpu_lock(struct pfair_state* state)
18544 +{
18545 +	return cluster_lock(cpu_cluster(state));
18546 +}
18547 +
18548 +DEFINE_PER_CPU(struct pfair_state, pfair_state);
18549 +struct pfair_state* *pstate; /* short cut */
18550 +
18551 +static struct pfair_cluster* pfair_clusters;
18552 +static int num_pfair_clusters;
18553 +
18554 +/* Enable for lots of trace info.
18555 + * #define PFAIR_DEBUG
18556 + */
18557 +
18558 +#ifdef PFAIR_DEBUG
18559 +#define PTRACE_TASK(t, f, args...)  TRACE_TASK(t, f, ## args)
18560 +#define PTRACE(f, args...) TRACE(f, ## args)
18561 +#else
18562 +#define PTRACE_TASK(t, f, args...)
18563 +#define PTRACE(f, args...)
18564 +#endif
18565 +
18566 +/* gcc will inline all of these accessor functions... */
18567 +static struct subtask* cur_subtask(struct task_struct* t)
18568 +{
18569 +	return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
18570 +}
18571 +
18572 +static quanta_t cur_deadline(struct task_struct* t)
18573 +{
18574 +	return cur_subtask(t)->deadline +  tsk_pfair(t)->release;
18575 +}
18576 +
18577 +static quanta_t cur_release(struct task_struct* t)
18578 +{
18579 +	/* This is early releasing: only the release of the first subtask
18580 +	 * counts. */
18581 +	return tsk_pfair(t)->release;
18582 +}
18583 +
18584 +static quanta_t cur_overlap(struct task_struct* t)
18585 +{
18586 +	return cur_subtask(t)->overlap;
18587 +}
18588 +
18589 +static quanta_t cur_group_deadline(struct task_struct* t)
18590 +{
18591 +	quanta_t gdl = cur_subtask(t)->group_deadline;
18592 +	if (gdl)
18593 +		return gdl + tsk_pfair(t)->release;
18594 +	else
18595 +		return gdl;
18596 +}
18597 +
18598 +
18599 +static int pfair_higher_prio(struct task_struct* first,
18600 +			     struct task_struct* second)
18601 +{
18602 +	return  /* first task must exist */
18603 +		first && (
18604 +		/* Does the second task exist and is it a real-time task?  If
18605 +		 * not, the first task (which is a RT task) has higher
18606 +		 * priority.
18607 +		 */
18608 +		!second || !is_realtime(second)  ||
18609 +
18610 +		/* Is the (subtask) deadline of the first task earlier?
18611 +		 * Then it has higher priority.
18612 +		 */
18613 +		time_before(cur_deadline(first), cur_deadline(second)) ||
18614 +
18615 +		/* Do we have a deadline tie?
18616 +		 * Then break by B-bit.
18617 +		 */
18618 +		(cur_deadline(first) == cur_deadline(second) &&
18619 +		 (cur_overlap(first) > cur_overlap(second) ||
18620 +
18621 +		/* Do we have a B-bit tie?
18622 +		 * Then break by group deadline.
18623 +		 */
18624 +		(cur_overlap(first) == cur_overlap(second) &&
18625 +		 (time_after(cur_group_deadline(first),
18626 +			     cur_group_deadline(second)) ||
18627 +
18628 +		/* Do we have a group deadline tie?
18629 +		 * Then break by PID, which are unique.
18630 +		 */
18631 +		(cur_group_deadline(first) ==
18632 +		 cur_group_deadline(second) &&
18633 +		 first->pid < second->pid))))));
18634 +}
18635 +
18636 +int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
18637 +{
18638 +	return pfair_higher_prio(bheap2task(a), bheap2task(b));
18639 +}
18640 +
18641 +static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
18642 +{
18643 +	struct pfair_cluster* cluster = from_domain(rt);
18644 +	unsigned long flags;
18645 +
18646 +	raw_spin_lock_irqsave(&cluster->release_lock, flags);
18647 +
18648 +	bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
18649 +
18650 +	raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
18651 +}
18652 +
18653 +static void prepare_release(struct task_struct* t, quanta_t at)
18654 +{
18655 +	tsk_pfair(t)->release    = at;
18656 +	tsk_pfair(t)->cur        = 0;
18657 +}
18658 +
18659 +/* pull released tasks from the release queue */
18660 +static void poll_releases(struct pfair_cluster* cluster)
18661 +{
18662 +	raw_spin_lock(&cluster->release_lock);
18663 +	__merge_ready(&cluster->pfair, &cluster->release_queue);
18664 +	raw_spin_unlock(&cluster->release_lock);
18665 +}
18666 +
18667 +static void check_preempt(struct task_struct* t)
18668 +{
18669 +	int cpu = NO_CPU;
18670 +	if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
18671 +	    is_present(t)) {
18672 +		/* the task can be scheduled and
18673 +		 * is not scheduled where it ought to be scheduled
18674 +		 */
18675 +		cpu = tsk_rt(t)->linked_on != NO_CPU ?
18676 +			tsk_rt(t)->linked_on         :
18677 +			tsk_rt(t)->scheduled_on;
18678 +		PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
18679 +			   tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
18680 +		/* preempt */
18681 +		litmus_reschedule(cpu);
18682 +	}
18683 +}
18684 +
18685 +/* caller must hold pfair.ready_lock */
18686 +static void drop_all_references(struct task_struct *t)
18687 +{
18688 +        int cpu;
18689 +        struct pfair_state* s;
18690 +	struct pfair_cluster* cluster;
18691 +        if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
18692 +                /* It must be in the ready queue; drop references isn't called
18693 +		 * when the job is in a release queue. */
18694 +		cluster = tsk_pfair(t)->cluster;
18695 +                bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
18696 +                            tsk_rt(t)->heap_node);
18697 +        }
18698 +        for (cpu = 0; cpu < num_online_cpus(); cpu++) {
18699 +                s = &per_cpu(pfair_state, cpu);
18700 +                if (s->linked == t)
18701 +                        s->linked = NULL;
18702 +                if (s->local  == t)
18703 +                        s->local  = NULL;
18704 +                if (s->scheduled  == t)
18705 +                        s->scheduled = NULL;
18706 +        }
18707 +	/* make sure we don't have a stale linked_on field */
18708 +	tsk_rt(t)->linked_on = NO_CPU;
18709 +}
18710 +
18711 +static void pfair_prepare_next_period(struct task_struct* t)
18712 +{
18713 +	struct pfair_param* p = tsk_pfair(t);
18714 +
18715 +	prepare_for_next_period(t);
18716 +	tsk_rt(t)->completed = 0;
18717 +	p->release = time2quanta(get_release(t), CEIL);
18718 +}
18719 +
18720 +/* returns 1 if the task needs to go the release queue */
18721 +static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
18722 +{
18723 +	struct pfair_param* p = tsk_pfair(t);
18724 +	int to_relq;
18725 +	p->cur = (p->cur + 1) % p->quanta;
18726 +	if (!p->cur) {
18727 +		if (is_present(t)) {
18728 +			/* The job overran; we start a new budget allocation. */
18729 +			pfair_prepare_next_period(t);
18730 +		} else {
18731 +			/* remove task from system until it wakes */
18732 +			drop_all_references(t);
18733 +			tsk_rt(t)->flags |= FLAGS_NEED_REQUEUE;
18734 +			TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
18735 +				   cpu, p->cur);
18736 +			return 0;
18737 +		}
18738 +	}
18739 +	to_relq = time_after(cur_release(t), time);
18740 +	TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
18741 +		   cpu, p->cur, to_relq, cur_release(t), time);
18742 +	return to_relq;
18743 +}
18744 +
18745 +static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
18746 +{
18747 +	struct task_struct* l;
18748 +	struct pfair_param* p;
18749 +	struct list_head* pos;
18750 +	struct pfair_state* cpu;
18751 +
18752 +	list_for_each(pos, &cluster->topology.cpus) {
18753 +		cpu = from_cluster_list(pos);
18754 +		l = cpu->linked;
18755 +		cpu->missed_updates += cpu->linked != cpu->local;
18756 +		if (l) {
18757 +			p = tsk_pfair(l);
18758 +			p->last_quantum = time;
18759 +			p->last_cpu     =  cpu_id(cpu);
18760 +			if (advance_subtask(time, l, cpu_id(cpu))) {
18761 +				//cpu->linked = NULL;
18762 +				PTRACE_TASK(l, "should go to release queue. "
18763 +					    "scheduled_on=%d present=%d\n",
18764 +					    tsk_rt(l)->scheduled_on,
18765 +					    tsk_rt(l)->present);
18766 +			}
18767 +		}
18768 +	}
18769 +}
18770 +
18771 +static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
18772 +{
18773 +	int cpu;
18774 +	if (tsk_rt(t)->scheduled_on != NO_CPU) {
18775 +		/* always observe scheduled_on linkage */
18776 +		default_cpu = tsk_rt(t)->scheduled_on;
18777 +	} else if (tsk_pfair(t)->last_quantum == time - 1) {
18778 +		/* back2back quanta */
18779 +		/* Only observe last_quantum if no scheduled_on is in the way.
18780 +		 * This should only kick in if a CPU missed quanta, and that
18781 +		 * *should* only happen in QEMU.
18782 +		 */
18783 +		cpu = tsk_pfair(t)->last_cpu;
18784 +		if (!pstate[cpu]->linked ||
18785 +		    tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
18786 +			default_cpu = cpu;
18787 +		}
18788 +	}
18789 +	return default_cpu;
18790 +}
18791 +
18792 +/* returns one if linking was redirected */
18793 +static int pfair_link(quanta_t time, int cpu,
18794 +		      struct task_struct* t)
18795 +{
18796 +	int target = target_cpu(time, t, cpu);
18797 +	struct task_struct* prev  = pstate[cpu]->linked;
18798 +	struct task_struct* other;
18799 +	struct pfair_cluster* cluster = cpu_cluster(pstate[cpu]);
18800 +
18801 +	if (target != cpu) {
18802 +		BUG_ON(pstate[target]->topology.cluster != pstate[cpu]->topology.cluster);
18803 +		other = pstate[target]->linked;
18804 +		pstate[target]->linked = t;
18805 +		tsk_rt(t)->linked_on   = target;
18806 +		if (!other)
18807 +			/* linked ok, but reschedule this CPU */
18808 +			return 1;
18809 +		if (target < cpu) {
18810 +			/* link other to cpu instead */
18811 +			tsk_rt(other)->linked_on = cpu;
18812 +			pstate[cpu]->linked      = other;
18813 +			if (prev) {
18814 +				/* prev got pushed back into the ready queue */
18815 +				tsk_rt(prev)->linked_on = NO_CPU;
18816 +				__add_ready(&cluster->pfair, prev);
18817 +			}
18818 +			/* we are done with this cpu */
18819 +			return 0;
18820 +		} else {
18821 +			/* re-add other, it's original CPU was not considered yet */
18822 +			tsk_rt(other)->linked_on = NO_CPU;
18823 +			__add_ready(&cluster->pfair, other);
18824 +			/* reschedule this CPU */
18825 +			return 1;
18826 +		}
18827 +	} else {
18828 +		pstate[cpu]->linked  = t;
18829 +		tsk_rt(t)->linked_on = cpu;
18830 +		if (prev) {
18831 +			/* prev got pushed back into the ready queue */
18832 +			tsk_rt(prev)->linked_on = NO_CPU;
18833 +			__add_ready(&cluster->pfair, prev);
18834 +		}
18835 +		/* we are done with this CPU */
18836 +		return 0;
18837 +	}
18838 +}
18839 +
18840 +static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
18841 +{
18842 +	int retry;
18843 +	struct list_head *pos;
18844 +	struct pfair_state *cpu_state;
18845 +
18846 +	list_for_each(pos, &cluster->topology.cpus) {
18847 +		cpu_state = from_cluster_list(pos);
18848 +		retry = 1;
18849 +#ifdef CONFIG_RELEASE_MASTER
18850 +		/* skip release master */
18851 +		if (cluster->pfair.release_master == cpu_id(cpu_state))
18852 +			continue;
18853 +#endif
18854 +		while (retry) {
18855 +			if (pfair_higher_prio(__peek_ready(&cluster->pfair),
18856 +					      cpu_state->linked))
18857 +				retry = pfair_link(time, cpu_id(cpu_state),
18858 +						   __take_ready(&cluster->pfair));
18859 +			else
18860 +				retry = 0;
18861 +		}
18862 +	}
18863 +}
18864 +
18865 +static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
18866 +{
18867 +	struct pfair_state *cpu;
18868 +	struct list_head* pos;
18869 +
18870 +	/* called with interrupts disabled */
18871 +	PTRACE("--- Q %lu at %llu PRE-SPIN\n",
18872 +	       time, litmus_clock());
18873 +	raw_spin_lock(cluster_lock(cluster));
18874 +	PTRACE("<<< Q %lu at %llu\n",
18875 +	       time, litmus_clock());
18876 +
18877 +	sched_trace_quantum_boundary();
18878 +
18879 +	advance_subtasks(cluster, time);
18880 +	poll_releases(cluster);
18881 +	schedule_subtasks(cluster, time);
18882 +
18883 +	list_for_each(pos, &cluster->topology.cpus) {
18884 +		cpu = from_cluster_list(pos);
18885 +		if (cpu->linked)
18886 +			PTRACE_TASK(cpu->linked,
18887 +				    " linked on %d.\n", cpu_id(cpu));
18888 +		else
18889 +			PTRACE("(null) linked on %d.\n", cpu_id(cpu));
18890 +	}
18891 +	/* We are done. Advance time. */
18892 +	mb();
18893 +	list_for_each(pos, &cluster->topology.cpus) {
18894 +		cpu = from_cluster_list(pos);
18895 +		if (cpu->local_tick != cpu->cur_tick) {
18896 +			TRACE("BAD Quantum not acked on %d "
18897 +			      "(l:%lu c:%lu p:%lu)\n",
18898 +			      cpu_id(cpu),
18899 +			      cpu->local_tick,
18900 +			      cpu->cur_tick,
18901 +			      cluster->pfair_time);
18902 +			cpu->missed_quanta++;
18903 +		}
18904 +		cpu->cur_tick = time;
18905 +	}
18906 +	PTRACE(">>> Q %lu at %llu\n",
18907 +	       time, litmus_clock());
18908 +	raw_spin_unlock(cluster_lock(cluster));
18909 +}
18910 +
18911 +static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
18912 +{
18913 +	quanta_t loc;
18914 +
18915 +	goto first; /* skip mb() on first iteration */
18916 +	do {
18917 +		cpu_relax();
18918 +		mb();
18919 +	first:	loc = state->cur_tick;
18920 +		/* FIXME: what if loc > cur? */
18921 +	} while (time_before(loc, q));
18922 +	PTRACE("observed cur_tick:%lu >= q:%lu\n",
18923 +	       loc, q);
18924 +}
18925 +
18926 +static quanta_t current_quantum(struct pfair_state* state)
18927 +{
18928 +	lt_t t = litmus_clock() - state->offset;
18929 +	return time2quanta(t, FLOOR);
18930 +}
18931 +
18932 +static void catchup_quanta(quanta_t from, quanta_t target,
18933 +			   struct pfair_state* state)
18934 +{
18935 +	quanta_t cur = from, time;
18936 +	TRACE("+++< BAD catching up quanta from %lu to %lu\n",
18937 +	      from, target);
18938 +	while (time_before(cur, target)) {
18939 +		wait_for_quantum(cur, state);
18940 +		cur++;
18941 +		time = cmpxchg(&cpu_cluster(state)->pfair_time,
18942 +			       cur - 1,   /* expected */
18943 +			       cur        /* next     */
18944 +			);
18945 +		if (time == cur - 1)
18946 +			schedule_next_quantum(cpu_cluster(state), cur);
18947 +	}
18948 +	TRACE("+++> catching up done\n");
18949 +}
18950 +
18951 +/* pfair_tick - this function is called for every local timer
18952 + *                         interrupt.
18953 + */
18954 +static void pfair_tick(struct task_struct* t)
18955 +{
18956 +	struct pfair_state* state = &__get_cpu_var(pfair_state);
18957 +	quanta_t time, cur;
18958 +	int retry = 10;
18959 +
18960 +	do {
18961 +		cur  = current_quantum(state);
18962 +		PTRACE("q %lu at %llu\n", cur, litmus_clock());
18963 +
18964 +		/* Attempt to advance time. First CPU to get here
18965 +		 * will prepare the next quantum.
18966 +		 */
18967 +		time = cmpxchg(&cpu_cluster(state)->pfair_time,
18968 +			       cur - 1,   /* expected */
18969 +			       cur        /* next     */
18970 +			);
18971 +		if (time == cur - 1) {
18972 +			/* exchange succeeded */
18973 +			wait_for_quantum(cur - 1, state);
18974 +			schedule_next_quantum(cpu_cluster(state), cur);
18975 +			retry = 0;
18976 +		} else if (time_before(time, cur - 1)) {
18977 +			/* the whole system missed a tick !? */
18978 +			catchup_quanta(time, cur, state);
18979 +			retry--;
18980 +		} else if (time_after(time, cur)) {
18981 +			/* our timer lagging behind!? */
18982 +			TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
18983 +			retry--;
18984 +		} else {
18985 +			/* Some other CPU already started scheduling
18986 +			 * this quantum. Let it do its job and then update.
18987 +			 */
18988 +			retry = 0;
18989 +		}
18990 +	} while (retry);
18991 +
18992 +	/* Spin locally until time advances. */
18993 +	wait_for_quantum(cur, state);
18994 +
18995 +	/* copy assignment */
18996 +	/* FIXME: what if we race with a future update? Corrupted state? */
18997 +	state->local      = state->linked;
18998 +	/* signal that we are done */
18999 +	mb();
19000 +	state->local_tick = state->cur_tick;
19001 +
19002 +	if (state->local != current
19003 +	    && (is_realtime(current) || is_present(state->local)))
19004 +		litmus_reschedule_local();
19005 +}
19006 +
19007 +/* Custom scheduling tick: called on each quantum boundary. */
19008 +static enum hrtimer_restart on_quantum_boundary(struct hrtimer *timer)
19009 +{
19010 +	TS_QUANTUM_BOUNDARY_START;
19011 +
19012 +	pfair_tick(current);
19013 +	hrtimer_add_expires_ns(timer, LITMUS_QUANTUM_LENGTH_NS);
19014 +
19015 +	TS_QUANTUM_BOUNDARY_END;
19016 +	return  HRTIMER_RESTART;
19017 +}
19018 +
19019 +static int safe_to_schedule(struct task_struct* t, int cpu)
19020 +{
19021 +	int where = tsk_rt(t)->scheduled_on;
19022 +	if (where != NO_CPU && where != cpu) {
19023 +		TRACE_TASK(t, "BAD: can't be scheduled on %d, "
19024 +			   "scheduled already on %d.\n", cpu, where);
19025 +		return 0;
19026 +	} else
19027 +		return is_present(t) && !is_completed(t);
19028 +}
19029 +
19030 +static struct task_struct* pfair_schedule(struct task_struct * prev)
19031 +{
19032 +	struct pfair_state* state = &__get_cpu_var(pfair_state);
19033 +	struct pfair_cluster* cluster = cpu_cluster(state);
19034 +	int blocks, completion, out_of_time;
19035 +	struct task_struct* next = NULL;
19036 +
19037 +#ifdef CONFIG_RELEASE_MASTER
19038 +	/* Bail out early if we are the release master.
19039 +	 * The release master never schedules any real-time tasks.
19040 +	 */
19041 +	if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
19042 +		sched_state_task_picked();
19043 +		return NULL;
19044 +	}
19045 +#endif
19046 +
19047 +	raw_spin_lock(cpu_lock(state));
19048 +
19049 +	blocks      = is_realtime(prev) && !is_running(prev);
19050 +	completion  = is_realtime(prev) && is_completed(prev);
19051 +	out_of_time = is_realtime(prev) && time_after(cur_release(prev),
19052 +						      state->local_tick);
19053 +
19054 +	if (is_realtime(prev))
19055 +	    PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
19056 +			blocks, completion, out_of_time);
19057 +
19058 +	if (completion) {
19059 +		sched_trace_task_completion(prev, 0);
19060 +		pfair_prepare_next_period(prev);
19061 +		prepare_release(prev, cur_release(prev));
19062 +	}
19063 +
19064 +	if (!blocks && (completion || out_of_time)) {
19065 +		drop_all_references(prev);
19066 +		sched_trace_task_release(prev);
19067 +		add_release(&cluster->pfair, prev);
19068 +	}
19069 +
19070 +	if (state->local && safe_to_schedule(state->local, cpu_id(state)))
19071 +		next = state->local;
19072 +
19073 +	if (prev != next) {
19074 +		tsk_rt(prev)->scheduled_on = NO_CPU;
19075 +		if (next)
19076 +			tsk_rt(next)->scheduled_on = cpu_id(state);
19077 +	}
19078 +	sched_state_task_picked();
19079 +	raw_spin_unlock(cpu_lock(state));
19080 +
19081 +	if (next)
19082 +		TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
19083 +			   tsk_pfair(next)->release, cpu_cluster(state)->pfair_time, litmus_clock());
19084 +	else if (is_realtime(prev))
19085 +		TRACE("Becomes idle at %lu (%llu)\n", cpu_cluster(state)->pfair_time, litmus_clock());
19086 +
19087 +	if (unlikely(!hrtimer_active(&state->quantum_timer))) {
19088 +		TRACE("activating quantum timer start=%llu\n",
19089 +			hrtimer_get_expires(&state->quantum_timer));
19090 +		hrtimer_start(&state->quantum_timer,
19091 +			hrtimer_get_expires(&state->quantum_timer),
19092 +			HRTIMER_MODE_ABS_PINNED);
19093 +	}
19094 +
19095 +	return next;
19096 +}
19097 +
19098 +static void pfair_task_new(struct task_struct * t, int on_rq, int is_scheduled)
19099 +{
19100 +	unsigned long flags;
19101 +	struct pfair_cluster* cluster;
19102 +
19103 +	TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
19104 +
19105 +	cluster = tsk_pfair(t)->cluster;
19106 +
19107 +	raw_spin_lock_irqsave(cluster_lock(cluster), flags);
19108 +
19109 +	prepare_release(t, cluster->pfair_time + 1);
19110 +
19111 +	t->rt_param.scheduled_on = NO_CPU;
19112 +	t->rt_param.linked_on    = NO_CPU;
19113 +
19114 +	if (is_scheduled) {
19115 +#ifdef CONFIG_RELEASE_MASTER
19116 +		if (task_cpu(t) != cluster->pfair.release_master)
19117 +#endif
19118 +			t->rt_param.scheduled_on = task_cpu(t);
19119 +	}
19120 +
19121 +	if (is_running(t)) {
19122 +		tsk_rt(t)->present = 1;
19123 +		__add_ready(&cluster->pfair, t);
19124 +	} else {
19125 +		tsk_rt(t)->present = 0;
19126 +		tsk_rt(t)->flags |= FLAGS_NEED_REQUEUE;
19127 +	}
19128 +
19129 +	check_preempt(t);
19130 +
19131 +	raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
19132 +}
19133 +
19134 +static void pfair_task_wake_up(struct task_struct *t)
19135 +{
19136 +	unsigned long flags;
19137 +	lt_t now;
19138 +	struct pfair_cluster* cluster;
19139 +
19140 +	cluster = tsk_pfair(t)->cluster;
19141 +
19142 +	TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
19143 +		   litmus_clock(), cur_release(t), cluster->pfair_time);
19144 +
19145 +	raw_spin_lock_irqsave(cluster_lock(cluster), flags);
19146 +
19147 +	/* If a task blocks and wakes before its next job release,
19148 +	 * then it may resume if it is currently linked somewhere
19149 +	 * (as if it never blocked at all). Otherwise, we have a
19150 +	 * new sporadic job release.
19151 +	 */
19152 +	now = litmus_clock();
19153 +	if (is_tardy(t, now)) {
19154 +		TRACE_TASK(t, "sporadic release!\n");
19155 +		release_at(t, now);
19156 +		prepare_release(t, time2quanta(now, CEIL));
19157 +		sched_trace_task_release(t);
19158 +	}
19159 +
19160 +	/* only add to ready queue if the task isn't still linked somewhere */
19161 +	if (tsk_rt(t)->flags & FLAGS_NEED_REQUEUE) {
19162 +		tsk_rt(t)->flags &= ~FLAGS_NEED_REQUEUE;
19163 +		TRACE_TASK(t, "requeueing required\n");
19164 +		tsk_rt(t)->completed = 0;
19165 +		__add_ready(&cluster->pfair, t);
19166 +	}
19167 +
19168 +	check_preempt(t);
19169 +
19170 +	raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
19171 +	TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
19172 +}
19173 +
19174 +static void pfair_task_block(struct task_struct *t)
19175 +{
19176 +	BUG_ON(!is_realtime(t));
19177 +	TRACE_TASK(t, "blocks at %llu, state:%d\n",
19178 +		   litmus_clock(), t->state);
19179 +}
19180 +
19181 +static void pfair_task_exit(struct task_struct * t)
19182 +{
19183 +	unsigned long flags;
19184 +	struct pfair_cluster *cluster;
19185 +
19186 +	BUG_ON(!is_realtime(t));
19187 +
19188 +	cluster = tsk_pfair(t)->cluster;
19189 +
19190 +	/* Remote task from release or ready queue, and ensure
19191 +	 * that it is not the scheduled task for ANY CPU. We
19192 +	 * do this blanket check because occassionally when
19193 +	 * tasks exit while blocked, the task_cpu of the task
19194 +	 * might not be the same as the CPU that the PFAIR scheduler
19195 +	 * has chosen for it.
19196 +	 */
19197 +	raw_spin_lock_irqsave(cluster_lock(cluster), flags);
19198 +
19199 +	TRACE_TASK(t, "RIP, state:%d\n", t->state);
19200 +	drop_all_references(t);
19201 +
19202 +	raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
19203 +
19204 +	kfree(t->rt_param.pfair);
19205 +	t->rt_param.pfair = NULL;
19206 +}
19207 +
19208 +static void init_subtask(struct subtask* sub, unsigned long i,
19209 +			 lt_t quanta, lt_t period)
19210 +{
19211 +	/* since i is zero-based, the formulas are shifted by one */
19212 +	lt_t tmp;
19213 +
19214 +	/* release */
19215 +	tmp = period * i;
19216 +	do_div(tmp, quanta); /* floor */
19217 +	sub->release = (quanta_t) tmp;
19218 +
19219 +	/* deadline */
19220 +	tmp = period * (i + 1);
19221 +	if (do_div(tmp, quanta)) /* ceil */
19222 +		tmp++;
19223 +	sub->deadline = (quanta_t) tmp;
19224 +
19225 +	/* next release */
19226 +	tmp = period * (i + 1);
19227 +	do_div(tmp, quanta); /* floor */
19228 +	sub->overlap =  sub->deadline - (quanta_t) tmp;
19229 +
19230 +	/* Group deadline.
19231 +	 * Based on the formula given in Uma's thesis.
19232 +	 */
19233 +	if (2 * quanta >= period) {
19234 +		/* heavy */
19235 +		tmp = (sub->deadline - (i + 1)) * period;
19236 +		if (period > quanta &&
19237 +		    do_div(tmp, (period - quanta))) /* ceil */
19238 +			tmp++;
19239 +		sub->group_deadline = (quanta_t) tmp;
19240 +	} else
19241 +		sub->group_deadline = 0;
19242 +}
19243 +
19244 +static void dump_subtasks(struct task_struct* t)
19245 +{
19246 +	unsigned long i;
19247 +	for (i = 0; i < t->rt_param.pfair->quanta; i++)
19248 +		TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
19249 +			   i + 1,
19250 +			   t->rt_param.pfair->subtasks[i].release,
19251 +			   t->rt_param.pfair->subtasks[i].deadline,
19252 +			   t->rt_param.pfair->subtasks[i].overlap,
19253 +			   t->rt_param.pfair->subtasks[i].group_deadline);
19254 +}
19255 +
19256 +static long pfair_admit_task(struct task_struct* t)
19257 +{
19258 +	lt_t quanta;
19259 +	lt_t period;
19260 +	s64  quantum_length = LITMUS_QUANTUM_LENGTH_NS;
19261 +	struct pfair_param* param;
19262 +	unsigned long i;
19263 +
19264 +	/* first check that the task is in the right cluster */
19265 +	if (cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]) !=
19266 +	    cpu_cluster(pstate[task_cpu(t)]))
19267 +		return -EINVAL;
19268 +
19269 +	if (get_rt_period(t) != get_rt_relative_deadline(t)) {
19270 +		printk(KERN_INFO "%s: Admission rejected. "
19271 +			"Only implicit deadlines are currently supported.\n",
19272 +			litmus->plugin_name);
19273 +		return -EINVAL;
19274 +	}
19275 +
19276 +	/* Pfair is a tick-based method, so the time
19277 +	 * of interest is jiffies. Calculate tick-based
19278 +	 * times for everything.
19279 +	 * (Ceiling of exec cost, floor of period.)
19280 +	 */
19281 +
19282 +	quanta = get_exec_cost(t);
19283 +	period = get_rt_period(t);
19284 +
19285 +	quanta = time2quanta(get_exec_cost(t), CEIL);
19286 +
19287 +	if (do_div(period, quantum_length))
19288 +		printk(KERN_WARNING
19289 +		       "The period of %s/%d is not a multiple of %llu.\n",
19290 +		       t->comm, t->pid, (unsigned long long) quantum_length);
19291 +
19292 +	if (quanta == period) {
19293 +		/* special case: task has weight 1.0 */
19294 +		printk(KERN_INFO
19295 +		       "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n",
19296 +		       t->comm, t->pid, quanta, period);
19297 +		quanta = 1;
19298 +		period = 1;
19299 +	}
19300 +
19301 +	param = kmalloc(sizeof(*param) +
19302 +			quanta * sizeof(struct subtask), GFP_ATOMIC);
19303 +
19304 +	if (!param)
19305 +		return -ENOMEM;
19306 +
19307 +	param->quanta  = quanta;
19308 +	param->cur     = 0;
19309 +	param->release = 0;
19310 +	param->period  = period;
19311 +
19312 +	param->cluster = cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]);
19313 +
19314 +	for (i = 0; i < quanta; i++)
19315 +		init_subtask(param->subtasks + i, i, quanta, period);
19316 +
19317 +	if (t->rt_param.pfair)
19318 +		/* get rid of stale allocation */
19319 +		kfree(t->rt_param.pfair);
19320 +
19321 +	t->rt_param.pfair = param;
19322 +
19323 +	/* spew out some debug info */
19324 +	dump_subtasks(t);
19325 +
19326 +	return 0;
19327 +}
19328 +
19329 +static void pfair_init_cluster(struct pfair_cluster* cluster)
19330 +{
19331 +	rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
19332 +	bheap_init(&cluster->release_queue);
19333 +	raw_spin_lock_init(&cluster->release_lock);
19334 +	INIT_LIST_HEAD(&cluster->topology.cpus);
19335 +}
19336 +
19337 +static void cleanup_clusters(void)
19338 +{
19339 +	int i;
19340 +
19341 +	if (num_pfair_clusters)
19342 +		kfree(pfair_clusters);
19343 +	pfair_clusters = NULL;
19344 +	num_pfair_clusters = 0;
19345 +
19346 +	/* avoid stale pointers */
19347 +	for (i = 0; i < num_online_cpus(); i++) {
19348 +		pstate[i]->topology.cluster = NULL;
19349 +		printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
19350 +		       pstate[i]->missed_updates, pstate[i]->missed_quanta);
19351 +	}
19352 +}
19353 +
19354 +static struct domain_proc_info pfair_domain_proc_info;
19355 +static long pfair_get_domain_proc_info(struct domain_proc_info **ret)
19356 +{
19357 +	*ret = &pfair_domain_proc_info;
19358 +	return 0;
19359 +}
19360 +
19361 +static void pfair_setup_domain_proc(void)
19362 +{
19363 +	int i, cpu, domain;
19364 +#ifdef CONFIG_RELEASE_MASTER
19365 +	int release_master = atomic_read(&release_master_cpu);
19366 +	/* skip over the domain with the release master if cluster size is 1 */
19367 +	int cluster_size = num_online_cpus() / num_pfair_clusters;
19368 +	int skip_domain = (1 == cluster_size && release_master != NO_CPU) ?
19369 +			release_master : NO_CPU;
19370 +#else
19371 +	int release_master = NO_CPU;
19372 +	int skip_domain = NO_CPU;
19373 +#endif
19374 +	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
19375 +	int num_rt_domains = num_pfair_clusters - (skip_domain != NO_CPU);
19376 +	struct cd_mapping *map;
19377 +
19378 +	memset(&pfair_domain_proc_info, sizeof(pfair_domain_proc_info), 0);
19379 +	init_domain_proc_info(&pfair_domain_proc_info, num_rt_cpus, num_pfair_clusters);
19380 +	pfair_domain_proc_info.num_cpus = num_rt_cpus;
19381 +	pfair_domain_proc_info.num_domains = num_rt_domains;
19382 +
19383 +	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
19384 +		if (cpu == release_master)
19385 +			continue;
19386 +		map = &pfair_domain_proc_info.cpu_to_domains[i];
19387 +		/* pointer math to figure out the domain index */
19388 +		domain = cpu_cluster(&per_cpu(pfair_state, cpu)) - pfair_clusters;
19389 +		map->id = cpu;
19390 +		cpumask_set_cpu(domain, map->mask);
19391 +		++i;
19392 +	}
19393 +
19394 +	for (domain = 0, i = 0; domain < num_pfair_clusters; ++domain) {
19395 +		struct pfair_cluster *cluster;
19396 +		struct list_head *pos;
19397 +
19398 +		if (domain == skip_domain)
19399 +			continue;
19400 +
19401 +		cluster = &pfair_clusters[domain];
19402 +		map = &pfair_domain_proc_info.domain_to_cpus[i];
19403 +		map->id = i;
19404 +
19405 +		list_for_each(pos, &cluster->topology.cpus) {
19406 +			cpu = cpu_id(from_cluster_list(pos));
19407 +			if (cpu != release_master)
19408 +				cpumask_set_cpu(cpu, map->mask);
19409 +		}
19410 +		++i;
19411 +	}
19412 +}
19413 +
19414 +static long pfair_activate_plugin(void)
19415 +{
19416 +	int err, i;
19417 +	struct pfair_state* state;
19418 +	struct pfair_cluster* cluster;
19419 +	quanta_t now, start;
19420 +	int cluster_size;
19421 +	struct cluster_cpu* cpus[NR_CPUS];
19422 +	struct scheduling_cluster* clust[NR_CPUS];
19423 +	lt_t quantum_timer_start;
19424 +
19425 +	cluster_size = get_cluster_size(pfair_cluster_level);
19426 +
19427 +	if (cluster_size <= 0 || num_online_cpus() % cluster_size != 0)
19428 +		return -EINVAL;
19429 +
19430 +	num_pfair_clusters = num_online_cpus() / cluster_size;
19431 +
19432 +	pfair_clusters = kzalloc(num_pfair_clusters * sizeof(struct pfair_cluster), GFP_ATOMIC);
19433 +	if (!pfair_clusters) {
19434 +		num_pfair_clusters = 0;
19435 +		printk(KERN_ERR "Could not allocate Pfair clusters!\n");
19436 +		return -ENOMEM;
19437 +	}
19438 +
19439 +	state = &__get_cpu_var(pfair_state);
19440 +	now   = current_quantum(state);
19441 +	start = now + 50;
19442 +	quantum_timer_start = quanta2time(start);
19443 +	TRACE("Activating PFAIR at %llu (q=%lu), first tick at %llu (q=%lu)\n",
19444 +		litmus_clock(),
19445 +		now,
19446 +		quantum_timer_start,
19447 +		time2quanta(quantum_timer_start, CEIL));
19448 +
19449 +	for (i = 0; i < num_pfair_clusters; i++) {
19450 +		cluster = &pfair_clusters[i];
19451 +		pfair_init_cluster(cluster);
19452 +		cluster->pfair_time = start;
19453 +		clust[i] = &cluster->topology;
19454 +#ifdef CONFIG_RELEASE_MASTER
19455 +		cluster->pfair.release_master = atomic_read(&release_master_cpu);
19456 +#endif
19457 +	}
19458 +
19459 +	for_each_online_cpu(i) {
19460 +		state = &per_cpu(pfair_state, i);
19461 +		state->cur_tick   = start;
19462 +		state->local_tick = start;
19463 +		state->missed_quanta = 0;
19464 +		state->missed_updates = 0;
19465 +		state->offset     = cpu_stagger_offset(i);
19466 +		hrtimer_set_expires(&state->quantum_timer,
19467 +			ns_to_ktime(quantum_timer_start + state->offset));
19468 +		printk(KERN_ERR "cpus[%d] set; offset=%llu; %d\n", i, state->offset, num_online_cpus());
19469 +		cpus[i] = &state->topology;
19470 +		/* force rescheduling to start quantum timer */
19471 +		litmus_reschedule(i);
19472 +
19473 +		WARN_ONCE(!hrtimer_is_hres_active(&state->quantum_timer),
19474 +			KERN_ERR "WARNING: no high resolution timers available!?\n");
19475 +	}
19476 +
19477 +	err = assign_cpus_to_clusters(pfair_cluster_level, clust, num_pfair_clusters,
19478 +				      cpus, num_online_cpus());
19479 +
19480 +	if (err < 0)
19481 +		cleanup_clusters();
19482 +	else
19483 +		pfair_setup_domain_proc();
19484 +
19485 +	return err;
19486 +}
19487 +
19488 +static long pfair_deactivate_plugin(void)
19489 +{
19490 +	int cpu;
19491 +	struct pfair_state* state;
19492 +
19493 +	for_each_online_cpu(cpu) {
19494 +		state = &per_cpu(pfair_state, cpu);
19495 +		TRACE("stopping quantum timer on CPU%d\n", cpu);
19496 +		hrtimer_cancel(&state->quantum_timer);
19497 +	}
19498 +	cleanup_clusters();
19499 +	destroy_domain_proc_info(&pfair_domain_proc_info);
19500 +	return 0;
19501 +}
19502 +
19503 +/*	Plugin object	*/
19504 +static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
19505 +	.plugin_name		= "PFAIR",
19506 +	.task_new		= pfair_task_new,
19507 +	.task_exit		= pfair_task_exit,
19508 +	.schedule		= pfair_schedule,
19509 +	.task_wake_up		= pfair_task_wake_up,
19510 +	.task_block		= pfair_task_block,
19511 +	.admit_task		= pfair_admit_task,
19512 +	.complete_job		= complete_job,
19513 +	.activate_plugin	= pfair_activate_plugin,
19514 +	.deactivate_plugin	= pfair_deactivate_plugin,
19515 +	.get_domain_proc_info	= pfair_get_domain_proc_info,
19516 +};
19517 +
19518 +
19519 +static struct proc_dir_entry *cluster_file = NULL, *pfair_dir = NULL;
19520 +
19521 +static int __init init_pfair(void)
19522 +{
19523 +	int cpu, err, fs;
19524 +	struct pfair_state *state;
19525 +
19526 +	/*
19527 +	 * initialize short_cut for per-cpu pfair state;
19528 +	 * there may be a problem here if someone removes a cpu
19529 +	 * while we are doing this initialization... and if cpus
19530 +	 * are added / removed later... but we don't support CPU hotplug atm anyway.
19531 +	 */
19532 +	pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL);
19533 +
19534 +	/* initialize CPU state */
19535 +	for (cpu = 0; cpu < num_online_cpus(); cpu++)  {
19536 +		state = &per_cpu(pfair_state, cpu);
19537 +		hrtimer_init(&state->quantum_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
19538 +		state->quantum_timer.function = on_quantum_boundary;
19539 +		state->topology.id = cpu;
19540 +		state->cur_tick   = 0;
19541 +		state->local_tick = 0;
19542 +		state->linked     = NULL;
19543 +		state->local      = NULL;
19544 +		state->scheduled  = NULL;
19545 +		state->missed_quanta = 0;
19546 +		state->offset     = cpu_stagger_offset(cpu);
19547 +		pstate[cpu] = state;
19548 +	}
19549 +
19550 +	pfair_clusters = NULL;
19551 +	num_pfair_clusters = 0;
19552 +
19553 +	err = register_sched_plugin(&pfair_plugin);
19554 +	if (!err) {
19555 +		fs = make_plugin_proc_dir(&pfair_plugin, &pfair_dir);
19556 +		if (!fs)
19557 +			cluster_file = create_cluster_file(pfair_dir, &pfair_cluster_level);
19558 +		else
19559 +			printk(KERN_ERR "Could not allocate PFAIR procfs dir.\n");
19560 +	}
19561 +
19562 +	return err;
19563 +}
19564 +
19565 +static void __exit clean_pfair(void)
19566 +{
19567 +	kfree(pstate);
19568 +
19569 +	if (cluster_file)
19570 +		remove_proc_entry("cluster", pfair_dir);
19571 +	if (pfair_dir)
19572 +		remove_plugin_proc_dir(&pfair_plugin);
19573 +}
19574 +
19575 +module_init(init_pfair);
19576 +module_exit(clean_pfair);
19577 -- 
19578 1.8.1.2
19579 
19580 
19581 From 5b564e918add09d778ae347e9fdd005a36f8e879 Mon Sep 17 00:00:00 2001
19582 From: Bjoern Brandenburg <bbb@mpi-sws.org>
19583 Date: Thu, 12 Jun 2014 14:05:51 +0200
19584 Subject: [PATCH 051/119] PFAIR: set release time of new tasks
19585 
19586 Without a proper release time, the job will be considered "lagging
19587 behind" for quite a while, which breaks the period enforcement. This
19588 bug manifested only in the absence of a synchronous release (which set
19589 a proper release time).
19590 
19591 This patch simply sets the beginning of the next quantum as the
19592 release time of the first job of a newly added task.
19593 ---
19594  litmus/sched_pfair.c | 1 +
19595  1 file changed, 1 insertion(+)
19596 
19597 diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
19598 index 91f1e08..54fa36c 100644
19599 --- a/litmus/sched_pfair.c
19600 +++ b/litmus/sched_pfair.c
19601 @@ -696,6 +696,7 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int is_scheduled)
19602  	raw_spin_lock_irqsave(cluster_lock(cluster), flags);
19603  
19604  	prepare_release(t, cluster->pfair_time + 1);
19605 +	release_at(t, quanta2time(cur_release(t)));
19606  
19607  	t->rt_param.scheduled_on = NO_CPU;
19608  	t->rt_param.linked_on    = NO_CPU;
19609 -- 
19610 1.8.1.2
19611 
19612 
19613 From 5c2112a210e8654d96e3f4c0395f1a326f28666f Mon Sep 17 00:00:00 2001
19614 From: Namhoon Kim <namhoonk@cs.unc.edu>
19615 Date: Mon, 3 Nov 2014 21:52:24 -0500
19616 Subject: [PATCH 052/119] ARM timer support
19617 
19618 ---
19619  include/litmus/clock.h         | 48 ++++++++++++++++++++++++++++++++++++++
19620  include/litmus/feather_trace.h | 12 ++++++++++
19621  litmus/litmus.c                | 53 +++++++++++++++++++++++++++++++++++++++++-
19622  3 files changed, 112 insertions(+), 1 deletion(-)
19623  create mode 100644 include/litmus/clock.h
19624 
19625 diff --git a/include/litmus/clock.h b/include/litmus/clock.h
19626 new file mode 100644
19627 index 0000000..f8de7a3
19628 --- /dev/null
19629 +++ b/include/litmus/clock.h
19630 @@ -0,0 +1,48 @@
19631 +#ifndef _LITMUS_CLOCK_H_
19632 +#define _LITMUS_CLOCK_H_
19633 +
19634 +#if defined(CONFIG_EXYNOS_MCT)
19635 +
19636 +/*
19637 + * Only used if we are using the EXYNOS MCT clock.
19638 + */
19639 +
19640 +#include <linux/clocksource.h>
19641 +extern struct clocksource mct_frc;
19642 +
19643 +static inline cycles_t mct_frc_read(void)
19644 +{
19645 +	cycle_t cycles = mct_frc.read(&mct_frc);
19646 +	return cycles;
19647 +}
19648 +
19649 +static inline s64 litmus_cycles_to_ns(cycles_t cycles)
19650 +{
19651 +	return clocksource_cyc2ns(cycles, mct_frc.mult, mct_frc.shift);
19652 +}
19653 +
19654 +#define litmus_get_cycles mct_frc_read
19655 +
19656 +#elif defined(CONFIG_CPU_V7) && !defined(CONFIG_HW_PERF_EVENTS)
19657 +
19658 +#include <asm/timex.h>
19659 +
19660 +static inline cycles_t v7_get_cycles (void)
19661 +{
19662 +	u32 value;
19663 +        /* read CCNT register */
19664 +        asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(value));
19665 +	return value;
19666 +}
19667 +
19668 +#define litmus_get_cycles v7_get_cycles
19669 +
19670 +#else
19671 +#include <asm/timex.h>
19672 +
19673 +#define litmus_get_cycles get_cycles
19674 +
19675 +#endif
19676 +
19677 +#endif
19678 +
19679 diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
19680 index dbeca46..cc4396e 100644
19681 --- a/include/litmus/feather_trace.h
19682 +++ b/include/litmus/feather_trace.h
19683 @@ -38,11 +38,23 @@ static inline void ft_atomic_dec(int *val)
19684  /* provide default implementation */
19685  #include <linux/timex.h> /* for get_cycles() */
19686  
19687 +#if defined(CONFIG_CPU_V7) && !defined(CONFIG_HW_PERF_EVENTS)	
19688 +
19689 +#include <litmus/clock.h> /* for litmus_get_cycles() */
19690 +static inline unsigned long long ft_timestamp(void)
19691 +{
19692 +	return (unsigned long long)litmus_get_cycles();
19693 +}
19694 +
19695 +#else
19696 +
19697  static inline unsigned long long ft_timestamp(void)
19698  {
19699  	return get_cycles();
19700  }
19701  
19702 +#endif
19703 +
19704  #define feather_callback
19705  
19706  #define MAX_EVENTS 1024
19707 diff --git a/litmus/litmus.c b/litmus/litmus.c
19708 index a061343..14b1031 100644
19709 --- a/litmus/litmus.c
19710 +++ b/litmus/litmus.c
19711 @@ -20,6 +20,9 @@
19712  #include <litmus/rt_domain.h>
19713  #include <litmus/litmus_proc.h>
19714  #include <litmus/sched_trace.h>
19715 +#include <litmus/clock.h>
19716 +
19717 +#include <asm/cacheflush.h>
19718  
19719  #ifdef CONFIG_SCHED_CPU_AFFINITY
19720  #include <litmus/affinity.h>
19721 @@ -303,9 +306,11 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
19722  	cycles_t now;
19723  
19724  	if (ts) {
19725 -		now = get_cycles();
19726 +		now = litmus_get_cycles();
19727  		ret = put_user(now, ts);
19728  	}
19729 +	else
19730 +		flush_cache_all();
19731  
19732  	return ret;
19733  }
19734 @@ -599,6 +604,48 @@ static struct notifier_block shutdown_notifier = {
19735  	.notifier_call = litmus_shutdown_nb,
19736  };
19737  
19738 +#if defined(CONFIG_CPU_V7) && !defined(CONFIG_HW_PERF_EVENTS)
19739 +static void __init litmus_enable_perfcounters_v7(void *_ignore)
19740 +{
19741 +	u32 enable_val = 0;
19742 +
19743 +	/* disable performance monitoring */
19744 +	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (0x00000006));
19745 +
19746 +	/* disable all events */
19747 +	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (0xffffffff));
19748 +
19749 +	/* write 1 to enable user-mode access to the performance counter */
19750 +	asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (1));
19751 +
19752 +	/* disable counter overflow interrupts (just in case) */
19753 +	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (0x8000000f));
19754 +
19755 +	/* select event zero */
19756 +	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (0));
19757 +
19758 +	/* count cycles in the selected event zero */
19759 +	asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (0x00000011));
19760 +
19761 +	enable_val |= 1;	/* bit 1 enables the counters */
19762 +	enable_val |= 2;	/* resets event counters to zero */
19763 +	enable_val |= 4;	/* resets cycle counter to zero */
19764 +	//enable_val |= 8;	/* enable "by 64" divider for CCNT. */
19765 +	
19766 +	/* performance monitor control register: enable all counters */
19767 +	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(enable_val));
19768 +
19769 +	/* enables counters (cycle counter and event 1) */
19770 +        asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x80000001));
19771 +}
19772 +
19773 +static void __init litmus_enable_perfcounters(void)
19774 +{
19775 +	litmus_enable_perfcounters_v7(NULL);
19776 +	smp_call_function(litmus_enable_perfcounters_v7, NULL, 0);
19777 +}
19778 +#endif
19779 +
19780  static int __init _init_litmus(void)
19781  {
19782  	/*      Common initializers,
19783 @@ -628,6 +675,10 @@ static int __init _init_litmus(void)
19784  
19785  	register_reboot_notifier(&shutdown_notifier);
19786  
19787 +#if defined(CONFIG_CPU_V7) && !defined(CONFIG_HW_PERF_EVENTS)	
19788 +	litmus_enable_perfcounters();
19789 +#endif
19790 +	
19791  	return 0;
19792  }
19793  
19794 -- 
19795 1.8.1.2
19796 
19797 
19798 From d0cc5b0897b74201fe1ca363ce1d980b5dbefff5 Mon Sep 17 00:00:00 2001
19799 From: Namhoon Kim <namhoonk@cs.unc.edu>
19800 Date: Mon, 3 Nov 2014 21:53:47 -0500
19801 Subject: [PATCH 053/119] Added cache /proc
19802 
19803 ---
19804  arch/arm/mach-imx/Makefile  |   4 +-
19805  arch/arm/mm/cache-l2x0.c    |   6 ++
19806  include/litmus/cache_proc.h |  10 ++
19807  litmus/cache_proc.c         | 245 ++++++++++++++++++++++++++++++++++++++++++++
19808  4 files changed, 263 insertions(+), 2 deletions(-)
19809  create mode 100644 include/litmus/cache_proc.h
19810  create mode 100644 litmus/cache_proc.c
19811 
19812 diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
19813 index 2536699..02d0208 100644
19814 --- a/arch/arm/mach-imx/Makefile
19815 +++ b/arch/arm/mach-imx/Makefile
19816 @@ -107,13 +107,13 @@ obj-$(CONFIG_SOC_IMX6SX) += clk-imx6sx.o mach-imx6sx.o
19817  AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a
19818  obj-$(CONFIG_PM) += pm-imx6.o headsmp.o suspend-imx6.o
19819  
19820 -ifeq ($(CONFIG_ARM_IMX6_CPUFREQ),y)
19821 +#ifeq ($(CONFIG_ARM_IMX6_CPUFREQ),y)
19822  obj-y += busfreq-imx6.o
19823  obj-$(CONFIG_SOC_IMX6Q) += ddr3_freq_imx6.o busfreq_ddr3.o
19824  obj-$(CONFIG_SOC_IMX6SL) += lpddr2_freq_imx6.o busfreq_lpddr2.o imx6sl_wfi.o
19825  obj-$(CONFIG_SOC_IMX6SX) += ddr3_freq_imx6sx.o lpddr2_freq_imx6sx.o
19826  
19827 -endif
19828 +#endif
19829  
19830  
19831  # i.MX5 based machines
19832 diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
19833 index c465fac..5efe6b6e 100644
19834 --- a/arch/arm/mm/cache-l2x0.c
19835 +++ b/arch/arm/mm/cache-l2x0.c
19836 @@ -27,6 +27,8 @@
19837  #include <asm/hardware/cache-l2x0.h>
19838  #include "cache-aurora-l2.h"
19839  
19840 +#include <litmus/cache_proc.h>
19841 +
19842  #define CACHE_LINE_SIZE		32
19843  
19844  static void __iomem *l2x0_base;
19845 @@ -393,6 +395,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
19846  		l2x0_unlock(cache_id);
19847  
19848  		/* l2x0 controller is disabled */
19849 +		//aux |= (1 << 12);
19850 +		//printk("AUX BIT = %08x\n", aux);
19851  		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
19852  
19853  		l2x0_inv_all();
19854 @@ -420,6 +424,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
19855  	printk(KERN_INFO "%s cache controller enabled\n", type);
19856  	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
19857  			ways, cache_id, aux, l2x0_size);
19858 +
19859 +	litmus_setup_lockdown(l2x0_base, cache_id);
19860  }
19861  
19862  #ifdef CONFIG_OF
19863 diff --git a/include/litmus/cache_proc.h b/include/litmus/cache_proc.h
19864 new file mode 100644
19865 index 0000000..a7a740e
19866 --- /dev/null
19867 +++ b/include/litmus/cache_proc.h
19868 @@ -0,0 +1,10 @@
19869 +#ifndef LITMUS_CACHE_PROC_H
19870 +#define LITMUS_CACHE_PROC_H
19871 +
19872 +#ifdef __KERNEL__
19873 +
19874 +void litmus_setup_lockdown(void __iomem*, u32);
19875 +
19876 +#endif
19877 +
19878 +#endif
19879 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
19880 new file mode 100644
19881 index 0000000..4425bfb
19882 --- /dev/null
19883 +++ b/litmus/cache_proc.c
19884 @@ -0,0 +1,245 @@
19885 +#include <linux/init.h>
19886 +#include <linux/types.h>
19887 +#include <linux/kernel.h>
19888 +#include <linux/module.h>
19889 +#include <linux/sysctl.h>
19890 +#include <linux/slab.h>
19891 +#include <linux/io.h>
19892 +#include <linux/mutex.h>
19893 +
19894 +#include <litmus/litmus_proc.h>
19895 +#include <litmus/sched_trace.h>
19896 +
19897 +#include <asm/hardware/cache-l2x0.h>
19898 +#include <asm/cacheflush.h>
19899 +
19900 +#define UNLOCK_ALL	0x00000000 /* allocation in any way */
19901 +#define LOCK_ALL        (~UNLOCK_ALL)
19902 +#define MAX_NR_WAYS	16
19903 +
19904 +u32 nr_unlocked_way[MAX_NR_WAYS+1]  = {
19905 +	0xFFFFFFFF, /* all ways are locked. usable = 0*/
19906 +	0xFFFFFFFE, /* way ~0 unlocked. usable = 1 */
19907 +	0xFFFFFFFC,
19908 +	0xFFFFFFF8,
19909 +	0xFFFFFFF0,
19910 +	0xFFFFFFE0,
19911 +	0xFFFFFFC0,
19912 +	0xFFFFFF80,
19913 +	0xFFFFFF00,
19914 +	0xFFFFFE00,
19915 +	0xFFFFFC00,
19916 +	0xFFFFF800,
19917 +	0xFFFFF000,
19918 +	0xFFFFE000,
19919 +	0xFFFFC000,
19920 +	0xFFFF8000,
19921 +	0xFFFF0000, /* way ~15 unlocked. usable = 16 */
19922 +};
19923 +
19924 +static void __iomem *cache_base;
19925 +static void __iomem *lockreg_d;
19926 +static void __iomem *lockreg_i;
19927 +
19928 +static u32 cache_id;
19929 +
19930 +struct mutex actlr_mutex;
19931 +struct mutex l2x0_prefetch_mutex;
19932 +struct mutex lockdown_proc;
19933 +
19934 +static int min_usable_ways = 0;
19935 +static int max_usable_ways = 16;
19936 +static int zero = 0;
19937 +static int one = 1;
19938 +
19939 +#define ld_d_reg(cpu) ({ int __cpu = cpu; \
19940 +			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_D_BASE + \
19941 +			__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
19942 +#define ld_i_reg(cpu) ({ int __cpu = cpu; \
19943 +			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_I_BASE + \
19944 +			__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
19945 +
19946 +int l2_usable_ways;
19947 +int lock_all;
19948 +int nr_lockregs;
19949 +
19950 +static void print_lockdown_registers(void)
19951 +{
19952 +	int i;
19953 +
19954 +	for (i = 0; i < nr_lockregs; i++) {
19955 +		printk("Lockdown Data CPU %2d: 0x%8x\n",
19956 +				i, readl_relaxed(ld_d_reg(i)));
19957 +		printk("Lockdown Inst CPU %2d: 0x%8x\n",
19958 +				i, readl_relaxed(ld_i_reg(i)));
19959 +	}
19960 +}
19961 +
19962 +static void test_lockdown(void *ignore)
19963 +{
19964 +	int i;
19965 +
19966 +	printk("Start lockdown test on CPU %d.\n", smp_processor_id());
19967 +
19968 +	for (i = 0; i < nr_lockregs; i++) {
19969 +		printk("CPU %2d data reg: 0x%8p\n", i, ld_d_reg(i));
19970 +		printk("CPU %2d inst reg: 0x%8p\n", i, ld_i_reg(i));
19971 +	}
19972 +
19973 +	printk("Lockdown initial state:\n");
19974 +	print_lockdown_registers();
19975 +	printk("---\n");
19976 +
19977 +	for (i = 0; i < nr_lockregs; i++) {
19978 +		writel_relaxed(1, ld_d_reg(i));
19979 +		writel_relaxed(2, ld_i_reg(i));
19980 +	}
19981 +	printk("Lockdown all data=1 instr=2:\n");
19982 +	print_lockdown_registers();
19983 +	printk("---\n");
19984 +
19985 +	for (i = 0; i < nr_lockregs; i++) {
19986 +		writel_relaxed((1 << i), ld_d_reg(i));
19987 +		writel_relaxed(((1 << 8) >> i), ld_i_reg(i));
19988 +	}
19989 +	printk("Lockdown varies:\n");
19990 +	print_lockdown_registers();
19991 +	printk("---\n");
19992 +
19993 +	for (i = 0; i < nr_lockregs; i++) {
19994 +		writel_relaxed(UNLOCK_ALL, ld_d_reg(i));
19995 +		writel_relaxed(UNLOCK_ALL, ld_i_reg(i));
19996 +	}
19997 +	printk("Lockdown all zero:\n");
19998 +	print_lockdown_registers();
19999 +
20000 +	printk("End lockdown test.\n");
20001 +}
20002 +
20003 +void litmus_setup_lockdown(void __iomem *base, u32 id)
20004 +{
20005 +    cache_base = base;
20006 +	cache_id = id;
20007 +	lockreg_d = cache_base + L2X0_LOCKDOWN_WAY_D_BASE;
20008 +	lockreg_i = cache_base + L2X0_LOCKDOWN_WAY_I_BASE;
20009 +    
20010 +	if (L2X0_CACHE_ID_PART_L310 == (cache_id & L2X0_CACHE_ID_PART_MASK)) {
20011 +		nr_lockregs = 8;
20012 +	} else {
20013 +		printk("Unknown cache ID!\n");
20014 +		nr_lockregs = 1;
20015 +	}
20016 +	
20017 +	mutex_init(&actlr_mutex);
20018 +	mutex_init(&l2x0_prefetch_mutex);
20019 +	mutex_init(&lockdown_proc);
20020 +	
20021 +	test_lockdown(NULL);
20022 +}
20023 +int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
20024 +		size_t *lenp, loff_t *ppos)
20025 +{
20026 +	int ret = 0, i;
20027 +	
20028 +	mutex_lock(&lockdown_proc);
20029 +	
20030 +	flush_cache_all();
20031 +	
20032 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
20033 +	if (ret)
20034 +		goto out;
20035 +	
20036 +	if (write && lock_all == 1) {
20037 +		for (i = 0; i < nr_lockregs;  i++) {
20038 +			writel_relaxed(nr_unlocked_way[0], ld_d_reg(i));
20039 +			writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
20040 +		}
20041 +		print_lockdown_registers();
20042 +	}
20043 +
20044 +out:
20045 +	mutex_unlock(&lockdown_proc);
20046 +	return ret;
20047 +}
20048 +
20049 +int l2_usable_ways_handler(struct ctl_table *table, int write, void __user *buffer,
20050 +		size_t *lenp, loff_t *ppos)
20051 +{
20052 +	int ret = 0, i = 0;
20053 +	
20054 +	mutex_lock(&lockdown_proc);
20055 +	
20056 +	flush_cache_all();
20057 +	
20058 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
20059 +	if (ret)
20060 +		goto out;
20061 +		
20062 +	TRACE_CUR("l2_usable_ways : %d\n", l2_usable_ways);
20063 +	
20064 +	if (write) {
20065 +		//for (i = 0; i < nr_lockregs;  i++) {
20066 +			writel_relaxed(nr_unlocked_way[l2_usable_ways], ld_d_reg(i));
20067 +			writel_relaxed(nr_unlocked_way[l2_usable_ways], ld_i_reg(i));
20068 +		//}
20069 +		print_lockdown_registers();
20070 +	}
20071 +
20072 +out:
20073 +	mutex_unlock(&lockdown_proc);
20074 +	return ret;
20075 +}
20076 +
20077 +static struct ctl_table cache_table[] =
20078 +{
20079 +	{
20080 +		.procname	= "l2_usable_ways",
20081 +		.mode		= 0666,
20082 +		.proc_handler	= l2_usable_ways_handler,
20083 +		.data		= &l2_usable_ways,
20084 +		.maxlen		= sizeof(l2_usable_ways),
20085 +		.extra1		= &min_usable_ways,
20086 +		.extra2		= &max_usable_ways,
20087 +	},
20088 +	{
20089 +		.procname	= "lock_all",
20090 +		.mode		= 0666,
20091 +		.proc_handler	= lock_all_handler,
20092 +		.data		= &lock_all,
20093 +		.maxlen		= sizeof(lock_all),
20094 +		.extra1		= &zero,
20095 +		.extra2		= &one,
20096 +	},
20097 +	{ }
20098 +};
20099 +
20100 +static struct ctl_table litmus_dir_table[] = {
20101 +	{
20102 +		.procname	= "litmus",
20103 + 		.mode		= 0555,
20104 +		.child		= cache_table,
20105 +	},
20106 +	{ }
20107 +};
20108 +
20109 +static struct ctl_table_header *litmus_sysctls;
20110 +
20111 +static int __init litmus_sysctl_init(void)
20112 +{
20113 +	int ret = 0;
20114 +
20115 +	printk(KERN_INFO "Registering LITMUS^RT proc sysctl.\n");
20116 +	litmus_sysctls = register_sysctl_table(litmus_dir_table);
20117 +	if (!litmus_sysctls) {
20118 +		printk(KERN_WARNING "Could not register LITMUS^RT sysctl.\n");
20119 +		ret = -EFAULT;
20120 +		goto out;
20121 +	}
20122 +
20123 +	l2_usable_ways = 16;
20124 +
20125 +out:
20126 +	return ret;
20127 +}
20128 +
20129 +module_init(litmus_sysctl_init);
20130 \ No newline at end of file
20131 -- 
20132 1.8.1.2
20133 
20134 
20135 From 709a4f8279a10ad85f5688808d11ffabff9ef25c Mon Sep 17 00:00:00 2001
20136 From: Namhoon Kim <namhoonk@cs.unc.edu>
20137 Date: Mon, 3 Nov 2014 21:54:53 -0500
20138 Subject: [PATCH 054/119] Add MC2 plugin
20139 
20140 ---
20141  include/litmus/mc2_common.h |  39 ++
20142  litmus/mc2_common.c         |  30 ++
20143  litmus/sched_mc2.c          | 842 ++++++++++++++++++++++++++++++++++++++++++++
20144  3 files changed, 911 insertions(+)
20145  create mode 100644 include/litmus/mc2_common.h
20146  create mode 100644 litmus/mc2_common.c
20147  create mode 100644 litmus/sched_mc2.c
20148 
20149 diff --git a/include/litmus/mc2_common.h b/include/litmus/mc2_common.h
20150 new file mode 100644
20151 index 0000000..a1d571f
20152 --- /dev/null
20153 +++ b/include/litmus/mc2_common.h
20154 @@ -0,0 +1,39 @@
20155 +/*
20156 + * MC^2 common data structures
20157 + */
20158 + 
20159 +#ifndef __UNC_MC2_COMMON_H__
20160 +#define __UNC_MC2_COMMON_H__
20161 +
20162 +enum crit_level {
20163 +	CRIT_LEVEL_A = 0,
20164 +	CRIT_LEVEL_B = 1,
20165 +	CRIT_LEVEL_C = 2,
20166 +	NUM_CRIT_LEVELS = 3,
20167 +};
20168 +
20169 +struct mc2_task {
20170 +	enum crit_level crit;
20171 +	pid_t pid;
20172 +	lt_t hyperperiod;
20173 +};
20174 +
20175 +#ifdef __KERNEL__
20176 +
20177 +#include <litmus/reservation.h>
20178 +
20179 +struct mc2_param{
20180 +	struct mc2_task mc2_task;
20181 +};
20182 +
20183 +struct mc2_task_client {
20184 +	struct task_client tc;
20185 +	struct mc2_param mc2;
20186 +};
20187 +
20188 +long mc2_task_client_init(struct mc2_task_client *mtc, struct task_struct *tsk,
20189 +							struct reservation *res);
20190 +	
20191 +#endif /* __KERNEL__ */
20192 +
20193 +#endif
20194 \ No newline at end of file
20195 diff --git a/litmus/mc2_common.c b/litmus/mc2_common.c
20196 new file mode 100644
20197 index 0000000..56ef6b5
20198 --- /dev/null
20199 +++ b/litmus/mc2_common.c
20200 @@ -0,0 +1,30 @@
20201 +/*
20202 + * litmus/mc2_common.c
20203 + *
20204 + * Common functions for MC2 plugin.
20205 + */
20206 +
20207 +#include <linux/percpu.h>
20208 +#include <linux/sched.h>
20209 +#include <linux/list.h>
20210 +
20211 +#include <litmus/litmus.h>
20212 +#include <litmus/sched_plugin.h>
20213 +#include <litmus/sched_trace.h>
20214 +
20215 +#include <litmus/mc2_common.h>
20216 +
20217 +long mc2_task_client_init(
20218 +	struct mc2_task_client *mtc,
20219 +	struct task_struct *tsk,
20220 +	struct reservation *res
20221 +)
20222 +{
20223 +	task_client_init(&mtc->tc, tsk, res);
20224 +	if ((mtc->mc2.mc2_task.crit < CRIT_LEVEL_A) ||
20225 +		(mtc->mc2.mc2_task.crit > CRIT_LEVEL_C))
20226 +		return -EINVAL;
20227 +	
20228 +	TRACE_TASK(tsk, "mc2_task_client_init: crit_level = %d\n", mtc->mc2.mc2_task.crit);
20229 +	return 0;
20230 +}
20231 \ No newline at end of file
20232 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
20233 new file mode 100644
20234 index 0000000..ab35008
20235 --- /dev/null
20236 +++ b/litmus/sched_mc2.c
20237 @@ -0,0 +1,842 @@
20238 +#include <linux/percpu.h>
20239 +#include <linux/slab.h>
20240 +#include <asm/uaccess.h>
20241 +
20242 +#include <litmus/sched_plugin.h>
20243 +#include <litmus/preempt.h>
20244 +#include <litmus/debug_trace.h>
20245 +
20246 +#include <litmus/litmus.h>
20247 +#include <litmus/jobs.h>
20248 +#include <litmus/budget.h>
20249 +#include <litmus/litmus_proc.h>
20250 +
20251 +#include <litmus/mc2_common.h>
20252 +#include <litmus/reservation.h>
20253 +#include <litmus/polling_reservations.h>
20254 +
20255 +struct mc2_task_state {
20256 +	struct mc2_task_client res_info;
20257 +	int cpu;
20258 +	bool has_departed;
20259 +};
20260 +
20261 +struct mc2_cpu_state {
20262 +	raw_spinlock_t lock;
20263 +
20264 +	struct sup_reservation_environment sup_env;
20265 +	struct hrtimer timer;
20266 +
20267 +	int cpu;
20268 +	struct task_struct* scheduled;
20269 +
20270 +#ifdef CONFIG_RELEASE_MASTER
20271 +	int release_master;
20272 +	/* used to delegate releases */
20273 +	struct hrtimer_start_on_info info;
20274 +#endif
20275 +};
20276 +
20277 +static DEFINE_PER_CPU(struct mc2_cpu_state, mc2_cpu_state);
20278 +
20279 +#define cpu_state_for(cpu_id)	(&per_cpu(mc2_cpu_state, cpu_id))
20280 +#define local_cpu_state()	(&__get_cpu_var(mc2_cpu_state))
20281 +
20282 +static struct mc2_task_state* get_mc2_state(struct task_struct *tsk)
20283 +{
20284 +	return (struct mc2_task_state*) tsk_rt(tsk)->plugin_state;
20285 +}
20286 +
20287 +static void task_departs(struct task_struct *tsk, int job_complete)
20288 +{
20289 +	struct mc2_task_state* state = get_mc2_state(tsk);
20290 +	struct reservation* res;
20291 +	struct reservation_client *client;
20292 +
20293 +	res    = state->res_info.tc.client.reservation;
20294 +	client = &state->res_info.tc.client;
20295 +
20296 +	res->ops->client_departs(res, client, job_complete);
20297 +	state->has_departed = true;
20298 +}
20299 +
20300 +static void task_arrives(struct task_struct *tsk)
20301 +{
20302 +	struct mc2_task_state* state = get_mc2_state(tsk);
20303 +	struct reservation* res;
20304 +	struct reservation_client *client;
20305 +
20306 +	res    = state->res_info.tc.client.reservation;
20307 +	client = &state->res_info.tc.client;
20308 +
20309 +	state->has_departed = false;
20310 +	res->ops->client_arrives(res, client);
20311 +}
20312 +
20313 +/* NOTE: drops state->lock */
20314 +static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
20315 +{
20316 +	int local;
20317 +	lt_t update, now;
20318 +
20319 +	update = state->sup_env.next_scheduler_update;
20320 +	now = state->sup_env.env.current_time;
20321 +	
20322 +	/* Be sure we're actually running on the right core,
20323 +	 * as mc2_update_timer() is also called from mc2_task_resume(),
20324 +	 * which might be called on any CPU when a thread resumes.
20325 +	 */
20326 +	local = local_cpu_state() == state;
20327 +	
20328 +	/* Must drop state lock before calling into hrtimer_start(), which
20329 +	 * may raise a softirq, which in turn may wake ksoftirqd. */
20330 +	raw_spin_unlock(&state->lock);
20331 +
20332 +	if (update <= now) {
20333 +		litmus_reschedule(state->cpu);
20334 +	} else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
20335 +		/* Reprogram only if not already set correctly. */
20336 +		if (!hrtimer_active(&state->timer) ||
20337 +		    ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
20338 +			TRACE("canceling timer...\n");
20339 +			hrtimer_cancel(&state->timer);
20340 +			TRACE("setting scheduler timer for %llu\n", update);
20341 +			/* We cannot use hrtimer_start() here because the
20342 +			 * wakeup flag must be set to zero. */
20343 +			__hrtimer_start_range_ns(&state->timer,
20344 +					ns_to_ktime(update),
20345 +					0 /* timer coalescing slack */,
20346 +					HRTIMER_MODE_ABS_PINNED,
20347 +					0 /* wakeup */);
20348 +		}
20349 +	} else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
20350 +		/* Poke remote core only if timer needs to be set earlier than
20351 +		 * it is currently set.
20352 +		 */
20353 +		TRACE("mc2_update_timer for remote CPU %d (update=%llu, "
20354 +		      "active:%d, set:%llu)\n",
20355 +			state->cpu,
20356 +			update,
20357 +			hrtimer_active(&state->timer),
20358 +			ktime_to_ns(hrtimer_get_expires(&state->timer)));
20359 +		if (!hrtimer_active(&state->timer) ||
20360 +		    ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
20361 +			TRACE("poking CPU %d so that it can update its "
20362 +			       "scheduling timer (active:%d, set:%llu)\n",
20363 +			       state->cpu,
20364 +			       hrtimer_active(&state->timer),
20365 +			       ktime_to_ns(hrtimer_get_expires(&state->timer)));
20366 +			litmus_reschedule(state->cpu);
20367 +		}
20368 +	}
20369 +}
20370 +
20371 +static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
20372 +{
20373 +	unsigned long flags;
20374 +	enum hrtimer_restart restart = HRTIMER_NORESTART;
20375 +	struct mc2_cpu_state *state;
20376 +	lt_t update, now;
20377 +
20378 +	state = container_of(timer, struct mc2_cpu_state, timer);
20379 +
20380 +	/* The scheduling timer should only fire on the local CPU, because
20381 +	 * otherwise deadlocks via timer_cancel() are possible.
20382 +	 * Note: this does not interfere with dedicated interrupt handling, as
20383 +	 * even under dedicated interrupt handling scheduling timers for
20384 +	 * budget enforcement must occur locally on each CPU.
20385 +	 */
20386 +	BUG_ON(state->cpu != raw_smp_processor_id());
20387 +
20388 +	raw_spin_lock_irqsave(&state->lock, flags);
20389 +	sup_update_time(&state->sup_env, litmus_clock());
20390 +
20391 +	update = state->sup_env.next_scheduler_update;
20392 +	now = state->sup_env.env.current_time;
20393 +
20394 +	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
20395 +		now, update, state->cpu);
20396 +
20397 +	if (update <= now) {
20398 +		litmus_reschedule_local();
20399 +	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
20400 +		hrtimer_set_expires(timer, ns_to_ktime(update));
20401 +		restart = HRTIMER_RESTART;
20402 +	}
20403 +
20404 +	raw_spin_unlock_irqrestore(&state->lock, flags);
20405 +
20406 +	return restart;
20407 +}
20408 +
20409 +static long mc2_complete_job(void)
20410 +{
20411 +	ktime_t next_release;
20412 +	long err;
20413 +
20414 +	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
20415 +					get_deadline(current));
20416 +
20417 +	tsk_rt(current)->completed = 1;
20418 +	prepare_for_next_period(current);
20419 +	next_release = ns_to_ktime(get_release(current));
20420 +	preempt_disable();
20421 +	TRACE_CUR("next_release=%llu\n", get_release(current));
20422 +	if (get_release(current) > litmus_clock()) {
20423 +		set_current_state(TASK_INTERRUPTIBLE);
20424 +		preempt_enable_no_resched();
20425 +		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
20426 +	} else {
20427 +		err = 0;
20428 +		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
20429 +		preempt_enable();
20430 +	}
20431 +
20432 +	TRACE_CUR("mc2_complete_job returns [%d] at %llu\n", err, litmus_clock());
20433 +	return err;
20434 +}
20435 +
20436 +static struct task_struct* mc2_schedule(struct task_struct * prev)
20437 +{
20438 +	/* next == NULL means "schedule background work". */
20439 +	struct mc2_cpu_state *state = local_cpu_state();
20440 +
20441 +	raw_spin_lock(&state->lock);
20442 +
20443 +	BUG_ON(state->scheduled && state->scheduled != prev);
20444 +	BUG_ON(state->scheduled && !is_realtime(prev));
20445 +
20446 +	/* update time */
20447 +	state->sup_env.will_schedule = true;
20448 +	sup_update_time(&state->sup_env, litmus_clock());
20449 +
20450 +	/* check if prev task complete */
20451 +	if (is_realtime(prev)) {
20452 +		TRACE_TASK(prev, "EXEC_TIME = %llu, EXEC_COST = %llu, REMAINED = %llu\n",
20453 +			get_exec_time(prev), get_exec_cost(prev), get_exec_cost(prev)-get_exec_time(prev)); 
20454 +	}
20455 +	if (is_realtime(prev) && (get_exec_time(prev) >= get_exec_cost(prev))) {
20456 +		TRACE_TASK(prev, "JOB COMPLETED! but is_completed = %d\n", is_completed(prev));
20457 +//		mc2_complete_job(prev);
20458 +	}
20459 +
20460 +	/* remove task from reservation if it blocks */
20461 +	if (is_realtime(prev) && !is_running(prev))
20462 +		task_departs(prev, is_completed(prev));
20463 +
20464 +	/* figure out what to schedule next */
20465 +	state->scheduled = sup_dispatch(&state->sup_env);
20466 +
20467 +	/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
20468 +	sched_state_task_picked();
20469 +
20470 +	/* program scheduler timer */
20471 +	state->sup_env.will_schedule = false;
20472 +	/* NOTE: drops state->lock */
20473 +	mc2_update_timer_and_unlock(state);
20474 +	
20475 +	if (prev != state->scheduled && is_realtime(prev))
20476 +		TRACE_TASK(prev, "descheduled.\n");
20477 +	if (state->scheduled)
20478 +		TRACE_TASK(state->scheduled, "scheduled.\n");
20479 +
20480 +	return state->scheduled;
20481 +}
20482 +
20483 +static void resume_legacy_task_model_updates(struct task_struct *tsk)
20484 +{
20485 +	lt_t now;
20486 +	if (is_sporadic(tsk)) {
20487 +		/* If this sporadic task was gone for a "long" time and woke up past
20488 +		 * its deadline, then give it a new budget by triggering a job
20489 +		 * release. This is purely cosmetic and has no effect on the
20490 +		 * P-RES scheduler. */
20491 +
20492 +		now = litmus_clock();
20493 +		if (is_tardy(tsk, now))
20494 +			release_at(tsk, now);
20495 +	}
20496 +}
20497 +
20498 +/* Called when the state of tsk changes back to TASK_RUNNING.
20499 + * We need to requeue the task.
20500 + */
20501 +static void mc2_task_resume(struct task_struct  *tsk)
20502 +{
20503 +	unsigned long flags;
20504 +	struct mc2_task_state* tinfo = get_mc2_state(tsk);
20505 +	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
20506 +
20507 +	TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
20508 +
20509 +	raw_spin_lock_irqsave(&state->lock, flags);
20510 +	/* Requeue only if self-suspension was already processed. */
20511 +	if (tinfo->has_departed)
20512 +	{
20513 +		/* Assumption: litmus_clock() is synchronized across cores,
20514 +		 * since we might not actually be executing on tinfo->cpu
20515 +		 * at the moment. */
20516 +		sup_update_time(&state->sup_env, litmus_clock());
20517 +		task_arrives(tsk);
20518 +		/* NOTE: drops state->lock */
20519 +		mc2_update_timer_and_unlock(state);
20520 +		local_irq_restore(flags);
20521 +	} else {
20522 +		TRACE_TASK(tsk, "resume event ignored, still scheduled\n");
20523 +		raw_spin_unlock_irqrestore(&state->lock, flags);
20524 +	}
20525 +
20526 +	resume_legacy_task_model_updates(tsk);
20527 +}
20528 +
20529 +static long mc2_admit_task(struct task_struct *tsk)
20530 +{
20531 +	long err = -ESRCH;
20532 +	unsigned long flags;
20533 +	struct reservation *res;
20534 +	struct mc2_cpu_state *state;
20535 +	struct mc2_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
20536 +	struct mc2_task *mp = tsk_rt(tsk)->plugin_state;
20537 +	
20538 +	if (!tinfo)
20539 +		return -ENOMEM;
20540 +	
20541 +	if (!mp) {
20542 +		printk(KERN_ERR "mc2_admit_task: criticality level has not been set\n");
20543 +		return err;
20544 +	}
20545 +	
20546 +	preempt_disable();
20547 +
20548 +	state = cpu_state_for(task_cpu(tsk));
20549 +	raw_spin_lock_irqsave(&state->lock, flags);
20550 +
20551 +	res = sup_find_by_id(&state->sup_env, mp->pid);
20552 +
20553 +	/* found the appropriate reservation (or vCPU) */
20554 +	if (res) {
20555 +		TRACE_TASK(tsk, "FOUND RES\n");
20556 +		tinfo->res_info.mc2.mc2_task.crit = mp->crit;
20557 +		
20558 +		kfree(tsk_rt(tsk)->plugin_state);
20559 +		tsk_rt(tsk)->plugin_state = NULL;
20560 +	
20561 +		err = mc2_task_client_init(&tinfo->res_info, tsk, res);
20562 +		tinfo->cpu = task_cpu(tsk);
20563 +		tinfo->has_departed = true;
20564 +		tsk_rt(tsk)->plugin_state = tinfo;
20565 +		
20566 +		/* disable LITMUS^RT's per-thread budget enforcement */
20567 +		tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
20568 +	}
20569 +
20570 +	raw_spin_unlock_irqrestore(&state->lock, flags);
20571 +
20572 +	preempt_enable();
20573 +
20574 +	if (err)
20575 +		kfree(tinfo);
20576 +
20577 +	return err;
20578 +}
20579 +
20580 +static void task_new_legacy_task_model_updates(struct task_struct *tsk)
20581 +{
20582 +	lt_t now = litmus_clock();
20583 +
20584 +	/* the first job exists starting as of right now */
20585 +	release_at(tsk, now);
20586 +}
20587 +
20588 +static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
20589 +			  int is_running)
20590 +{
20591 +	unsigned long flags;
20592 +	struct mc2_task_state* tinfo = get_mc2_state(tsk);
20593 +	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
20594 +
20595 +	TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
20596 +		   litmus_clock(), on_runqueue, is_running);
20597 +
20598 +	/* acquire the lock protecting the state and disable interrupts */
20599 +	raw_spin_lock_irqsave(&state->lock, flags);
20600 +
20601 +	if (is_running) {
20602 +		state->scheduled = tsk;
20603 +		/* make sure this task should actually be running */
20604 +		litmus_reschedule_local();
20605 +	}
20606 +
20607 +	if (on_runqueue || is_running) {
20608 +		/* Assumption: litmus_clock() is synchronized across cores
20609 +		 * [see comment in pres_task_resume()] */
20610 +		sup_update_time(&state->sup_env, litmus_clock());
20611 +		task_arrives(tsk);
20612 +		/* NOTE: drops state->lock */
20613 +		mc2_update_timer_and_unlock(state);
20614 +		local_irq_restore(flags);
20615 +	} else
20616 +		raw_spin_unlock_irqrestore(&state->lock, flags);
20617 +
20618 +	task_new_legacy_task_model_updates(tsk);
20619 +}
20620 +
20621 +static void mc2_task_exit(struct task_struct *tsk)
20622 +{
20623 +	unsigned long flags;
20624 +	struct mc2_task_state* tinfo = get_mc2_state(tsk);
20625 +	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
20626 +
20627 +	raw_spin_lock_irqsave(&state->lock, flags);
20628 +
20629 +	if (state->scheduled == tsk)
20630 +		state->scheduled = NULL;
20631 +
20632 +	/* remove from queues */
20633 +	if (is_running(tsk)) {
20634 +		/* Assumption: litmus_clock() is synchronized across cores
20635 +		 * [see comment in pres_task_resume()] */
20636 +		sup_update_time(&state->sup_env, litmus_clock());
20637 +		task_departs(tsk, 0);
20638 +		/* NOTE: drops state->lock */
20639 +		mc2_update_timer_and_unlock(state);
20640 +		local_irq_restore(flags);
20641 +	} else
20642 +		raw_spin_unlock_irqrestore(&state->lock, flags);
20643 +
20644 +	kfree(tsk_rt(tsk)->plugin_state);
20645 +	tsk_rt(tsk)->plugin_state = NULL;
20646 +}
20647 +
20648 +asmlinkage long sys_set_mc2_task_param(pid_t pid, struct mc2_task __user * param)
20649 +{
20650 +	struct task_struct *target;
20651 +	int retval = -EINVAL;
20652 +	struct mc2_task *mp = kzalloc(sizeof(*mp), GFP_KERNEL);
20653 +	
20654 +	if (!mp)
20655 +		return -ENOMEM;
20656 +
20657 +	printk("Setting up mc^2 task parameters for process %d.\n", pid);
20658 +
20659 +	if (pid < 0 || param == 0) {
20660 +		goto out;
20661 +	}
20662 +	if (copy_from_user(mp, param, sizeof(*mp))) {
20663 +		retval = -EFAULT;
20664 +		goto out;
20665 +	}
20666 +
20667 +	/* Task search and manipulation must be protected */
20668 +	read_lock_irq(&tasklist_lock);
20669 +	if (!(target = find_task_by_vpid(pid))) {
20670 +		retval = -ESRCH;
20671 +		goto out_unlock;
20672 +	}
20673 +
20674 +	if (is_realtime(target)) {
20675 +		/* The task is already a real-time task.
20676 +		 * We cannot not allow parameter changes at this point.
20677 +		 */
20678 +		retval = -EBUSY;
20679 +		goto out_unlock;
20680 +	}
20681 +	if (mp->crit < CRIT_LEVEL_A || mp->crit >= NUM_CRIT_LEVELS) {
20682 +		printk(KERN_INFO "litmus: real-time task %d rejected "
20683 +			"because of invalid criticality level\n", pid);
20684 +		goto out_unlock;
20685 +	}
20686 +	
20687 +	target->rt_param.plugin_state = mp;
20688 +
20689 +	retval = 0;
20690 +      out_unlock:
20691 +	read_unlock_irq(&tasklist_lock);
20692 +      out:
20693 +	return retval;
20694 +}
20695 +
20696 +static long create_polling_reservation(
20697 +	int res_type,
20698 +	struct reservation_config *config)
20699 +{
20700 +	struct mc2_cpu_state *state;
20701 +	struct reservation* res;
20702 +	struct polling_reservation *pres;
20703 +	unsigned long flags;
20704 +	int use_edf  = config->priority == LITMUS_NO_PRIORITY;
20705 +	int periodic =  res_type == PERIODIC_POLLING;
20706 +	long err = -EINVAL;
20707 +
20708 +	if (config->polling_params.budget >
20709 +	    config->polling_params.period) {
20710 +		printk(KERN_ERR "invalid polling reservation (%u): "
20711 +		       "budget > period\n", config->id);
20712 +		return -EINVAL;
20713 +	}
20714 +	if (config->polling_params.budget >
20715 +	    config->polling_params.relative_deadline
20716 +	    && config->polling_params.relative_deadline) {
20717 +		printk(KERN_ERR "invalid polling reservation (%u): "
20718 +		       "budget > deadline\n", config->id);
20719 +		return -EINVAL;
20720 +	}
20721 +	if (config->polling_params.offset >
20722 +	    config->polling_params.period) {
20723 +		printk(KERN_ERR "invalid polling reservation (%u): "
20724 +		       "offset > period\n", config->id);
20725 +		return -EINVAL;
20726 +	}
20727 +
20728 +	/* Allocate before we grab a spin lock.
20729 +	 * Todo: would be nice to use a core-local allocation.
20730 +	 */
20731 +	pres = kzalloc(sizeof(*pres), GFP_KERNEL);
20732 +	if (!pres)
20733 +		return -ENOMEM;
20734 +
20735 +	state = cpu_state_for(config->cpu);
20736 +	raw_spin_lock_irqsave(&state->lock, flags);
20737 +
20738 +	res = sup_find_by_id(&state->sup_env, config->id);
20739 +	if (!res) {
20740 +		polling_reservation_init(pres, use_edf, periodic,
20741 +			config->polling_params.budget,
20742 +			config->polling_params.period,
20743 +			config->polling_params.relative_deadline,
20744 +			config->polling_params.offset);
20745 +		pres->res.id = config->id;
20746 +		if (!use_edf)
20747 +			pres->res.priority = config->priority;
20748 +		sup_add_new_reservation(&state->sup_env, &pres->res);
20749 +		err = config->id;
20750 +	} else {
20751 +		err = -EEXIST;
20752 +	}
20753 +
20754 +	raw_spin_unlock_irqrestore(&state->lock, flags);
20755 +
20756 +	if (err < 0)
20757 +		kfree(pres);
20758 +
20759 +	return err;
20760 +}
20761 +
20762 +#define MAX_INTERVALS 1024
20763 +
20764 +static long create_table_driven_reservation(
20765 +	struct reservation_config *config)
20766 +{
20767 +	struct mc2_cpu_state *state;
20768 +	struct reservation* res;
20769 +	struct table_driven_reservation *td_res = NULL;
20770 +	struct lt_interval *slots = NULL;
20771 +	size_t slots_size;
20772 +	unsigned int i, num_slots;
20773 +	unsigned long flags;
20774 +	long err = -EINVAL;
20775 +
20776 +
20777 +	if (!config->table_driven_params.num_intervals) {
20778 +		printk(KERN_ERR "invalid table-driven reservation (%u): "
20779 +		       "no intervals\n", config->id);
20780 +		return -EINVAL;
20781 +	}
20782 +
20783 +	if (config->table_driven_params.num_intervals > MAX_INTERVALS) {
20784 +		printk(KERN_ERR "invalid table-driven reservation (%u): "
20785 +		       "too many intervals (max: %d)\n", config->id, MAX_INTERVALS);
20786 +		return -EINVAL;
20787 +	}
20788 +
20789 +	num_slots = config->table_driven_params.num_intervals;
20790 +	slots_size = sizeof(slots[0]) * num_slots;
20791 +	slots = kzalloc(slots_size, GFP_KERNEL);
20792 +	if (!slots)
20793 +		return -ENOMEM;
20794 +
20795 +	td_res = kzalloc(sizeof(*td_res), GFP_KERNEL);
20796 +	if (!td_res)
20797 +		err = -ENOMEM;
20798 +	else
20799 +		err = copy_from_user(slots,
20800 +			config->table_driven_params.intervals, slots_size);
20801 +
20802 +	for (i=0; i<num_slots;i++) {
20803 +		TRACE("###### [%llu, %llu]\n", slots[i].start, slots[i].end);
20804 +	}
20805 +	
20806 +	if (!err) {
20807 +		/* sanity checks */
20808 +		for (i = 0; !err && i < num_slots; i++)
20809 +			if (slots[i].end <= slots[i].start) {
20810 +				printk(KERN_ERR
20811 +				       "invalid table-driven reservation (%u): "
20812 +				       "invalid interval %u => [%llu, %llu]\n",
20813 +				       config->id, i,
20814 +				       slots[i].start, slots[i].end);
20815 +				err = -EINVAL;
20816 +			}
20817 +
20818 +		for (i = 0; !err && i + 1 < num_slots; i++)
20819 +			if (slots[i + 1].start <= slots[i].end) {
20820 +				printk(KERN_ERR
20821 +				       "invalid table-driven reservation (%u): "
20822 +				       "overlapping intervals %u, %u\n",
20823 +				       config->id, i, i + 1);
20824 +				err = -EINVAL;
20825 +			}
20826 +
20827 +		if (slots[num_slots - 1].end >
20828 +			config->table_driven_params.major_cycle_length) {
20829 +			printk(KERN_ERR
20830 +				"invalid table-driven reservation (%u): last "
20831 +				"interval ends past major cycle %llu > %llu\n",
20832 +				config->id,
20833 +				slots[num_slots - 1].end,
20834 +				config->table_driven_params.major_cycle_length);
20835 +			err = -EINVAL;
20836 +		}
20837 +	}
20838 +
20839 +	if (!err) {
20840 +		state = cpu_state_for(config->cpu);
20841 +		raw_spin_lock_irqsave(&state->lock, flags);
20842 +
20843 +		res = sup_find_by_id(&state->sup_env, config->id);
20844 +		if (!res) {
20845 +			table_driven_reservation_init(td_res,
20846 +				config->table_driven_params.major_cycle_length,
20847 +				slots, num_slots);
20848 +			td_res->res.id = config->id;
20849 +			td_res->res.priority = config->priority;
20850 +			sup_add_new_reservation(&state->sup_env, &td_res->res);
20851 +			err = config->id;
20852 +		} else {
20853 +			err = -EEXIST;
20854 +		}
20855 +
20856 +		raw_spin_unlock_irqrestore(&state->lock, flags);
20857 +	}
20858 +
20859 +	if (err < 0) {
20860 +		kfree(slots);
20861 +		kfree(td_res);
20862 +	}
20863 +
20864 +	TRACE("CREATE_TABLE_DRIVEN_RES = %d\n", err);
20865 +	return err;
20866 +}
20867 +
20868 +static long mc2_reservation_create(int res_type, void* __user _config)
20869 +{
20870 +	long ret = -EINVAL;
20871 +	struct reservation_config config;
20872 +
20873 +	TRACE("Attempt to create reservation (%d)\n", res_type);
20874 +
20875 +	if (copy_from_user(&config, _config, sizeof(config)))
20876 +		return -EFAULT;
20877 +
20878 +	if (config.cpu < 0 || !cpu_online(config.cpu)) {
20879 +		printk(KERN_ERR "invalid polling reservation (%u): "
20880 +		       "CPU %d offline\n", config.id, config.cpu);
20881 +		return -EINVAL;
20882 +	}
20883 +
20884 +	switch (res_type) {
20885 +		case PERIODIC_POLLING:
20886 +		case SPORADIC_POLLING:
20887 +			ret = create_polling_reservation(res_type, &config);
20888 +			break;
20889 +
20890 +		case TABLE_DRIVEN:
20891 +			ret = create_table_driven_reservation(&config);
20892 +			break;
20893 +
20894 +		default:
20895 +			return -EINVAL;
20896 +	};
20897 +
20898 +	return ret;
20899 +}
20900 +
20901 +static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
20902 +{
20903 +	long ret = -EINVAL;
20904 +	struct mc2_cpu_state *state;
20905 +	struct reservation *res, *next;
20906 +	struct sup_reservation_environment *sup_env;
20907 +	unsigned long flags;
20908 +	int found = 0;
20909 +	
20910 +	state = cpu_state_for(cpu);
20911 +	raw_spin_lock_irqsave(&state->lock, flags);
20912 +	
20913 +	//res = sup_find_by_id(&state->sup_env, reservation_id);
20914 +	sup_env = &state->sup_env;
20915 +	//if (!res) {
20916 +	list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
20917 +		if (res->id == reservation_id) {
20918 +			list_del(&res->list);
20919 +			found = 1;
20920 +			ret = 0;
20921 +		}
20922 +	}
20923 +	if (!found) {
20924 +		list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
20925 +			if (res->id == reservation_id) {
20926 +				list_del(&res->list);
20927 +				found = 1;
20928 +				ret = 0;
20929 +			}
20930 +		}
20931 +	}
20932 +	if (!found) {
20933 +		list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
20934 +			if (res->id == reservation_id) {
20935 +				list_del(&res->list);
20936 +				found = 1;
20937 +				ret = 0;
20938 +			}
20939 +		}
20940 +	}
20941 +	//}
20942 +
20943 +	raw_spin_unlock_irqrestore(&state->lock, flags);
20944 +	
20945 +	TRACE("RESERVATION_DESTROY ret = %d\n", ret);
20946 +	return ret;
20947 +}
20948 +
20949 +static struct domain_proc_info mc2_domain_proc_info;
20950 +
20951 +static long mc2_get_domain_proc_info(struct domain_proc_info **ret)
20952 +{
20953 +	*ret = &mc2_domain_proc_info;
20954 +	return 0;
20955 +}
20956 +
20957 +static void mc2_setup_domain_proc(void)
20958 +{
20959 +	int i, cpu;
20960 +	int num_rt_cpus = num_online_cpus();
20961 +
20962 +	struct cd_mapping *cpu_map, *domain_map;
20963 +
20964 +	memset(&mc2_domain_proc_info, sizeof(mc2_domain_proc_info), 0);
20965 +	init_domain_proc_info(&mc2_domain_proc_info, num_rt_cpus, num_rt_cpus);
20966 +	mc2_domain_proc_info.num_cpus = num_rt_cpus;
20967 +	mc2_domain_proc_info.num_domains = num_rt_cpus;
20968 +
20969 +	i = 0;
20970 +	for_each_online_cpu(cpu) {
20971 +		cpu_map = &mc2_domain_proc_info.cpu_to_domains[i];
20972 +		domain_map = &mc2_domain_proc_info.domain_to_cpus[i];
20973 +
20974 +		cpu_map->id = cpu;
20975 +		domain_map->id = i;
20976 +		cpumask_set_cpu(i, cpu_map->mask);
20977 +		cpumask_set_cpu(cpu, domain_map->mask);
20978 +		++i;
20979 +	}
20980 +}
20981 +
20982 +static long mc2_activate_plugin(void)
20983 +{
20984 +	int cpu;
20985 +	struct mc2_cpu_state *state;
20986 +
20987 +	for_each_online_cpu(cpu) {
20988 +		TRACE("Initializing CPU%d...\n", cpu);
20989 +
20990 +		state = cpu_state_for(cpu);
20991 +
20992 +#ifdef CONFIG_RELEASE_MASTER
20993 +		state->release_master = atomic_read(&release_master_cpu);
20994 +		hrtimer_start_on_info_init(&state->info);
20995 +#endif
20996 +		
20997 +		raw_spin_lock_init(&state->lock);
20998 +		state->cpu = cpu;
20999 +		state->scheduled = NULL;
21000 +
21001 +		sup_init(&state->sup_env);
21002 +
21003 +		hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
21004 +		state->timer.function = on_scheduling_timer;
21005 +	}
21006 +
21007 +	mc2_setup_domain_proc();
21008 +
21009 +	return 0;
21010 +}
21011 +
21012 +static long mc2_deactivate_plugin(void)
21013 +{
21014 +	int cpu;
21015 +	struct mc2_cpu_state *state;
21016 +	struct reservation *res;
21017 +
21018 +	for_each_online_cpu(cpu) {
21019 +		state = cpu_state_for(cpu);
21020 +		raw_spin_lock(&state->lock);
21021 +
21022 +		hrtimer_cancel(&state->timer);
21023 +
21024 +		/* Delete all reservations --- assumes struct reservation
21025 +		 * is prefix of containing struct. */
21026 +
21027 +		while (!list_empty(&state->sup_env.active_reservations)) {
21028 +			res = list_first_entry(
21029 +				&state->sup_env.active_reservations,
21030 +			        struct reservation, list);
21031 +			list_del(&res->list);
21032 +			kfree(res);
21033 +		}
21034 +
21035 +		while (!list_empty(&state->sup_env.inactive_reservations)) {
21036 +			res = list_first_entry(
21037 +				&state->sup_env.inactive_reservations,
21038 +			        struct reservation, list);
21039 +			list_del(&res->list);
21040 +			kfree(res);
21041 +		}
21042 +
21043 +		while (!list_empty(&state->sup_env.depleted_reservations)) {
21044 +			res = list_first_entry(
21045 +				&state->sup_env.depleted_reservations,
21046 +			        struct reservation, list);
21047 +			list_del(&res->list);
21048 +			kfree(res);
21049 +		}
21050 +
21051 +		raw_spin_unlock(&state->lock);
21052 +	}
21053 +
21054 +	destroy_domain_proc_info(&mc2_domain_proc_info);
21055 +	return 0;
21056 +}
21057 +
21058 +static struct sched_plugin mc2_plugin = {
21059 +	.plugin_name		= "MC2",
21060 +	.schedule		= mc2_schedule,
21061 +	.task_wake_up		= mc2_task_resume,
21062 +	.admit_task		= mc2_admit_task,
21063 +	.task_new		= mc2_task_new,
21064 +	.task_exit		= mc2_task_exit,
21065 +	.complete_job           = mc2_complete_job,
21066 +	.get_domain_proc_info   = mc2_get_domain_proc_info,
21067 +	.activate_plugin	= mc2_activate_plugin,
21068 +	.deactivate_plugin      = mc2_deactivate_plugin,
21069 +	.reservation_create     = mc2_reservation_create,
21070 +	.reservation_destroy	= mc2_reservation_destroy,
21071 +};
21072 +
21073 +static int __init init_mc2(void)
21074 +{
21075 +	return register_sched_plugin(&mc2_plugin);
21076 +}
21077 +
21078 +module_init(init_mc2);
21079 +
21080 -- 
21081 1.8.1.2
21082 
21083 
21084 From abd10c08d222f23322ba91cc493ef1095bdb5f86 Mon Sep 17 00:00:00 2001
21085 From: Bjoern Brandenburg <bbb@mpi-sws.org>
21086 Date: Fri, 20 Jun 2014 11:29:09 +0200
21087 Subject: [PATCH 055/119] default_wait_for_release_at() should invoke plugin
21088  callback
21089 
21090 Instead of calling complete_job() directly, the default implementation
21091 of wait_for_release_at() should invoke the plugin-provided
21092 complete_job() method to support plugins that happen to override
21093 complete_job(), but not wait_for_release_at().
21094 ---
21095  litmus/jobs.c | 3 ++-
21096  1 file changed, 2 insertions(+), 1 deletion(-)
21097 
21098 diff --git a/litmus/jobs.c b/litmus/jobs.c
21099 index 2d9f8aa..547222c 100644
21100 --- a/litmus/jobs.c
21101 +++ b/litmus/jobs.c
21102 @@ -4,6 +4,7 @@
21103  #include <linux/sched.h>
21104  
21105  #include <litmus/litmus.h>
21106 +#include <litmus/sched_plugin.h>
21107  #include <litmus/jobs.h>
21108  
21109  static inline void setup_release(struct task_struct *t, lt_t release)
21110 @@ -58,7 +59,7 @@ long default_wait_for_release_at(lt_t release_time)
21111  	tsk_rt(t)->sporadic_release = 1;
21112  	local_irq_restore(flags);
21113  
21114 -	return complete_job();
21115 +	return litmus->complete_job();
21116  }
21117  
21118  
21119 -- 
21120 1.8.1.2
21121 
21122 
21123 From 70f269792d87c26f03a93c6715ea351c2eee62a3 Mon Sep 17 00:00:00 2001
21124 From: Bjoern Brandenburg <bbb@mpi-sws.org>
21125 Date: Sat, 14 Jun 2014 17:15:00 +0200
21126 Subject: [PATCH 056/119] Add void* plugin_state pointer to task_struct
21127 
21128 ---
21129  include/litmus/rt_param.h | 5 ++++-
21130  1 file changed, 4 insertions(+), 1 deletion(-)
21131 
21132 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
21133 index e26535b..060b5d7 100644
21134 --- a/include/litmus/rt_param.h
21135 +++ b/include/litmus/rt_param.h
21136 @@ -249,7 +249,10 @@ struct rt_param {
21137  	volatile int		linked_on;
21138  
21139  	/* PFAIR/PD^2 state. Allocated on demand. */
21140 -	struct pfair_param*	pfair;
21141 +	union {
21142 +		void *plugin_state;
21143 +		struct pfair_param *pfair;
21144 +	};
21145  
21146  	/* Fields saved before BE->RT transition.
21147  	 */
21148 -- 
21149 1.8.1.2
21150 
21151 
21152 From 277dbeea9af1ca31add69636aef4b18892e54646 Mon Sep 17 00:00:00 2001
21153 From: Bjoern Brandenburg <bbb@mpi-sws.org>
21154 Date: Wed, 16 Jul 2014 16:52:13 +0200
21155 Subject: [PATCH 057/119] Add reservation creation API to plugin interface &
21156  syscalls
21157 
21158 ---
21159  include/litmus/sched_plugin.h |  9 +++++++++
21160  litmus/litmus.c               | 10 ++++++++++
21161  litmus/sched_plugin.c         | 13 +++++++++++++
21162  3 files changed, 32 insertions(+)
21163 
21164 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
21165 index 0ccccd6..cb663b8 100644
21166 --- a/include/litmus/sched_plugin.h
21167 +++ b/include/litmus/sched_plugin.h
21168 @@ -77,6 +77,11 @@ typedef long (*wait_for_release_at_t)(lt_t release_time);
21169  /* Informs the plugin when a synchronous release takes place. */
21170  typedef void (*synchronous_release_at_t)(lt_t time_zero);
21171  
21172 +/* Reservation creation/removal backends. Meaning of reservation_type and
21173 + * reservation_id are entirely plugin-specific. */
21174 +typedef long (*reservation_create_t)(int reservation_type, void* __user config);
21175 +typedef long (*reservation_destroy_t)(unsigned int reservation_id, int cpu);
21176 +
21177  /************************ misc routines ***********************/
21178  
21179  
21180 @@ -109,6 +114,10 @@ struct sched_plugin {
21181  	task_exit_t 		task_exit;
21182  	task_cleanup_t		task_cleanup;
21183  
21184 +	/* Reservation support */
21185 +	reservation_create_t	reservation_create;
21186 +	reservation_destroy_t	reservation_destroy;
21187 +
21188  #ifdef CONFIG_LITMUS_LOCKING
21189  	/*	locking protocols	*/
21190  	allocate_lock_t		allocate_lock;
21191 diff --git a/litmus/litmus.c b/litmus/litmus.c
21192 index 14b1031..0b87e04 100644
21193 --- a/litmus/litmus.c
21194 +++ b/litmus/litmus.c
21195 @@ -315,6 +315,16 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
21196  	return ret;
21197  }
21198  
21199 +asmlinkage long sys_reservation_create(int type, void __user *config)
21200 +{
21201 +	return litmus->reservation_create(type, config);
21202 +}
21203 +
21204 +asmlinkage long sys_reservation_destroy(unsigned int reservation_id, int cpu)
21205 +{
21206 +	return litmus->reservation_destroy(reservation_id, cpu);
21207 +}
21208 +
21209  /* p is a real-time task. Re-init its state as a best-effort task. */
21210  static void reinit_litmus_state(struct task_struct* p, int restore)
21211  {
21212 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
21213 index edd91e9..b917793 100644
21214 --- a/litmus/sched_plugin.c
21215 +++ b/litmus/sched_plugin.c
21216 @@ -132,6 +132,17 @@ static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
21217  
21218  #endif
21219  
21220 +static long  litmus_dummy_reservation_create(
21221 +	int reservation_type,
21222 +	void* __user config)
21223 +{
21224 +	return -EINVAL;
21225 +}
21226 +
21227 +static long litmus_dummy_reservation_destroy(unsigned int reservation_id, int cpu)
21228 +{
21229 +	return -EINVAL;
21230 +}
21231  
21232  /* The default scheduler plugin. It doesn't do anything and lets Linux do its
21233   * job.
21234 @@ -193,6 +204,8 @@ int register_sched_plugin(struct sched_plugin* plugin)
21235  #endif
21236  	CHECK(admit_task);
21237  	CHECK(synchronous_release_at);
21238 +	CHECK(reservation_destroy);
21239 +	CHECK(reservation_create);
21240  
21241  	if (!plugin->wait_for_release_at)
21242  		plugin->wait_for_release_at = default_wait_for_release_at;
21243 -- 
21244 1.8.1.2
21245 
21246 
21247 From cd6cb2ecd3238a0a1f05408e0b8148c1ecc80f59 Mon Sep 17 00:00:00 2001
21248 From: Bjoern Brandenburg <bbb@mpi-sws.org>
21249 Date: Wed, 16 Jul 2014 17:29:07 +0200
21250 Subject: [PATCH 058/119] Add reservation system calls to x86 syscall table
21251 
21252 ---
21253  arch/x86/syscalls/syscall_32.tbl | 2 ++
21254  arch/x86/syscalls/syscall_64.tbl | 3 +++
21255  2 files changed, 5 insertions(+)
21256 
21257 diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
21258 index ffe39dd..290c879 100644
21259 --- a/arch/x86/syscalls/syscall_32.tbl
21260 +++ b/arch/x86/syscalls/syscall_32.tbl
21261 @@ -370,3 +370,5 @@
21262  360	i386	wait_for_ts_release	sys_wait_for_ts_release
21263  361	i386	release_ts		sys_release_ts
21264  362	i386	null_call		sys_null_call
21265 +363 i386    reservation_create sys_reservation_create
21266 +364 i386    reservation_destroy sys_reservation_destroy
21267 diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
21268 index cde714e..d39de2a 100644
21269 --- a/arch/x86/syscalls/syscall_64.tbl
21270 +++ b/arch/x86/syscalls/syscall_64.tbl
21271 @@ -333,6 +333,9 @@
21272  360	common	wait_for_ts_release	sys_wait_for_ts_release
21273  361	common	release_ts		sys_release_ts
21274  362	common	null_call		sys_null_call
21275 +363 common  reservation_create  sys_reservation_create
21276 +364 common  reservation_destroy sys_reservation_destroy
21277 +
21278  
21279  #
21280  # x32-specific system call numbers start at 512 to avoid cache impact
21281 -- 
21282 1.8.1.2
21283 
21284 
21285 From 4b9d58f6f3441c8bbf37dfc24ae9dee04f64c9cb Mon Sep 17 00:00:00 2001
21286 From: Bjoern Brandenburg <bbb@mpi-sws.org>
21287 Date: Wed, 16 Jul 2014 17:38:05 +0200
21288 Subject: [PATCH 059/119] Add generic reservation syscall table definitions
21289 
21290 ---
21291  include/litmus/unistd_32.h | 4 +++-
21292  include/litmus/unistd_64.h | 7 ++++++-
21293  2 files changed, 9 insertions(+), 2 deletions(-)
21294 
21295 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
21296 index 94264c2..5f6a274 100644
21297 --- a/include/litmus/unistd_32.h
21298 +++ b/include/litmus/unistd_32.h
21299 @@ -17,5 +17,7 @@
21300  #define __NR_wait_for_ts_release __LSC(9)
21301  #define __NR_release_ts		__LSC(10)
21302  #define __NR_null_call		__LSC(11)
21303 +#define __NR_reservation_create	__LSC(12)
21304 +#define __NR_reservation_destroy __LSC(13)
21305  
21306 -#define NR_litmus_syscalls 12
21307 +#define NR_litmus_syscalls 14
21308 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
21309 index d5ced0d..3e6b1d3 100644
21310 --- a/include/litmus/unistd_64.h
21311 +++ b/include/litmus/unistd_64.h
21312 @@ -29,5 +29,10 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
21313  __SYSCALL(__NR_release_ts, sys_release_ts)
21314  #define __NR_null_call				__LSC(11)
21315  __SYSCALL(__NR_null_call, sys_null_call)
21316 +#define __NR_reservation_create			__LSC(12)
21317 +__SYSCALL(__NR_reservation_create, sys_reservation_create)
21318 +#define __NR_reservation_destroy		__LSC(13)
21319 +__SYSCALL(__NR_reservation_destroy, sys_reservation_destroy)
21320  
21321 -#define NR_litmus_syscalls 12
21322 +
21323 +#define NR_litmus_syscalls 14
21324 -- 
21325 1.8.1.2
21326 
21327 
21328 From 1d65b6286a0f6c13495eefbb41bd1cac3d420cc3 Mon Sep 17 00:00:00 2001
21329 From: Bjoern Brandenburg <bbb@mpi-sws.org>
21330 Date: Wed, 16 Jul 2014 17:38:37 +0200
21331 Subject: [PATCH 060/119]  Add reservation system calls to ARM syscall table
21332 
21333 ---
21334  arch/arm/kernel/calls.S | 2 ++
21335  1 file changed, 2 insertions(+)
21336 
21337 diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
21338 index 2da776a..ad22fcc 100644
21339 --- a/arch/arm/kernel/calls.S
21340 +++ b/arch/arm/kernel/calls.S
21341 @@ -401,6 +401,8 @@
21342          	CALL(sys_wait_for_ts_release)
21343  /* 390 */	CALL(sys_release_ts)
21344  		CALL(sys_null_call)
21345 +	    CALL(sys_reservation_create)
21346 +	    CALL(sys_reservation_destroy)
21347  
21348  #ifndef syscalls_counted
21349  .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
21350 -- 
21351 1.8.1.2
21352 
21353 
21354 From a88daa29933e6c2b1b3b4d616450a35137e59723 Mon Sep 17 00:00:00 2001
21355 From: Bjoern Brandenburg <bbb@mpi-sws.org>
21356 Date: Sat, 14 Jun 2014 12:42:30 +0200
21357 Subject: [PATCH 061/119] Add basic generic reservation-based scheduling
21358  infrastructure
21359 
21360 ---
21361  include/litmus/polling_reservations.h |  37 +++
21362  include/litmus/reservation.h          | 189 +++++++++++++++
21363  litmus/Makefile                       |   2 +
21364  litmus/polling_reservations.c         | 436 ++++++++++++++++++++++++++++++++++
21365  litmus/reservation.c                  | 298 +++++++++++++++++++++++
21366  5 files changed, 962 insertions(+)
21367  create mode 100644 include/litmus/polling_reservations.h
21368  create mode 100644 include/litmus/reservation.h
21369  create mode 100644 litmus/polling_reservations.c
21370  create mode 100644 litmus/reservation.c
21371 
21372 diff --git a/include/litmus/polling_reservations.h b/include/litmus/polling_reservations.h
21373 new file mode 100644
21374 index 0000000..9958a92
21375 --- /dev/null
21376 +++ b/include/litmus/polling_reservations.h
21377 @@ -0,0 +1,37 @@
21378 +#ifndef LITMUS_POLLING_RESERVATIONS_H
21379 +#define LITMUS_POLLING_RESERVATIONS_H
21380 +
21381 +#include <litmus/reservation.h>
21382 +
21383 +struct polling_reservation {
21384 +	/* extend basic reservation */
21385 +	struct reservation res;
21386 +
21387 +	lt_t max_budget;
21388 +	lt_t period;
21389 +	lt_t deadline;
21390 +	lt_t offset;
21391 +};
21392 +
21393 +void polling_reservation_init(struct polling_reservation *pres, int use_edf_prio,
21394 +	int use_periodic_polling, lt_t budget, lt_t period, lt_t deadline, lt_t offset);
21395 +
21396 +struct lt_interval {
21397 +	lt_t start;
21398 +	lt_t end;
21399 +};
21400 +
21401 +struct table_driven_reservation {
21402 +	/* extend basic reservation */
21403 +	struct reservation res;
21404 +
21405 +	lt_t major_cycle;
21406 +	unsigned int next_interval;
21407 +	unsigned int num_intervals;
21408 +	struct lt_interval *intervals;
21409 +};
21410 +
21411 +void table_driven_reservation_init(struct table_driven_reservation *tdres,
21412 +	lt_t major_cycle, struct lt_interval *intervals, unsigned int num_intervals);
21413 +
21414 +#endif
21415 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
21416 new file mode 100644
21417 index 0000000..d8d6ce3
21418 --- /dev/null
21419 +++ b/include/litmus/reservation.h
21420 @@ -0,0 +1,189 @@
21421 +#ifndef LITMUS_RESERVATION_H
21422 +#define LITMUS_RESERVATION_H
21423 +
21424 +#include <linux/list.h>
21425 +#include <linux/hrtimer.h>
21426 +
21427 +struct reservation_client;
21428 +struct reservation_environment;
21429 +struct reservation;
21430 +
21431 +typedef enum {
21432 +	/* reservation has no clients, is not consuming budget */
21433 +	RESERVATION_INACTIVE = 0,
21434 +
21435 +	/* reservation has clients, consumes budget when scheduled */
21436 +	RESERVATION_ACTIVE,
21437 +
21438 +	/* reservation has no clients, but may be consuming budget */
21439 +	RESERVATION_ACTIVE_IDLE,
21440 +
21441 +	/* Reservation has no budget and waits for
21442 +	 * replenishment. May or may not have clients. */
21443 +	RESERVATION_DEPLETED,
21444 +} reservation_state_t;
21445 +
21446 +
21447 +/* ************************************************************************** */
21448 +
21449 +/* Select which task to dispatch. If NULL is returned, it means there is nothing
21450 + * to schedule right now and background work can be scheduled. */
21451 +typedef struct task_struct * (*dispatch_t)  (
21452 +	struct reservation_client *client
21453 +);
21454 +
21455 +/* Something that can be managed in a reservation and that can yield
21456 + * a process for dispatching. */
21457 +struct reservation_client {
21458 +	struct list_head list;
21459 +	dispatch_t dispatch;
21460 +};
21461 +
21462 +
21463 +/* ************************************************************************** */
21464 +
21465 +/* Called by reservations to request state change. */
21466 +typedef void (*reservation_change_state_t)  (
21467 +	struct reservation_environment* env,
21468 +	struct reservation *res,
21469 +	reservation_state_t new_state
21470 +);
21471 +
21472 +/* The framework within wich reservations operate. */
21473 +struct reservation_environment {
21474 +	lt_t time_zero;
21475 +	lt_t current_time;
21476 +
21477 +	/* services invoked by reservations */
21478 +	reservation_change_state_t change_state;
21479 +};
21480 +
21481 +
21482 +/* ************************************************************************** */
21483 +
21484 +/* A new client is added or an existing client resumes. */
21485 +typedef void (*client_arrives_t)  (
21486 +	struct reservation *reservation,
21487 +	struct reservation_client *client
21488 +);
21489 +
21490 +/* A client suspends or terminates. */
21491 +typedef void (*client_departs_t)  (
21492 +	struct reservation *reservation,
21493 +	struct reservation_client *client,
21494 +	int did_signal_job_completion
21495 +);
21496 +
21497 +/* A previously requested replenishment has occurred. */
21498 +typedef void (*on_replenishment_timer_t)  (
21499 +	struct reservation *reservation
21500 +);
21501 +
21502 +/* Update the reservation's budget to reflect execution or idling. */
21503 +typedef void (*drain_budget_t) (
21504 +	struct reservation *reservation,
21505 +	lt_t how_much
21506 +);
21507 +
21508 +/* Select a ready task from one of the clients for scheduling. */
21509 +typedef struct task_struct* (*dispatch_client_t)  (
21510 +	struct reservation *reservation,
21511 +	lt_t *time_slice /* May be used to force rescheduling after
21512 +	                    some amount of time. 0 => no limit */
21513 +);
21514 +
21515 +
21516 +struct reservation_ops {
21517 +	dispatch_client_t dispatch_client;
21518 +
21519 +	client_arrives_t client_arrives;
21520 +	client_departs_t client_departs;
21521 +
21522 +	on_replenishment_timer_t replenish;
21523 +	drain_budget_t drain_budget;
21524 +};
21525 +
21526 +struct reservation {
21527 +	/* used to queue in environment */
21528 +	struct list_head list;
21529 +
21530 +	reservation_state_t state;
21531 +	unsigned int id;
21532 +
21533 +	/* exact meaning defined by impl. */
21534 +	lt_t priority;
21535 +	lt_t cur_budget;
21536 +	lt_t next_replenishment;
21537 +
21538 +	/* interaction with framework */
21539 +	struct reservation_environment *env;
21540 +	struct reservation_ops *ops;
21541 +
21542 +	struct list_head clients;
21543 +};
21544 +
21545 +void reservation_init(struct reservation *res);
21546 +
21547 +/* Default implementations */
21548 +
21549 +/* simply select the first client in the list, set *for_at_most to zero */
21550 +struct task_struct* default_dispatch_client(
21551 +	struct reservation *res,
21552 +	lt_t *for_at_most
21553 +);
21554 +
21555 +/* "connector" reservation client to hook up tasks with reservations */
21556 +struct task_client {
21557 +	struct reservation_client client;
21558 +	struct reservation* reservation;
21559 +	struct task_struct *task;
21560 +};
21561 +
21562 +void task_client_init(struct task_client *tc, struct task_struct *task,
21563 +	struct reservation *reservation);
21564 +
21565 +#define SUP_RESCHEDULE_NOW (0)
21566 +#define SUP_NO_SCHEDULER_UPDATE (ULLONG_MAX)
21567 +
21568 +/* A simple uniprocessor (SUP) flat (i.e., non-hierarchical) reservation
21569 + * environment.
21570 + */
21571 +struct sup_reservation_environment {
21572 +	struct reservation_environment env;
21573 +
21574 +	/* ordered by priority */
21575 +	struct list_head active_reservations;
21576 +
21577 +	/* ordered by next_replenishment */
21578 +	struct list_head depleted_reservations;
21579 +
21580 +	/* unordered */
21581 +	struct list_head inactive_reservations;
21582 +
21583 +	/* - SUP_RESCHEDULE_NOW means call sup_dispatch() now
21584 +	 * - SUP_NO_SCHEDULER_UPDATE means nothing to do
21585 +	 * any other value means program a timer for the given time
21586 +	 */
21587 +	lt_t next_scheduler_update;
21588 +	/* set to true if a call to sup_dispatch() is imminent */
21589 +	bool will_schedule;
21590 +};
21591 +
21592 +/* Contract:
21593 + *  - before calling into sup_ code, or any reservation methods,
21594 + *    update the time with sup_update_time(); and
21595 + *  - after calling into sup_ code, or any reservation methods,
21596 + *    check next_scheduler_update and program timer or trigger
21597 + *    scheduler invocation accordingly.
21598 + */
21599 +
21600 +void sup_init(struct sup_reservation_environment* sup_env);
21601 +void sup_add_new_reservation(struct sup_reservation_environment* sup_env,
21602 +	struct reservation* new_res);
21603 +void sup_update_time(struct sup_reservation_environment* sup_env, lt_t now);
21604 +struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env);
21605 +
21606 +struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
21607 +	unsigned int id);
21608 +
21609 +#endif
21610 diff --git a/litmus/Makefile b/litmus/Makefile
21611 index 84b173a..e3439c8 100644
21612 --- a/litmus/Makefile
21613 +++ b/litmus/Makefile
21614 @@ -32,3 +32,5 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
21615  obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
21616  obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
21617  obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
21618 +
21619 +obj-y += reservation.o polling_reservations.o
21620 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
21621 new file mode 100644
21622 index 0000000..08034c3
21623 --- /dev/null
21624 +++ b/litmus/polling_reservations.c
21625 @@ -0,0 +1,436 @@
21626 +#include <linux/sched.h>
21627 +
21628 +#include <litmus/litmus.h>
21629 +#include <litmus/reservation.h>
21630 +#include <litmus/polling_reservations.h>
21631 +
21632 +
21633 +static void periodic_polling_client_arrives(
21634 +	struct reservation* res,
21635 +	struct reservation_client *client
21636 +)
21637 +{
21638 +	struct polling_reservation *pres =
21639 +		container_of(res, struct polling_reservation, res);
21640 +	lt_t instances, tmp;
21641 +
21642 +	list_add_tail(&client->list, &res->clients);
21643 +
21644 +	switch (res->state) {
21645 +		case RESERVATION_INACTIVE:
21646 +			/* Figure out next replenishment time. */
21647 +			tmp = res->env->current_time - res->env->time_zero;
21648 +			instances =  div64_u64(tmp, pres->period);
21649 +			res->next_replenishment =
21650 +				(instances + 1) * pres->period + pres->offset;
21651 +
21652 +			TRACE("pol-res: activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
21653 +				tmp, instances, pres->period, res->next_replenishment,
21654 +				res->env->current_time);
21655 +
21656 +			res->env->change_state(res->env, res,
21657 +				RESERVATION_DEPLETED);
21658 +			break;
21659 +
21660 +		case RESERVATION_ACTIVE:
21661 +		case RESERVATION_DEPLETED:
21662 +			/* do nothing */
21663 +			break;
21664 +
21665 +		case RESERVATION_ACTIVE_IDLE:
21666 +			res->env->change_state(res->env, res,
21667 +				RESERVATION_ACTIVE);
21668 +			break;
21669 +	}
21670 +}
21671 +
21672 +
21673 +static void periodic_polling_client_departs(
21674 +	struct reservation *res,
21675 +	struct reservation_client *client,
21676 +	int did_signal_job_completion
21677 +)
21678 +{
21679 +	list_del(&client->list);
21680 +
21681 +	switch (res->state) {
21682 +		case RESERVATION_INACTIVE:
21683 +		case RESERVATION_ACTIVE_IDLE:
21684 +			BUG(); /* INACTIVE or IDLE <=> no client */
21685 +			break;
21686 +
21687 +		case RESERVATION_ACTIVE:
21688 +			if (list_empty(&res->clients)) {
21689 +				res->env->change_state(res->env, res,
21690 +					did_signal_job_completion ?
21691 +						RESERVATION_DEPLETED :
21692 +						RESERVATION_ACTIVE_IDLE);
21693 +			} /* else: nothing to do, more clients ready */
21694 +			break;
21695 +
21696 +		case RESERVATION_DEPLETED:
21697 +			/* do nothing */
21698 +			break;
21699 +	}
21700 +}
21701 +
21702 +static void periodic_polling_on_replenishment(
21703 +	struct reservation *res
21704 +)
21705 +{
21706 +	struct polling_reservation *pres =
21707 +		container_of(res, struct polling_reservation, res);
21708 +
21709 +	/* replenish budget */
21710 +	res->cur_budget = pres->max_budget;
21711 +	res->next_replenishment += pres->period;
21712 +
21713 +	switch (res->state) {
21714 +		case RESERVATION_DEPLETED:
21715 +		case RESERVATION_INACTIVE:
21716 +		case RESERVATION_ACTIVE_IDLE:
21717 +			if (list_empty(&res->clients))
21718 +				/* no clients => poll again later */
21719 +				res->env->change_state(res->env, res,
21720 +					RESERVATION_INACTIVE);
21721 +			else
21722 +				/* we have clients & budget => ACTIVE */
21723 +				res->env->change_state(res->env, res,
21724 +					RESERVATION_ACTIVE);
21725 +			break;
21726 +
21727 +		case RESERVATION_ACTIVE:
21728 +			/* Replenished while active => tardy? In any case,
21729 +			 * go ahead and stay active. */
21730 +			break;
21731 +	}
21732 +}
21733 +
21734 +static void periodic_polling_on_replenishment_edf(
21735 +	struct reservation *res
21736 +)
21737 +{
21738 +	struct polling_reservation *pres =
21739 +		container_of(res, struct polling_reservation, res);
21740 +
21741 +	/* update current priority */
21742 +	res->priority = res->next_replenishment + pres->deadline;
21743 +
21744 +	/* do common updates */
21745 +	periodic_polling_on_replenishment(res);
21746 +}
21747 +
21748 +static void common_drain_budget(
21749 +		struct reservation *res,
21750 +		lt_t how_much)
21751 +{
21752 +	if (how_much >= res->cur_budget)
21753 +		res->cur_budget = 0;
21754 +	else
21755 +		res->cur_budget -= how_much;
21756 +
21757 +	switch (res->state) {
21758 +		case RESERVATION_DEPLETED:
21759 +		case RESERVATION_INACTIVE:
21760 +			BUG();
21761 +			break;
21762 +
21763 +		case RESERVATION_ACTIVE_IDLE:
21764 +		case RESERVATION_ACTIVE:
21765 +			if (!res->cur_budget) {
21766 +				res->env->change_state(res->env, res,
21767 +					RESERVATION_DEPLETED);
21768 +			} /* else: stay in current state */
21769 +			break;
21770 +	}
21771 +}
21772 +
21773 +static struct reservation_ops periodic_polling_ops_fp = {
21774 +	.dispatch_client = default_dispatch_client,
21775 +	.client_arrives = periodic_polling_client_arrives,
21776 +	.client_departs = periodic_polling_client_departs,
21777 +	.replenish = periodic_polling_on_replenishment,
21778 +	.drain_budget = common_drain_budget,
21779 +};
21780 +
21781 +static struct reservation_ops periodic_polling_ops_edf = {
21782 +	.dispatch_client = default_dispatch_client,
21783 +	.client_arrives = periodic_polling_client_arrives,
21784 +	.client_departs = periodic_polling_client_departs,
21785 +	.replenish = periodic_polling_on_replenishment_edf,
21786 +	.drain_budget = common_drain_budget,
21787 +};
21788 +
21789 +
21790 +
21791 +
21792 +static void sporadic_polling_client_arrives_fp(
21793 +	struct reservation* res,
21794 +	struct reservation_client *client
21795 +)
21796 +{
21797 +	struct polling_reservation *pres =
21798 +		container_of(res, struct polling_reservation, res);
21799 +
21800 +	list_add_tail(&client->list, &res->clients);
21801 +
21802 +	switch (res->state) {
21803 +		case RESERVATION_INACTIVE:
21804 +			/* Replenish now. */
21805 +			res->cur_budget = pres->max_budget;
21806 +			res->next_replenishment =
21807 +				res->env->current_time + pres->period;
21808 +
21809 +			res->env->change_state(res->env, res,
21810 +				RESERVATION_ACTIVE);
21811 +			break;
21812 +
21813 +		case RESERVATION_ACTIVE:
21814 +		case RESERVATION_DEPLETED:
21815 +			/* do nothing */
21816 +			break;
21817 +
21818 +		case RESERVATION_ACTIVE_IDLE:
21819 +			res->env->change_state(res->env, res,
21820 +				RESERVATION_ACTIVE);
21821 +			break;
21822 +	}
21823 +}
21824 +
21825 +static void sporadic_polling_client_arrives_edf(
21826 +	struct reservation* res,
21827 +	struct reservation_client *client
21828 +)
21829 +{
21830 +	struct polling_reservation *pres =
21831 +		container_of(res, struct polling_reservation, res);
21832 +
21833 +	list_add_tail(&client->list, &res->clients);
21834 +
21835 +	switch (res->state) {
21836 +		case RESERVATION_INACTIVE:
21837 +			/* Replenish now. */
21838 +			res->cur_budget = pres->max_budget;
21839 +			res->next_replenishment =
21840 +				res->env->current_time + pres->period;
21841 +			res->priority =
21842 +				res->env->current_time + pres->deadline;
21843 +
21844 +			res->env->change_state(res->env, res,
21845 +				RESERVATION_ACTIVE);
21846 +			break;
21847 +
21848 +		case RESERVATION_ACTIVE:
21849 +		case RESERVATION_DEPLETED:
21850 +			/* do nothing */
21851 +			break;
21852 +
21853 +		case RESERVATION_ACTIVE_IDLE:
21854 +			res->env->change_state(res->env, res,
21855 +				RESERVATION_ACTIVE);
21856 +			break;
21857 +	}
21858 +}
21859 +
21860 +static struct reservation_ops sporadic_polling_ops_fp = {
21861 +	.dispatch_client = default_dispatch_client,
21862 +	.client_arrives = sporadic_polling_client_arrives_fp,
21863 +	.client_departs = periodic_polling_client_departs,
21864 +	.replenish = periodic_polling_on_replenishment,
21865 +	.drain_budget = common_drain_budget,
21866 +};
21867 +
21868 +static struct reservation_ops sporadic_polling_ops_edf = {
21869 +	.dispatch_client = default_dispatch_client,
21870 +	.client_arrives = sporadic_polling_client_arrives_edf,
21871 +	.client_departs = periodic_polling_client_departs,
21872 +	.replenish = periodic_polling_on_replenishment_edf,
21873 +	.drain_budget = common_drain_budget,
21874 +};
21875 +
21876 +void polling_reservation_init(
21877 +	struct polling_reservation *pres,
21878 +	int use_edf_prio,
21879 +	int use_periodic_polling,
21880 +	lt_t budget, lt_t period, lt_t deadline, lt_t offset
21881 +)
21882 +{
21883 +	if (!deadline)
21884 +		deadline = period;
21885 +	BUG_ON(budget > period);
21886 +	BUG_ON(budget > deadline);
21887 +	BUG_ON(offset >= period);
21888 +
21889 +	reservation_init(&pres->res);
21890 +	pres->max_budget = budget;
21891 +	pres->period = period;
21892 +	pres->deadline = deadline;
21893 +	pres->offset = offset;
21894 +	if (use_periodic_polling) {
21895 +		if (use_edf_prio)
21896 +			pres->res.ops = &periodic_polling_ops_edf;
21897 +		else
21898 +			pres->res.ops = &periodic_polling_ops_fp;
21899 +	} else {
21900 +		if (use_edf_prio)
21901 +			pres->res.ops = &sporadic_polling_ops_edf;
21902 +		else
21903 +			pres->res.ops = &sporadic_polling_ops_fp;
21904 +	}
21905 +}
21906 +
21907 +
21908 +static lt_t td_cur_major_cycle_start(struct table_driven_reservation *tdres)
21909 +{
21910 +	lt_t x, tmp;
21911 +
21912 +	tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
21913 +	x = div64_u64(tmp, tdres->major_cycle);
21914 +	x *= tdres->major_cycle;
21915 +	return x;
21916 +}
21917 +
21918 +
21919 +static lt_t td_next_major_cycle_start(struct table_driven_reservation *tdres)
21920 +{
21921 +	lt_t x, tmp;
21922 +
21923 +	tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
21924 +	x = div64_u64(tmp, tdres->major_cycle) + 1;
21925 +	x *= tdres->major_cycle;
21926 +	return x;
21927 +}
21928 +
21929 +static void td_client_arrives(
21930 +	struct reservation* res,
21931 +	struct reservation_client *client
21932 +)
21933 +{
21934 +	struct table_driven_reservation *tdres =
21935 +		container_of(res, struct table_driven_reservation, res);
21936 +
21937 +	list_add_tail(&client->list, &res->clients);
21938 +
21939 +	switch (res->state) {
21940 +		case RESERVATION_INACTIVE:
21941 +			/* Figure out first replenishment time. */
21942 +			res->next_replenishment = td_next_major_cycle_start(tdres);
21943 +			res->next_replenishment += tdres->intervals[0].start;
21944 +			tdres->next_interval = 0;
21945 +
21946 +			res->env->change_state(res->env, res,
21947 +				RESERVATION_DEPLETED);
21948 +			break;
21949 +
21950 +		case RESERVATION_ACTIVE:
21951 +		case RESERVATION_DEPLETED:
21952 +			/* do nothing */
21953 +			break;
21954 +
21955 +		case RESERVATION_ACTIVE_IDLE:
21956 +			res->env->change_state(res->env, res,
21957 +				RESERVATION_ACTIVE);
21958 +			break;
21959 +	}
21960 +}
21961 +
21962 +static void td_client_departs(
21963 +	struct reservation *res,
21964 +	struct reservation_client *client,
21965 +	int did_signal_job_completion
21966 +)
21967 +{
21968 +	list_del(&client->list);
21969 +
21970 +	switch (res->state) {
21971 +		case RESERVATION_INACTIVE:
21972 +		case RESERVATION_ACTIVE_IDLE:
21973 +			BUG(); /* INACTIVE or IDLE <=> no client */
21974 +			break;
21975 +
21976 +		case RESERVATION_ACTIVE:
21977 +			if (list_empty(&res->clients)) {
21978 +				res->env->change_state(res->env, res,
21979 +						RESERVATION_ACTIVE_IDLE);
21980 +			} /* else: nothing to do, more clients ready */
21981 +			break;
21982 +
21983 +		case RESERVATION_DEPLETED:
21984 +			/* do nothing */
21985 +			break;
21986 +	}
21987 +}
21988 +
21989 +static lt_t td_interval_length(struct lt_interval *ival)
21990 +{
21991 +	return ival->end - ival->start;
21992 +}
21993 +
21994 +static void td_replenish(
21995 +	struct reservation *res
21996 +)
21997 +{
21998 +	struct table_driven_reservation *tdres =
21999 +		container_of(res, struct table_driven_reservation, res);
22000 +
22001 +	/* replenish budget */
22002 +	res->cur_budget = td_interval_length(tdres->intervals + tdres->next_interval);
22003 +
22004 +	tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
22005 +	if (tdres->next_interval)
22006 +		res->next_replenishment = td_cur_major_cycle_start(tdres);
22007 +	else
22008 +		/* wrap to next major cycle */
22009 +		res->next_replenishment = td_next_major_cycle_start(tdres);
22010 +	res->next_replenishment += tdres->intervals[tdres->next_interval].start;
22011 +
22012 +
22013 +	switch (res->state) {
22014 +		case RESERVATION_DEPLETED:
22015 +		case RESERVATION_ACTIVE:
22016 +		case RESERVATION_ACTIVE_IDLE:
22017 +			if (list_empty(&res->clients))
22018 +				res->env->change_state(res->env, res,
22019 +					RESERVATION_ACTIVE_IDLE);
22020 +			else
22021 +				/* we have clients & budget => ACTIVE */
22022 +				res->env->change_state(res->env, res,
22023 +					RESERVATION_ACTIVE);
22024 +			break;
22025 +
22026 +		case RESERVATION_INACTIVE:
22027 +			BUG();
22028 +			break;
22029 +	}
22030 +}
22031 +
22032 +static struct reservation_ops td_ops = {
22033 +	.dispatch_client = default_dispatch_client,
22034 +	.client_arrives = td_client_arrives,
22035 +	.client_departs = td_client_departs,
22036 +	.replenish = td_replenish,
22037 +	.drain_budget = common_drain_budget,
22038 +};
22039 +
22040 +void table_driven_reservation_init(
22041 +	struct table_driven_reservation *tdres,
22042 +	lt_t major_cycle,
22043 +	struct lt_interval *intervals,
22044 +	unsigned int num_intervals)
22045 +{
22046 +	unsigned int i;
22047 +
22048 +	/* sanity checking */
22049 +	BUG_ON(!num_intervals);
22050 +	for (i = 0; i < num_intervals; i++)
22051 +		BUG_ON(intervals[i].end <= intervals[i].start);
22052 +	for (i = 0; i + 1 < num_intervals; i++)
22053 +		BUG_ON(intervals[i + 1].start <= intervals[i].end);
22054 +	BUG_ON(intervals[num_intervals - 1].end > major_cycle);
22055 +
22056 +	reservation_init(&tdres->res);
22057 +	tdres->major_cycle = major_cycle;
22058 +	tdres->intervals = intervals;
22059 +	tdres->num_intervals = num_intervals;
22060 +	tdres->res.ops = &td_ops;
22061 +}
22062 diff --git a/litmus/reservation.c b/litmus/reservation.c
22063 new file mode 100644
22064 index 0000000..bc32b2e
22065 --- /dev/null
22066 +++ b/litmus/reservation.c
22067 @@ -0,0 +1,298 @@
22068 +#include <linux/sched.h>
22069 +
22070 +#include <litmus/litmus.h>
22071 +#include <litmus/reservation.h>
22072 +
22073 +void reservation_init(struct reservation *res)
22074 +{
22075 +	memset(res, sizeof(*res), 0);
22076 +	res->state = RESERVATION_INACTIVE;
22077 +	INIT_LIST_HEAD(&res->clients);
22078 +}
22079 +
22080 +struct task_struct* default_dispatch_client(
22081 +	struct reservation *res,
22082 +	lt_t *for_at_most)
22083 +{
22084 +	struct reservation_client *client, *next;
22085 +	struct task_struct* tsk;
22086 +
22087 +	BUG_ON(res->state != RESERVATION_ACTIVE);
22088 +	*for_at_most = 0;
22089 +
22090 +	list_for_each_entry_safe(client, next, &res->clients, list) {
22091 +		tsk = client->dispatch(client);
22092 +		if (likely(tsk)) {
22093 +			return tsk;
22094 +		}
22095 +	}
22096 +	return NULL;
22097 +}
22098 +
22099 +static struct task_struct * task_client_dispatch(struct reservation_client *client)
22100 +{
22101 +	struct task_client *tc = container_of(client, struct task_client, client);
22102 +	return tc->task;
22103 +}
22104 +
22105 +void task_client_init(struct task_client *tc, struct task_struct *tsk,
22106 +	struct reservation *res)
22107 +{
22108 +	memset(&tc->client, sizeof(tc->client), 0);
22109 +	tc->client.dispatch = task_client_dispatch;
22110 +	tc->task = tsk;
22111 +	tc->reservation = res;
22112 +}
22113 +
22114 +static void sup_scheduler_update_at(
22115 +	struct sup_reservation_environment* sup_env,
22116 +	lt_t when)
22117 +{
22118 +	if (sup_env->next_scheduler_update > when)
22119 +		sup_env->next_scheduler_update = when;
22120 +}
22121 +
22122 +static void sup_scheduler_update_after(
22123 +	struct sup_reservation_environment* sup_env,
22124 +	lt_t timeout)
22125 +{
22126 +	sup_scheduler_update_at(sup_env, sup_env->env.current_time + timeout);
22127 +}
22128 +
22129 +static int _sup_queue_depleted(
22130 +	struct sup_reservation_environment* sup_env,
22131 +	struct reservation *res)
22132 +{
22133 +	struct list_head *pos;
22134 +	struct reservation *queued;
22135 +	int passed_earlier = 0;
22136 +
22137 +	list_for_each(pos, &sup_env->depleted_reservations) {
22138 +		queued = list_entry(pos, struct reservation, list);
22139 +		if (queued->next_replenishment > res->next_replenishment) {
22140 +			list_add(&res->list, pos->prev);
22141 +			return passed_earlier;
22142 +		} else
22143 +			passed_earlier = 1;
22144 +	}
22145 +
22146 +	list_add_tail(&res->list, &sup_env->depleted_reservations);
22147 +
22148 +	return passed_earlier;
22149 +}
22150 +
22151 +static void sup_queue_depleted(
22152 +	struct sup_reservation_environment* sup_env,
22153 +	struct reservation *res)
22154 +{
22155 +	int passed_earlier = _sup_queue_depleted(sup_env, res);
22156 +
22157 +	/* check for updated replenishment time */
22158 +	if (!passed_earlier)
22159 +		sup_scheduler_update_at(sup_env, res->next_replenishment);
22160 +}
22161 +
22162 +static int _sup_queue_active(
22163 +	struct sup_reservation_environment* sup_env,
22164 +	struct reservation *res)
22165 +{
22166 +	struct list_head *pos;
22167 +	struct reservation *queued;
22168 +	int passed_active = 0;
22169 +
22170 +	list_for_each(pos, &sup_env->active_reservations) {
22171 +		queued = list_entry(pos, struct reservation, list);
22172 +		if (queued->priority > res->priority) {
22173 +			list_add(&res->list, pos->prev);
22174 +			return passed_active;
22175 +		} else if (queued->state == RESERVATION_ACTIVE)
22176 +			passed_active = 1;
22177 +	}
22178 +
22179 +	list_add_tail(&res->list, &sup_env->active_reservations);
22180 +	return passed_active;
22181 +}
22182 +
22183 +static void sup_queue_active(
22184 +	struct sup_reservation_environment* sup_env,
22185 +	struct reservation *res)
22186 +{
22187 +	int passed_active = _sup_queue_active(sup_env, res);
22188 +
22189 +	/* check for possible preemption */
22190 +	if (res->state == RESERVATION_ACTIVE && !passed_active)
22191 +		sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
22192 +}
22193 +
22194 +
22195 +static void sup_queue_reservation(
22196 +	struct sup_reservation_environment* sup_env,
22197 +	struct reservation *res)
22198 +{
22199 +	switch (res->state) {
22200 +		case RESERVATION_INACTIVE:
22201 +			list_add(&res->list, &sup_env->inactive_reservations);
22202 +			break;
22203 +
22204 +		case RESERVATION_DEPLETED:
22205 +			sup_queue_depleted(sup_env, res);
22206 +			break;
22207 +
22208 +		case RESERVATION_ACTIVE_IDLE:
22209 +		case RESERVATION_ACTIVE:
22210 +			sup_queue_active(sup_env, res);
22211 +			break;
22212 +	}
22213 +}
22214 +
22215 +void sup_add_new_reservation(
22216 +	struct sup_reservation_environment* sup_env,
22217 +	struct reservation* new_res)
22218 +{
22219 +	new_res->env = &sup_env->env;
22220 +	sup_queue_reservation(sup_env, new_res);
22221 +}
22222 +
22223 +struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
22224 +	unsigned int id)
22225 +{
22226 +	struct reservation *res;
22227 +
22228 +	list_for_each_entry(res, &sup_env->active_reservations, list) {
22229 +		if (res->id == id)
22230 +			return res;
22231 +	}
22232 +	list_for_each_entry(res, &sup_env->inactive_reservations, list) {
22233 +		if (res->id == id)
22234 +			return res;
22235 +	}
22236 +	list_for_each_entry(res, &sup_env->depleted_reservations, list) {
22237 +		if (res->id == id)
22238 +			return res;
22239 +	}
22240 +
22241 +	return NULL;
22242 +}
22243 +
22244 +static void sup_charge_budget(
22245 +	struct sup_reservation_environment* sup_env,
22246 +	lt_t delta)
22247 +{
22248 +	struct list_head *pos, *next;
22249 +	struct reservation *res;
22250 +
22251 +	list_for_each_safe(pos, next, &sup_env->active_reservations) {
22252 +		/* charge all ACTIVE_IDLE up to the first ACTIVE reservation */
22253 +		res = list_entry(pos, struct reservation, list);
22254 +		if (res->state == RESERVATION_ACTIVE) {
22255 +			res->ops->drain_budget(res, delta);
22256 +			/* stop at the first ACTIVE reservation */
22257 +			break;
22258 +		} else {
22259 +			BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
22260 +			res->ops->drain_budget(res, delta);
22261 +		}
22262 +	}
22263 +}
22264 +
22265 +static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
22266 +{
22267 +	struct list_head *pos, *next;
22268 +	struct reservation *res;
22269 +
22270 +	list_for_each_safe(pos, next, &sup_env->depleted_reservations) {
22271 +		res = list_entry(pos, struct reservation, list);
22272 +		if (res->next_replenishment <= sup_env->env.current_time) {
22273 +			res->ops->replenish(res);
22274 +		} else {
22275 +			/* list is ordered by increasing depletion times */
22276 +			break;
22277 +		}
22278 +	}
22279 +
22280 +	/* request a scheduler update at the next replenishment instant */
22281 +	res = list_first_entry_or_null(&sup_env->depleted_reservations,
22282 +		struct reservation, list);
22283 +	if (res)
22284 +		sup_scheduler_update_at(sup_env, res->next_replenishment);
22285 +}
22286 +
22287 +void sup_update_time(
22288 +	struct sup_reservation_environment* sup_env,
22289 +	lt_t now)
22290 +{
22291 +	lt_t delta;
22292 +
22293 +	/* If the time didn't advance, there is nothing to do.
22294 +	 * This check makes it safe to call sup_advance_time() potentially
22295 +	 * multiple times (e.g., via different code paths. */
22296 +	if (unlikely(now <= sup_env->env.current_time))
22297 +		return;
22298 +
22299 +	delta = now - sup_env->env.current_time;
22300 +	sup_env->env.current_time = now;
22301 +
22302 +	/* check if future updates are required */
22303 +	if (sup_env->next_scheduler_update <= sup_env->env.current_time)
22304 +		sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
22305 +
22306 +	/* deplete budgets by passage of time */
22307 +	sup_charge_budget(sup_env, delta);
22308 +
22309 +	/* check if any budgets where replenished */
22310 +	sup_replenish_budgets(sup_env);
22311 +}
22312 +
22313 +struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env)
22314 +{
22315 +	struct reservation *res, *next;
22316 +	struct task_struct *tsk = NULL;
22317 +	lt_t time_slice;
22318 +
22319 +	list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
22320 +		if (res->state == RESERVATION_ACTIVE) {
22321 +			tsk = res->ops->dispatch_client(res, &time_slice);
22322 +			if (likely(tsk)) {
22323 +				if (time_slice)
22324 +				    sup_scheduler_update_after(sup_env, time_slice);
22325 +				sup_scheduler_update_after(sup_env, res->cur_budget);
22326 +				return tsk;
22327 +			}
22328 +		}
22329 +	}
22330 +
22331 +	return NULL;
22332 +}
22333 +
22334 +static void sup_res_change_state(
22335 +	struct reservation_environment* env,
22336 +	struct reservation *res,
22337 +	reservation_state_t new_state)
22338 +{
22339 +	struct sup_reservation_environment* sup_env;
22340 +
22341 +	sup_env = container_of(env, struct sup_reservation_environment, env);
22342 +
22343 +	TRACE("reservation R%d state %d->%d at %llu\n",
22344 +		res->id, res->state, new_state, env->current_time);
22345 +
22346 +	list_del(&res->list);
22347 +	/* check if we need to reschedule because we lost an active reservation */
22348 +	if (res->state == RESERVATION_ACTIVE && !sup_env->will_schedule)
22349 +		sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
22350 +	res->state = new_state;
22351 +	sup_queue_reservation(sup_env, res);
22352 +}
22353 +
22354 +void sup_init(struct sup_reservation_environment* sup_env)
22355 +{
22356 +	memset(sup_env, sizeof(*sup_env), 0);
22357 +
22358 +	INIT_LIST_HEAD(&sup_env->active_reservations);
22359 +	INIT_LIST_HEAD(&sup_env->depleted_reservations);
22360 +	INIT_LIST_HEAD(&sup_env->inactive_reservations);
22361 +
22362 +	sup_env->env.change_state = sup_res_change_state;
22363 +
22364 +	sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
22365 +}
22366 -- 
22367 1.8.1.2
22368 
22369 
22370 From 62c4870dbe84cafc37ff9e3b867352ab2a02703f Mon Sep 17 00:00:00 2001
22371 From: Bjoern Brandenburg <bbb@mpi-sws.org>
22372 Date: Thu, 17 Jul 2014 13:54:11 +0200
22373 Subject: [PATCH 062/119] Add reservation configuration types to rt_param.h
22374 
22375 ---
22376  include/litmus/polling_reservations.h |  5 -----
22377  include/litmus/rt_param.h             | 41 +++++++++++++++++++++++++++++++++++
22378  2 files changed, 41 insertions(+), 5 deletions(-)
22379 
22380 diff --git a/include/litmus/polling_reservations.h b/include/litmus/polling_reservations.h
22381 index 9958a92..15910ed 100644
22382 --- a/include/litmus/polling_reservations.h
22383 +++ b/include/litmus/polling_reservations.h
22384 @@ -16,11 +16,6 @@ struct polling_reservation {
22385  void polling_reservation_init(struct polling_reservation *pres, int use_edf_prio,
22386  	int use_periodic_polling, lt_t budget, lt_t period, lt_t deadline, lt_t offset);
22387  
22388 -struct lt_interval {
22389 -	lt_t start;
22390 -	lt_t end;
22391 -};
22392 -
22393  struct table_driven_reservation {
22394  	/* extend basic reservation */
22395  	struct reservation res;
22396 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
22397 index 060b5d7..b252cc1 100644
22398 --- a/include/litmus/rt_param.h
22399 +++ b/include/litmus/rt_param.h
22400 @@ -62,6 +62,7 @@ typedef enum {
22401  #define LITMUS_MAX_PRIORITY     512
22402  #define LITMUS_HIGHEST_PRIORITY   1
22403  #define LITMUS_LOWEST_PRIORITY    (LITMUS_MAX_PRIORITY - 1)
22404 +#define LITMUS_NO_PRIORITY	UINT_MAX
22405  
22406  /* Provide generic comparison macros for userspace,
22407   * in case that we change this later. */
22408 @@ -71,6 +72,46 @@ typedef enum {
22409  	((p) >= LITMUS_HIGHEST_PRIORITY &&	\
22410  	 (p) <= LITMUS_LOWEST_PRIORITY)
22411  
22412 +/* reservation support */
22413 +
22414 +typedef enum {
22415 +	PERIODIC_POLLING,
22416 +	SPORADIC_POLLING,
22417 +	TABLE_DRIVEN,
22418 +} reservation_type_t;
22419 +
22420 +struct lt_interval {
22421 +	lt_t start;
22422 +	lt_t end;
22423 +};
22424 +
22425 +#ifndef __KERNEL__
22426 +#define __user
22427 +#endif
22428 +
22429 +struct reservation_config {
22430 +	unsigned int id;
22431 +	unsigned int priority;
22432 +	int  cpu;
22433 +
22434 +	union {
22435 +		struct {
22436 +			lt_t period;
22437 +			lt_t budget;
22438 +			lt_t relative_deadline;
22439 +			lt_t offset;
22440 +		} polling_params;
22441 +
22442 +		struct {
22443 +			lt_t major_cycle_length;
22444 +			unsigned int num_intervals;
22445 +			struct lt_interval __user *intervals;
22446 +		} table_driven_params;
22447 +	};
22448 +};
22449 +
22450 +/* regular sporadic task support */
22451 +
22452  struct rt_task {
22453  	lt_t 		exec_cost;
22454  	lt_t 		period;
22455 -- 
22456 1.8.1.2
22457 
22458 
22459 From bb1ee06d3b70f0d546cbf829a9ffe3ff7e800e8a Mon Sep 17 00:00:00 2001
22460 From: Bjoern Brandenburg <bbb@mpi-sws.org>
22461 Date: Sat, 14 Jun 2014 17:16:06 +0200
22462 Subject: [PATCH 063/119] Add partitioned reservation-based scheduler plugin
22463  (P-RES)
22464 
22465 A simple partitioned scheduler that provides a reservation environment
22466 on each core, based on the generic reservations code.  Hierarchical
22467 scheduling is not supported in this version.
22468 ---
22469  litmus/Makefile     |   2 +
22470  litmus/sched_pres.c | 631 ++++++++++++++++++++++++++++++++++++++++++++++++++++
22471  2 files changed, 633 insertions(+)
22472  create mode 100644 litmus/sched_pres.c
22473 
22474 diff --git a/litmus/Makefile b/litmus/Makefile
22475 index e3439c8..05021f5 100644
22476 --- a/litmus/Makefile
22477 +++ b/litmus/Makefile
22478 @@ -34,3 +34,5 @@ obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
22479  obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
22480  
22481  obj-y += reservation.o polling_reservations.o
22482 +
22483 +obj-y += sched_pres.o
22484 \ No newline at end of file
22485 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
22486 new file mode 100644
22487 index 0000000..6779ffd
22488 --- /dev/null
22489 +++ b/litmus/sched_pres.c
22490 @@ -0,0 +1,631 @@
22491 +#include <linux/percpu.h>
22492 +#include <linux/slab.h>
22493 +#include <asm/uaccess.h>
22494 +
22495 +#include <litmus/sched_plugin.h>
22496 +#include <litmus/preempt.h>
22497 +#include <litmus/debug_trace.h>
22498 +
22499 +#include <litmus/litmus.h>
22500 +#include <litmus/jobs.h>
22501 +#include <litmus/budget.h>
22502 +#include <litmus/litmus_proc.h>
22503 +
22504 +#include <litmus/reservation.h>
22505 +#include <litmus/polling_reservations.h>
22506 +
22507 +struct pres_task_state {
22508 +	struct task_client res_info;
22509 +	int cpu;
22510 +};
22511 +
22512 +struct pres_cpu_state {
22513 +	raw_spinlock_t lock;
22514 +
22515 +	struct sup_reservation_environment sup_env;
22516 +	struct hrtimer timer;
22517 +
22518 +	int cpu;
22519 +	struct task_struct* scheduled;
22520 +};
22521 +
22522 +static DEFINE_PER_CPU(struct pres_cpu_state, pres_cpu_state);
22523 +
22524 +#define cpu_state_for(cpu_id)	(&per_cpu(pres_cpu_state, cpu_id))
22525 +#define local_cpu_state()	(&__get_cpu_var(pres_cpu_state))
22526 +
22527 +static struct pres_task_state* get_pres_state(struct task_struct *tsk)
22528 +{
22529 +	return (struct pres_task_state*) tsk_rt(tsk)->plugin_state;
22530 +}
22531 +
22532 +static void task_departs(struct task_struct *tsk, int job_complete)
22533 +{
22534 +	struct pres_task_state* state = get_pres_state(tsk);
22535 +	struct reservation* res;
22536 +	struct reservation_client *client;
22537 +
22538 +	res    = state->res_info.reservation;
22539 +	client = &state->res_info.client;
22540 +
22541 +	res->ops->client_departs(res, client, job_complete);
22542 +}
22543 +
22544 +static void task_arrives(struct task_struct *tsk)
22545 +{
22546 +	struct pres_task_state* state = get_pres_state(tsk);
22547 +	struct reservation* res;
22548 +	struct reservation_client *client;
22549 +
22550 +	res    = state->res_info.reservation;
22551 +	client = &state->res_info.client;
22552 +
22553 +	res->ops->client_arrives(res, client);
22554 +}
22555 +
22556 +static void pres_update_timer(struct pres_cpu_state *state)
22557 +{
22558 +	lt_t update, now;
22559 +
22560 +	update = state->sup_env.next_scheduler_update;
22561 +	now = state->sup_env.env.current_time;
22562 +	if (update <= now) {
22563 +		litmus_reschedule(state->cpu);
22564 +	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
22565 +		/* reprogram only if not already set correctly */
22566 +		if (!hrtimer_active(&state->timer) ||
22567 +		    ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
22568 +			TRACE("canceling timer...\n");
22569 +			hrtimer_cancel(&state->timer);
22570 +			TRACE("setting scheduler timer for %llu\n", update);
22571 +			hrtimer_start(&state->timer, ns_to_ktime(update),
22572 +				HRTIMER_MODE_ABS_PINNED);
22573 +		}
22574 +	}
22575 +}
22576 +
22577 +static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
22578 +{
22579 +	unsigned long flags;
22580 +	enum hrtimer_restart restart = HRTIMER_NORESTART;
22581 +	struct pres_cpu_state *state = local_cpu_state();
22582 +	lt_t update, now;
22583 +
22584 +	raw_spin_lock_irqsave(&state->lock, flags);
22585 +	sup_update_time(&state->sup_env, litmus_clock());
22586 +
22587 +	update = state->sup_env.next_scheduler_update;
22588 +	now = state->sup_env.env.current_time;
22589 +
22590 +	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu\n", now, update);
22591 +
22592 +	if (update <= now) {
22593 +		litmus_reschedule_local();
22594 +	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
22595 +		hrtimer_set_expires(timer, ns_to_ktime(update));
22596 +		restart = HRTIMER_RESTART;
22597 +	}
22598 +
22599 +	raw_spin_unlock_irqrestore(&state->lock, flags);
22600 +
22601 +	return restart;
22602 +}
22603 +
22604 +static struct task_struct* pres_schedule(struct task_struct * prev)
22605 +{
22606 +	/* next == NULL means "schedule background work". */
22607 +	struct pres_cpu_state *state = local_cpu_state();
22608 +
22609 +	raw_spin_lock(&state->lock);
22610 +
22611 +	BUG_ON(state->scheduled && state->scheduled != prev);
22612 +	BUG_ON(state->scheduled && !is_realtime(prev));
22613 +
22614 +	/* update time */
22615 +	state->sup_env.will_schedule = true;
22616 +	sup_update_time(&state->sup_env, litmus_clock());
22617 +
22618 +	/* remove task from reservation if it blocks */
22619 +	if (is_realtime(prev) && !is_running(prev))
22620 +		task_departs(prev, is_completed(prev));
22621 +
22622 +	/* figure out what to schedule next */
22623 +	state->scheduled = sup_dispatch(&state->sup_env);
22624 +
22625 +	/* program scheduler timer */
22626 +	state->sup_env.will_schedule = false;
22627 +	pres_update_timer(state);
22628 +
22629 +	/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
22630 +	sched_state_task_picked();
22631 +
22632 +	raw_spin_unlock(&state->lock);
22633 +
22634 +	if (prev != state->scheduled && is_realtime(prev))
22635 +		TRACE_TASK(prev, "descheduled.\n");
22636 +	if (state->scheduled)
22637 +		TRACE_TASK(state->scheduled, "scheduled.\n");
22638 +
22639 +	return state->scheduled;
22640 +}
22641 +
22642 +static void resume_legacy_task_model_updates(struct task_struct *tsk)
22643 +{
22644 +	lt_t now;
22645 +	if (is_sporadic(tsk)) {
22646 +		/* If this sporadic task was gone for a "long" time and woke up past
22647 +		 * its deadline, then give it a new budget by triggering a job
22648 +		 * release. This is purely cosmetic and has no effect on the
22649 +		 * P-RES scheduler. */
22650 +
22651 +		now = litmus_clock();
22652 +		if (is_tardy(tsk, now))
22653 +			release_at(tsk, now);
22654 +	}
22655 +}
22656 +
22657 +/* Called when the state of tsk changes back to TASK_RUNNING.
22658 + * We need to requeue the task.
22659 + */
22660 +static void pres_task_resume(struct task_struct  *tsk)
22661 +{
22662 +	unsigned long flags;
22663 +	struct pres_task_state* tinfo = get_pres_state(tsk);
22664 +	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
22665 +
22666 +	TRACE_TASK(tsk, "wake_up at %llu\n", litmus_clock());
22667 +
22668 +	raw_spin_lock_irqsave(&state->lock, flags);
22669 +	/* Requeue if self-suspension was already processed. */
22670 +	if (state->scheduled != tsk)
22671 +	{
22672 +		sup_update_time(&state->sup_env, litmus_clock());
22673 +		task_arrives(tsk);
22674 +		pres_update_timer(state);
22675 +	}
22676 +	raw_spin_unlock_irqrestore(&state->lock, flags);
22677 +
22678 +	resume_legacy_task_model_updates(tsk);
22679 +}
22680 +
22681 +/* syscall backend for job completions */
22682 +static long pres_complete_job(void)
22683 +{
22684 +	ktime_t next_release;
22685 +	long err;
22686 +
22687 +	TRACE_CUR("pres_complete_job at %llu\n", litmus_clock());
22688 +
22689 +	tsk_rt(current)->completed = 1;
22690 +	prepare_for_next_period(current);
22691 +	next_release = ns_to_ktime(get_release(current));
22692 +	set_current_state(TASK_INTERRUPTIBLE);
22693 +	err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
22694 +
22695 +	TRACE_CUR("pres_complete_job returns at %llu\n", litmus_clock());
22696 +	return err;
22697 +}
22698 +
22699 +static long pres_admit_task(struct task_struct *tsk)
22700 +{
22701 +	long err = -ESRCH;
22702 +	unsigned long flags;
22703 +	struct reservation *res;
22704 +	struct pres_cpu_state *state;
22705 +	struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL);
22706 +
22707 +	if (!tinfo)
22708 +		return -ENOMEM;
22709 +
22710 +	preempt_disable();
22711 +
22712 +	state = cpu_state_for(task_cpu(tsk));
22713 +	raw_spin_lock_irqsave(&state->lock, flags);
22714 +
22715 +	res = sup_find_by_id(&state->sup_env, tsk_rt(tsk)->task_params.cpu);
22716 +
22717 +	/* found the appropriate reservation (or vCPU) */
22718 +	if (res) {
22719 +		task_client_init(&tinfo->res_info, tsk, res);
22720 +		tinfo->cpu = task_cpu(tsk);
22721 +		tsk_rt(tsk)->plugin_state = tinfo;
22722 +		err = 0;
22723 +	}
22724 +
22725 +	raw_spin_unlock_irqrestore(&state->lock, flags);
22726 +
22727 +	preempt_enable();
22728 +
22729 +	if (err)
22730 +		kfree(tinfo);
22731 +
22732 +	return err;
22733 +}
22734 +
22735 +static void task_new_legacy_task_model_updates(struct task_struct *tsk)
22736 +{
22737 +	lt_t now = litmus_clock();
22738 +
22739 +	/* the first job exists starting as of right now */
22740 +	release_at(tsk, now);
22741 +}
22742 +
22743 +static void pres_task_new(struct task_struct *tsk, int on_runqueue,
22744 +			  int is_running)
22745 +{
22746 +	unsigned long flags;
22747 +	struct pres_task_state* tinfo = get_pres_state(tsk);
22748 +	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
22749 +
22750 +	TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
22751 +		   litmus_clock(), on_runqueue, is_running);
22752 +
22753 +	/* acquire the lock protecting the state and disable interrupts */
22754 +	raw_spin_lock_irqsave(&state->lock, flags);
22755 +
22756 +	if (is_running) {
22757 +		state->scheduled = tsk;
22758 +		/* make sure this task should actually be running */
22759 +		litmus_reschedule_local();
22760 +	}
22761 +
22762 +	if (on_runqueue || is_running) {
22763 +		sup_update_time(&state->sup_env, litmus_clock());
22764 +		task_arrives(tsk);
22765 +		pres_update_timer(state);
22766 +	}
22767 +
22768 +	raw_spin_unlock_irqrestore(&state->lock, flags);
22769 +
22770 +	task_new_legacy_task_model_updates(tsk);
22771 +}
22772 +
22773 +static void pres_task_exit(struct task_struct *tsk)
22774 +{
22775 +	unsigned long flags;
22776 +	struct pres_task_state* tinfo = get_pres_state(tsk);
22777 +	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
22778 +
22779 +	raw_spin_lock_irqsave(&state->lock, flags);
22780 +
22781 +	if (state->scheduled == tsk)
22782 +		state->scheduled = NULL;
22783 +
22784 +	/* remove from queues */
22785 +	if (is_running(tsk)) {
22786 +		sup_update_time(&state->sup_env, litmus_clock());
22787 +		task_departs(tsk, 0);
22788 +		pres_update_timer(state);
22789 +	}
22790 +
22791 +	raw_spin_unlock_irqrestore(&state->lock, flags);
22792 +
22793 +	kfree(tsk_rt(tsk)->plugin_state);
22794 +	tsk_rt(tsk)->plugin_state = NULL;
22795 +}
22796 +
22797 +static long create_polling_reservation(
22798 +	int res_type,
22799 +	struct reservation_config *config)
22800 +{
22801 +	struct pres_cpu_state *state;
22802 +	struct reservation* res;
22803 +	struct polling_reservation *pres;
22804 +	unsigned long flags;
22805 +	int use_edf  = config->priority == LITMUS_NO_PRIORITY;
22806 +	int periodic =  res_type == PERIODIC_POLLING;
22807 +	long err = -EINVAL;
22808 +
22809 +	if (config->polling_params.budget >
22810 +	    config->polling_params.period) {
22811 +		printk(KERN_ERR "invalid polling reservation (%u): "
22812 +		       "budget > period\n", config->id);
22813 +		return -EINVAL;
22814 +	}
22815 +	if (config->polling_params.budget >
22816 +	    config->polling_params.relative_deadline
22817 +	    && config->polling_params.relative_deadline) {
22818 +		printk(KERN_ERR "invalid polling reservation (%u): "
22819 +		       "budget > deadline\n", config->id);
22820 +		return -EINVAL;
22821 +	}
22822 +	if (config->polling_params.offset >
22823 +	    config->polling_params.period) {
22824 +		printk(KERN_ERR "invalid polling reservation (%u): "
22825 +		       "offset > period\n", config->id);
22826 +		return -EINVAL;
22827 +	}
22828 +
22829 +	/* Allocate before we grab a spin lock.
22830 +	 * Todo: would be nice to use a core-local allocation.
22831 +	 */
22832 +	pres = kzalloc(sizeof(*pres), GFP_KERNEL);
22833 +	if (!pres)
22834 +		return -ENOMEM;
22835 +
22836 +	state = cpu_state_for(config->cpu);
22837 +	raw_spin_lock_irqsave(&state->lock, flags);
22838 +
22839 +	res = sup_find_by_id(&state->sup_env, config->id);
22840 +	if (!res) {
22841 +		polling_reservation_init(pres, use_edf, periodic,
22842 +			config->polling_params.budget,
22843 +			config->polling_params.period,
22844 +			config->polling_params.relative_deadline,
22845 +			config->polling_params.offset);
22846 +		pres->res.id = config->id;
22847 +		if (!use_edf)
22848 +			pres->res.priority = config->priority;
22849 +		sup_add_new_reservation(&state->sup_env, &pres->res);
22850 +		err = config->id;
22851 +	} else {
22852 +		err = -EEXIST;
22853 +	}
22854 +
22855 +	raw_spin_unlock_irqrestore(&state->lock, flags);
22856 +
22857 +	if (err < 0)
22858 +		kfree(pres);
22859 +
22860 +	return err;
22861 +}
22862 +
22863 +#define MAX_INTERVALS 1024
22864 +
22865 +static long create_table_driven_reservation(
22866 +	struct reservation_config *config)
22867 +{
22868 +	struct pres_cpu_state *state;
22869 +	struct reservation* res;
22870 +	struct table_driven_reservation *td_res = NULL;
22871 +	struct lt_interval *slots = NULL;
22872 +	size_t slots_size;
22873 +	unsigned int i, num_slots;
22874 +	unsigned long flags;
22875 +	long err = -EINVAL;
22876 +
22877 +
22878 +	if (!config->table_driven_params.num_intervals) {
22879 +		printk(KERN_ERR "invalid table-driven reservation (%u): "
22880 +		       "no intervals\n", config->id);
22881 +		return -EINVAL;
22882 +	}
22883 +
22884 +	if (config->table_driven_params.num_intervals > MAX_INTERVALS) {
22885 +		printk(KERN_ERR "invalid table-driven reservation (%u): "
22886 +		       "too many intervals (max: %d)\n", config->id, MAX_INTERVALS);
22887 +		return -EINVAL;
22888 +	}
22889 +
22890 +	num_slots = config->table_driven_params.num_intervals;
22891 +	slots_size = sizeof(slots[0]) * num_slots;
22892 +	slots = kzalloc(slots_size, GFP_KERNEL);
22893 +	if (!slots)
22894 +		return -ENOMEM;
22895 +
22896 +	td_res = kzalloc(sizeof(*td_res), GFP_KERNEL);
22897 +	if (!td_res)
22898 +		err = -ENOMEM;
22899 +	else
22900 +		err = copy_from_user(slots,
22901 +			config->table_driven_params.intervals, slots_size);
22902 +
22903 +	if (!err) {
22904 +		/* sanity checks */
22905 +		for (i = 0; !err && i < num_slots; i++)
22906 +			if (slots[i].end <= slots[i].start) {
22907 +				printk(KERN_ERR
22908 +				       "invalid table-driven reservation (%u): "
22909 +				       "invalid interval %u => [%llu, %llu]\n",
22910 +				       config->id, i,
22911 +				       slots[i].start, slots[i].end);
22912 +				err = -EINVAL;
22913 +			}
22914 +
22915 +		for (i = 0; !err && i + 1 < num_slots; i++)
22916 +			if (slots[i + 1].start <= slots[i].end) {
22917 +				printk(KERN_ERR
22918 +				       "invalid table-driven reservation (%u): "
22919 +				       "overlapping intervals %u, %u\n",
22920 +				       config->id, i, i + 1);
22921 +				err = -EINVAL;
22922 +			}
22923 +
22924 +		if (slots[num_slots - 1].end >
22925 +			config->table_driven_params.major_cycle_length) {
22926 +			printk(KERN_ERR
22927 +				"invalid table-driven reservation (%u): last "
22928 +				"interval ends past major cycle %llu > %llu\n",
22929 +				config->id,
22930 +				slots[num_slots - 1].end,
22931 +				config->table_driven_params.major_cycle_length);
22932 +			err = -EINVAL;
22933 +		}
22934 +	}
22935 +
22936 +	if (!err) {
22937 +		state = cpu_state_for(config->cpu);
22938 +		raw_spin_lock_irqsave(&state->lock, flags);
22939 +
22940 +		res = sup_find_by_id(&state->sup_env, config->id);
22941 +		if (!res) {
22942 +			table_driven_reservation_init(td_res,
22943 +				config->table_driven_params.major_cycle_length,
22944 +				slots, num_slots);
22945 +			td_res->res.id = config->id;
22946 +			td_res->res.priority = config->priority;
22947 +			sup_add_new_reservation(&state->sup_env, &td_res->res);
22948 +			err = config->id;
22949 +		} else {
22950 +			err = -EEXIST;
22951 +		}
22952 +
22953 +		raw_spin_unlock_irqrestore(&state->lock, flags);
22954 +	}
22955 +
22956 +	if (err < 0) {
22957 +		kfree(slots);
22958 +		kfree(td_res);
22959 +	}
22960 +
22961 +	return err;
22962 +}
22963 +
22964 +static long pres_reservation_create(int res_type, void* __user _config)
22965 +{
22966 +	long ret = -EINVAL;
22967 +	struct reservation_config config;
22968 +
22969 +	TRACE("Attempt to create reservation (%d)\n", res_type);
22970 +
22971 +	if (copy_from_user(&config, _config, sizeof(config)))
22972 +		return -EFAULT;
22973 +
22974 +	if (config.cpu < 0 || !cpu_online(config.cpu)) {
22975 +		printk(KERN_ERR "invalid polling reservation (%u): "
22976 +		       "CPU %d offline\n", config.id, config.cpu);
22977 +		return -EINVAL;
22978 +	}
22979 +
22980 +	switch (res_type) {
22981 +		case PERIODIC_POLLING:
22982 +		case SPORADIC_POLLING:
22983 +			ret = create_polling_reservation(res_type, &config);
22984 +			break;
22985 +
22986 +		case TABLE_DRIVEN:
22987 +			ret = create_table_driven_reservation(&config);
22988 +			break;
22989 +
22990 +		default:
22991 +			return -EINVAL;
22992 +	};
22993 +
22994 +	return ret;
22995 +}
22996 +
22997 +static struct domain_proc_info pres_domain_proc_info;
22998 +
22999 +static long pres_get_domain_proc_info(struct domain_proc_info **ret)
23000 +{
23001 +	*ret = &pres_domain_proc_info;
23002 +	return 0;
23003 +}
23004 +
23005 +static void pres_setup_domain_proc(void)
23006 +{
23007 +	int i, cpu;
23008 +	int num_rt_cpus = num_online_cpus();
23009 +
23010 +	struct cd_mapping *cpu_map, *domain_map;
23011 +
23012 +	memset(&pres_domain_proc_info, sizeof(pres_domain_proc_info), 0);
23013 +	init_domain_proc_info(&pres_domain_proc_info, num_rt_cpus, num_rt_cpus);
23014 +	pres_domain_proc_info.num_cpus = num_rt_cpus;
23015 +	pres_domain_proc_info.num_domains = num_rt_cpus;
23016 +
23017 +	i = 0;
23018 +	for_each_online_cpu(cpu) {
23019 +		cpu_map = &pres_domain_proc_info.cpu_to_domains[i];
23020 +		domain_map = &pres_domain_proc_info.domain_to_cpus[i];
23021 +
23022 +		cpu_map->id = cpu;
23023 +		domain_map->id = i;
23024 +		cpumask_set_cpu(i, cpu_map->mask);
23025 +		cpumask_set_cpu(cpu, domain_map->mask);
23026 +		++i;
23027 +	}
23028 +}
23029 +
23030 +static long pres_activate_plugin(void)
23031 +{
23032 +	int cpu;
23033 +	struct pres_cpu_state *state;
23034 +
23035 +	for_each_online_cpu(cpu) {
23036 +		TRACE("Initializing CPU%d...\n", cpu);
23037 +
23038 +		state = cpu_state_for(cpu);
23039 +
23040 +		raw_spin_lock_init(&state->lock);
23041 +		state->cpu = cpu;
23042 +		state->scheduled = NULL;
23043 +
23044 +		sup_init(&state->sup_env);
23045 +
23046 +		hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
23047 +		state->timer.function = on_scheduling_timer;
23048 +	}
23049 +
23050 +	pres_setup_domain_proc();
23051 +
23052 +	return 0;
23053 +}
23054 +
23055 +static long pres_deactivate_plugin(void)
23056 +{
23057 +	int cpu;
23058 +	struct pres_cpu_state *state;
23059 +	struct reservation *res;
23060 +
23061 +	for_each_online_cpu(cpu) {
23062 +		state = cpu_state_for(cpu);
23063 +		raw_spin_lock(&state->lock);
23064 +
23065 +		hrtimer_cancel(&state->timer);
23066 +
23067 +		/* Delete all reservations --- assumes struct reservation
23068 +		 * is prefix of containing struct. */
23069 +
23070 +		while (!list_empty(&state->sup_env.active_reservations)) {
23071 +			res = list_first_entry(
23072 +				&state->sup_env.active_reservations,
23073 +			        struct reservation, list);
23074 +			list_del(&res->list);
23075 +			kfree(res);
23076 +		}
23077 +
23078 +		while (!list_empty(&state->sup_env.inactive_reservations)) {
23079 +			res = list_first_entry(
23080 +				&state->sup_env.inactive_reservations,
23081 +			        struct reservation, list);
23082 +			list_del(&res->list);
23083 +			kfree(res);
23084 +		}
23085 +
23086 +		while (!list_empty(&state->sup_env.depleted_reservations)) {
23087 +			res = list_first_entry(
23088 +				&state->sup_env.depleted_reservations,
23089 +			        struct reservation, list);
23090 +			list_del(&res->list);
23091 +			kfree(res);
23092 +		}
23093 +
23094 +		raw_spin_unlock(&state->lock);
23095 +	}
23096 +
23097 +	destroy_domain_proc_info(&pres_domain_proc_info);
23098 +	return 0;
23099 +}
23100 +
23101 +static struct sched_plugin pres_plugin = {
23102 +	.plugin_name		= "P-RES",
23103 +	.schedule		= pres_schedule,
23104 +	.task_wake_up		= pres_task_resume,
23105 +	.admit_task		= pres_admit_task,
23106 +	.task_new		= pres_task_new,
23107 +	.task_exit		= pres_task_exit,
23108 +	.complete_job           = pres_complete_job,
23109 +	.get_domain_proc_info   = pres_get_domain_proc_info,
23110 +	.activate_plugin	= pres_activate_plugin,
23111 +	.deactivate_plugin      = pres_deactivate_plugin,
23112 +	.reservation_create     = pres_reservation_create,
23113 +};
23114 +
23115 +static int __init init_pres(void)
23116 +{
23117 +	return register_sched_plugin(&pres_plugin);
23118 +}
23119 +
23120 +module_init(init_pres);
23121 +
23122 -- 
23123 1.8.1.2
23124 
23125 
23126 From 90add7e63ec95fcf81e311ffc1c036382ac28347 Mon Sep 17 00:00:00 2001
23127 From: Namhoon Kim <namhoonk@cs.unc.edu>
23128 Date: Thu, 4 Sep 2014 15:30:12 -0400
23129 Subject: [PATCH 064/119] Fix scheduler invocation after draining budget
23130 
23131 ---
23132  litmus/reservation.c | 18 ++++++++++++++++--
23133  1 file changed, 16 insertions(+), 2 deletions(-)
23134 
23135 diff --git a/litmus/reservation.c b/litmus/reservation.c
23136 index bc32b2e..cd51b90 100644
23137 --- a/litmus/reservation.c
23138 +++ b/litmus/reservation.c
23139 @@ -180,18 +180,31 @@ static void sup_charge_budget(
23140  {
23141  	struct list_head *pos, *next;
23142  	struct reservation *res;
23143 +	
23144 +	int encountered_active = 0;
23145  
23146  	list_for_each_safe(pos, next, &sup_env->active_reservations) {
23147  		/* charge all ACTIVE_IDLE up to the first ACTIVE reservation */
23148  		res = list_entry(pos, struct reservation, list);
23149  		if (res->state == RESERVATION_ACTIVE) {
23150  			res->ops->drain_budget(res, delta);
23151 -			/* stop at the first ACTIVE reservation */
23152 -			break;
23153 +			encountered_active = 1;
23154  		} else {
23155  			BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
23156  			res->ops->drain_budget(res, delta);
23157  		}
23158 +		if (res->state == RESERVATION_ACTIVE ||
23159 +			res->state == RESERVATION_ACTIVE_IDLE)
23160 +		{
23161 +			/* make sure scheduler is invoked when this reservation expires
23162 +			 * its remaining budget */
23163 +			 TRACE("requesting scheduler update for reservation %u in %llu nanoseconds\n",
23164 +				res->id, res->cur_budget);
23165 +			 sup_scheduler_update_after(sup_env, res->cur_budget);
23166 +		}
23167 +		if (encountered_active)
23168 +			/* stop at the first ACTIVE reservation */
23169 +			break;
23170  	}
23171  }
23172  
23173 @@ -226,6 +239,7 @@ void sup_update_time(
23174  	/* If the time didn't advance, there is nothing to do.
23175  	 * This check makes it safe to call sup_advance_time() potentially
23176  	 * multiple times (e.g., via different code paths. */
23177 +	TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
23178  	if (unlikely(now <= sup_env->env.current_time))
23179  		return;
23180  
23181 -- 
23182 1.8.1.2
23183 
23184 
23185 From e55ab67a060ded6f2c47e5ede00a39176bacbab3 Mon Sep 17 00:00:00 2001
23186 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23187 Date: Fri, 5 Sep 2014 01:39:41 +0200
23188 Subject: [PATCH 065/119] Switch table-driven reservations to use table-driven
23189  budget
23190 
23191 Instead of counting how much budget has been consumed, determine
23192 budget based on actual time slots.
23193 ---
23194  include/litmus/polling_reservations.h |  1 +
23195  litmus/polling_reservations.c         | 54 ++++++++++++++++++++++++++++++++---
23196  2 files changed, 51 insertions(+), 4 deletions(-)
23197 
23198 diff --git a/include/litmus/polling_reservations.h b/include/litmus/polling_reservations.h
23199 index 15910ed..fa22181 100644
23200 --- a/include/litmus/polling_reservations.h
23201 +++ b/include/litmus/polling_reservations.h
23202 @@ -24,6 +24,7 @@ struct table_driven_reservation {
23203  	unsigned int next_interval;
23204  	unsigned int num_intervals;
23205  	struct lt_interval *intervals;
23206 +	struct lt_interval *cur_interval;
23207  };
23208  
23209  void table_driven_reservation_init(struct table_driven_reservation *tdres,
23210 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
23211 index 08034c3..e6c57f5 100644
23212 --- a/litmus/polling_reservations.c
23213 +++ b/litmus/polling_reservations.c
23214 @@ -366,15 +366,29 @@ static lt_t td_interval_length(struct lt_interval *ival)
23215  	return ival->end - ival->start;
23216  }
23217  
23218 +static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
23219 +{
23220 +	lt_t now = tdres->res.env->current_time;
23221 +	lt_t end = td_cur_major_cycle_start(tdres) + tdres->cur_interval->end;
23222 +	TRACE("td_remaining(%u): start=%llu now=%llu end=%llu\n",
23223 +		tdres->res.id,
23224 +		td_cur_major_cycle_start(tdres) + tdres->cur_interval->start,
23225 +		now, end);
23226 +	if (now >=  end)
23227 +		return 0;
23228 +	else
23229 +		return end - now;
23230 +}
23231 +
23232  static void td_replenish(
23233 -	struct reservation *res
23234 -)
23235 +	struct reservation *res)
23236  {
23237  	struct table_driven_reservation *tdres =
23238  		container_of(res, struct table_driven_reservation, res);
23239  
23240  	/* replenish budget */
23241 -	res->cur_budget = td_interval_length(tdres->intervals + tdres->next_interval);
23242 +	tdres->cur_interval = tdres->intervals + tdres->next_interval;
23243 +	res->cur_budget = td_interval_length(tdres->cur_interval);
23244  
23245  	tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
23246  	if (tdres->next_interval)
23247 @@ -404,12 +418,43 @@ static void td_replenish(
23248  	}
23249  }
23250  
23251 +static void td_drain_budget(
23252 +		struct reservation *res,
23253 +		lt_t how_much)
23254 +{
23255 +	struct table_driven_reservation *tdres =
23256 +		container_of(res, struct table_driven_reservation, res);
23257 +
23258 +	/* Table-driven scheduling: instead of tracking the budget, we compute
23259 +	 * how much time is left in this allocation interval. */
23260 +
23261 +	switch (res->state) {
23262 +		case RESERVATION_DEPLETED:
23263 +		case RESERVATION_INACTIVE:
23264 +			BUG();
23265 +			break;
23266 +
23267 +		case RESERVATION_ACTIVE_IDLE:
23268 +		case RESERVATION_ACTIVE:
23269 +			res->cur_budget = td_time_remaining_until_end(tdres);
23270 +			TRACE("td_drain_budget(%u): drained to budget=%llu\n",
23271 +				res->id, res->cur_budget);
23272 +			if (!res->cur_budget) {
23273 +				res->env->change_state(res->env, res,
23274 +					RESERVATION_DEPLETED);
23275 +			} /* else: stay in current state */
23276 +			break;
23277 +	}
23278 +}
23279 +
23280 +
23281 +
23282  static struct reservation_ops td_ops = {
23283  	.dispatch_client = default_dispatch_client,
23284  	.client_arrives = td_client_arrives,
23285  	.client_departs = td_client_departs,
23286  	.replenish = td_replenish,
23287 -	.drain_budget = common_drain_budget,
23288 +	.drain_budget = td_drain_budget,
23289  };
23290  
23291  void table_driven_reservation_init(
23292 @@ -431,6 +476,7 @@ void table_driven_reservation_init(
23293  	reservation_init(&tdres->res);
23294  	tdres->major_cycle = major_cycle;
23295  	tdres->intervals = intervals;
23296 +	tdres->cur_interval = intervals;
23297  	tdres->num_intervals = num_intervals;
23298  	tdres->res.ops = &td_ops;
23299  }
23300 -- 
23301 1.8.1.2
23302 
23303 
23304 From fcd5e594a26012c7457197fd111e58820bc34741 Mon Sep 17 00:00:00 2001
23305 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23306 Date: Mon, 8 Sep 2014 18:19:43 +0200
23307 Subject: [PATCH 066/119] P-RES: ensure scheduler timer fires on _local_ CPU
23308  only
23309 
23310 Accidentally setting up the timer on the wrong CPU when a thread
23311 resumes is problematic can lead (potentially) to deadlock and to
23312 missed scheduling events.
23313 ---
23314  litmus/sched_pres.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-----
23315  1 file changed, 50 insertions(+), 5 deletions(-)
23316 
23317 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
23318 index 6779ffd..60afde6 100644
23319 --- a/litmus/sched_pres.c
23320 +++ b/litmus/sched_pres.c
23321 @@ -65,14 +65,22 @@ static void task_arrives(struct task_struct *tsk)
23322  
23323  static void pres_update_timer(struct pres_cpu_state *state)
23324  {
23325 +	int local;
23326  	lt_t update, now;
23327  
23328  	update = state->sup_env.next_scheduler_update;
23329  	now = state->sup_env.env.current_time;
23330 +
23331 +	/* Be sure we're actually running on the right core,
23332 +	 * as pres_update_timer() is also called from pres_task_resume(),
23333 +	 * which might be called on any CPU when a thread resumes.
23334 +	 */
23335 +	local = local_cpu_state() == state;
23336 +
23337  	if (update <= now) {
23338  		litmus_reschedule(state->cpu);
23339 -	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
23340 -		/* reprogram only if not already set correctly */
23341 +	} else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
23342 +		/* Reprogram only if not already set correctly. */
23343  		if (!hrtimer_active(&state->timer) ||
23344  		    ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
23345  			TRACE("canceling timer...\n");
23346 @@ -81,6 +89,25 @@ static void pres_update_timer(struct pres_cpu_state *state)
23347  			hrtimer_start(&state->timer, ns_to_ktime(update),
23348  				HRTIMER_MODE_ABS_PINNED);
23349  		}
23350 +	} else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
23351 +		/* Poke remote core only if timer needs to be set earlier than
23352 +		 * it is currently set.
23353 +		 */
23354 +		TRACE("pres_update_timer for remote CPU %d (update=%llu, "
23355 +		      "active:%d, set:%llu)\n",
23356 +			state->cpu,
23357 +			update,
23358 +			hrtimer_active(&state->timer),
23359 +			ktime_to_ns(hrtimer_get_expires(&state->timer)));
23360 +		if (!hrtimer_active(&state->timer) ||
23361 +		    ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
23362 +			TRACE("poking CPU %d so that it can update its "
23363 +			       "scheduling timer (active:%d, set:%llu)\n",
23364 +			       state->cpu,
23365 +			       hrtimer_active(&state->timer),
23366 +			       ktime_to_ns(hrtimer_get_expires(&state->timer)));
23367 +			litmus_reschedule(state->cpu);
23368 +		}
23369  	}
23370  }
23371  
23372 @@ -88,16 +115,27 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
23373  {
23374  	unsigned long flags;
23375  	enum hrtimer_restart restart = HRTIMER_NORESTART;
23376 -	struct pres_cpu_state *state = local_cpu_state();
23377 +	struct pres_cpu_state *state;
23378  	lt_t update, now;
23379  
23380 +	state = container_of(timer, struct pres_cpu_state, timer);
23381 +
23382 +	/* The scheduling timer should only fire on the local CPU, because
23383 +	 * otherwise deadlocks via timer_cancel() are possible.
23384 +	 * Note: this does not interfere with dedicated interrupt handling, as
23385 +	 * even under dedicated interrupt handling scheduling timers for
23386 +	 * budget enforcement must occur locally on each CPU.
23387 +	 */
23388 +	BUG_ON(state->cpu != raw_smp_processor_id());
23389 +
23390  	raw_spin_lock_irqsave(&state->lock, flags);
23391  	sup_update_time(&state->sup_env, litmus_clock());
23392  
23393  	update = state->sup_env.next_scheduler_update;
23394  	now = state->sup_env.env.current_time;
23395  
23396 -	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu\n", now, update);
23397 +	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
23398 +		now, update, state->cpu);
23399  
23400  	if (update <= now) {
23401  		litmus_reschedule_local();
23402 @@ -173,12 +211,15 @@ static void pres_task_resume(struct task_struct  *tsk)
23403  	struct pres_task_state* tinfo = get_pres_state(tsk);
23404  	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
23405  
23406 -	TRACE_TASK(tsk, "wake_up at %llu\n", litmus_clock());
23407 +	TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
23408  
23409  	raw_spin_lock_irqsave(&state->lock, flags);
23410  	/* Requeue if self-suspension was already processed. */
23411  	if (state->scheduled != tsk)
23412  	{
23413 +		/* Assumption: litmus_clock() is synchronized across cores,
23414 +		 * since we might not actually be executing on tinfo->cpu
23415 +		 * at the moment. */
23416  		sup_update_time(&state->sup_env, litmus_clock());
23417  		task_arrives(tsk);
23418  		pres_update_timer(state);
23419 @@ -270,6 +311,8 @@ static void pres_task_new(struct task_struct *tsk, int on_runqueue,
23420  	}
23421  
23422  	if (on_runqueue || is_running) {
23423 +		/* Assumption: litmus_clock() is synchronized across cores
23424 +		 * [see comment in pres_task_resume()] */
23425  		sup_update_time(&state->sup_env, litmus_clock());
23426  		task_arrives(tsk);
23427  		pres_update_timer(state);
23428 @@ -293,6 +336,8 @@ static void pres_task_exit(struct task_struct *tsk)
23429  
23430  	/* remove from queues */
23431  	if (is_running(tsk)) {
23432 +		/* Assumption: litmus_clock() is synchronized across cores
23433 +		 * [see comment in pres_task_resume()] */
23434  		sup_update_time(&state->sup_env, litmus_clock());
23435  		task_departs(tsk, 0);
23436  		pres_update_timer(state);
23437 -- 
23438 1.8.1.2
23439 
23440 
23441 From aca9db004d887d28621a0fc30818d7ebf77fc9bf Mon Sep 17 00:00:00 2001
23442 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23443 Date: Mon, 8 Sep 2014 18:24:18 +0200
23444 Subject: [PATCH 067/119] Table-driven replenishments should depend on the
23445  current time
23446 
23447 Make sure we don't accidentally bleed past the current reservation
23448 scheduling slot (due to jitter) by determining the remaining budget
23449 precisely when replenishing the reservation budget.
23450 ---
23451  litmus/polling_reservations.c | 45 ++++++++++++++++++++++++++++++++++++-------
23452  1 file changed, 38 insertions(+), 7 deletions(-)
23453 
23454 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
23455 index e6c57f5..5c9b183 100644
23456 --- a/litmus/polling_reservations.c
23457 +++ b/litmus/polling_reservations.c
23458 @@ -361,11 +361,6 @@ static void td_client_departs(
23459  	}
23460  }
23461  
23462 -static lt_t td_interval_length(struct lt_interval *ival)
23463 -{
23464 -	return ival->end - ival->start;
23465 -}
23466 -
23467  static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
23468  {
23469  	lt_t now = tdres->res.env->current_time;
23470 @@ -388,7 +383,9 @@ static void td_replenish(
23471  
23472  	/* replenish budget */
23473  	tdres->cur_interval = tdres->intervals + tdres->next_interval;
23474 -	res->cur_budget = td_interval_length(tdres->cur_interval);
23475 +	res->cur_budget = td_time_remaining_until_end(tdres);
23476 +	TRACE("td_replenish(%u): %s budget=%llu\n", res->id,
23477 +		res->cur_budget ? "" : "WARNING", res->cur_budget);
23478  
23479  	tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
23480  	if (tdres->next_interval)
23481 @@ -447,10 +444,44 @@ static void td_drain_budget(
23482  	}
23483  }
23484  
23485 +static struct task_struct* td_dispatch_client(
23486 +	struct reservation *res,
23487 +	lt_t *for_at_most)
23488 +{
23489 +	struct task_struct *t;
23490 +	struct table_driven_reservation *tdres =
23491 +		container_of(res, struct table_driven_reservation, res);
23492  
23493 +	/* usual logic for selecting a client */
23494 +	t = default_dispatch_client(res, for_at_most);
23495 +
23496 +	TRACE_TASK(t, "td_dispatch_client(%u): selected, budget=%llu\n",
23497 +		res->id, res->cur_budget);
23498 +
23499 +	/* check how much budget we have left in this time slot */
23500 +	res->cur_budget = td_time_remaining_until_end(tdres);
23501 +
23502 +	TRACE_TASK(t, "td_dispatch_client(%u): updated to budget=%llu next=%d\n",
23503 +		res->id, res->cur_budget, tdres->next_interval);
23504 +
23505 +	if (unlikely(!res->cur_budget)) {
23506 +		/* Unlikely case: if we ran out of budget, the user configured
23507 +		 * a broken scheduling table (overlapping table slots).
23508 +		 * Not much we can do about this, but we can't dispatch a job
23509 +		 * now without causing overload. So let's register this reservation
23510 +		 * as depleted and wait for the next allocation. */
23511 +		TRACE("td_dispatch_client(%u): budget unexpectedly depleted "
23512 +			"(check scheduling table for unintended overlap)\n",
23513 +			res->id);
23514 +		res->env->change_state(res->env, res,
23515 +			RESERVATION_DEPLETED);
23516 +		return NULL;
23517 +	} else
23518 +		return t;
23519 +}
23520  
23521  static struct reservation_ops td_ops = {
23522 -	.dispatch_client = default_dispatch_client,
23523 +	.dispatch_client = td_dispatch_client,
23524  	.client_arrives = td_client_arrives,
23525  	.client_departs = td_client_departs,
23526  	.replenish = td_replenish,
23527 -- 
23528 1.8.1.2
23529 
23530 
23531 From 563999251e34d52bfbc47889cabd763714d020e1 Mon Sep 17 00:00:00 2001
23532 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23533 Date: Thu, 11 Sep 2014 12:53:42 +0200
23534 Subject: [PATCH 068/119] Move 'reservation' field from task_client to generic
23535  reservation_client
23536 
23537 This makes it a lot easier to write generic code for thread arrival /
23538 thread departure in plugins with multiple types of reservation
23539 clients.
23540 ---
23541  include/litmus/reservation.h | 5 +++--
23542  litmus/reservation.c         | 4 ++--
23543  litmus/sched_pres.c          | 4 ++--
23544  3 files changed, 7 insertions(+), 6 deletions(-)
23545 
23546 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
23547 index d8d6ce3..9c23e27 100644
23548 --- a/include/litmus/reservation.h
23549 +++ b/include/litmus/reservation.h
23550 @@ -33,9 +33,11 @@ typedef struct task_struct * (*dispatch_t)  (
23551  );
23552  
23553  /* Something that can be managed in a reservation and that can yield
23554 - * a process for dispatching. */
23555 + * a process for dispatching. Contains a pointer to the reservation
23556 + * to which it "belongs". */
23557  struct reservation_client {
23558  	struct list_head list;
23559 +	struct reservation* reservation;
23560  	dispatch_t dispatch;
23561  };
23562  
23563 @@ -135,7 +137,6 @@ struct task_struct* default_dispatch_client(
23564  /* "connector" reservation client to hook up tasks with reservations */
23565  struct task_client {
23566  	struct reservation_client client;
23567 -	struct reservation* reservation;
23568  	struct task_struct *task;
23569  };
23570  
23571 diff --git a/litmus/reservation.c b/litmus/reservation.c
23572 index cd51b90..447fc5b 100644
23573 --- a/litmus/reservation.c
23574 +++ b/litmus/reservation.c
23575 @@ -40,8 +40,8 @@ void task_client_init(struct task_client *tc, struct task_struct *tsk,
23576  {
23577  	memset(&tc->client, sizeof(tc->client), 0);
23578  	tc->client.dispatch = task_client_dispatch;
23579 +	tc->client.reservation = res;
23580  	tc->task = tsk;
23581 -	tc->reservation = res;
23582  }
23583  
23584  static void sup_scheduler_update_at(
23585 @@ -180,7 +180,7 @@ static void sup_charge_budget(
23586  {
23587  	struct list_head *pos, *next;
23588  	struct reservation *res;
23589 -	
23590 +
23591  	int encountered_active = 0;
23592  
23593  	list_for_each_safe(pos, next, &sup_env->active_reservations) {
23594 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
23595 index 60afde6..2c777ec 100644
23596 --- a/litmus/sched_pres.c
23597 +++ b/litmus/sched_pres.c
23598 @@ -45,7 +45,7 @@ static void task_departs(struct task_struct *tsk, int job_complete)
23599  	struct reservation* res;
23600  	struct reservation_client *client;
23601  
23602 -	res    = state->res_info.reservation;
23603 +	res    = state->res_info.client.reservation;
23604  	client = &state->res_info.client;
23605  
23606  	res->ops->client_departs(res, client, job_complete);
23607 @@ -57,7 +57,7 @@ static void task_arrives(struct task_struct *tsk)
23608  	struct reservation* res;
23609  	struct reservation_client *client;
23610  
23611 -	res    = state->res_info.reservation;
23612 +	res    = state->res_info.client.reservation;
23613  	client = &state->res_info.client;
23614  
23615  	res->ops->client_arrives(res, client);
23616 -- 
23617 1.8.1.2
23618 
23619 
23620 From 4841253863ef57e0b91d169b0080ce079d54fe6f Mon Sep 17 00:00:00 2001
23621 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23622 Date: Fri, 12 Sep 2014 13:31:08 +0200
23623 Subject: [PATCH 069/119] P-RES: fix rare deadlock via hrtimer_start()
23624 
23625 There's a rare condition under which the current call to hrtimer_start()
23626 in pres_update_timer() may result in deadlock.
23627 
23628 pres_update_timer() // holds runqueue lock and state->lock
23629 -> hrtimer_start()
23630   -> raise_softirq_irqoff()
23631     -> wakeup_softirqd()
23632       ->  wake_up_process()
23633         -> acquires runqueue lock()
23634 
23635 To avoid this, we need to call __hrtimer_start_range_ns() with the
23636 'wakeup' flag set to zero.
23637 
23638 While at it, also drop the state->lock before calling into hrtimer(),
23639 to avoid making the scheduler critical section longer than necessary.
23640 ---
23641  litmus/sched_pres.c | 51 ++++++++++++++++++++++++++++++++-------------------
23642  1 file changed, 32 insertions(+), 19 deletions(-)
23643 
23644 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
23645 index 2c777ec..13a47a8 100644
23646 --- a/litmus/sched_pres.c
23647 +++ b/litmus/sched_pres.c
23648 @@ -63,7 +63,8 @@ static void task_arrives(struct task_struct *tsk)
23649  	res->ops->client_arrives(res, client);
23650  }
23651  
23652 -static void pres_update_timer(struct pres_cpu_state *state)
23653 +/* NOTE: drops state->lock */
23654 +static void pres_update_timer_and_unlock(struct pres_cpu_state *state)
23655  {
23656  	int local;
23657  	lt_t update, now;
23658 @@ -77,6 +78,10 @@ static void pres_update_timer(struct pres_cpu_state *state)
23659  	 */
23660  	local = local_cpu_state() == state;
23661  
23662 +	/* Must drop state lock before calling into hrtimer_start(), which
23663 +	 * may raise a softirq, which in turn may wake ksoftirqd. */
23664 +	raw_spin_unlock(&state->lock);
23665 +
23666  	if (update <= now) {
23667  		litmus_reschedule(state->cpu);
23668  	} else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
23669 @@ -86,8 +91,13 @@ static void pres_update_timer(struct pres_cpu_state *state)
23670  			TRACE("canceling timer...\n");
23671  			hrtimer_cancel(&state->timer);
23672  			TRACE("setting scheduler timer for %llu\n", update);
23673 -			hrtimer_start(&state->timer, ns_to_ktime(update),
23674 -				HRTIMER_MODE_ABS_PINNED);
23675 +			/* We cannot use hrtimer_start() here because the
23676 +			 * wakeup flag must be set to zero. */
23677 +			__hrtimer_start_range_ns(&state->timer,
23678 +					ns_to_ktime(update),
23679 +					0 /* timer coalescing slack */,
23680 +					HRTIMER_MODE_ABS_PINNED,
23681 +					0 /* wakeup */);
23682  		}
23683  	} else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
23684  		/* Poke remote core only if timer needs to be set earlier than
23685 @@ -170,14 +180,13 @@ static struct task_struct* pres_schedule(struct task_struct * prev)
23686  	/* figure out what to schedule next */
23687  	state->scheduled = sup_dispatch(&state->sup_env);
23688  
23689 -	/* program scheduler timer */
23690 -	state->sup_env.will_schedule = false;
23691 -	pres_update_timer(state);
23692 -
23693  	/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
23694  	sched_state_task_picked();
23695  
23696 -	raw_spin_unlock(&state->lock);
23697 +	/* program scheduler timer */
23698 +	state->sup_env.will_schedule = false;
23699 +	/* NOTE: drops state->lock */
23700 +	pres_update_timer_and_unlock(state);
23701  
23702  	if (prev != state->scheduled && is_realtime(prev))
23703  		TRACE_TASK(prev, "descheduled.\n");
23704 @@ -222,9 +231,11 @@ static void pres_task_resume(struct task_struct  *tsk)
23705  		 * at the moment. */
23706  		sup_update_time(&state->sup_env, litmus_clock());
23707  		task_arrives(tsk);
23708 -		pres_update_timer(state);
23709 -	}
23710 -	raw_spin_unlock_irqrestore(&state->lock, flags);
23711 +		/* NOTE: drops state->lock */
23712 +		pres_update_timer_and_unlock(state);
23713 +		local_irq_restore(flags);
23714 +	} else
23715 +		raw_spin_unlock_irqrestore(&state->lock, flags);
23716  
23717  	resume_legacy_task_model_updates(tsk);
23718  }
23719 @@ -315,10 +326,11 @@ static void pres_task_new(struct task_struct *tsk, int on_runqueue,
23720  		 * [see comment in pres_task_resume()] */
23721  		sup_update_time(&state->sup_env, litmus_clock());
23722  		task_arrives(tsk);
23723 -		pres_update_timer(state);
23724 -	}
23725 -
23726 -	raw_spin_unlock_irqrestore(&state->lock, flags);
23727 +		/* NOTE: drops state->lock */
23728 +		pres_update_timer_and_unlock(state);
23729 +		local_irq_restore(flags);
23730 +	} else
23731 +		raw_spin_unlock_irqrestore(&state->lock, flags);
23732  
23733  	task_new_legacy_task_model_updates(tsk);
23734  }
23735 @@ -340,10 +352,11 @@ static void pres_task_exit(struct task_struct *tsk)
23736  		 * [see comment in pres_task_resume()] */
23737  		sup_update_time(&state->sup_env, litmus_clock());
23738  		task_departs(tsk, 0);
23739 -		pres_update_timer(state);
23740 -	}
23741 -
23742 -	raw_spin_unlock_irqrestore(&state->lock, flags);
23743 +		/* NOTE: drops state->lock */
23744 +		pres_update_timer_and_unlock(state);
23745 +		local_irq_restore(flags);
23746 +	} else
23747 +		raw_spin_unlock_irqrestore(&state->lock, flags);
23748  
23749  	kfree(tsk_rt(tsk)->plugin_state);
23750  	tsk_rt(tsk)->plugin_state = NULL;
23751 -- 
23752 1.8.1.2
23753 
23754 
23755 From a56ffe502e0f4edc7be9b59533455fdc3c9f86d3 Mon Sep 17 00:00:00 2001
23756 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23757 Date: Mon, 15 Sep 2014 08:13:35 +0200
23758 Subject: [PATCH 070/119] Reservations: keep track of consumed budget
23759 
23760 This can be a useful stat for userspace.
23761 ---
23762  include/litmus/reservation.h  | 4 ++++
23763  litmus/polling_reservations.c | 8 ++++++++
23764  2 files changed, 12 insertions(+)
23765 
23766 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
23767 index 9c23e27..4eecd3f 100644
23768 --- a/include/litmus/reservation.h
23769 +++ b/include/litmus/reservation.h
23770 @@ -117,6 +117,10 @@ struct reservation {
23771  	lt_t cur_budget;
23772  	lt_t next_replenishment;
23773  
23774 +	/* budget stats */
23775 +	lt_t budget_consumed; /* how much budget consumed in this allocation cycle? */
23776 +	lt_t budget_consumed_total;
23777 +
23778  	/* interaction with framework */
23779  	struct reservation_environment *env;
23780  	struct reservation_ops *ops;
23781 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
23782 index 5c9b183..2c481b4 100644
23783 --- a/litmus/polling_reservations.c
23784 +++ b/litmus/polling_reservations.c
23785 @@ -84,6 +84,7 @@ static void periodic_polling_on_replenishment(
23786  	/* replenish budget */
23787  	res->cur_budget = pres->max_budget;
23788  	res->next_replenishment += pres->period;
23789 +	res->budget_consumed = 0;
23790  
23791  	switch (res->state) {
23792  		case RESERVATION_DEPLETED:
23793 @@ -129,6 +130,9 @@ static void common_drain_budget(
23794  	else
23795  		res->cur_budget -= how_much;
23796  
23797 +	res->budget_consumed += how_much;
23798 +	res->budget_consumed_total += how_much;
23799 +
23800  	switch (res->state) {
23801  		case RESERVATION_DEPLETED:
23802  		case RESERVATION_INACTIVE:
23803 @@ -384,6 +388,7 @@ static void td_replenish(
23804  	/* replenish budget */
23805  	tdres->cur_interval = tdres->intervals + tdres->next_interval;
23806  	res->cur_budget = td_time_remaining_until_end(tdres);
23807 +	res->budget_consumed = 0;
23808  	TRACE("td_replenish(%u): %s budget=%llu\n", res->id,
23809  		res->cur_budget ? "" : "WARNING", res->cur_budget);
23810  
23811 @@ -422,6 +427,9 @@ static void td_drain_budget(
23812  	struct table_driven_reservation *tdres =
23813  		container_of(res, struct table_driven_reservation, res);
23814  
23815 +	res->budget_consumed += how_much;
23816 +	res->budget_consumed_total += how_much;
23817 +
23818  	/* Table-driven scheduling: instead of tracking the budget, we compute
23819  	 * how much time is left in this allocation interval. */
23820  
23821 -- 
23822 1.8.1.2
23823 
23824 
23825 From 0dc72270017d1362bbb4eb05aa07c1967cc9c30c Mon Sep 17 00:00:00 2001
23826 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23827 Date: Tue, 16 Sep 2014 12:03:10 +0200
23828 Subject: [PATCH 071/119] Reservations: priority should be a lt_t
23829 
23830 Rationale: the internal priority point representation is of type lt_t
23831 (64 bits), so to enable userspace to specify priorities below (=after)
23832 EDF priority points, we need to allow userspace to specify values
23833 larger than 2^32.
23834 ---
23835  include/litmus/rt_param.h | 2 +-
23836  1 file changed, 1 insertion(+), 1 deletion(-)
23837 
23838 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
23839 index b252cc1..e626bbb 100644
23840 --- a/include/litmus/rt_param.h
23841 +++ b/include/litmus/rt_param.h
23842 @@ -91,7 +91,7 @@ struct lt_interval {
23843  
23844  struct reservation_config {
23845  	unsigned int id;
23846 -	unsigned int priority;
23847 +	lt_t priority;
23848  	int  cpu;
23849  
23850  	union {
23851 -- 
23852 1.8.1.2
23853 
23854 
23855 From f552154ad716c7601d07e4e90c8491766ef74fa7 Mon Sep 17 00:00:00 2001
23856 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23857 Date: Wed, 17 Sep 2014 09:44:03 +0200
23858 Subject: [PATCH 072/119] P-RES: allocation in pres_admit_task() must be atomic
23859 
23860 The kernel codepath calling into pres_admit_task() is holding some
23861 lock unrelated to LITMUS^RT. As a result, we need to pass GFP_ATOMIC,
23862 not just GFP_KERNEL, to kzalloc().
23863 ---
23864  litmus/sched_pres.c | 2 +-
23865  1 file changed, 1 insertion(+), 1 deletion(-)
23866 
23867 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
23868 index 13a47a8..49648ee 100644
23869 --- a/litmus/sched_pres.c
23870 +++ b/litmus/sched_pres.c
23871 @@ -264,7 +264,7 @@ static long pres_admit_task(struct task_struct *tsk)
23872  	unsigned long flags;
23873  	struct reservation *res;
23874  	struct pres_cpu_state *state;
23875 -	struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL);
23876 +	struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
23877  
23878  	if (!tinfo)
23879  		return -ENOMEM;
23880 -- 
23881 1.8.1.2
23882 
23883 
23884 From b1c6f8b1f57417ea05d83261e8a20623ca11b6d5 Mon Sep 17 00:00:00 2001
23885 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23886 Date: Wed, 17 Sep 2014 09:34:49 +0200
23887 Subject: [PATCH 073/119] P-RES: disable LITMUS^RT's standard budget
23888  enforcement
23889 
23890 The P-RES plugin is currently not compatible with the per-thread
23891 budget enforcement logic, which can trigger assertion failures. For
23892 now, let's simply disable per-thread timeslice enforcement. (P-RES's
23893 reservations are a much better mechanism anyway.)
23894 ---
23895  litmus/sched_pres.c | 3 +++
23896  1 file changed, 3 insertions(+)
23897 
23898 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
23899 index 49648ee..6126852 100644
23900 --- a/litmus/sched_pres.c
23901 +++ b/litmus/sched_pres.c
23902 @@ -282,6 +282,9 @@ static long pres_admit_task(struct task_struct *tsk)
23903  		tinfo->cpu = task_cpu(tsk);
23904  		tsk_rt(tsk)->plugin_state = tinfo;
23905  		err = 0;
23906 +
23907 +		/* disable LITMUS^RT's per-thread budget enforcement */
23908 +		tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
23909  	}
23910  
23911  	raw_spin_unlock_irqrestore(&state->lock, flags);
23912 -- 
23913 1.8.1.2
23914 
23915 
23916 From 33ad22dfbddcff613fd530f3721cd3e941f4614c Mon Sep 17 00:00:00 2001
23917 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23918 Date: Wed, 17 Sep 2014 09:33:32 +0200
23919 Subject: [PATCH 074/119] P-RES: keep track in per-task state of whether it
23920  suspended
23921 
23922 Checking state->scheduled is not accurate when bandwidth inheritance
23923 is applied.
23924 ---
23925  litmus/sched_pres.c | 12 +++++++++---
23926  1 file changed, 9 insertions(+), 3 deletions(-)
23927 
23928 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
23929 index 6126852..34b096d 100644
23930 --- a/litmus/sched_pres.c
23931 +++ b/litmus/sched_pres.c
23932 @@ -17,6 +17,7 @@
23933  struct pres_task_state {
23934  	struct task_client res_info;
23935  	int cpu;
23936 +	bool has_departed;
23937  };
23938  
23939  struct pres_cpu_state {
23940 @@ -49,6 +50,7 @@ static void task_departs(struct task_struct *tsk, int job_complete)
23941  	client = &state->res_info.client;
23942  
23943  	res->ops->client_departs(res, client, job_complete);
23944 +	state->has_departed = true;
23945  }
23946  
23947  static void task_arrives(struct task_struct *tsk)
23948 @@ -60,6 +62,7 @@ static void task_arrives(struct task_struct *tsk)
23949  	res    = state->res_info.client.reservation;
23950  	client = &state->res_info.client;
23951  
23952 +	state->has_departed = false;
23953  	res->ops->client_arrives(res, client);
23954  }
23955  
23956 @@ -223,8 +226,8 @@ static void pres_task_resume(struct task_struct  *tsk)
23957  	TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
23958  
23959  	raw_spin_lock_irqsave(&state->lock, flags);
23960 -	/* Requeue if self-suspension was already processed. */
23961 -	if (state->scheduled != tsk)
23962 +	/* Requeue only if self-suspension was already processed. */
23963 +	if (tinfo->has_departed)
23964  	{
23965  		/* Assumption: litmus_clock() is synchronized across cores,
23966  		 * since we might not actually be executing on tinfo->cpu
23967 @@ -234,8 +237,10 @@ static void pres_task_resume(struct task_struct  *tsk)
23968  		/* NOTE: drops state->lock */
23969  		pres_update_timer_and_unlock(state);
23970  		local_irq_restore(flags);
23971 -	} else
23972 +	} else {
23973 +		TRACE_TASK(tsk, "resume event ignored, still scheduled\n");
23974  		raw_spin_unlock_irqrestore(&state->lock, flags);
23975 +	}
23976  
23977  	resume_legacy_task_model_updates(tsk);
23978  }
23979 @@ -280,6 +285,7 @@ static long pres_admit_task(struct task_struct *tsk)
23980  	if (res) {
23981  		task_client_init(&tinfo->res_info, tsk, res);
23982  		tinfo->cpu = task_cpu(tsk);
23983 +		tinfo->has_departed = true;
23984  		tsk_rt(tsk)->plugin_state = tinfo;
23985  		err = 0;
23986  
23987 -- 
23988 1.8.1.2
23989 
23990 
23991 From f460d28c594341d8e8f78cfe92e6e0d42b2f5616 Mon Sep 17 00:00:00 2001
23992 From: Bjoern Brandenburg <bbb@mpi-sws.org>
23993 Date: Wed, 17 Sep 2014 17:25:50 +0200
23994 Subject: [PATCH 075/119] Reservations: fix time-tracking of table-driven
23995  reservations
23996 
23997 Keep track of the current slot and major cycle explicitly to avoid
23998 ambiguity when the budget charging is delayed into the next major
23999 cycle due to a late interrupt or other sources of delay.
24000 ---
24001  include/litmus/polling_reservations.h |  5 +++-
24002  litmus/polling_reservations.c         | 53 ++++++++++++++++++++++++++---------
24003  litmus/reservation.c                  |  2 ++
24004  3 files changed, 46 insertions(+), 14 deletions(-)
24005 
24006 diff --git a/include/litmus/polling_reservations.h b/include/litmus/polling_reservations.h
24007 index fa22181..66c9b1e 100644
24008 --- a/include/litmus/polling_reservations.h
24009 +++ b/include/litmus/polling_reservations.h
24010 @@ -24,7 +24,10 @@ struct table_driven_reservation {
24011  	unsigned int next_interval;
24012  	unsigned int num_intervals;
24013  	struct lt_interval *intervals;
24014 -	struct lt_interval *cur_interval;
24015 +
24016 +	/* info about current scheduling slot */
24017 +	struct lt_interval cur_interval;
24018 +	lt_t major_cycle_start;
24019  };
24020  
24021  void table_driven_reservation_init(struct table_driven_reservation *tdres,
24022 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
24023 index 2c481b4..86a3206 100644
24024 --- a/litmus/polling_reservations.c
24025 +++ b/litmus/polling_reservations.c
24026 @@ -318,7 +318,8 @@ static void td_client_arrives(
24027  	switch (res->state) {
24028  		case RESERVATION_INACTIVE:
24029  			/* Figure out first replenishment time. */
24030 -			res->next_replenishment = td_next_major_cycle_start(tdres);
24031 +			tdres->major_cycle_start = td_next_major_cycle_start(tdres);
24032 +			res->next_replenishment  = tdres->major_cycle_start;
24033  			res->next_replenishment += tdres->intervals[0].start;
24034  			tdres->next_interval = 0;
24035  
24036 @@ -368,11 +369,12 @@ static void td_client_departs(
24037  static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
24038  {
24039  	lt_t now = tdres->res.env->current_time;
24040 -	lt_t end = td_cur_major_cycle_start(tdres) + tdres->cur_interval->end;
24041 -	TRACE("td_remaining(%u): start=%llu now=%llu end=%llu\n",
24042 +	lt_t end = tdres->cur_interval.end;
24043 +	TRACE("td_remaining(%u): start=%llu now=%llu end=%llu state=%d\n",
24044  		tdres->res.id,
24045 -		td_cur_major_cycle_start(tdres) + tdres->cur_interval->start,
24046 -		now, end);
24047 +		tdres->cur_interval.start,
24048 +		now, end,
24049 +		tdres->res.state);
24050  	if (now >=  end)
24051  		return 0;
24052  	else
24053 @@ -385,20 +387,36 @@ static void td_replenish(
24054  	struct table_driven_reservation *tdres =
24055  		container_of(res, struct table_driven_reservation, res);
24056  
24057 -	/* replenish budget */
24058 -	tdres->cur_interval = tdres->intervals + tdres->next_interval;
24059 +	TRACE("td_replenish(%u): expected_replenishment=%llu\n", res->id,
24060 +		res->next_replenishment);
24061 +
24062 +	/* figure out current interval */
24063 +	tdres->cur_interval.start = tdres->major_cycle_start +
24064 +		tdres->intervals[tdres->next_interval].start;
24065 +	tdres->cur_interval.end =  tdres->major_cycle_start +
24066 +		tdres->intervals[tdres->next_interval].end;
24067 +	TRACE("major_cycle_start=%llu => [%llu, %llu]\n",
24068 +		tdres->major_cycle_start,
24069 +		tdres->cur_interval.start,
24070 +		tdres->cur_interval.end);
24071 +
24072 +	/* reset budget */
24073  	res->cur_budget = td_time_remaining_until_end(tdres);
24074  	res->budget_consumed = 0;
24075  	TRACE("td_replenish(%u): %s budget=%llu\n", res->id,
24076  		res->cur_budget ? "" : "WARNING", res->cur_budget);
24077  
24078 +	/* prepare next slot */
24079  	tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
24080 -	if (tdres->next_interval)
24081 -		res->next_replenishment = td_cur_major_cycle_start(tdres);
24082 -	else
24083 +	if (!tdres->next_interval)
24084  		/* wrap to next major cycle */
24085 -		res->next_replenishment = td_next_major_cycle_start(tdres);
24086 +		tdres->major_cycle_start += tdres->major_cycle;
24087 +
24088 +	/* determine next time this reservation becomes eligible to execute */
24089 +	res->next_replenishment  = tdres->major_cycle_start;
24090  	res->next_replenishment += tdres->intervals[tdres->next_interval].start;
24091 +	TRACE("td_replenish(%u): next_replenishment=%llu\n", res->id,
24092 +		res->next_replenishment);
24093  
24094  
24095  	switch (res->state) {
24096 @@ -433,6 +451,9 @@ static void td_drain_budget(
24097  	/* Table-driven scheduling: instead of tracking the budget, we compute
24098  	 * how much time is left in this allocation interval. */
24099  
24100 +	/* sanity check: we should never try to drain from future slots */
24101 +	BUG_ON(tdres->cur_interval.start > res->env->current_time);
24102 +
24103  	switch (res->state) {
24104  		case RESERVATION_DEPLETED:
24105  		case RESERVATION_INACTIVE:
24106 @@ -447,7 +468,12 @@ static void td_drain_budget(
24107  			if (!res->cur_budget) {
24108  				res->env->change_state(res->env, res,
24109  					RESERVATION_DEPLETED);
24110 -			} /* else: stay in current state */
24111 +			} else {
24112 +				/* sanity check budget calculation */
24113 +				BUG_ON(res->env->current_time >= tdres->cur_interval.end);
24114 +				BUG_ON(res->env->current_time < tdres->cur_interval.start);
24115 +			}
24116 +
24117  			break;
24118  	}
24119  }
24120 @@ -515,7 +541,8 @@ void table_driven_reservation_init(
24121  	reservation_init(&tdres->res);
24122  	tdres->major_cycle = major_cycle;
24123  	tdres->intervals = intervals;
24124 -	tdres->cur_interval = intervals;
24125 +	tdres->cur_interval.start = 0;
24126 +	tdres->cur_interval.end   = 0;
24127  	tdres->num_intervals = num_intervals;
24128  	tdres->res.ops = &td_ops;
24129  }
24130 diff --git a/litmus/reservation.c b/litmus/reservation.c
24131 index 447fc5b..f796898 100644
24132 --- a/litmus/reservation.c
24133 +++ b/litmus/reservation.c
24134 @@ -206,6 +206,7 @@ static void sup_charge_budget(
24135  			/* stop at the first ACTIVE reservation */
24136  			break;
24137  	}
24138 +	TRACE("finished charging budgets\n");
24139  }
24140  
24141  static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
24142 @@ -222,6 +223,7 @@ static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
24143  			break;
24144  		}
24145  	}
24146 +	TRACE("finished replenishing budgets\n");
24147  
24148  	/* request a scheduler update at the next replenishment instant */
24149  	res = list_first_entry_or_null(&sup_env->depleted_reservations,
24150 -- 
24151 1.8.1.2
24152 
24153 
24154 From 301fe33935a9ce53d6f2fc15084c7bcbae163d7a Mon Sep 17 00:00:00 2001
24155 From: Bjoern Brandenburg <bbb@mpi-sws.org>
24156 Date: Wed, 17 Sep 2014 17:27:20 +0200
24157 Subject: [PATCH 076/119] Reservations: request scheduler update for new active
24158  reservations
24159 
24160 Don't forget to ask for a scheduler update when a reservation is
24161 replenished but enters state ACTIVE_IDLE and there's nothing else
24162 going on that triggers the scheduler by chance.
24163 ---
24164  litmus/reservation.c | 7 ++++++-
24165  1 file changed, 6 insertions(+), 1 deletion(-)
24166 
24167 diff --git a/litmus/reservation.c b/litmus/reservation.c
24168 index f796898..0bc551e 100644
24169 --- a/litmus/reservation.c
24170 +++ b/litmus/reservation.c
24171 @@ -122,9 +122,14 @@ static void sup_queue_active(
24172  	/* check for possible preemption */
24173  	if (res->state == RESERVATION_ACTIVE && !passed_active)
24174  		sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
24175 +	else {
24176 +		/* Active means this reservation is draining budget => make sure
24177 +		 * the scheduler is called to notice when the reservation budget has been
24178 +		 * drained completely. */
24179 +		sup_scheduler_update_after(sup_env, res->cur_budget);
24180 +	}
24181  }
24182  
24183 -
24184  static void sup_queue_reservation(
24185  	struct sup_reservation_environment* sup_env,
24186  	struct reservation *res)
24187 -- 
24188 1.8.1.2
24189 
24190 
24191 From 6d16993db5e56e01d1b19f149ef805ab7aff8e12 Mon Sep 17 00:00:00 2001
24192 From: Bjoern Brandenburg <bbb@mpi-sws.org>
24193 Date: Wed, 17 Sep 2014 13:16:08 +0200
24194 Subject: [PATCH 077/119] P-RES: improved handling of tardy jobs
24195 
24196 Don't set a release timer for jobs that are tardy and already
24197 released.
24198 ---
24199  litmus/sched_pres.c | 16 +++++++++++++---
24200  1 file changed, 13 insertions(+), 3 deletions(-)
24201 
24202 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
24203 index 34b096d..e2d4810 100644
24204 --- a/litmus/sched_pres.c
24205 +++ b/litmus/sched_pres.c
24206 @@ -251,13 +251,23 @@ static long pres_complete_job(void)
24207  	ktime_t next_release;
24208  	long err;
24209  
24210 -	TRACE_CUR("pres_complete_job at %llu\n", litmus_clock());
24211 +	TRACE_CUR("pres_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
24212 +		get_deadline(current));
24213  
24214  	tsk_rt(current)->completed = 1;
24215  	prepare_for_next_period(current);
24216  	next_release = ns_to_ktime(get_release(current));
24217 -	set_current_state(TASK_INTERRUPTIBLE);
24218 -	err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
24219 +	preempt_disable();
24220 +	TRACE_CUR("next_release=%llu\n", get_release(current));
24221 +	if (get_release(current) > litmus_clock()) {
24222 +		set_current_state(TASK_INTERRUPTIBLE);
24223 +		preempt_enable_no_resched();
24224 +		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
24225 +	} else {
24226 +		err = 0;
24227 +		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
24228 +		preempt_enable();
24229 +	}
24230  
24231  	TRACE_CUR("pres_complete_job returns at %llu\n", litmus_clock());
24232  	return err;
24233 -- 
24234 1.8.1.2
24235 
24236 
24237 From 3805cbc73a63f74166c65446395785390d7ad44b Mon Sep 17 00:00:00 2001
24238 From: Namhoon Kim <namhoonk@cs.unc.edu>
24239 Date: Tue, 4 Nov 2014 05:01:02 -0500
24240 Subject: [PATCH 078/119] Add set_mc2_task_param system call
24241 
24242 ---
24243  arch/arm/include/asm/unistd.h    | 4 ++--
24244  arch/arm/kernel/calls.S          | 5 +++--
24245  arch/x86/syscalls/syscall_32.tbl | 5 +++--
24246  arch/x86/syscalls/syscall_64.tbl | 6 +++---
24247  include/litmus/unistd_32.h       | 3 ++-
24248  include/litmus/unistd_64.h       | 5 +++--
24249  6 files changed, 16 insertions(+), 12 deletions(-)
24250 
24251 diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
24252 index 8b26b32..0db825d 100644
24253 --- a/arch/arm/include/asm/unistd.h
24254 +++ b/arch/arm/include/asm/unistd.h
24255 @@ -14,8 +14,8 @@
24256  #define __ASM_ARM_UNISTD_H
24257  
24258  #include <uapi/asm/unistd.h>
24259 -
24260 -#define __NR_syscalls  (380 + NR_litmus_syscalls)
24261 +/** __NR_syscalls padding */
24262 +#define __NR_syscalls  (380 + NR_litmus_syscalls + 1)
24263  
24264  #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
24265  
24266 diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
24267 index ad22fcc..34c5ee7 100644
24268 --- a/arch/arm/kernel/calls.S
24269 +++ b/arch/arm/kernel/calls.S
24270 @@ -401,8 +401,9 @@
24271          	CALL(sys_wait_for_ts_release)
24272  /* 390 */	CALL(sys_release_ts)
24273  		CALL(sys_null_call)
24274 -	    CALL(sys_reservation_create)
24275 -	    CALL(sys_reservation_destroy)
24276 +		CALL(sys_reservation_create)
24277 +		CALL(sys_reservation_destroy)
24278 +		CALL(sys_set_mc2_task_param)
24279  
24280  #ifndef syscalls_counted
24281  .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
24282 diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
24283 index 290c879..20f6cdc 100644
24284 --- a/arch/x86/syscalls/syscall_32.tbl
24285 +++ b/arch/x86/syscalls/syscall_32.tbl
24286 @@ -370,5 +370,6 @@
24287  360	i386	wait_for_ts_release	sys_wait_for_ts_release
24288  361	i386	release_ts		sys_release_ts
24289  362	i386	null_call		sys_null_call
24290 -363 i386    reservation_create sys_reservation_create
24291 -364 i386    reservation_destroy sys_reservation_destroy
24292 +363	i386	reservation_create	sys_reservation_create
24293 +364	i386	reservation_destroy	sys_reservation_destroy
24294 +365	i386	set_mc2_task_param	sys_set_mc2_task_param
24295 diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
24296 index d39de2a..f3d142c 100644
24297 --- a/arch/x86/syscalls/syscall_64.tbl
24298 +++ b/arch/x86/syscalls/syscall_64.tbl
24299 @@ -333,9 +333,9 @@
24300  360	common	wait_for_ts_release	sys_wait_for_ts_release
24301  361	common	release_ts		sys_release_ts
24302  362	common	null_call		sys_null_call
24303 -363 common  reservation_create  sys_reservation_create
24304 -364 common  reservation_destroy sys_reservation_destroy
24305 -
24306 +363	common	reservation_create	sys_reservation_create
24307 +364	common	reservation_destroy	sys_reservation_destroy
24308 +365	common	set_mc2_task_param	sys_set_mc2_task_param
24309  
24310  #
24311  # x32-specific system call numbers start at 512 to avoid cache impact
24312 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
24313 index 5f6a274..202f439 100644
24314 --- a/include/litmus/unistd_32.h
24315 +++ b/include/litmus/unistd_32.h
24316 @@ -19,5 +19,6 @@
24317  #define __NR_null_call		__LSC(11)
24318  #define __NR_reservation_create	__LSC(12)
24319  #define __NR_reservation_destroy __LSC(13)
24320 +#define __NR_set_mc2_task_param	__LSC(14)
24321  
24322 -#define NR_litmus_syscalls 14
24323 +#define NR_litmus_syscalls 15
24324 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
24325 index 3e6b1d3..ba2c91c 100644
24326 --- a/include/litmus/unistd_64.h
24327 +++ b/include/litmus/unistd_64.h
24328 @@ -33,6 +33,7 @@ __SYSCALL(__NR_null_call, sys_null_call)
24329  __SYSCALL(__NR_reservation_create, sys_reservation_create)
24330  #define __NR_reservation_destroy		__LSC(13)
24331  __SYSCALL(__NR_reservation_destroy, sys_reservation_destroy)
24332 +#define __NR_set_mc2_task_param			__LSC(14)
24333 +__SYSCALL(__NR_set_mc2_task_param, sys_set_mc2_task_param)
24334  
24335 -
24336 -#define NR_litmus_syscalls 14
24337 +#define NR_litmus_syscalls 15
24338 -- 
24339 1.8.1.2
24340 
24341 
24342 From cef9a08794f4ecdd0d1ea80ef4035f2bc9e234ce Mon Sep 17 00:00:00 2001
24343 From: Namhoon Kim <namhoonk@cs.unc.edu>
24344 Date: Tue, 4 Nov 2014 05:01:31 -0500
24345 Subject: [PATCH 079/119] Add MC2 plugin
24346 
24347 ---
24348  litmus/Makefile      | 4 +++-
24349  litmus/litmus.c      | 2 +-
24350  litmus/reservation.c | 6 +++---
24351  litmus/sched_mc2.c   | 4 ++++
24352  4 files changed, 11 insertions(+), 5 deletions(-)
24353 
24354 diff --git a/litmus/Makefile b/litmus/Makefile
24355 index 05021f5..997524f 100644
24356 --- a/litmus/Makefile
24357 +++ b/litmus/Makefile
24358 @@ -19,6 +19,7 @@ obj-y     = sched_plugin.o litmus.o \
24359  	    binheap.o \
24360  	    ctrldev.o \
24361  	    uncachedev.o \
24362 +	    cache_proc.o \
24363  	    sched_gsn_edf.o \
24364  	    sched_psn_edf.o \
24365  	    sched_pfp.o
24366 @@ -35,4 +36,5 @@ obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
24367  
24368  obj-y += reservation.o polling_reservations.o
24369  
24370 -obj-y += sched_pres.o
24371 \ No newline at end of file
24372 +obj-y += sched_pres.o
24373 +obj-y += mc2_common.o sched_mc2.o
24374 diff --git a/litmus/litmus.c b/litmus/litmus.c
24375 index 0b87e04..8a2446f 100644
24376 --- a/litmus/litmus.c
24377 +++ b/litmus/litmus.c
24378 @@ -646,7 +646,7 @@ static void __init litmus_enable_perfcounters_v7(void *_ignore)
24379  	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(enable_val));
24380  
24381  	/* enables counters (cycle counter and event 1) */
24382 -        asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x80000001));
24383 +    asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x80000001));
24384  }
24385  
24386  static void __init litmus_enable_perfcounters(void)
24387 diff --git a/litmus/reservation.c b/litmus/reservation.c
24388 index 0bc551e..0e43479 100644
24389 --- a/litmus/reservation.c
24390 +++ b/litmus/reservation.c
24391 @@ -211,7 +211,7 @@ static void sup_charge_budget(
24392  			/* stop at the first ACTIVE reservation */
24393  			break;
24394  	}
24395 -	TRACE("finished charging budgets\n");
24396 +	//TRACE("finished charging budgets\n");
24397  }
24398  
24399  static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
24400 @@ -228,7 +228,7 @@ static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
24401  			break;
24402  		}
24403  	}
24404 -	TRACE("finished replenishing budgets\n");
24405 +	//TRACE("finished replenishing budgets\n");
24406  
24407  	/* request a scheduler update at the next replenishment instant */
24408  	res = list_first_entry_or_null(&sup_env->depleted_reservations,
24409 @@ -246,7 +246,7 @@ void sup_update_time(
24410  	/* If the time didn't advance, there is nothing to do.
24411  	 * This check makes it safe to call sup_advance_time() potentially
24412  	 * multiple times (e.g., via different code paths. */
24413 -	TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
24414 +	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
24415  	if (unlikely(now <= sup_env->env.current_time))
24416  		return;
24417  
24418 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
24419 index ab35008..1e5b28b 100644
24420 --- a/litmus/sched_mc2.c
24421 +++ b/litmus/sched_mc2.c
24422 @@ -381,6 +381,8 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
24423  	task_new_legacy_task_model_updates(tsk);
24424  }
24425  
24426 +static long mc2_reservation_destroy(unsigned int reservation_id, int cpu);
24427 +
24428  static void mc2_task_exit(struct task_struct *tsk)
24429  {
24430  	unsigned long flags;
24431 @@ -404,6 +406,8 @@ static void mc2_task_exit(struct task_struct *tsk)
24432  	} else
24433  		raw_spin_unlock_irqrestore(&state->lock, flags);
24434  
24435 +	mc2_reservation_destroy(tsk->pid, tinfo->cpu);
24436 +	
24437  	kfree(tsk_rt(tsk)->plugin_state);
24438  	tsk_rt(tsk)->plugin_state = NULL;
24439  }
24440 -- 
24441 1.8.1.2
24442 
24443 
24444 From d77654f3287edf9fa6aeda97825e9a972bdc8821 Mon Sep 17 00:00:00 2001
24445 From: Namhoon Kim <namhoonk@cs.unc.edu>
24446 Date: Wed, 19 Nov 2014 16:01:27 -0500
24447 Subject: [PATCH 080/119] set_page_color syscall
24448 
24449 ---
24450  arch/arm/include/asm/unistd.h    |   2 +-
24451  arch/arm/kernel/calls.S          |   3 +-
24452  arch/arm/mm/cache-l2x0.c         |   4 ++
24453  arch/x86/syscalls/syscall_32.tbl |   1 +
24454  arch/x86/syscalls/syscall_64.tbl |   1 +
24455  include/litmus/unistd_32.h       |   3 +-
24456  include/litmus/unistd_64.h       |   4 +-
24457  litmus/litmus.c                  | 110 +++++++++++++++++++++++++++++++++++++++
24458  8 files changed, 124 insertions(+), 4 deletions(-)
24459 
24460 diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
24461 index 0db825d..f31a912 100644
24462 --- a/arch/arm/include/asm/unistd.h
24463 +++ b/arch/arm/include/asm/unistd.h
24464 @@ -15,7 +15,7 @@
24465  
24466  #include <uapi/asm/unistd.h>
24467  /** __NR_syscalls padding */
24468 -#define __NR_syscalls  (380 + NR_litmus_syscalls + 1)
24469 +#define __NR_syscalls  (380 + NR_litmus_syscalls)
24470  
24471  #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
24472  
24473 diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
24474 index 34c5ee7..2fbce68 100644
24475 --- a/arch/arm/kernel/calls.S
24476 +++ b/arch/arm/kernel/calls.S
24477 @@ -404,7 +404,8 @@
24478  		CALL(sys_reservation_create)
24479  		CALL(sys_reservation_destroy)
24480  		CALL(sys_set_mc2_task_param)
24481 -
24482 +/* 395 */	CALL(sys_set_page_color)
24483 +		
24484  #ifndef syscalls_counted
24485  .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
24486  #define syscalls_counted
24487 diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
24488 index 5efe6b6e..cff808e 100644
24489 --- a/arch/arm/mm/cache-l2x0.c
24490 +++ b/arch/arm/mm/cache-l2x0.c
24491 @@ -326,6 +326,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
24492  {
24493  	u32 aux;
24494  	u32 cache_id;
24495 +	u32 cache_type;
24496  	u32 way_size = 0;
24497  	int ways;
24498  	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
24499 @@ -337,6 +338,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
24500  	else
24501  		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
24502  	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
24503 +	
24504 +	cache_type = readl_relaxed(l2x0_base + L2X0_CACHE_TYPE);
24505  
24506  	aux &= aux_mask;
24507  	aux |= aux_val;
24508 @@ -424,6 +427,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
24509  	printk(KERN_INFO "%s cache controller enabled\n", type);
24510  	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
24511  			ways, cache_id, aux, l2x0_size);
24512 +	printk(KERN_INFO "l2x0: CACHE_TYPE 0x%08x\n", cache_type);
24513  
24514  	litmus_setup_lockdown(l2x0_base, cache_id);
24515  }
24516 diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
24517 index 20f6cdc..401f313 100644
24518 --- a/arch/x86/syscalls/syscall_32.tbl
24519 +++ b/arch/x86/syscalls/syscall_32.tbl
24520 @@ -373,3 +373,4 @@
24521  363	i386	reservation_create	sys_reservation_create
24522  364	i386	reservation_destroy	sys_reservation_destroy
24523  365	i386	set_mc2_task_param	sys_set_mc2_task_param
24524 +366	i386	set_page_color		sys_set_page_color
24525 diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
24526 index f3d142c..77710a9 100644
24527 --- a/arch/x86/syscalls/syscall_64.tbl
24528 +++ b/arch/x86/syscalls/syscall_64.tbl
24529 @@ -336,6 +336,7 @@
24530  363	common	reservation_create	sys_reservation_create
24531  364	common	reservation_destroy	sys_reservation_destroy
24532  365	common	set_mc2_task_param	sys_set_mc2_task_param
24533 +366	common	set_page_color		sys_set_page_color
24534  
24535  #
24536  # x32-specific system call numbers start at 512 to avoid cache impact
24537 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
24538 index 202f439..cfffbdd 100644
24539 --- a/include/litmus/unistd_32.h
24540 +++ b/include/litmus/unistd_32.h
24541 @@ -20,5 +20,6 @@
24542  #define __NR_reservation_create	__LSC(12)
24543  #define __NR_reservation_destroy __LSC(13)
24544  #define __NR_set_mc2_task_param	__LSC(14)
24545 +#define __NR_set_page_color		__LSC(15)
24546  
24547 -#define NR_litmus_syscalls 15
24548 +#define NR_litmus_syscalls 16
24549 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
24550 index ba2c91c..2fb49dc 100644
24551 --- a/include/litmus/unistd_64.h
24552 +++ b/include/litmus/unistd_64.h
24553 @@ -35,5 +35,7 @@ __SYSCALL(__NR_reservation_create, sys_reservation_create)
24554  __SYSCALL(__NR_reservation_destroy, sys_reservation_destroy)
24555  #define __NR_set_mc2_task_param			__LSC(14)
24556  __SYSCALL(__NR_set_mc2_task_param, sys_set_mc2_task_param)
24557 +#define __NR_set_page_color				__LSC(15)
24558 +__SYSCALL(__NR_set_page_color, sys_set_page_color)
24559  
24560 -#define NR_litmus_syscalls 15
24561 +#define NR_litmus_syscalls 16
24562 diff --git a/litmus/litmus.c b/litmus/litmus.c
24563 index 8a2446f..cdffbc6 100644
24564 --- a/litmus/litmus.c
24565 +++ b/litmus/litmus.c
24566 @@ -13,6 +13,7 @@
24567  #include <linux/stop_machine.h>
24568  #include <linux/sched/rt.h>
24569  #include <linux/rwsem.h>
24570 +#include <linux/list.h>
24571  
24572  #include <litmus/litmus.h>
24573  #include <litmus/bheap.h>
24574 @@ -325,6 +326,112 @@ asmlinkage long sys_reservation_destroy(unsigned int reservation_id, int cpu)
24575  	return litmus->reservation_destroy(reservation_id, cpu);
24576  }
24577  
24578 +struct task_page {
24579 +	unsigned long vm_start;
24580 +	unsigned long vm_end;
24581 +	struct page* page;
24582 +	struct list_head list;
24583 +};
24584 +
24585 +LIST_HEAD(task_page_list);
24586 +
24587 +static unsigned long color_mask;
24588 +
24589 +static inline unsigned long page_color(struct page *page)
24590 +{
24591 +	return ((page_to_phys(page) & color_mask) >> PAGE_SHIFT);
24592 +}
24593 +
24594 +static struct page *walk_page_table(unsigned long addr)
24595 +{
24596 +    pgd_t *pgd;
24597 +    pte_t *ptep, pte;
24598 +    pud_t *pud;
24599 +    pmd_t *pmd;
24600 +
24601 +    struct page *page = NULL;
24602 +    struct mm_struct *mm = current->mm;
24603 +
24604 +    pgd = pgd_offset(mm, addr);
24605 +    if (pgd_none(*pgd) || pgd_bad(*pgd))
24606 +        goto out;
24607 +    
24608 +    pud = pud_offset(pgd, addr);
24609 +    if (pud_none(*pud) || pud_bad(*pud))
24610 +        goto out;
24611 +    
24612 +    pmd = pmd_offset(pud, addr);
24613 +    if (pmd_none(*pmd) || pmd_bad(*pmd))
24614 +        goto out;
24615 +    
24616 +    ptep = pte_offset_map(pmd, addr);
24617 +    if (!ptep)
24618 +        goto out;
24619 +    pte = *ptep;
24620 +
24621 +    page = pte_page(pte);
24622 +    if (page)
24623 +        printk(KERN_INFO "page frame struct is @ %p\n", page);
24624 +	
24625 +	pte_unmap(ptep);
24626 +
24627 + out:
24628 +    return page;
24629 +}
24630 +
24631 +asmlinkage long sys_set_page_color(int cpu)
24632 +{
24633 +	long ret = 0;
24634 +	struct task_page *task_page_itr = NULL;
24635 +	struct task_page *task_page_itr_next = NULL;
24636 +	struct vm_area_struct *vma_itr = NULL;
24637 +	struct task_page *entry = NULL;
24638 +	
24639 +	down_read(&current->mm->mmap_sem);
24640 +	printk(KERN_INFO "SYSCALL set_page_color\n");
24641 +	vma_itr = current->mm->mmap;
24642 +	while (vma_itr != NULL) {
24643 +		unsigned int num_pages = 0, i;
24644 +		struct page *new_page = NULL;
24645 +		entry = kmalloc(sizeof(struct task_page), GFP_ATOMIC);
24646 +		if (entry == NULL) {
24647 +			return -ENOSPC;
24648 +		}
24649 +		entry->vm_start = vma_itr->vm_start;
24650 +		entry->vm_end = vma_itr->vm_end;
24651 +		num_pages = (entry->vm_end - entry->vm_start) / PAGE_SIZE;
24652 +		// print vma flags
24653 +		printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
24654 +		printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
24655 +		
24656 +		for (i = 0; i < num_pages; i++) {
24657 +alloc:
24658 +			new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma_itr, vma_itr->vm_start);
24659 +			if (!new_page)
24660 +				return -ENOSPC;
24661 +			if ( (page_color(new_page)>>2) != cpu) {
24662 +				
24663 +				
24664 +		}
24665 +		//entry->page = walk_page_table(entry->vm_start);
24666 +		
24667 +		INIT_LIST_HEAD(&entry->list);
24668 +		list_add(&entry->list, &task_page_list);
24669 +		
24670 +		vma_itr = vma_itr->vm_next;
24671 +	}
24672 +	
24673 +	up_read(&current->mm->mmap_sem);
24674 +	
24675 +	list_for_each_entry_safe(task_page_itr, task_page_itr_next, &task_page_list, list) {
24676 +		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", task_page_itr->vm_start, task_page_itr->vm_end, (task_page_itr->vm_end - task_page_itr->vm_start)/PAGE_SIZE);
24677 +		list_del(&task_page_itr->list);
24678 +		kfree(task_page_itr);		
24679 +	}
24680 +	
24681 +	return ret;
24682 +}
24683 +
24684  /* p is a real-time task. Re-init its state as a best-effort task. */
24685  static void reinit_litmus_state(struct task_struct* p, int restore)
24686  {
24687 @@ -662,6 +769,8 @@ static int __init _init_litmus(void)
24688  	 *      mode change lock is used to enforce single mode change
24689  	 *      operation.
24690  	 */
24691 +	unsigned int line_size_log = 5; // 2^5 = 32 byte
24692 +	unsigned int cache_info_sets = 2048; // 64KB (way_size) / 32B (line_size) = 2048
24693  	printk("Starting LITMUS^RT kernel\n");
24694  
24695  	register_sched_plugin(&linux_sched_plugin);
24696 @@ -689,6 +798,7 @@ static int __init _init_litmus(void)
24697  	litmus_enable_perfcounters();
24698  #endif
24699  	
24700 +	color_mask = ((cache_info_sets << line_size_log) - 1) ^ (PAGE_SIZE - 1);
24701  	return 0;
24702  }
24703  
24704 -- 
24705 1.8.1.2
24706 
24707 
24708 From 07d5680c4c476a4b68bd3cff134d99ca996b2481 Mon Sep 17 00:00:00 2001
24709 From: Namhoon Kim <namhoonk@cs.unc.edu>
24710 Date: Fri, 21 Nov 2014 15:37:46 -0500
24711 Subject: [PATCH 081/119] static linking coloring
24712 
24713 ---
24714  litmus/litmus.c | 94 ++++++++++++++++++++++++++++++++++++++++++++-------------
24715  1 file changed, 73 insertions(+), 21 deletions(-)
24716 
24717 diff --git a/litmus/litmus.c b/litmus/litmus.c
24718 index cdffbc6..88cc3e0 100644
24719 --- a/litmus/litmus.c
24720 +++ b/litmus/litmus.c
24721 @@ -14,13 +14,16 @@
24722  #include <linux/sched/rt.h>
24723  #include <linux/rwsem.h>
24724  #include <linux/list.h>
24725 +#include <linux/migrate.h>
24726 +#include <linux/mm.h>
24727 +#include <linux/memcontrol.h>
24728  
24729  #include <litmus/litmus.h>
24730  #include <litmus/bheap.h>
24731  #include <litmus/trace.h>
24732  #include <litmus/rt_domain.h>
24733 -#include <litmus/litmus_proc.h>
24734  #include <litmus/sched_trace.h>
24735 +#include <litmus/litmus_proc.h>
24736  #include <litmus/clock.h>
24737  
24738  #include <asm/cacheflush.h>
24739 @@ -342,6 +345,7 @@ static inline unsigned long page_color(struct page *page)
24740  	return ((page_to_phys(page) & color_mask) >> PAGE_SHIFT);
24741  }
24742  
24743 +/*
24744  static struct page *walk_page_table(unsigned long addr)
24745  {
24746      pgd_t *pgd;
24747 @@ -353,15 +357,18 @@ static struct page *walk_page_table(unsigned long addr)
24748      struct mm_struct *mm = current->mm;
24749  
24750      pgd = pgd_offset(mm, addr);
24751 -    if (pgd_none(*pgd) || pgd_bad(*pgd))
24752 +    //if (pgd_none(*pgd) || pgd_bad(*pgd))
24753 +	if (pgd_none_or_clear_bad(pgd))
24754          goto out;
24755      
24756      pud = pud_offset(pgd, addr);
24757 -    if (pud_none(*pud) || pud_bad(*pud))
24758 +    //if (pud_none(*pud) || pud_bad(*pud))
24759 +	if (pud_none_or_clear_bad(pud))
24760          goto out;
24761      
24762      pmd = pmd_offset(pud, addr);
24763 -    if (pmd_none(*pmd) || pmd_bad(*pmd))
24764 +    //if (pmd_none(*pmd) || pmd_bad(*pmd))
24765 +	if (pmd_none_or_clear_bad(pmd))
24766          goto out;
24767      
24768      ptep = pte_offset_map(pmd, addr);
24769 @@ -370,65 +377,110 @@ static struct page *walk_page_table(unsigned long addr)
24770      pte = *ptep;
24771  
24772      page = pte_page(pte);
24773 -    if (page)
24774 -        printk(KERN_INFO "page frame struct is @ %p\n", page);
24775 +    if (pfn_valid(__page_to_pfn(page))) {
24776 +        ;//printk(KERN_INFO "page frame struct is @ %p\n", page);
24777 +		//printk(KERN_INFO "pfn is %lu\n", __page_to_pfn(page));
24778 +	}
24779  	
24780  	pte_unmap(ptep);
24781  
24782   out:
24783      return page;
24784  }
24785 +*/
24786 +
24787 +extern int isolate_lru_page(struct page *page);
24788 +extern void putback_lru_page(struct page *page);
24789 +
24790 +static struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
24791 +{
24792 +	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
24793 +}
24794  
24795  asmlinkage long sys_set_page_color(int cpu)
24796  {
24797  	long ret = 0;
24798 -	struct task_page *task_page_itr = NULL;
24799 -	struct task_page *task_page_itr_next = NULL;
24800 +	//struct task_page *task_page_itr = NULL;
24801 +	//struct task_page *task_page_itr_next = NULL;
24802  	struct vm_area_struct *vma_itr = NULL;
24803 -	struct task_page *entry = NULL;
24804 +	//struct task_page *entry = NULL;
24805 +	int nr_pages = 0;
24806 +	LIST_HEAD(pagelist);
24807  	
24808  	down_read(&current->mm->mmap_sem);
24809  	printk(KERN_INFO "SYSCALL set_page_color\n");
24810  	vma_itr = current->mm->mmap;
24811  	while (vma_itr != NULL) {
24812  		unsigned int num_pages = 0, i;
24813 -		struct page *new_page = NULL;
24814 +		struct page *new_page = NULL, *old_page = NULL;
24815 +		/*
24816  		entry = kmalloc(sizeof(struct task_page), GFP_ATOMIC);
24817  		if (entry == NULL) {
24818  			return -ENOSPC;
24819  		}
24820  		entry->vm_start = vma_itr->vm_start;
24821  		entry->vm_end = vma_itr->vm_end;
24822 -		num_pages = (entry->vm_end - entry->vm_start) / PAGE_SIZE;
24823 +		*/
24824 +		num_pages = (vma_itr->vm_end - vma_itr->vm_start) / PAGE_SIZE;
24825  		// print vma flags
24826 -		printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
24827 -		printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
24828 +		//printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
24829 +		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
24830  		
24831  		for (i = 0; i < num_pages; i++) {
24832 -alloc:
24833 +/*
24834  			new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma_itr, vma_itr->vm_start);
24835  			if (!new_page)
24836  				return -ENOSPC;
24837 -			if ( (page_color(new_page)>>2) != cpu) {
24838 -				
24839 +			printk(KERN_INFO "PAGE_COLOR: %lu\n", page_color(new_page));
24840 +*/
24841 +			//old_page = walk_page_table(vma_itr->vm_start + PAGE_SIZE*i);
24842 +			old_page = follow_page(vma_itr, vma_itr->vm_start + PAGE_SIZE*i, FOLL_GET|FOLL_SPLIT);
24843 +			
24844 +			if (IS_ERR(old_page))
24845 +				continue;
24846 +			if (!old_page)
24847 +				continue;
24848 +			if (PageReserved(old_page))
24849 +				goto put_and_next;
24850  				
24851 +			ret = isolate_lru_page(old_page);
24852 +			//if (pfn_valid(__page_to_pfn(old_page)))
24853 +			if (!ret) {
24854 +				//printk(KERN_INFO "page_mapcount = %d\n", page_mapcount(old_page));
24855 +				printk(KERN_INFO "addr: %lu, pfn: %lu mapcount: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page));
24856 +				list_add_tail(&old_page->lru, &pagelist);
24857 +				inc_zone_page_state(old_page, NR_ISOLATED_ANON + !PageSwapBacked(old_page));
24858 +				nr_pages++;
24859 +			}
24860 +put_and_next:				
24861 +				put_page(old_page);
24862  		}
24863 -		//entry->page = walk_page_table(entry->vm_start);
24864  		
24865 -		INIT_LIST_HEAD(&entry->list);
24866 -		list_add(&entry->list, &task_page_list);
24867 +		//INIT_LIST_HEAD(&entry->list);
24868 +		//list_add(&entry->list, &task_page_list);
24869  		
24870  		vma_itr = vma_itr->vm_next;
24871  	}
24872 +
24873 +	ret = 0;
24874 +	if (!list_empty(&pagelist)) {
24875 +		ret = migrate_pages(&pagelist, new_alloc_page, 0, MIGRATE_ASYNC, MR_SYSCALL);
24876 +		if (ret) {
24877 +			printk(KERN_INFO "%ld pages not migrated.\n", ret);
24878 +			putback_lru_pages(&pagelist);
24879 +		}
24880 +	}
24881  	
24882  	up_read(&current->mm->mmap_sem);
24883 -	
24884 +
24885 +/*	
24886  	list_for_each_entry_safe(task_page_itr, task_page_itr_next, &task_page_list, list) {
24887  		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", task_page_itr->vm_start, task_page_itr->vm_end, (task_page_itr->vm_end - task_page_itr->vm_start)/PAGE_SIZE);
24888  		list_del(&task_page_itr->list);
24889  		kfree(task_page_itr);		
24890  	}
24891 -	
24892 +*/	
24893 +	printk(KERN_INFO "nr_pages = %d\n", nr_pages);
24894  	return ret;
24895  }
24896  
24897 -- 
24898 1.8.1.2
24899 
24900 
24901 From 6583dcfbda43e420921e3adf7f2e46dc719e8d26 Mon Sep 17 00:00:00 2001
24902 From: Namhoon Kim <namhoonk@cs.unc.edu>
24903 Date: Wed, 14 Jan 2015 02:31:12 -0500
24904 Subject: [PATCH 082/119] MC2 levels A, B
24905 
24906 ---
24907  include/litmus/mc2_common.h   |  14 +-
24908  litmus/litmus.c               |  63 ++++++--
24909  litmus/mc2_common.c           |  65 ++++++--
24910  litmus/polling_reservations.c |   9 +-
24911  litmus/sched_mc2.c            | 357 ++++++++++++++++++++----------------------
24912  litmus/sched_pres.c           |  10 ++
24913  6 files changed, 288 insertions(+), 230 deletions(-)
24914 
24915 diff --git a/include/litmus/mc2_common.h b/include/litmus/mc2_common.h
24916 index a1d571f..bdc3a6d 100644
24917 --- a/include/litmus/mc2_common.h
24918 +++ b/include/litmus/mc2_common.h
24919 @@ -14,24 +14,14 @@ enum crit_level {
24920  
24921  struct mc2_task {
24922  	enum crit_level crit;
24923 -	pid_t pid;
24924 -	lt_t hyperperiod;
24925 +	unsigned int res_id;
24926  };
24927  
24928  #ifdef __KERNEL__
24929  
24930  #include <litmus/reservation.h>
24931  
24932 -struct mc2_param{
24933 -	struct mc2_task mc2_task;
24934 -};
24935 -
24936 -struct mc2_task_client {
24937 -	struct task_client tc;
24938 -	struct mc2_param mc2;
24939 -};
24940 -
24941 -long mc2_task_client_init(struct mc2_task_client *mtc, struct task_struct *tsk,
24942 +long mc2_task_client_init(struct task_client *tc, struct mc2_task *mc2_param, struct task_struct *tsk,
24943  							struct reservation *res);
24944  	
24945  #endif /* __KERNEL__ */
24946 diff --git a/litmus/litmus.c b/litmus/litmus.c
24947 index 88cc3e0..6034ff8 100644
24948 --- a/litmus/litmus.c
24949 +++ b/litmus/litmus.c
24950 @@ -402,10 +402,12 @@ asmlinkage long sys_set_page_color(int cpu)
24951  	long ret = 0;
24952  	//struct task_page *task_page_itr = NULL;
24953  	//struct task_page *task_page_itr_next = NULL;
24954 +	struct page *page_itr = NULL;
24955  	struct vm_area_struct *vma_itr = NULL;
24956  	//struct task_page *entry = NULL;
24957 -	int nr_pages = 0;
24958 +	int nr_pages = 0, nr_shared_pages = 0;
24959  	LIST_HEAD(pagelist);
24960 +	LIST_HEAD(shared_pagelist);
24961  	
24962  	down_read(&current->mm->mmap_sem);
24963  	printk(KERN_INFO "SYSCALL set_page_color\n");
24964 @@ -423,8 +425,8 @@ asmlinkage long sys_set_page_color(int cpu)
24965  		*/
24966  		num_pages = (vma_itr->vm_end - vma_itr->vm_start) / PAGE_SIZE;
24967  		// print vma flags
24968 -		//printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
24969 -		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
24970 +		printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
24971 +		printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
24972  		
24973  		for (i = 0; i < num_pages; i++) {
24974  /*
24975 @@ -440,20 +442,32 @@ asmlinkage long sys_set_page_color(int cpu)
24976  				continue;
24977  			if (!old_page)
24978  				continue;
24979 -			if (PageReserved(old_page))
24980 -				goto put_and_next;
24981 -				
24982 -			ret = isolate_lru_page(old_page);
24983 -			//if (pfn_valid(__page_to_pfn(old_page)))
24984 -			if (!ret) {
24985 -				//printk(KERN_INFO "page_mapcount = %d\n", page_mapcount(old_page));
24986 -				printk(KERN_INFO "addr: %lu, pfn: %lu mapcount: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page));
24987 -				list_add_tail(&old_page->lru, &pagelist);
24988 -				inc_zone_page_state(old_page, NR_ISOLATED_ANON + !PageSwapBacked(old_page));
24989 -				nr_pages++;
24990 +			if (PageReserved(old_page)) {
24991 +				put_page(old_page);
24992 +				continue;
24993 +			}
24994 +			
24995 +			printk(KERN_INFO "addr: %lu, pfn: %lu, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page));
24996 +			
24997 +			if (page_mapcount(old_page) == 1) {
24998 +				ret = isolate_lru_page(old_page);
24999 +				//if (pfn_valid(__page_to_pfn(old_page)))
25000 +				if (!ret) {
25001 +					list_add_tail(&old_page->lru, &pagelist);
25002 +					inc_zone_page_state(old_page, NR_ISOLATED_ANON + !PageSwapBacked(old_page));
25003 +					nr_pages++;
25004 +				}
25005 +				put_page(old_page);
25006  			}
25007 -put_and_next:				
25008 +			else {
25009 +				ret = isolate_lru_page(old_page);
25010 +				if (!ret) {
25011 +					list_add_tail(&old_page->lru, &shared_pagelist);
25012 +					inc_zone_page_state(old_page, NR_ISOLATED_ANON + !PageSwapBacked(old_page));
25013 +					nr_shared_pages++;
25014 +				}					
25015  				put_page(old_page);
25016 +			}
25017  		}
25018  		
25019  		//INIT_LIST_HEAD(&entry->list);
25020 @@ -462,6 +476,10 @@ put_and_next:
25021  		vma_itr = vma_itr->vm_next;
25022  	}
25023  
25024 +	//list_for_each_entry(page_itr, &pagelist, lru) {
25025 +//		printk(KERN_INFO "B _mapcount = %d, _count = %d\n", page_mapcount(page_itr), page_count(page_itr));
25026 +//	}
25027 +	
25028  	ret = 0;
25029  	if (!list_empty(&pagelist)) {
25030  		ret = migrate_pages(&pagelist, new_alloc_page, 0, MIGRATE_ASYNC, MR_SYSCALL);
25031 @@ -471,8 +489,23 @@ put_and_next:
25032  		}
25033  	}
25034  	
25035 +	/* copy shared pages HERE */
25036 +/*	
25037 +	ret = 0;
25038 +	if (!list_empty(&shared_pagelist)) {
25039 +		ret = migrate_shared_pages(&shared_pagelist, new_alloc_page, 0, MIGRATE_ASYNC, MR_SYSCALL);
25040 +		if (ret) {
25041 +			printk(KERN_INFO "%ld shared pages not migrated.\n", ret);
25042 +			putback_lru_pages(&shared_pagelist);
25043 +		}
25044 +	}
25045 +*/
25046  	up_read(&current->mm->mmap_sem);
25047  
25048 +	list_for_each_entry(page_itr, &shared_pagelist, lru) {
25049 +		printk(KERN_INFO "S Anon=%d, pfn = %lu, _mapcount = %d, _count = %d\n", PageAnon(page_itr), __page_to_pfn(page_itr), page_mapcount(page_itr), page_count(page_itr));
25050 +	}
25051 +	
25052  /*	
25053  	list_for_each_entry_safe(task_page_itr, task_page_itr_next, &task_page_list, list) {
25054  		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", task_page_itr->vm_start, task_page_itr->vm_end, (task_page_itr->vm_end - task_page_itr->vm_start)/PAGE_SIZE);
25055 diff --git a/litmus/mc2_common.c b/litmus/mc2_common.c
25056 index 56ef6b5..d0a42c6 100644
25057 --- a/litmus/mc2_common.c
25058 +++ b/litmus/mc2_common.c
25059 @@ -7,6 +7,8 @@
25060  #include <linux/percpu.h>
25061  #include <linux/sched.h>
25062  #include <linux/list.h>
25063 +#include <linux/slab.h>
25064 +#include <asm/uaccess.h>
25065  
25066  #include <litmus/litmus.h>
25067  #include <litmus/sched_plugin.h>
25068 @@ -14,17 +16,62 @@
25069  
25070  #include <litmus/mc2_common.h>
25071  
25072 -long mc2_task_client_init(
25073 -	struct mc2_task_client *mtc,
25074 -	struct task_struct *tsk,
25075 -	struct reservation *res
25076 -)
25077 +long mc2_task_client_init(struct task_client *tc, struct mc2_task *mc2_param, struct task_struct *tsk, struct reservation *res)
25078  {
25079 -	task_client_init(&mtc->tc, tsk, res);
25080 -	if ((mtc->mc2.mc2_task.crit < CRIT_LEVEL_A) ||
25081 -		(mtc->mc2.mc2_task.crit > CRIT_LEVEL_C))
25082 +	task_client_init(tc, tsk, res);
25083 +	if ((mc2_param->crit < CRIT_LEVEL_A) ||
25084 +		(mc2_param->crit > CRIT_LEVEL_C))
25085  		return -EINVAL;
25086  	
25087 -	TRACE_TASK(tsk, "mc2_task_client_init: crit_level = %d\n", mtc->mc2.mc2_task.crit);
25088 +	TRACE_TASK(tsk, "mc2_task_client_init: crit_level = %d\n", mc2_param->crit);
25089 +	
25090  	return 0;
25091 +}
25092 +
25093 +asmlinkage long sys_set_mc2_task_param(pid_t pid, struct mc2_task __user * param)
25094 +{
25095 +	struct task_struct *target;
25096 +	int retval = -EINVAL;
25097 +	struct mc2_task *mp = kzalloc(sizeof(*mp), GFP_KERNEL);
25098 +	
25099 +	if (!mp)
25100 +		return -ENOMEM;
25101 +
25102 +	printk("Setting up mc^2 task parameters for process %d.\n", pid);
25103 +
25104 +	if (pid < 0 || param == 0) {
25105 +		goto out;
25106 +	}
25107 +	if (copy_from_user(mp, param, sizeof(*mp))) {
25108 +		retval = -EFAULT;
25109 +		goto out;
25110 +	}
25111 +
25112 +	/* Task search and manipulation must be protected */
25113 +	read_lock_irq(&tasklist_lock);
25114 +	if (!(target = find_task_by_vpid(pid))) {
25115 +		retval = -ESRCH;
25116 +		goto out_unlock;
25117 +	}
25118 +
25119 +	if (is_realtime(target)) {
25120 +		/* The task is already a real-time task.
25121 +		 * We cannot not allow parameter changes at this point.
25122 +		 */
25123 +		retval = -EBUSY;
25124 +		goto out_unlock;
25125 +	}
25126 +	if (mp->crit < CRIT_LEVEL_A || mp->crit >= NUM_CRIT_LEVELS) {
25127 +		printk(KERN_INFO "litmus: real-time task %d rejected "
25128 +			"because of invalid criticality level\n", pid);
25129 +		goto out_unlock;
25130 +	}
25131 +	
25132 +	target->rt_param.plugin_state = mp;
25133 +
25134 +	retval = 0;
25135 +out_unlock:
25136 +	read_unlock_irq(&tasklist_lock);
25137 +out:
25138 +	return retval;
25139  }
25140 \ No newline at end of file
25141 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
25142 index 86a3206..4c07ee7 100644
25143 --- a/litmus/polling_reservations.c
25144 +++ b/litmus/polling_reservations.c
25145 @@ -452,7 +452,10 @@ static void td_drain_budget(
25146  	 * how much time is left in this allocation interval. */
25147  
25148  	/* sanity check: we should never try to drain from future slots */
25149 -	BUG_ON(tdres->cur_interval.start > res->env->current_time);
25150 +	TRACE("TD_DRAIN STATE(%d) [%llu,%llu]  %llu ?\n", res->state, tdres->cur_interval.start, tdres->cur_interval.end, res->env->current_time);
25151 +	//BUG_ON(tdres->cur_interval.start > res->env->current_time);
25152 +	if (tdres->cur_interval.start > res->env->current_time)
25153 +		TRACE("TD_DRAIN BUG!!!!!!!!!!\n");
25154  
25155  	switch (res->state) {
25156  		case RESERVATION_DEPLETED:
25157 @@ -470,8 +473,8 @@ static void td_drain_budget(
25158  					RESERVATION_DEPLETED);
25159  			} else {
25160  				/* sanity check budget calculation */
25161 -				BUG_ON(res->env->current_time >= tdres->cur_interval.end);
25162 -				BUG_ON(res->env->current_time < tdres->cur_interval.start);
25163 +				//BUG_ON(res->env->current_time >= tdres->cur_interval.end);
25164 +				//BUG_ON(res->env->current_time < tdres->cur_interval.start);
25165  			}
25166  
25167  			break;
25168 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
25169 index 1e5b28b..b9f0523 100644
25170 --- a/litmus/sched_mc2.c
25171 +++ b/litmus/sched_mc2.c
25172 @@ -16,9 +16,10 @@
25173  #include <litmus/polling_reservations.h>
25174  
25175  struct mc2_task_state {
25176 -	struct mc2_task_client res_info;
25177 +	struct task_client res_info;
25178  	int cpu;
25179  	bool has_departed;
25180 +	struct mc2_task mc2_param;
25181  };
25182  
25183  struct mc2_cpu_state {
25184 @@ -29,12 +30,7 @@ struct mc2_cpu_state {
25185  
25186  	int cpu;
25187  	struct task_struct* scheduled;
25188 -
25189 -#ifdef CONFIG_RELEASE_MASTER
25190 -	int release_master;
25191 -	/* used to delegate releases */
25192 -	struct hrtimer_start_on_info info;
25193 -#endif
25194 +	enum crit_level run_level;
25195  };
25196  
25197  static DEFINE_PER_CPU(struct mc2_cpu_state, mc2_cpu_state);
25198 @@ -53,8 +49,8 @@ static void task_departs(struct task_struct *tsk, int job_complete)
25199  	struct reservation* res;
25200  	struct reservation_client *client;
25201  
25202 -	res    = state->res_info.tc.client.reservation;
25203 -	client = &state->res_info.tc.client;
25204 +	res    = state->res_info.client.reservation;
25205 +	client = &state->res_info.client;
25206  
25207  	res->ops->client_departs(res, client, job_complete);
25208  	state->has_departed = true;
25209 @@ -66,8 +62,8 @@ static void task_arrives(struct task_struct *tsk)
25210  	struct reservation* res;
25211  	struct reservation_client *client;
25212  
25213 -	res    = state->res_info.tc.client.reservation;
25214 -	client = &state->res_info.tc.client;
25215 +	res    = state->res_info.client.reservation;
25216 +	client = &state->res_info.client;
25217  
25218  	state->has_departed = false;
25219  	res->ops->client_arrives(res, client);
25220 @@ -81,13 +77,13 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
25221  
25222  	update = state->sup_env.next_scheduler_update;
25223  	now = state->sup_env.env.current_time;
25224 -	
25225 +
25226  	/* Be sure we're actually running on the right core,
25227 -	 * as mc2_update_timer() is also called from mc2_task_resume(),
25228 +	 * as pres_update_timer() is also called from pres_task_resume(),
25229  	 * which might be called on any CPU when a thread resumes.
25230  	 */
25231  	local = local_cpu_state() == state;
25232 -	
25233 +
25234  	/* Must drop state lock before calling into hrtimer_start(), which
25235  	 * may raise a softirq, which in turn may wake ksoftirqd. */
25236  	raw_spin_unlock(&state->lock);
25237 @@ -169,36 +165,10 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
25238  	return restart;
25239  }
25240  
25241 -static long mc2_complete_job(void)
25242 -{
25243 -	ktime_t next_release;
25244 -	long err;
25245 -
25246 -	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
25247 -					get_deadline(current));
25248 -
25249 -	tsk_rt(current)->completed = 1;
25250 -	prepare_for_next_period(current);
25251 -	next_release = ns_to_ktime(get_release(current));
25252 -	preempt_disable();
25253 -	TRACE_CUR("next_release=%llu\n", get_release(current));
25254 -	if (get_release(current) > litmus_clock()) {
25255 -		set_current_state(TASK_INTERRUPTIBLE);
25256 -		preempt_enable_no_resched();
25257 -		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
25258 -	} else {
25259 -		err = 0;
25260 -		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
25261 -		preempt_enable();
25262 -	}
25263 -
25264 -	TRACE_CUR("mc2_complete_job returns [%d] at %llu\n", err, litmus_clock());
25265 -	return err;
25266 -}
25267 -
25268  static struct task_struct* mc2_schedule(struct task_struct * prev)
25269  {
25270  	/* next == NULL means "schedule background work". */
25271 +	struct mc2_task_state *tinfo;
25272  	struct mc2_cpu_state *state = local_cpu_state();
25273  
25274  	raw_spin_lock(&state->lock);
25275 @@ -210,16 +180,6 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
25276  	state->sup_env.will_schedule = true;
25277  	sup_update_time(&state->sup_env, litmus_clock());
25278  
25279 -	/* check if prev task complete */
25280 -	if (is_realtime(prev)) {
25281 -		TRACE_TASK(prev, "EXEC_TIME = %llu, EXEC_COST = %llu, REMAINED = %llu\n",
25282 -			get_exec_time(prev), get_exec_cost(prev), get_exec_cost(prev)-get_exec_time(prev)); 
25283 -	}
25284 -	if (is_realtime(prev) && (get_exec_time(prev) >= get_exec_cost(prev))) {
25285 -		TRACE_TASK(prev, "JOB COMPLETED! but is_completed = %d\n", is_completed(prev));
25286 -//		mc2_complete_job(prev);
25287 -	}
25288 -
25289  	/* remove task from reservation if it blocks */
25290  	if (is_realtime(prev) && !is_running(prev))
25291  		task_departs(prev, is_completed(prev));
25292 @@ -234,12 +194,17 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
25293  	state->sup_env.will_schedule = false;
25294  	/* NOTE: drops state->lock */
25295  	mc2_update_timer_and_unlock(state);
25296 -	
25297 -	if (prev != state->scheduled && is_realtime(prev))
25298 +
25299 +	if (prev != state->scheduled && is_realtime(prev)) {
25300  		TRACE_TASK(prev, "descheduled.\n");
25301 -	if (state->scheduled)
25302 +		state->run_level = NUM_CRIT_LEVELS;
25303 +	}
25304 +	if (state->scheduled) {
25305  		TRACE_TASK(state->scheduled, "scheduled.\n");
25306 -
25307 +		//tinfo = get_mc2_state(state->scheduled);
25308 +		//state->run_level = tinfo->mc2_param.crit;
25309 +	}
25310 +	
25311  	return state->scheduled;
25312  }
25313  
25314 @@ -279,6 +244,7 @@ static void mc2_task_resume(struct task_struct  *tsk)
25315  		sup_update_time(&state->sup_env, litmus_clock());
25316  		task_arrives(tsk);
25317  		/* NOTE: drops state->lock */
25318 +		TRACE("mc2_resume()\n");
25319  		mc2_update_timer_and_unlock(state);
25320  		local_irq_restore(flags);
25321  	} else {
25322 @@ -289,6 +255,60 @@ static void mc2_task_resume(struct task_struct  *tsk)
25323  	resume_legacy_task_model_updates(tsk);
25324  }
25325  
25326 +/* syscall backend for job completions */
25327 +static long mc2_complete_job(void)
25328 +{
25329 +	ktime_t next_release;
25330 +	long err;
25331 +	struct mc2_cpu_state *state = local_cpu_state();
25332 +	struct reservation_environment *env = &(state->sup_env.env);
25333 +	struct mc2_task_state *tinfo = get_mc2_state(current);
25334 +	
25335 +	
25336 +	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
25337 +		get_deadline(current));
25338 +
25339 +	tsk_rt(current)->completed = 1;
25340 +	
25341 +	if (tsk_rt(current)->sporadic_release) {
25342 +		env->time_zero = tsk_rt(current)->sporadic_release_time;
25343 +	
25344 +		if (tinfo->mc2_param.crit == CRIT_LEVEL_A) {
25345 +			struct reservation *res;
25346 +			struct table_driven_reservation *tdres;
25347 +			
25348 +			sup_update_time(&state->sup_env, litmus_clock());
25349 +			res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
25350 +			tdres = container_of(res, struct table_driven_reservation, res);
25351 +			tdres->next_interval = 0;
25352 +			tdres->major_cycle_start = tsk_rt(current)->sporadic_release_time;
25353 +			res->next_replenishment = tsk_rt(current)->sporadic_release_time;
25354 +			res->next_replenishment += tdres->intervals[0].start;
25355 +			res->env->change_state(res->env, res, RESERVATION_DEPLETED);
25356 +						
25357 +			TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
25358 +		}
25359 +		
25360 +	}
25361 +		
25362 +	prepare_for_next_period(current);
25363 +	next_release = ns_to_ktime(get_release(current));
25364 +	preempt_disable();
25365 +	TRACE_CUR("next_release=%llu\n", get_release(current));
25366 +	if (get_release(current) > litmus_clock()) {
25367 +		set_current_state(TASK_INTERRUPTIBLE);
25368 +		preempt_enable_no_resched();
25369 +		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
25370 +	} else {
25371 +		err = 0;
25372 +		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
25373 +		preempt_enable();
25374 +	}
25375 +
25376 +	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
25377 +	return err;
25378 +}
25379 +
25380  static long mc2_admit_task(struct task_struct *tsk)
25381  {
25382  	long err = -ESRCH;
25383 @@ -297,10 +317,10 @@ static long mc2_admit_task(struct task_struct *tsk)
25384  	struct mc2_cpu_state *state;
25385  	struct mc2_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
25386  	struct mc2_task *mp = tsk_rt(tsk)->plugin_state;
25387 -	
25388 +
25389  	if (!tinfo)
25390  		return -ENOMEM;
25391 -	
25392 +
25393  	if (!mp) {
25394  		printk(KERN_ERR "mc2_admit_task: criticality level has not been set\n");
25395  		return err;
25396 @@ -311,21 +331,22 @@ static long mc2_admit_task(struct task_struct *tsk)
25397  	state = cpu_state_for(task_cpu(tsk));
25398  	raw_spin_lock_irqsave(&state->lock, flags);
25399  
25400 -	res = sup_find_by_id(&state->sup_env, mp->pid);
25401 +	res = sup_find_by_id(&state->sup_env, mp->res_id);
25402  
25403  	/* found the appropriate reservation (or vCPU) */
25404  	if (res) {
25405 -		TRACE_TASK(tsk, "FOUND RES\n");
25406 -		tinfo->res_info.mc2.mc2_task.crit = mp->crit;
25407 +		TRACE_TASK(tsk, "FOUND RES ID\n");
25408 +		tinfo->mc2_param.crit = mp->crit;
25409 +		tinfo->mc2_param.res_id = mp->res_id;
25410  		
25411  		kfree(tsk_rt(tsk)->plugin_state);
25412  		tsk_rt(tsk)->plugin_state = NULL;
25413 -	
25414 -		err = mc2_task_client_init(&tinfo->res_info, tsk, res);
25415 +		
25416 +		err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
25417  		tinfo->cpu = task_cpu(tsk);
25418  		tinfo->has_departed = true;
25419  		tsk_rt(tsk)->plugin_state = tinfo;
25420 -		
25421 +
25422  		/* disable LITMUS^RT's per-thread budget enforcement */
25423  		tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
25424  	}
25425 @@ -340,21 +361,14 @@ static long mc2_admit_task(struct task_struct *tsk)
25426  	return err;
25427  }
25428  
25429 -static void task_new_legacy_task_model_updates(struct task_struct *tsk)
25430 -{
25431 -	lt_t now = litmus_clock();
25432 -
25433 -	/* the first job exists starting as of right now */
25434 -	release_at(tsk, now);
25435 -}
25436 -
25437  static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
25438  			  int is_running)
25439  {
25440  	unsigned long flags;
25441  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
25442  	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
25443 -
25444 +	struct reservation *res;
25445 +	
25446  	TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
25447  		   litmus_clock(), on_runqueue, is_running);
25448  
25449 @@ -373,15 +387,69 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
25450  		sup_update_time(&state->sup_env, litmus_clock());
25451  		task_arrives(tsk);
25452  		/* NOTE: drops state->lock */
25453 +		TRACE("mc2_new()\n");
25454  		mc2_update_timer_and_unlock(state);
25455  		local_irq_restore(flags);
25456  	} else
25457  		raw_spin_unlock_irqrestore(&state->lock, flags);
25458  
25459 -	task_new_legacy_task_model_updates(tsk);
25460 +	res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
25461 +	release_at(tsk, res->next_replenishment);
25462 +	if (res)
25463 +		TRACE_TASK(tsk, "next_replenishment = %llu\n", res->next_replenishment);
25464 +	else
25465 +		TRACE_TASK(tsk, "next_replenishment = NULL\n");
25466  }
25467  
25468 -static long mc2_reservation_destroy(unsigned int reservation_id, int cpu);
25469 +static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
25470 +{
25471 +	long ret = -EINVAL;
25472 +	struct mc2_cpu_state *state;
25473 +	struct reservation *res = NULL, *next;
25474 +	struct sup_reservation_environment *sup_env;
25475 +	int found = 0;
25476 +		
25477 +	state = cpu_state_for(cpu);
25478 +	raw_spin_lock(&state->lock);
25479 +	
25480 +//	res = sup_find_by_id(&state->sup_env, reservation_id);
25481 +	sup_env = &state->sup_env;
25482 +	//if (!res) {
25483 +	list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
25484 +		if (res->id == reservation_id) {
25485 +			list_del(&res->list);
25486 +			//kfree(res);
25487 +			found = 1;
25488 +			ret = 0;
25489 +		}
25490 +	}
25491 +	if (!found) {
25492 +		list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
25493 +			if (res->id == reservation_id) {
25494 +				list_del(&res->list);
25495 +				//kfree(res);
25496 +				found = 1;
25497 +				ret = 0;
25498 +			}
25499 +		}
25500 +	}
25501 +	if (!found) {
25502 +		list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
25503 +			if (res->id == reservation_id) {
25504 +				list_del(&res->list);
25505 +				//kfree(res);
25506 +				found = 1;
25507 +				ret = 0;
25508 +			}
25509 +		}
25510 +	}
25511 +	//}
25512 +
25513 +	raw_spin_unlock(&state->lock);
25514 +	
25515 +	TRACE("RESERVATION_DESTROY ret = %d\n", ret);
25516 +	return ret;
25517 +}
25518  
25519  static void mc2_task_exit(struct task_struct *tsk)
25520  {
25521 @@ -401,65 +469,31 @@ static void mc2_task_exit(struct task_struct *tsk)
25522  		sup_update_time(&state->sup_env, litmus_clock());
25523  		task_departs(tsk, 0);
25524  		/* NOTE: drops state->lock */
25525 +		TRACE("mc2_exit()\n");
25526  		mc2_update_timer_and_unlock(state);
25527  		local_irq_restore(flags);
25528  	} else
25529  		raw_spin_unlock_irqrestore(&state->lock, flags);
25530 -
25531 -	mc2_reservation_destroy(tsk->pid, tinfo->cpu);
25532 -	
25533 +/*
25534 +	if (tinfo->mc2_param.crit == CRIT_LEVEL_A) {
25535 +		struct table_driven_reservation *td_res;
25536 +		struct reservation *res;
25537 +		res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
25538 +		td_res = container_of(res, struct table_driven_reservation, res);
25539 +		kfree(td_res->intervals);
25540 +		//kfree(td_res);
25541 +	} else if (tinfo->mc2_param.crit == CRIT_LEVEL_B) {
25542 +		struct polling_reservation *pres;
25543 +		struct reservation *res;
25544 +		res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
25545 +		pres = container_of(res, struct polling_reservation, res);
25546 +		kfree(pres);
25547 +	}
25548 +*/
25549  	kfree(tsk_rt(tsk)->plugin_state);
25550  	tsk_rt(tsk)->plugin_state = NULL;
25551  }
25552  
25553 -asmlinkage long sys_set_mc2_task_param(pid_t pid, struct mc2_task __user * param)
25554 -{
25555 -	struct task_struct *target;
25556 -	int retval = -EINVAL;
25557 -	struct mc2_task *mp = kzalloc(sizeof(*mp), GFP_KERNEL);
25558 -	
25559 -	if (!mp)
25560 -		return -ENOMEM;
25561 -
25562 -	printk("Setting up mc^2 task parameters for process %d.\n", pid);
25563 -
25564 -	if (pid < 0 || param == 0) {
25565 -		goto out;
25566 -	}
25567 -	if (copy_from_user(mp, param, sizeof(*mp))) {
25568 -		retval = -EFAULT;
25569 -		goto out;
25570 -	}
25571 -
25572 -	/* Task search and manipulation must be protected */
25573 -	read_lock_irq(&tasklist_lock);
25574 -	if (!(target = find_task_by_vpid(pid))) {
25575 -		retval = -ESRCH;
25576 -		goto out_unlock;
25577 -	}
25578 -
25579 -	if (is_realtime(target)) {
25580 -		/* The task is already a real-time task.
25581 -		 * We cannot not allow parameter changes at this point.
25582 -		 */
25583 -		retval = -EBUSY;
25584 -		goto out_unlock;
25585 -	}
25586 -	if (mp->crit < CRIT_LEVEL_A || mp->crit >= NUM_CRIT_LEVELS) {
25587 -		printk(KERN_INFO "litmus: real-time task %d rejected "
25588 -			"because of invalid criticality level\n", pid);
25589 -		goto out_unlock;
25590 -	}
25591 -	
25592 -	target->rt_param.plugin_state = mp;
25593 -
25594 -	retval = 0;
25595 -      out_unlock:
25596 -	read_unlock_irq(&tasklist_lock);
25597 -      out:
25598 -	return retval;
25599 -}
25600 -
25601  static long create_polling_reservation(
25602  	int res_type,
25603  	struct reservation_config *config)
25604 @@ -566,10 +600,6 @@ static long create_table_driven_reservation(
25605  		err = copy_from_user(slots,
25606  			config->table_driven_params.intervals, slots_size);
25607  
25608 -	for (i=0; i<num_slots;i++) {
25609 -		TRACE("###### [%llu, %llu]\n", slots[i].start, slots[i].end);
25610 -	}
25611 -	
25612  	if (!err) {
25613  		/* sanity checks */
25614  		for (i = 0; !err && i < num_slots; i++)
25615 @@ -628,7 +658,6 @@ static long create_table_driven_reservation(
25616  		kfree(td_res);
25617  	}
25618  
25619 -	TRACE("CREATE_TABLE_DRIVEN_RES = %d\n", err);
25620  	return err;
25621  }
25622  
25623 @@ -665,54 +694,6 @@ static long mc2_reservation_create(int res_type, void* __user _config)
25624  	return ret;
25625  }
25626  
25627 -static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
25628 -{
25629 -	long ret = -EINVAL;
25630 -	struct mc2_cpu_state *state;
25631 -	struct reservation *res, *next;
25632 -	struct sup_reservation_environment *sup_env;
25633 -	unsigned long flags;
25634 -	int found = 0;
25635 -	
25636 -	state = cpu_state_for(cpu);
25637 -	raw_spin_lock_irqsave(&state->lock, flags);
25638 -	
25639 -	//res = sup_find_by_id(&state->sup_env, reservation_id);
25640 -	sup_env = &state->sup_env;
25641 -	//if (!res) {
25642 -	list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
25643 -		if (res->id == reservation_id) {
25644 -			list_del(&res->list);
25645 -			found = 1;
25646 -			ret = 0;
25647 -		}
25648 -	}
25649 -	if (!found) {
25650 -		list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
25651 -			if (res->id == reservation_id) {
25652 -				list_del(&res->list);
25653 -				found = 1;
25654 -				ret = 0;
25655 -			}
25656 -		}
25657 -	}
25658 -	if (!found) {
25659 -		list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
25660 -			if (res->id == reservation_id) {
25661 -				list_del(&res->list);
25662 -				found = 1;
25663 -				ret = 0;
25664 -			}
25665 -		}
25666 -	}
25667 -	//}
25668 -
25669 -	raw_spin_unlock_irqrestore(&state->lock, flags);
25670 -	
25671 -	TRACE("RESERVATION_DESTROY ret = %d\n", ret);
25672 -	return ret;
25673 -}
25674 -
25675  static struct domain_proc_info mc2_domain_proc_info;
25676  
25677  static long mc2_get_domain_proc_info(struct domain_proc_info **ret)
25678 @@ -756,11 +737,6 @@ static long mc2_activate_plugin(void)
25679  
25680  		state = cpu_state_for(cpu);
25681  
25682 -#ifdef CONFIG_RELEASE_MASTER
25683 -		state->release_master = atomic_read(&release_master_cpu);
25684 -		hrtimer_start_on_info_init(&state->info);
25685 -#endif
25686 -		
25687  		raw_spin_lock_init(&state->lock);
25688  		state->cpu = cpu;
25689  		state->scheduled = NULL;
25690 @@ -823,15 +799,15 @@ static long mc2_deactivate_plugin(void)
25691  }
25692  
25693  static struct sched_plugin mc2_plugin = {
25694 -	.plugin_name		= "MC2",
25695 -	.schedule		= mc2_schedule,
25696 -	.task_wake_up		= mc2_task_resume,
25697 -	.admit_task		= mc2_admit_task,
25698 -	.task_new		= mc2_task_new,
25699 -	.task_exit		= mc2_task_exit,
25700 +	.plugin_name			= "MC2",
25701 +	.schedule				= mc2_schedule,
25702 +	.task_wake_up			= mc2_task_resume,
25703 +	.admit_task				= mc2_admit_task,
25704 +	.task_new				= mc2_task_new,
25705 +	.task_exit				= mc2_task_exit,
25706  	.complete_job           = mc2_complete_job,
25707  	.get_domain_proc_info   = mc2_get_domain_proc_info,
25708 -	.activate_plugin	= mc2_activate_plugin,
25709 +	.activate_plugin		= mc2_activate_plugin,
25710  	.deactivate_plugin      = mc2_deactivate_plugin,
25711  	.reservation_create     = mc2_reservation_create,
25712  	.reservation_destroy	= mc2_reservation_destroy,
25713 @@ -843,4 +819,3 @@ static int __init init_mc2(void)
25714  }
25715  
25716  module_init(init_mc2);
25717 -
25718 diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
25719 index e2d4810..6c636cc 100644
25720 --- a/litmus/sched_pres.c
25721 +++ b/litmus/sched_pres.c
25722 @@ -14,6 +14,8 @@
25723  #include <litmus/reservation.h>
25724  #include <litmus/polling_reservations.h>
25725  
25726 +//static int testval = 0;
25727 +
25728  struct pres_task_state {
25729  	struct task_client res_info;
25730  	int cpu;
25731 @@ -172,6 +174,11 @@ static struct task_struct* pres_schedule(struct task_struct * prev)
25732  	BUG_ON(state->scheduled && state->scheduled != prev);
25733  	BUG_ON(state->scheduled && !is_realtime(prev));
25734  
25735 +//if (testval == 1) {
25736 +//	testval = 0;
25737 +//	printk(KERN_INFO "TESTVAL = 1 at %llu\n", litmus_clock());
25738 +//}
25739 +	
25740  	/* update time */
25741  	state->sup_env.will_schedule = true;
25742  	sup_update_time(&state->sup_env, litmus_clock());
25743 @@ -250,6 +257,8 @@ static long pres_complete_job(void)
25744  {
25745  	ktime_t next_release;
25746  	long err;
25747 +//testval = 1;
25748 +//printk(KERN_INFO "pres_complete_job at %llu (deadline: %llu)\n", litmus_clock(), get_deadline(current));
25749  
25750  	TRACE_CUR("pres_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
25751  		get_deadline(current));
25752 @@ -270,6 +279,7 @@ static long pres_complete_job(void)
25753  	}
25754  
25755  	TRACE_CUR("pres_complete_job returns at %llu\n", litmus_clock());
25756 +//printk(KERN_INFO "pres_complete_job returns at %llu\n", litmus_clock());	
25757  	return err;
25758  }
25759  
25760 -- 
25761 1.8.1.2
25762 
25763 
25764 From 5be3aecdd4b8b0beb981cc0f7fc84b0d0ded2c47 Mon Sep 17 00:00:00 2001
25765 From: Namhoon Kim <namhoonk@cs.unc.edu>
25766 Date: Sat, 24 Jan 2015 04:55:04 -0500
25767 Subject: [PATCH 083/119] slack stealing
25768 
25769 ---
25770  include/litmus/mc2_common.h   |   2 +
25771  include/litmus/reservation.h  |  44 ++++++++++
25772  include/litmus/rt_param.h     |   4 +
25773  litmus/jobs.c                 |   1 +
25774  litmus/polling_reservations.c |  28 +++++--
25775  litmus/reservation.c          |  16 ++--
25776  litmus/sched_mc2.c            | 185 +++++++++++++++++++++++++++++++++++-------
25777  7 files changed, 237 insertions(+), 43 deletions(-)
25778 
25779 diff --git a/include/litmus/mc2_common.h b/include/litmus/mc2_common.h
25780 index bdc3a6d..e3c0af2 100644
25781 --- a/include/litmus/mc2_common.h
25782 +++ b/include/litmus/mc2_common.h
25783 @@ -21,6 +21,8 @@ struct mc2_task {
25784  
25785  #include <litmus/reservation.h>
25786  
25787 +#define tsk_mc2_data(t)		(tsk_rt(t)->mc2_data)
25788 +
25789  long mc2_task_client_init(struct task_client *tc, struct mc2_task *mc2_param, struct task_struct *tsk,
25790  							struct reservation *res);
25791  	
25792 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
25793 index 4eecd3f..5ccb200 100644
25794 --- a/include/litmus/reservation.h
25795 +++ b/include/litmus/reservation.h
25796 @@ -126,6 +126,9 @@ struct reservation {
25797  	struct reservation_ops *ops;
25798  
25799  	struct list_head clients;
25800 +	
25801 +	/* for global env. */
25802 +	int scheduled_on;
25803  };
25804  
25805  void reservation_init(struct reservation *res);
25806 @@ -185,10 +188,51 @@ struct sup_reservation_environment {
25807  void sup_init(struct sup_reservation_environment* sup_env);
25808  void sup_add_new_reservation(struct sup_reservation_environment* sup_env,
25809  	struct reservation* new_res);
25810 +void sup_scheduler_update_after(struct sup_reservation_environment* sup_env,
25811 +	lt_t timeout);
25812  void sup_update_time(struct sup_reservation_environment* sup_env, lt_t now);
25813  struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env);
25814  
25815  struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
25816  	unsigned int id);
25817 +	
25818 +/* A global multiprocessor reservation environment. */
25819  
25820 +struct next_timer_event {
25821 +	lt_t next_update;
25822 +	int timer_armed_on;
25823 +	unsigned int id;
25824 +	struct list_head list;
25825 +};
25826 +
25827 +struct gmp_reservation_environment {
25828 +	raw_spinlock_t lock;
25829 +	struct reservation_environment env;
25830 +
25831 +	/* ordered by priority */
25832 +	struct list_head active_reservations;
25833 +
25834 +	/* ordered by next_replenishment */
25835 +	struct list_head depleted_reservations;
25836 +
25837 +	/* unordered */
25838 +	struct list_head inactive_reservations;
25839 +
25840 +	/* timer event ordered by next_update */
25841 +	struct list_head next_events;
25842 +	/* (schedule_now == true) means call gmp_dispatch() now */
25843 +	bool schedule_now;
25844 +	/* set to true if a call to gmp_dispatch() is imminent */
25845 +	bool will_schedule;
25846 +};
25847 +/*
25848 +void gmp_init(struct gmp_reservation_environment* gmp_env);
25849 +void gmp_add_new_reservation(struct gmp_reservation_environment* gmp_env,
25850 +	struct reservation* new_res);
25851 +void gmp_update_time(struct gmp_reservation_environment* gmp_env, lt_t now);
25852 +struct task_struct* gmp_dispatch(struct gmp_reservation_environment* gmp_env);
25853 +
25854 +struct reservation* gmp_find_by_id(struct gmp_reservation_environment* gmp_env,
25855 +	unsigned int id);
25856 +*/
25857  #endif
25858 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
25859 index e626bbb..284b89e 100644
25860 --- a/include/litmus/rt_param.h
25861 +++ b/include/litmus/rt_param.h
25862 @@ -206,6 +206,7 @@ struct rt_job {
25863  };
25864  
25865  struct pfair_param;
25866 +struct mc2_task;
25867  
25868  /*	RT task parameters for scheduling extensions
25869   *	These parameters are inherited during clone and therefore must
25870 @@ -322,6 +323,9 @@ struct rt_param {
25871  
25872  	/* Pointer to the page shared between userspace and kernel. */
25873  	struct control_page * ctrl_page;
25874 +	
25875 +	/* Mixed-criticality specific data */
25876 +	struct mc2_task* mc2_data;
25877  };
25878  
25879  #endif
25880 diff --git a/litmus/jobs.c b/litmus/jobs.c
25881 index 547222c..e523e29 100644
25882 --- a/litmus/jobs.c
25883 +++ b/litmus/jobs.c
25884 @@ -45,6 +45,7 @@ void release_at(struct task_struct *t, lt_t start)
25885  {
25886  	BUG_ON(!t);
25887  	setup_release(t, start);
25888 +	TRACE("RELEASE!!\n");
25889  	tsk_rt(t)->completed = 0;
25890  }
25891  
25892 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
25893 index 4c07ee7..941a371 100644
25894 --- a/litmus/polling_reservations.c
25895 +++ b/litmus/polling_reservations.c
25896 @@ -19,11 +19,19 @@ static void periodic_polling_client_arrives(
25897  	switch (res->state) {
25898  		case RESERVATION_INACTIVE:
25899  			/* Figure out next replenishment time. */
25900 -			tmp = res->env->current_time - res->env->time_zero;
25901 -			instances =  div64_u64(tmp, pres->period);
25902 -			res->next_replenishment =
25903 -				(instances + 1) * pres->period + pres->offset;
25904 -
25905 +			if (res->env->time_zero == 0) {
25906 +				tmp = res->env->current_time - res->env->time_zero;
25907 +				instances =  div64_u64(tmp, pres->period);
25908 +				res->next_replenishment =
25909 +					(instances + 1) * pres->period + pres->offset;
25910 +			}
25911 +			else {
25912 +				tmp = res->env->current_time - res->env->time_zero;
25913 +				instances =  div64_u64(tmp, pres->period);
25914 +				res->next_replenishment = res->env->time_zero + instances * pres->period;
25915 +			}
25916 +				
25917 +			TRACE("ENV_TIME_ZERO %llu\n", res->env->time_zero);
25918  			TRACE("pol-res: activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
25919  				tmp, instances, pres->period, res->next_replenishment,
25920  				res->env->current_time);
25921 @@ -62,9 +70,10 @@ static void periodic_polling_client_departs(
25922  		case RESERVATION_ACTIVE:
25923  			if (list_empty(&res->clients)) {
25924  				res->env->change_state(res->env, res,
25925 -					did_signal_job_completion ?
25926 -						RESERVATION_DEPLETED :
25927  						RESERVATION_ACTIVE_IDLE);
25928 +//					did_signal_job_completion ?
25929 +//						RESERVATION_DEPLETED :
25930 +//						RESERVATION_ACTIVE_IDLE);
25931  			} /* else: nothing to do, more clients ready */
25932  			break;
25933  
25934 @@ -86,6 +95,7 @@ static void periodic_polling_on_replenishment(
25935  	res->next_replenishment += pres->period;
25936  	res->budget_consumed = 0;
25937  
25938 +	TRACE("polling_replenish(%u): next_replenishment=%llu\n", res->id, res->next_replenishment);
25939  	switch (res->state) {
25940  		case RESERVATION_DEPLETED:
25941  		case RESERVATION_INACTIVE:
25942 @@ -270,6 +280,7 @@ void polling_reservation_init(
25943  	pres->period = period;
25944  	pres->deadline = deadline;
25945  	pres->offset = offset;
25946 +	TRACE_TASK(current, "polling_reservation_init: periodic %d, use_edf %d\n", use_periodic_polling, use_edf_prio);
25947  	if (use_periodic_polling) {
25948  		if (use_edf_prio)
25949  			pres->res.ops = &periodic_polling_ops_edf;
25950 @@ -460,7 +471,8 @@ static void td_drain_budget(
25951  	switch (res->state) {
25952  		case RESERVATION_DEPLETED:
25953  		case RESERVATION_INACTIVE:
25954 -			BUG();
25955 +			//BUG();
25956 +			TRACE("TD_DRAIN!!!!!!!!! RES_STATE = %d\n", res->state);
25957  			break;
25958  
25959  		case RESERVATION_ACTIVE_IDLE:
25960 diff --git a/litmus/reservation.c b/litmus/reservation.c
25961 index 0e43479..2dc3dc2 100644
25962 --- a/litmus/reservation.c
25963 +++ b/litmus/reservation.c
25964 @@ -48,11 +48,12 @@ static void sup_scheduler_update_at(
25965  	struct sup_reservation_environment* sup_env,
25966  	lt_t when)
25967  {
25968 +	TRACE("SCHEDULER_UPDATE_AT update: %llu > when %llu\n", sup_env->next_scheduler_update, when);
25969  	if (sup_env->next_scheduler_update > when)
25970  		sup_env->next_scheduler_update = when;
25971  }
25972  
25973 -static void sup_scheduler_update_after(
25974 +void sup_scheduler_update_after(
25975  	struct sup_reservation_environment* sup_env,
25976  	lt_t timeout)
25977  {
25978 @@ -192,10 +193,13 @@ static void sup_charge_budget(
25979  		/* charge all ACTIVE_IDLE up to the first ACTIVE reservation */
25980  		res = list_entry(pos, struct reservation, list);
25981  		if (res->state == RESERVATION_ACTIVE) {
25982 -			res->ops->drain_budget(res, delta);
25983 +			TRACE("sup_charge_budget ACTIVE R%u drain %llu\n", res->id, delta);
25984 +			if (encountered_active == 0)
25985 +				res->ops->drain_budget(res, delta);
25986  			encountered_active = 1;
25987  		} else {
25988  			BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
25989 +			TRACE("sup_charge_budget INACTIVE R%u drain %llu\n", res->id, delta);
25990  			res->ops->drain_budget(res, delta);
25991  		}
25992  		if (res->state == RESERVATION_ACTIVE ||
25993 @@ -207,9 +211,9 @@ static void sup_charge_budget(
25994  				res->id, res->cur_budget);
25995  			 sup_scheduler_update_after(sup_env, res->cur_budget);
25996  		}
25997 -		if (encountered_active)
25998 +		//if (encountered_active == 2)
25999  			/* stop at the first ACTIVE reservation */
26000 -			break;
26001 +		//	break;
26002  	}
26003  	//TRACE("finished charging budgets\n");
26004  }
26005 @@ -246,7 +250,7 @@ void sup_update_time(
26006  	/* If the time didn't advance, there is nothing to do.
26007  	 * This check makes it safe to call sup_advance_time() potentially
26008  	 * multiple times (e.g., via different code paths. */
26009 -	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
26010 +	TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
26011  	if (unlikely(now <= sup_env->env.current_time))
26012  		return;
26013  
26014 @@ -258,9 +262,11 @@ void sup_update_time(
26015  		sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
26016  
26017  	/* deplete budgets by passage of time */
26018 +	TRACE("CHARGE###\n");
26019  	sup_charge_budget(sup_env, delta);
26020  
26021  	/* check if any budgets where replenished */
26022 +	TRACE("REPLENISH###\n");
26023  	sup_replenish_budgets(sup_env);
26024  }
26025  
26026 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
26027 index b9f0523..6b29d52 100644
26028 --- a/litmus/sched_mc2.c
26029 +++ b/litmus/sched_mc2.c
26030 @@ -22,15 +22,22 @@ struct mc2_task_state {
26031  	struct mc2_task mc2_param;
26032  };
26033  
26034 +struct crit_entry {
26035 +	enum crit_level level;
26036 +	struct task_struct *running;
26037 +	struct hrtimer ghost_timer;
26038 +};
26039 +
26040  struct mc2_cpu_state {
26041  	raw_spinlock_t lock;
26042  
26043  	struct sup_reservation_environment sup_env;
26044  	struct hrtimer timer;
26045 +	struct hrtimer g_timer;
26046  
26047  	int cpu;
26048  	struct task_struct* scheduled;
26049 -	enum crit_level run_level;
26050 +	struct crit_entry crit_entries[NUM_CRIT_LEVELS];
26051  };
26052  
26053  static DEFINE_PER_CPU(struct mc2_cpu_state, mc2_cpu_state);
26054 @@ -42,30 +49,53 @@ static struct mc2_task_state* get_mc2_state(struct task_struct *tsk)
26055  {
26056  	return (struct mc2_task_state*) tsk_rt(tsk)->plugin_state;
26057  }
26058 +static enum crit_level get_task_crit_level(struct task_struct *tsk)
26059 +{
26060 +	struct mc2_task_state *tinfo = get_mc2_state(tsk);
26061 +	if (!tinfo)
26062 +		return NUM_CRIT_LEVELS;
26063 +	else
26064 +		return tinfo->mc2_param.crit;
26065 +}
26066  
26067  static void task_departs(struct task_struct *tsk, int job_complete)
26068  {
26069 -	struct mc2_task_state* state = get_mc2_state(tsk);
26070 +	struct mc2_task_state* tinfo = get_mc2_state(tsk);
26071 +	struct mc2_cpu_state* state = local_cpu_state();
26072  	struct reservation* res;
26073  	struct reservation_client *client;
26074  
26075 -	res    = state->res_info.client.reservation;
26076 -	client = &state->res_info.client;
26077 +	res    = tinfo->res_info.client.reservation;
26078 +	client = &tinfo->res_info.client;
26079  
26080  	res->ops->client_departs(res, client, job_complete);
26081 -	state->has_departed = true;
26082 +	tinfo->has_departed = true;
26083 +	TRACE_TASK(tsk, "CLIENT DEPART with budget %llu\n", res->cur_budget);
26084 +	if (job_complete && res->cur_budget) {
26085 +		struct crit_entry* ce;
26086 +		enum crit_level lv = tinfo->mc2_param.crit;
26087 +		//lt_t now = litmus_clock();
26088 +		
26089 +		ce = &state->crit_entries[lv];
26090 +		ce->running = tsk;
26091 +		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
26092 +		
26093 +		BUG_ON(hrtimer_active(&ce->ghost_timer));
26094 +		//TRACE("setting GHOST timer %llu\n", ns_to_ktime(now + res->cur_budget));
26095 +		//__hrtimer_start_range_ns(&ce->ghost_timer, ns_to_ktime(now + res->cur_budget), 0, HRTIMER_MODE_ABS_PINNED, 0);
26096 +	}		
26097  }
26098  
26099  static void task_arrives(struct task_struct *tsk)
26100  {
26101 -	struct mc2_task_state* state = get_mc2_state(tsk);
26102 +	struct mc2_task_state* tinfo = get_mc2_state(tsk);
26103  	struct reservation* res;
26104  	struct reservation_client *client;
26105  
26106 -	res    = state->res_info.client.reservation;
26107 -	client = &state->res_info.client;
26108 +	res    = tinfo->res_info.client.reservation;
26109 +	client = &tinfo->res_info.client;
26110  
26111 -	state->has_departed = false;
26112 +	tinfo->has_departed = false;
26113  	res->ops->client_arrives(res, client);
26114  }
26115  
26116 @@ -94,7 +124,7 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
26117  		/* Reprogram only if not already set correctly. */
26118  		if (!hrtimer_active(&state->timer) ||
26119  		    ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
26120 -			TRACE("canceling timer...\n");
26121 +			TRACE("canceling timer...at %llu\n", ktime_to_ns(hrtimer_get_expires(&state->timer)));
26122  			hrtimer_cancel(&state->timer);
26123  			TRACE("setting scheduler timer for %llu\n", update);
26124  			/* We cannot use hrtimer_start() here because the
26125 @@ -127,6 +157,49 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
26126  	}
26127  }
26128  
26129 +static void mc2_update_ghost_state(struct mc2_cpu_state *state)
26130 +{
26131 +	int lv = 0;
26132 +	struct crit_entry* ce;
26133 +	struct reservation *res;
26134 +	struct mc2_task_state *tinfo;
26135 +	
26136 +	for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
26137 +		ce = &state->crit_entries[lv];
26138 +		if (ce->running != NULL) {
26139 +			tinfo = get_mc2_state(ce->running);
26140 +			if (lv != CRIT_LEVEL_C)
26141 +				res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
26142 +			else
26143 +				continue;
26144 +			TRACE("LV %d running id %d budget %llu\n", lv, tinfo->mc2_param.res_id, res->cur_budget);
26145 +			if (!res->cur_budget) {
26146 +				TRACE("GHOST FINISH id %d at %llu\n", tinfo->mc2_param.res_id, litmus_clock());
26147 +				ce->running = NULL;
26148 +			}
26149 +		}
26150 +	}
26151 +}			
26152 +
26153 +static enum hrtimer_restart on_ghost_timer(struct hrtimer *timer)
26154 +{
26155 +	struct crit_entry *ce;
26156 +	struct mc2_cpu_state *state;
26157 +	
26158 +	ce = container_of(timer, struct crit_entry, ghost_timer);
26159 +	state = container_of(ce, struct mc2_cpu_state, crit_entries[ce->level]);
26160 +	
26161 +	TRACE("GHOST_TIMER FIRED at %llu\n", litmus_clock());
26162 +	
26163 +	raw_spin_lock(&state->lock);
26164 +	sup_update_time(&state->sup_env, litmus_clock());
26165 +	mc2_update_ghost_state(state);
26166 +	
26167 +	raw_spin_unlock(&state->lock);
26168 +	
26169 +	return HRTIMER_NORESTART;
26170 +}
26171 +	
26172  static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
26173  {
26174  	unsigned long flags;
26175 @@ -144,9 +217,11 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
26176  	 */
26177  	BUG_ON(state->cpu != raw_smp_processor_id());
26178  
26179 +	TRACE("TIMER FIRED at %llu\n", litmus_clock());
26180  	raw_spin_lock_irqsave(&state->lock, flags);
26181  	sup_update_time(&state->sup_env, litmus_clock());
26182 -
26183 +	mc2_update_ghost_state(state);
26184 +	
26185  	update = state->sup_env.next_scheduler_update;
26186  	now = state->sup_env.env.current_time;
26187  
26188 @@ -165,6 +240,36 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
26189  	return restart;
26190  }
26191  
26192 +struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, struct mc2_cpu_state* state)
26193 +{
26194 +	struct reservation *res, *next;
26195 +	struct task_struct *tsk = NULL;
26196 +	struct crit_entry *ce;
26197 +	enum crit_level lv;
26198 +	lt_t time_slice;
26199 +
26200 +	list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
26201 +		if (res->state == RESERVATION_ACTIVE) {
26202 +			tsk = res->ops->dispatch_client(res, &time_slice);
26203 +			if (likely(tsk)) {
26204 +				lv = get_task_crit_level(tsk);
26205 +				if (lv == NUM_CRIT_LEVELS) {
26206 +					sup_scheduler_update_after(sup_env, res->cur_budget);
26207 +					return tsk;
26208 +				} else {
26209 +					ce = &state->crit_entries[lv];
26210 +					if (likely(!ce->running)) {
26211 +						sup_scheduler_update_after(sup_env, res->cur_budget);
26212 +						return tsk;
26213 +					}
26214 +				}
26215 +			}
26216 +		}
26217 +	}
26218 +
26219 +	return NULL;
26220 +}
26221 +
26222  static struct task_struct* mc2_schedule(struct task_struct * prev)
26223  {
26224  	/* next == NULL means "schedule background work". */
26225 @@ -178,14 +283,17 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
26226  
26227  	/* update time */
26228  	state->sup_env.will_schedule = true;
26229 +	TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time ####\n");
26230  	sup_update_time(&state->sup_env, litmus_clock());
26231 -
26232 +	TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time !!!!\n");
26233 +	mc2_update_ghost_state(state);
26234 +	
26235  	/* remove task from reservation if it blocks */
26236  	if (is_realtime(prev) && !is_running(prev))
26237  		task_departs(prev, is_completed(prev));
26238  
26239  	/* figure out what to schedule next */
26240 -	state->scheduled = sup_dispatch(&state->sup_env);
26241 +	state->scheduled = mc2_dispatch(&state->sup_env, state);
26242  
26243  	/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
26244  	sched_state_task_picked();
26245 @@ -197,7 +305,6 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
26246  
26247  	if (prev != state->scheduled && is_realtime(prev)) {
26248  		TRACE_TASK(prev, "descheduled.\n");
26249 -		state->run_level = NUM_CRIT_LEVELS;
26250  	}
26251  	if (state->scheduled) {
26252  		TRACE_TASK(state->scheduled, "scheduled.\n");
26253 @@ -242,9 +349,10 @@ static void mc2_task_resume(struct task_struct  *tsk)
26254  		 * since we might not actually be executing on tinfo->cpu
26255  		 * at the moment. */
26256  		sup_update_time(&state->sup_env, litmus_clock());
26257 +		mc2_update_ghost_state(state);
26258  		task_arrives(tsk);
26259  		/* NOTE: drops state->lock */
26260 -		TRACE("mc2_resume()\n");
26261 +		TRACE_TASK(tsk, "mc2_resume()\n");
26262  		mc2_update_timer_and_unlock(state);
26263  		local_irq_restore(flags);
26264  	} else {
26265 @@ -263,34 +371,36 @@ static long mc2_complete_job(void)
26266  	struct mc2_cpu_state *state = local_cpu_state();
26267  	struct reservation_environment *env = &(state->sup_env.env);
26268  	struct mc2_task_state *tinfo = get_mc2_state(current);
26269 +	struct reservation *res;
26270  	
26271 +	res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
26272 +	if (!res)
26273 +		; // find in global env
26274  	
26275 -	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
26276 -		get_deadline(current));
26277 +	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu) (cur->budget: %llu)\n", litmus_clock(),
26278 +		get_deadline(current), res->cur_budget);
26279  
26280  	tsk_rt(current)->completed = 1;
26281  	
26282  	if (tsk_rt(current)->sporadic_release) {
26283  		env->time_zero = tsk_rt(current)->sporadic_release_time;
26284 -	
26285 +		res->next_replenishment = tsk_rt(current)->sporadic_release_time;
26286 +		res->cur_budget = 0;
26287 +		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
26288 +		
26289  		if (tinfo->mc2_param.crit == CRIT_LEVEL_A) {
26290 -			struct reservation *res;
26291  			struct table_driven_reservation *tdres;
26292  			
26293 -			sup_update_time(&state->sup_env, litmus_clock());
26294 -			res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
26295 +			//sup_update_time(&state->sup_env, litmus_clock());
26296 +			//res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
26297  			tdres = container_of(res, struct table_driven_reservation, res);
26298  			tdres->next_interval = 0;
26299  			tdres->major_cycle_start = tsk_rt(current)->sporadic_release_time;
26300 -			res->next_replenishment = tsk_rt(current)->sporadic_release_time;
26301 -			res->next_replenishment += tdres->intervals[0].start;
26302 -			res->env->change_state(res->env, res, RESERVATION_DEPLETED);
26303 -						
26304 -			TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
26305 +			res->next_replenishment += tdres->intervals[0].start;			
26306  		}
26307 -		
26308 +		TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
26309  	}
26310 -		
26311 +	
26312  	prepare_for_next_period(current);
26313  	next_release = ns_to_ktime(get_release(current));
26314  	preempt_disable();
26315 @@ -385,6 +495,7 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
26316  		/* Assumption: litmus_clock() is synchronized across cores
26317  		 * [see comment in pres_task_resume()] */
26318  		sup_update_time(&state->sup_env, litmus_clock());
26319 +		mc2_update_ghost_state(state);
26320  		task_arrives(tsk);
26321  		/* NOTE: drops state->lock */
26322  		TRACE("mc2_new()\n");
26323 @@ -456,18 +567,26 @@ static void mc2_task_exit(struct task_struct *tsk)
26324  	unsigned long flags;
26325  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
26326  	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
26327 +	enum crit_level lv = tinfo->mc2_param.crit;
26328 +	struct crit_entry* ce;	
26329  
26330  	raw_spin_lock_irqsave(&state->lock, flags);
26331  
26332  	if (state->scheduled == tsk)
26333  		state->scheduled = NULL;
26334  
26335 +	ce = &state->crit_entries[lv];
26336 +	if (ce->running == tsk)
26337 +		ce->running = NULL;
26338 +	
26339  	/* remove from queues */
26340  	if (is_running(tsk)) {
26341  		/* Assumption: litmus_clock() is synchronized across cores
26342  		 * [see comment in pres_task_resume()] */
26343  		sup_update_time(&state->sup_env, litmus_clock());
26344 +		mc2_update_ghost_state(state);
26345  		task_departs(tsk, 0);
26346 +		
26347  		/* NOTE: drops state->lock */
26348  		TRACE("mc2_exit()\n");
26349  		mc2_update_timer_and_unlock(state);
26350 @@ -729,7 +848,7 @@ static void mc2_setup_domain_proc(void)
26351  
26352  static long mc2_activate_plugin(void)
26353  {
26354 -	int cpu;
26355 +	int cpu, lv;
26356  	struct mc2_cpu_state *state;
26357  
26358  	for_each_online_cpu(cpu) {
26359 @@ -740,7 +859,13 @@ static long mc2_activate_plugin(void)
26360  		raw_spin_lock_init(&state->lock);
26361  		state->cpu = cpu;
26362  		state->scheduled = NULL;
26363 -
26364 +		for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
26365 +			struct crit_entry *ce = &state->crit_entries[lv];
26366 +			ce->level = lv;
26367 +			ce->running = NULL;
26368 +			hrtimer_init(&ce->ghost_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
26369 +			ce->ghost_timer.function = on_ghost_timer;
26370 +		}
26371  		sup_init(&state->sup_env);
26372  
26373  		hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
26374 -- 
26375 1.8.1.2
26376 
26377 
26378 From 34fe51ed2dc210e87bfa5d85ab98c5125495f002 Mon Sep 17 00:00:00 2001
26379 From: Namhoon Kim <namhoonk@cs.unc.edu>
26380 Date: Sat, 24 Jan 2015 05:22:15 -0500
26381 Subject: [PATCH 084/119] Removed ghost_timer
26382 
26383 ---
26384  litmus/sched_mc2.c | 12 +++++++-----
26385  1 file changed, 7 insertions(+), 5 deletions(-)
26386 
26387 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
26388 index 6b29d52..499f770 100644
26389 --- a/litmus/sched_mc2.c
26390 +++ b/litmus/sched_mc2.c
26391 @@ -25,7 +25,7 @@ struct mc2_task_state {
26392  struct crit_entry {
26393  	enum crit_level level;
26394  	struct task_struct *running;
26395 -	struct hrtimer ghost_timer;
26396 +	//struct hrtimer ghost_timer;
26397  };
26398  
26399  struct mc2_cpu_state {
26400 @@ -80,7 +80,7 @@ static void task_departs(struct task_struct *tsk, int job_complete)
26401  		ce->running = tsk;
26402  		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
26403  		
26404 -		BUG_ON(hrtimer_active(&ce->ghost_timer));
26405 +		//BUG_ON(hrtimer_active(&ce->ghost_timer));
26406  		//TRACE("setting GHOST timer %llu\n", ns_to_ktime(now + res->cur_budget));
26407  		//__hrtimer_start_range_ns(&ce->ghost_timer, ns_to_ktime(now + res->cur_budget), 0, HRTIMER_MODE_ABS_PINNED, 0);
26408  	}		
26409 @@ -181,6 +181,7 @@ static void mc2_update_ghost_state(struct mc2_cpu_state *state)
26410  	}
26411  }			
26412  
26413 +/*
26414  static enum hrtimer_restart on_ghost_timer(struct hrtimer *timer)
26415  {
26416  	struct crit_entry *ce;
26417 @@ -199,7 +200,8 @@ static enum hrtimer_restart on_ghost_timer(struct hrtimer *timer)
26418  	
26419  	return HRTIMER_NORESTART;
26420  }
26421 -	
26422 +*/
26423 +
26424  static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
26425  {
26426  	unsigned long flags;
26427 @@ -863,8 +865,8 @@ static long mc2_activate_plugin(void)
26428  			struct crit_entry *ce = &state->crit_entries[lv];
26429  			ce->level = lv;
26430  			ce->running = NULL;
26431 -			hrtimer_init(&ce->ghost_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
26432 -			ce->ghost_timer.function = on_ghost_timer;
26433 +			//hrtimer_init(&ce->ghost_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
26434 +			//ce->ghost_timer.function = on_ghost_timer;
26435  		}
26436  		sup_init(&state->sup_env);
26437  
26438 -- 
26439 1.8.1.2
26440 
26441 
26442 From ca538aafd7cebfd09a47af0a628647620a6bba35 Mon Sep 17 00:00:00 2001
26443 From: Namhoon Kim <namhoonk@cs.unc.edu>
26444 Date: Sat, 24 Jan 2015 13:24:12 -0500
26445 Subject: [PATCH 085/119] Reservation destroy
26446 
26447 ---
26448  include/litmus/reservation.h  |  8 +++++---
26449  litmus/polling_reservations.c |  1 +
26450  litmus/reservation.c          |  6 ++++--
26451  litmus/sched_mc2.c            | 47 ++++++++++++++++++++++++++++++++++++-------
26452  4 files changed, 50 insertions(+), 12 deletions(-)
26453 
26454 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
26455 index 5ccb200..0e656ad 100644
26456 --- a/include/litmus/reservation.h
26457 +++ b/include/litmus/reservation.h
26458 @@ -129,6 +129,8 @@ struct reservation {
26459  	
26460  	/* for global env. */
26461  	int scheduled_on;
26462 +	/* for blocked by ghost */
26463 +	int blocked_by_ghost;
26464  };
26465  
26466  void reservation_init(struct reservation *res);
26467 @@ -225,14 +227,14 @@ struct gmp_reservation_environment {
26468  	/* set to true if a call to gmp_dispatch() is imminent */
26469  	bool will_schedule;
26470  };
26471 -/*
26472 +
26473  void gmp_init(struct gmp_reservation_environment* gmp_env);
26474  void gmp_add_new_reservation(struct gmp_reservation_environment* gmp_env,
26475  	struct reservation* new_res);
26476  void gmp_update_time(struct gmp_reservation_environment* gmp_env, lt_t now);
26477  struct task_struct* gmp_dispatch(struct gmp_reservation_environment* gmp_env);
26478 -
26479 +struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env, unsigned int id);
26480  struct reservation* gmp_find_by_id(struct gmp_reservation_environment* gmp_env,
26481  	unsigned int id);
26482 -*/
26483 +
26484  #endif
26485 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
26486 index 941a371..ec5cadd 100644
26487 --- a/litmus/polling_reservations.c
26488 +++ b/litmus/polling_reservations.c
26489 @@ -46,6 +46,7 @@ static void periodic_polling_client_arrives(
26490  			break;
26491  
26492  		case RESERVATION_ACTIVE_IDLE:
26493 +			res->blocked_by_ghost = 0;
26494  			res->env->change_state(res->env, res,
26495  				RESERVATION_ACTIVE);
26496  			break;
26497 diff --git a/litmus/reservation.c b/litmus/reservation.c
26498 index 2dc3dc2..16b3a48 100644
26499 --- a/litmus/reservation.c
26500 +++ b/litmus/reservation.c
26501 @@ -194,9 +194,11 @@ static void sup_charge_budget(
26502  		res = list_entry(pos, struct reservation, list);
26503  		if (res->state == RESERVATION_ACTIVE) {
26504  			TRACE("sup_charge_budget ACTIVE R%u drain %llu\n", res->id, delta);
26505 -			if (encountered_active == 0)
26506 +			if (encountered_active == 0 && res->blocked_by_ghost == 0) {
26507 +				TRACE("DRAIN !!\n");
26508  				res->ops->drain_budget(res, delta);
26509 -			encountered_active = 1;
26510 +				encountered_active = 1;
26511 +			}			
26512  		} else {
26513  			BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
26514  			TRACE("sup_charge_budget INACTIVE R%u drain %llu\n", res->id, delta);
26515 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
26516 index 499f770..0c26019 100644
26517 --- a/litmus/sched_mc2.c
26518 +++ b/litmus/sched_mc2.c
26519 @@ -86,17 +86,25 @@ static void task_departs(struct task_struct *tsk, int job_complete)
26520  	}		
26521  }
26522  
26523 -static void task_arrives(struct task_struct *tsk)
26524 +static void task_arrives(struct mc2_cpu_state *state, struct task_struct *tsk)
26525  {
26526  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
26527  	struct reservation* res;
26528  	struct reservation_client *client;
26529 +	enum crit_level lv = get_task_crit_level(tsk);
26530  
26531  	res    = tinfo->res_info.client.reservation;
26532  	client = &tinfo->res_info.client;
26533  
26534  	tinfo->has_departed = false;
26535  	res->ops->client_arrives(res, client);
26536 +	
26537 +	if (lv != NUM_CRIT_LEVELS) {
26538 +		struct crit_entry *ce;
26539 +		ce = &state->crit_entries[lv];
26540 +		if (ce->running == tsk)
26541 +			ce->running = NULL;
26542 +	}
26543  }
26544  
26545  /* NOTE: drops state->lock */
26546 @@ -174,8 +182,13 @@ static void mc2_update_ghost_state(struct mc2_cpu_state *state)
26547  				continue;
26548  			TRACE("LV %d running id %d budget %llu\n", lv, tinfo->mc2_param.res_id, res->cur_budget);
26549  			if (!res->cur_budget) {
26550 +				struct sup_reservation_environment* sup_env = &state->sup_env;
26551 +				
26552  				TRACE("GHOST FINISH id %d at %llu\n", tinfo->mc2_param.res_id, litmus_clock());
26553  				ce->running = NULL;
26554 +				res = list_first_entry_or_null(&sup_env->active_reservations, struct reservation, list);
26555 +				if (res)
26556 +					litmus_reschedule_local();
26557  			}
26558  		}
26559  	}
26560 @@ -262,7 +275,10 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
26561  					ce = &state->crit_entries[lv];
26562  					if (likely(!ce->running)) {
26563  						sup_scheduler_update_after(sup_env, res->cur_budget);
26564 +						res->blocked_by_ghost = 0;
26565  						return tsk;
26566 +					} else {
26567 +						res->blocked_by_ghost = 1;
26568  					}
26569  				}
26570  			}
26571 @@ -275,7 +291,6 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
26572  static struct task_struct* mc2_schedule(struct task_struct * prev)
26573  {
26574  	/* next == NULL means "schedule background work". */
26575 -	struct mc2_task_state *tinfo;
26576  	struct mc2_cpu_state *state = local_cpu_state();
26577  
26578  	raw_spin_lock(&state->lock);
26579 @@ -352,7 +367,7 @@ static void mc2_task_resume(struct task_struct  *tsk)
26580  		 * at the moment. */
26581  		sup_update_time(&state->sup_env, litmus_clock());
26582  		mc2_update_ghost_state(state);
26583 -		task_arrives(tsk);
26584 +		task_arrives(state, tsk);
26585  		/* NOTE: drops state->lock */
26586  		TRACE_TASK(tsk, "mc2_resume()\n");
26587  		mc2_update_timer_and_unlock(state);
26588 @@ -498,7 +513,7 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
26589  		 * [see comment in pres_task_resume()] */
26590  		sup_update_time(&state->sup_env, litmus_clock());
26591  		mc2_update_ghost_state(state);
26592 -		task_arrives(tsk);
26593 +		task_arrives(state, tsk);
26594  		/* NOTE: drops state->lock */
26595  		TRACE("mc2_new()\n");
26596  		mc2_update_timer_and_unlock(state);
26597 @@ -521,6 +536,7 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
26598  	struct reservation *res = NULL, *next;
26599  	struct sup_reservation_environment *sup_env;
26600  	int found = 0;
26601 +	enum crit_level lv = get_task_crit_level(current);
26602  		
26603  	state = cpu_state_for(cpu);
26604  	raw_spin_lock(&state->lock);
26605 @@ -530,8 +546,13 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
26606  	//if (!res) {
26607  	list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
26608  		if (res->id == reservation_id) {
26609 +			if (lv == CRIT_LEVEL_A) {
26610 +				struct table_driven_reservation *tdres;
26611 +				tdres = container_of(res, struct table_driven_reservation, res);
26612 +				kfree(tdres->intervals);
26613 +			}
26614  			list_del(&res->list);
26615 -			//kfree(res);
26616 +			kfree(res);
26617  			found = 1;
26618  			ret = 0;
26619  		}
26620 @@ -539,8 +560,13 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
26621  	if (!found) {
26622  		list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
26623  			if (res->id == reservation_id) {
26624 +				if (lv == CRIT_LEVEL_A) {
26625 +					struct table_driven_reservation *tdres;
26626 +					tdres = container_of(res, struct table_driven_reservation, res);
26627 +					kfree(tdres->intervals);
26628 +				}
26629  				list_del(&res->list);
26630 -				//kfree(res);
26631 +				kfree(res);
26632  				found = 1;
26633  				ret = 0;
26634  			}
26635 @@ -549,8 +575,13 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
26636  	if (!found) {
26637  		list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
26638  			if (res->id == reservation_id) {
26639 +				if (lv == CRIT_LEVEL_A) {
26640 +					struct table_driven_reservation *tdres;
26641 +					tdres = container_of(res, struct table_driven_reservation, res);
26642 +					kfree(tdres->intervals);
26643 +				}
26644  				list_del(&res->list);
26645 -				//kfree(res);
26646 +				kfree(res);
26647  				found = 1;
26648  				ret = 0;
26649  			}
26650 @@ -665,6 +696,7 @@ static long create_polling_reservation(
26651  			config->polling_params.relative_deadline,
26652  			config->polling_params.offset);
26653  		pres->res.id = config->id;
26654 +		pres->res.blocked_by_ghost = 0;
26655  		if (!use_edf)
26656  			pres->res.priority = config->priority;
26657  		sup_add_new_reservation(&state->sup_env, &pres->res);
26658 @@ -765,6 +797,7 @@ static long create_table_driven_reservation(
26659  				slots, num_slots);
26660  			td_res->res.id = config->id;
26661  			td_res->res.priority = config->priority;
26662 +			td_res->res.blocked_by_ghost = 0;
26663  			sup_add_new_reservation(&state->sup_env, &td_res->res);
26664  			err = config->id;
26665  		} else {
26666 -- 
26667 1.8.1.2
26668 
26669 
26670 From 5ba38eb6290a0c1767932c03b15edb0627ffd6b2 Mon Sep 17 00:00:00 2001
26671 From: Namhoon Kim <namhoonk@cs.unc.edu>
26672 Date: Wed, 28 Jan 2015 09:26:59 -0500
26673 Subject: [PATCH 086/119] LV c
26674 
26675 ---
26676  include/litmus/reservation.h  |  22 +-
26677  litmus/mc2_common.c           |   3 +-
26678  litmus/polling_reservations.c |   7 +-
26679  litmus/reservation.c          | 356 ++++++++++++++++++++-
26680  litmus/sched_mc2.c            | 729 +++++++++++++++++++++++++++++++++++-------
26681  5 files changed, 985 insertions(+), 132 deletions(-)
26682 
26683 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
26684 index 0e656ad..fc7e319 100644
26685 --- a/include/litmus/reservation.h
26686 +++ b/include/litmus/reservation.h
26687 @@ -129,8 +129,10 @@ struct reservation {
26688  	
26689  	/* for global env. */
26690  	int scheduled_on;
26691 -	/* for blocked by ghost */
26692 +	/* for blocked by ghost. Do not charge budget when ACTIVE */
26693  	int blocked_by_ghost;
26694 +	/* ghost_job. If it is clear, do not charge budget when ACTIVE_IDLE */
26695 +	int is_ghost;
26696  };
26697  
26698  void reservation_init(struct reservation *res);
26699 @@ -199,11 +201,19 @@ struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
26700  	unsigned int id);
26701  	
26702  /* A global multiprocessor reservation environment. */
26703 +/*
26704 +typedef enum {
26705 +	EVENT_REPLENISH = 0,
26706 +	EVENT_DRAIN,
26707 +	EVENT_OTHERS,
26708 +} event_type_t;
26709 +*/
26710  
26711  struct next_timer_event {
26712  	lt_t next_update;
26713  	int timer_armed_on;
26714 -	unsigned int id;
26715 +	//unsigned int id;
26716 +	//event_type_t type;
26717  	struct list_head list;
26718  };
26719  
26720 @@ -222,6 +232,7 @@ struct gmp_reservation_environment {
26721  
26722  	/* timer event ordered by next_update */
26723  	struct list_head next_events;
26724 +	
26725  	/* (schedule_now == true) means call gmp_dispatch() now */
26726  	bool schedule_now;
26727  	/* set to true if a call to gmp_dispatch() is imminent */
26728 @@ -231,9 +242,12 @@ struct gmp_reservation_environment {
26729  void gmp_init(struct gmp_reservation_environment* gmp_env);
26730  void gmp_add_new_reservation(struct gmp_reservation_environment* gmp_env,
26731  	struct reservation* new_res);
26732 -void gmp_update_time(struct gmp_reservation_environment* gmp_env, lt_t now);
26733 +void gmp_scheduler_update_after(struct gmp_reservation_environment* gmp_env,
26734 +	lt_t timeout);
26735 +bool gmp_update_time(struct gmp_reservation_environment* gmp_env, lt_t now);
26736  struct task_struct* gmp_dispatch(struct gmp_reservation_environment* gmp_env);
26737 -struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env, unsigned int id);
26738 +//struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env, unsigned int id);
26739 +struct next_timer_event* gmp_find_event_by_time(struct gmp_reservation_environment* gmp_env, lt_t when);
26740  struct reservation* gmp_find_by_id(struct gmp_reservation_environment* gmp_env,
26741  	unsigned int id);
26742  
26743 diff --git a/litmus/mc2_common.c b/litmus/mc2_common.c
26744 index d0a42c6..a8ea5d9 100644
26745 --- a/litmus/mc2_common.c
26746 +++ b/litmus/mc2_common.c
26747 @@ -67,7 +67,8 @@ asmlinkage long sys_set_mc2_task_param(pid_t pid, struct mc2_task __user * param
26748  		goto out_unlock;
26749  	}
26750  	
26751 -	target->rt_param.plugin_state = mp;
26752 +	//target->rt_param.plugin_state = mp;
26753 +	target->rt_param.mc2_data = mp;
26754  
26755  	retval = 0;
26756  out_unlock:
26757 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
26758 index ec5cadd..d2c54c4 100644
26759 --- a/litmus/polling_reservations.c
26760 +++ b/litmus/polling_reservations.c
26761 @@ -32,8 +32,8 @@ static void periodic_polling_client_arrives(
26762  			}
26763  				
26764  			TRACE("ENV_TIME_ZERO %llu\n", res->env->time_zero);
26765 -			TRACE("pol-res: activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
26766 -				tmp, instances, pres->period, res->next_replenishment,
26767 +			TRACE("pol-res: R%d activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
26768 +				res->id, tmp, instances, pres->period, res->next_replenishment,
26769  				res->env->current_time);
26770  
26771  			res->env->change_state(res->env, res,
26772 @@ -147,7 +147,8 @@ static void common_drain_budget(
26773  	switch (res->state) {
26774  		case RESERVATION_DEPLETED:
26775  		case RESERVATION_INACTIVE:
26776 -			BUG();
26777 +			//BUG();
26778 +			TRACE("!!!!!!!!!!!!!!!STATE ERROR R%d STATE(%d)\n", res->id, res->state);
26779  			break;
26780  
26781  		case RESERVATION_ACTIVE_IDLE:
26782 diff --git a/litmus/reservation.c b/litmus/reservation.c
26783 index 16b3a48..e30892c 100644
26784 --- a/litmus/reservation.c
26785 +++ b/litmus/reservation.c
26786 @@ -1,4 +1,5 @@
26787  #include <linux/sched.h>
26788 +#include <linux/slab.h>
26789  
26790  #include <litmus/litmus.h>
26791  #include <litmus/reservation.h>
26792 @@ -48,7 +49,7 @@ static void sup_scheduler_update_at(
26793  	struct sup_reservation_environment* sup_env,
26794  	lt_t when)
26795  {
26796 -	TRACE("SCHEDULER_UPDATE_AT update: %llu > when %llu\n", sup_env->next_scheduler_update, when);
26797 +	//TRACE("SCHEDULER_UPDATE_AT update: %llu > when %llu\n", sup_env->next_scheduler_update, when);
26798  	if (sup_env->next_scheduler_update > when)
26799  		sup_env->next_scheduler_update = when;
26800  }
26801 @@ -252,7 +253,7 @@ void sup_update_time(
26802  	/* If the time didn't advance, there is nothing to do.
26803  	 * This check makes it safe to call sup_advance_time() potentially
26804  	 * multiple times (e.g., via different code paths. */
26805 -	TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
26806 +	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
26807  	if (unlikely(now <= sup_env->env.current_time))
26808  		return;
26809  
26810 @@ -264,11 +265,11 @@ void sup_update_time(
26811  		sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
26812  
26813  	/* deplete budgets by passage of time */
26814 -	TRACE("CHARGE###\n");
26815 +	//TRACE("CHARGE###\n");
26816  	sup_charge_budget(sup_env, delta);
26817  
26818  	/* check if any budgets where replenished */
26819 -	TRACE("REPLENISH###\n");
26820 +	//TRACE("REPLENISH###\n");
26821  	sup_replenish_budgets(sup_env);
26822  }
26823  
26824 @@ -325,3 +326,350 @@ void sup_init(struct sup_reservation_environment* sup_env)
26825  
26826  	sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
26827  }
26828 +
26829 +struct reservation* gmp_find_by_id(struct gmp_reservation_environment* gmp_env,
26830 +	unsigned int id)
26831 +{
26832 +	struct reservation *res;
26833 +
26834 +	list_for_each_entry(res, &gmp_env->active_reservations, list) {
26835 +		if (res->id == id)
26836 +			return res;
26837 +	}
26838 +	list_for_each_entry(res, &gmp_env->inactive_reservations, list) {
26839 +		if (res->id == id)
26840 +			return res;
26841 +	}
26842 +	list_for_each_entry(res, &gmp_env->depleted_reservations, list) {
26843 +		if (res->id == id)
26844 +			return res;
26845 +	}
26846 +
26847 +	return NULL;
26848 +}
26849 +
26850 +/*
26851 +struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env,
26852 +	unsigned int id)
26853 +{
26854 +	struct next_timer_event *event;
26855 +
26856 +	list_for_each_entry(event, &gmp_env->next_events, list) {
26857 +		if (event->id == id)
26858 +			return event;
26859 +	}
26860 +
26861 +	return NULL;
26862 +}
26863 +*/
26864 +
26865 +struct next_timer_event* gmp_find_event_by_time(struct gmp_reservation_environment* gmp_env,
26866 +	lt_t when)
26867 +{
26868 +	struct next_timer_event *event;
26869 +
26870 +	list_for_each_entry(event, &gmp_env->next_events, list) {
26871 +		if (event->next_update == when)
26872 +			return event;
26873 +	}
26874 +
26875 +	return NULL;
26876 +}
26877 +
26878 +/*
26879 +static void gmp_scheduler_update_at(
26880 +	struct gmp_reservation_environment* gmp_env, unsigned int id,
26881 +	event_type_t type, lt_t when)
26882 +{
26883 +	struct next_timer_event *nevent, *queued;
26884 +	struct list_head *pos;
26885 +	int found = 0;
26886 +	
26887 +	nevent = gmp_find_event_by_id(gmp_env, id);
26888 +	
26889 +	if (!nevent) {
26890 +		nevent = kzalloc(sizeof(*nevent), GFP_KERNEL);
26891 +		nevent->next_update = when;
26892 +		nevent->id = id;
26893 +		nevent->timer_armed_on = NO_CPU;
26894 +		nevent->type = type;
26895 +		
26896 +		list_for_each(pos, &gmp_env->next_events) {
26897 +			queued = list_entry(pos, struct next_timer_event, list);
26898 +			if (queued->next_update > nevent->next_update) {
26899 +				list_add(&nevent->list, pos->prev);
26900 +				found = 1;
26901 +				TRACE("NEXT_EVENT ADDED after %llu\n", queued->next_update);
26902 +				break;
26903 +			}
26904 +		}
26905 +		
26906 +		if (!found) {
26907 +			list_add_tail(&nevent->list, &gmp_env->next_events);
26908 +			TRACE("NEXT_EVENT ADDED at [0]\n");
26909 +		}
26910 +	} else {
26911 +		TRACE("EVENT FOUND at %llu T(%d), NEW EVENT %llu T(%d)\n", nevent->next_update, nevent->type, when, type);
26912 +	}
26913 +}
26914 +*/
26915 +#define TIMER_RESOLUTION 100000L
26916 +
26917 +static void gmp_scheduler_update_at(
26918 +	struct gmp_reservation_environment* gmp_env,
26919 +	lt_t when)
26920 +{
26921 +	struct next_timer_event *nevent, *queued;
26922 +	struct list_head *pos;
26923 +	int found = 0;
26924 +
26925 +	//when = div64_u64(when, TIMER_RESOLUTION);
26926 +	//when *= TIMER_RESOLUTION;
26927 +	
26928 +	nevent = gmp_find_event_by_time(gmp_env, when);
26929 +	
26930 +	if (!nevent) {
26931 +		nevent = kzalloc(sizeof(*nevent), GFP_KERNEL);
26932 +		nevent->next_update = when;
26933 +		nevent->timer_armed_on = NO_CPU;
26934 +		
26935 +		list_for_each(pos, &gmp_env->next_events) {
26936 +			queued = list_entry(pos, struct next_timer_event, list);
26937 +			if (queued->next_update > nevent->next_update) {
26938 +				list_add(&nevent->list, pos->prev);
26939 +				found = 1;
26940 +				TRACE("NEXT_EVENT at %llu ADDED before %llu\n", nevent->next_update, queued->next_update);
26941 +				break;
26942 +			}
26943 +		}
26944 +		
26945 +		if (!found) {
26946 +			list_add_tail(&nevent->list, &gmp_env->next_events);
26947 +			TRACE("NEXT_EVENT ADDED at %llu ADDED at HEAD\n", nevent->next_update);
26948 +		}
26949 +	} else {
26950 +		; //TRACE("EVENT FOUND at %llu, NEW EVENT %llu\n", nevent->next_update, when);
26951 +	}
26952 +}
26953 +
26954 +void gmp_scheduler_update_after(
26955 +	struct gmp_reservation_environment* gmp_env, lt_t timeout)
26956 +{
26957 +	gmp_scheduler_update_at(gmp_env, gmp_env->env.current_time + timeout);
26958 +}
26959 +
26960 +static void gmp_queue_depleted(
26961 +	struct gmp_reservation_environment* gmp_env,
26962 +	struct reservation *res)
26963 +{
26964 +	struct list_head *pos;
26965 +	struct reservation *queued;
26966 +	int found = 0;
26967 +
26968 +	list_for_each(pos, &gmp_env->depleted_reservations) {
26969 +		queued = list_entry(pos, struct reservation, list);
26970 +		if (queued->next_replenishment > res->next_replenishment) {
26971 +			list_add(&res->list, pos->prev);
26972 +			found = 1;
26973 +		}
26974 +	}
26975 +
26976 +	if (!found)
26977 +		list_add_tail(&res->list, &gmp_env->depleted_reservations);
26978 +
26979 +	gmp_scheduler_update_at(gmp_env, res->next_replenishment);
26980 +}
26981 +
26982 +static void gmp_queue_active(
26983 +	struct gmp_reservation_environment* gmp_env,
26984 +	struct reservation *res)
26985 +{
26986 +	struct list_head *pos;
26987 +	struct reservation *queued;
26988 +	int check_preempt = 1, found = 0;
26989 +
26990 +	list_for_each(pos, &gmp_env->active_reservations) {
26991 +		queued = list_entry(pos, struct reservation, list);
26992 +		if (queued->priority > res->priority) {
26993 +			list_add(&res->list, pos->prev);
26994 +			found = 1;
26995 +			break;
26996 +		} else if (queued->scheduled_on == NO_CPU)
26997 +			check_preempt = 0;
26998 +	}
26999 +
27000 +	if (!found)
27001 +		list_add_tail(&res->list, &gmp_env->active_reservations);
27002 +
27003 +	/* check for possible preemption */
27004 +	if (res->state == RESERVATION_ACTIVE && !check_preempt)
27005 +		gmp_env->schedule_now = true;
27006 +	
27007 +	gmp_scheduler_update_after(gmp_env, res->cur_budget);
27008 +}
27009 +
27010 +static void gmp_queue_reservation(
27011 +	struct gmp_reservation_environment* gmp_env,
27012 +	struct reservation *res)
27013 +{
27014 +	switch (res->state) {
27015 +		case RESERVATION_INACTIVE:
27016 +			list_add(&res->list, &gmp_env->inactive_reservations);
27017 +			break;
27018 +
27019 +		case RESERVATION_DEPLETED:
27020 +			gmp_queue_depleted(gmp_env, res);
27021 +			break;
27022 +
27023 +		case RESERVATION_ACTIVE_IDLE:
27024 +		case RESERVATION_ACTIVE:
27025 +			gmp_queue_active(gmp_env, res);
27026 +			break;
27027 +	}
27028 +}
27029 +
27030 +void gmp_add_new_reservation(
27031 +	struct gmp_reservation_environment* gmp_env,
27032 +	struct reservation* new_res)
27033 +{
27034 +	new_res->env = &gmp_env->env;
27035 +	gmp_queue_reservation(gmp_env, new_res);
27036 +}
27037 +
27038 +static void gmp_charge_budget(
27039 +	struct gmp_reservation_environment* gmp_env,
27040 +	lt_t delta)
27041 +{
27042 +	struct list_head *pos, *next;
27043 +	struct reservation *res;
27044 +
27045 +	list_for_each_safe(pos, next, &gmp_env->active_reservations) {
27046 +		int drained = 0;
27047 +		/* charge all ACTIVE_IDLE up to the first ACTIVE reservation */
27048 +		res = list_entry(pos, struct reservation, list);
27049 +		if (res->state == RESERVATION_ACTIVE) {
27050 +			TRACE("gmp_charge_budget ACTIVE R%u drain %llu\n", res->id, delta);
27051 +			if (res->scheduled_on != NO_CPU && res->blocked_by_ghost == 0) {
27052 +				TRACE("DRAIN !!\n");
27053 +				drained = 1;
27054 +				res->ops->drain_budget(res, delta);
27055 +			}			
27056 +		} else {
27057 +			//BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
27058 +			if (res->state != RESERVATION_ACTIVE_IDLE)
27059 +				TRACE("BUG!!!!!!!!!!!! gmp_charge_budget()\n");
27060 +			TRACE("gmp_charge_budget INACTIVE R%u drain %llu\n", res->id, delta);
27061 +			//if (res->is_ghost == 1) {
27062 +				TRACE("DRAIN !!\n");
27063 +				drained = 1;
27064 +				res->ops->drain_budget(res, delta);
27065 +			//}
27066 +		}
27067 +		if ((res->state == RESERVATION_ACTIVE ||
27068 +			res->state == RESERVATION_ACTIVE_IDLE) && (drained == 1))
27069 +		{
27070 +			/* make sure scheduler is invoked when this reservation expires
27071 +			 * its remaining budget */
27072 +			 TRACE("requesting gmp_scheduler update for reservation %u in %llu nanoseconds\n",
27073 +				res->id, res->cur_budget);
27074 +			 gmp_scheduler_update_after(gmp_env, res->cur_budget);
27075 +		}
27076 +		//if (encountered_active == 2)
27077 +			/* stop at the first ACTIVE reservation */
27078 +		//	break;
27079 +	}
27080 +	//TRACE("finished charging budgets\n");
27081 +}
27082 +
27083 +static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
27084 +{
27085 +	struct list_head *pos, *next;
27086 +	struct reservation *res;
27087 +
27088 +	list_for_each_safe(pos, next, &gmp_env->depleted_reservations) {
27089 +		res = list_entry(pos, struct reservation, list);
27090 +		if (res->next_replenishment <= gmp_env->env.current_time) {
27091 +			res->ops->replenish(res);
27092 +		} else {
27093 +			/* list is ordered by increasing depletion times */
27094 +			break;
27095 +		}
27096 +	}
27097 +	//TRACE("finished replenishing budgets\n");
27098 +
27099 +	/* request a scheduler update at the next replenishment instant */
27100 +	res = list_first_entry_or_null(&gmp_env->depleted_reservations,
27101 +		struct reservation, list);
27102 +	if (res)
27103 +		gmp_scheduler_update_at(gmp_env, res->next_replenishment);
27104 +}
27105 +
27106 +/* return schedule_now */
27107 +bool gmp_update_time(
27108 +	struct gmp_reservation_environment* gmp_env,
27109 +	lt_t now)
27110 +{
27111 +	lt_t delta;
27112 +
27113 +	if (!gmp_env) {
27114 +		TRACE("BUG****************************************\n");
27115 +		return false;
27116 +	}
27117 +	/* If the time didn't advance, there is nothing to do.
27118 +	 * This check makes it safe to call sup_advance_time() potentially
27119 +	 * multiple times (e.g., via different code paths. */
27120 +	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
27121 +	if (unlikely(now <= gmp_env->env.current_time))
27122 +		return gmp_env->schedule_now;
27123 +
27124 +	delta = now - gmp_env->env.current_time;
27125 +	gmp_env->env.current_time = now;
27126 +
27127 +
27128 +	/* deplete budgets by passage of time */
27129 +	//TRACE("CHARGE###\n");
27130 +	gmp_charge_budget(gmp_env, delta);
27131 +
27132 +	/* check if any budgets where replenished */
27133 +	//TRACE("REPLENISH###\n");
27134 +	gmp_replenish_budgets(gmp_env);
27135 +	
27136 +	return gmp_env->schedule_now;
27137 +}
27138 +
27139 +static void gmp_res_change_state(
27140 +	struct reservation_environment* env,
27141 +	struct reservation *res,
27142 +	reservation_state_t new_state)
27143 +{
27144 +	struct gmp_reservation_environment* gmp_env;
27145 +
27146 +	gmp_env = container_of(env, struct gmp_reservation_environment, env);
27147 +
27148 +	TRACE("GMP reservation R%d state %d->%d at %llu\n",
27149 +		res->id, res->state, new_state, env->current_time);
27150 +
27151 +	list_del(&res->list);
27152 +	/* check if we need to reschedule because we lost an active reservation */
27153 +	if (res->state == RESERVATION_ACTIVE && !gmp_env->will_schedule)
27154 +		gmp_env->schedule_now = true;
27155 +	res->state = new_state;
27156 +	gmp_queue_reservation(gmp_env, res);
27157 +}
27158 +
27159 +void gmp_init(struct gmp_reservation_environment* gmp_env)
27160 +{
27161 +	memset(gmp_env, sizeof(*gmp_env), 0);
27162 +
27163 +	INIT_LIST_HEAD(&gmp_env->active_reservations);
27164 +	INIT_LIST_HEAD(&gmp_env->depleted_reservations);
27165 +	INIT_LIST_HEAD(&gmp_env->inactive_reservations);
27166 +	INIT_LIST_HEAD(&gmp_env->next_events);
27167 +
27168 +	gmp_env->env.change_state = gmp_res_change_state;
27169 +
27170 +	gmp_env->schedule_now = false;
27171 +	gmp_env->will_schedule = false;
27172 +	
27173 +	raw_spin_lock_init(&gmp_env->lock);
27174 +}
27175 \ No newline at end of file
27176 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
27177 index 0c26019..6dee1ec 100644
27178 --- a/litmus/sched_mc2.c
27179 +++ b/litmus/sched_mc2.c
27180 @@ -15,6 +15,23 @@
27181  #include <litmus/reservation.h>
27182  #include <litmus/polling_reservations.h>
27183  
27184 +struct gmp_reservation_environment _global_env;
27185 +
27186 +struct cpu_entry {
27187 +	struct task_struct *scheduled;
27188 +	lt_t deadline;
27189 +	int cpu;
27190 +	enum crit_level lv;
27191 +	bool will_schedule;
27192 +};
27193 +
27194 +struct cpu_priority {
27195 +	raw_spinlock_t lock;
27196 +	struct cpu_entry cpu_entries[NR_CPUS];
27197 +};
27198 +
27199 +struct cpu_priority _lowest_prio_cpu;
27200 +	
27201  struct mc2_task_state {
27202  	struct task_client res_info;
27203  	int cpu;
27204 @@ -51,11 +68,39 @@ static struct mc2_task_state* get_mc2_state(struct task_struct *tsk)
27205  }
27206  static enum crit_level get_task_crit_level(struct task_struct *tsk)
27207  {
27208 -	struct mc2_task_state *tinfo = get_mc2_state(tsk);
27209 -	if (!tinfo)
27210 +	//struct mc2_task_state *tinfo = get_mc2_state(tsk);
27211 +	struct mc2_task *mp;
27212 +	
27213 +	if (!tsk || !is_realtime(tsk))
27214 +		return NUM_CRIT_LEVELS;
27215 +	
27216 +	mp = tsk_rt(tsk)->mc2_data;
27217 +	
27218 +	if (!mp)
27219  		return NUM_CRIT_LEVELS;
27220  	else
27221 -		return tinfo->mc2_param.crit;
27222 +		return mp->crit;
27223 +}
27224 +
27225 +static struct reservation* res_find_by_id(struct mc2_cpu_state *state, unsigned int id)
27226 +{
27227 +	struct reservation *res;
27228 +
27229 +	res = sup_find_by_id(&state->sup_env, id);
27230 +	if (!res)
27231 +		res = gmp_find_by_id(&_global_env, id);
27232 +	
27233 +	return res;
27234 +}
27235 +
27236 +static void mc2_update_time(enum crit_level lv, struct mc2_cpu_state *state, lt_t time)
27237 +{
27238 +	if (lv < CRIT_LEVEL_C)
27239 +		sup_update_time(&state->sup_env, time);
27240 +	else if (lv == CRIT_LEVEL_C)
27241 +		gmp_update_time(&_global_env, time);
27242 +	else
27243 +		TRACE("update_time(): Criticality level error!!!!\n");
27244  }
27245  
27246  static void task_departs(struct task_struct *tsk, int job_complete)
27247 @@ -78,6 +123,7 @@ static void task_departs(struct task_struct *tsk, int job_complete)
27248  		
27249  		ce = &state->crit_entries[lv];
27250  		ce->running = tsk;
27251 +		res->is_ghost = 1;
27252  		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
27253  		
27254  		//BUG_ON(hrtimer_active(&ce->ghost_timer));
27255 @@ -107,11 +153,44 @@ static void task_arrives(struct mc2_cpu_state *state, struct task_struct *tsk)
27256  	}
27257  }
27258  
27259 +/* return: NO_CPU - all CPUs are running tasks with higher priority than Level C */
27260 +static int get_lowest_prio_cpu(void)
27261 +{
27262 +	struct cpu_entry *ce;
27263 +	int cpu, ret = NO_CPU;
27264 +	lt_t latest_deadline = 0;
27265 +	
27266 +	raw_spin_lock(&_lowest_prio_cpu.lock);
27267 +	for_each_online_cpu(cpu) {
27268 +		ce = &_lowest_prio_cpu.cpu_entries[cpu];
27269 +		if (!ce->will_schedule) {
27270 +			if (!ce->scheduled) {
27271 +				raw_spin_unlock(&_lowest_prio_cpu.lock);
27272 +				return ce->cpu;
27273 +			} else if (ce->lv == CRIT_LEVEL_C && ce->deadline > latest_deadline) {
27274 +				latest_deadline = ce->deadline;
27275 +				ret = ce->cpu;
27276 +			}
27277 +		}
27278 +	}		
27279 +	
27280 +	raw_spin_unlock(&_lowest_prio_cpu.lock);
27281 +	
27282 +	return ret;
27283 +}
27284 +
27285  /* NOTE: drops state->lock */
27286  static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
27287  {
27288  	int local;
27289  	lt_t update, now;
27290 +	enum crit_level lv = get_task_crit_level(state->scheduled);
27291 +	struct next_timer_event *event, *next;
27292 +	int found_event = 0;
27293 +	
27294 +	//TRACE_TASK(state->scheduled, "update_timer!\n");
27295 +	if (lv != NUM_CRIT_LEVELS)
27296 +		TRACE_TASK(state->scheduled, "UPDATE_TIMER LV = %d\n", lv);
27297  
27298  	update = state->sup_env.next_scheduler_update;
27299  	now = state->sup_env.env.current_time;
27300 @@ -163,6 +242,37 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
27301  			litmus_reschedule(state->cpu);
27302  		}
27303  	}
27304 +	
27305 +	raw_spin_lock(&_global_env.lock);
27306 +	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
27307 +		if (event->timer_armed_on == NO_CPU) {
27308 +			found_event = 1;
27309 +			if (event->next_update < litmus_clock()) {
27310 +				int cpu = get_lowest_prio_cpu();
27311 +				TRACE("GLOBAL EVENT PASSED!! poking CPU %d to reschedule\n", cpu);
27312 +				list_del(&event->list);
27313 +				kfree(event);
27314 +				if (cpu != NO_CPU) {
27315 +					raw_spin_lock(&_lowest_prio_cpu.lock);
27316 +					_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
27317 +					raw_spin_unlock(&_lowest_prio_cpu.lock);
27318 +					litmus_reschedule(cpu);
27319 +				}
27320 +			} else if (!hrtimer_active(&state->g_timer)) {
27321 +				int ret;
27322 +				TRACE("setting global scheduler timer for %llu\n", event->next_update);
27323 +				ret = __hrtimer_start_range_ns(&state->g_timer,
27324 +						ns_to_ktime(event->next_update),
27325 +						0 /* timer coalescing slack */,
27326 +						HRTIMER_MODE_ABS_PINNED,
27327 +						0 /* wakeup */);
27328 +				if (!ret) {
27329 +					event->timer_armed_on = state->cpu;
27330 +				}
27331 +			}				
27332 +		}
27333 +	}	
27334 +	raw_spin_unlock(&_global_env.lock);
27335  }
27336  
27337  static void mc2_update_ghost_state(struct mc2_cpu_state *state)
27338 @@ -176,16 +286,20 @@ static void mc2_update_ghost_state(struct mc2_cpu_state *state)
27339  		ce = &state->crit_entries[lv];
27340  		if (ce->running != NULL) {
27341  			tinfo = get_mc2_state(ce->running);
27342 +			/*
27343  			if (lv != CRIT_LEVEL_C)
27344  				res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
27345  			else
27346  				continue;
27347 +			*/
27348 +			res = res_find_by_id(state, tinfo->mc2_param.res_id);
27349  			TRACE("LV %d running id %d budget %llu\n", lv, tinfo->mc2_param.res_id, res->cur_budget);
27350  			if (!res->cur_budget) {
27351  				struct sup_reservation_environment* sup_env = &state->sup_env;
27352  				
27353  				TRACE("GHOST FINISH id %d at %llu\n", tinfo->mc2_param.res_id, litmus_clock());
27354  				ce->running = NULL;
27355 +				res->is_ghost = 0;
27356  				res = list_first_entry_or_null(&sup_env->active_reservations, struct reservation, list);
27357  				if (res)
27358  					litmus_reschedule_local();
27359 @@ -215,6 +329,95 @@ static enum hrtimer_restart on_ghost_timer(struct hrtimer *timer)
27360  }
27361  */
27362  
27363 +static void update_cpu_prio(struct mc2_cpu_state *state)
27364 +{
27365 +	struct cpu_entry *ce = &_lowest_prio_cpu.cpu_entries[state->cpu];
27366 +	enum crit_level lv = get_task_crit_level(state->scheduled);
27367 +	
27368 +	if (!state->scheduled) {
27369 +		// cpu is idle.
27370 +		ce->scheduled = NULL;
27371 +		ce->deadline = ULLONG_MAX;
27372 +		ce->lv = NUM_CRIT_LEVELS;
27373 +	} else if (lv == CRIT_LEVEL_C) {
27374 +		ce->scheduled = state->scheduled;
27375 +		ce->deadline = get_deadline(state->scheduled);
27376 +		ce->lv = lv;
27377 +	} else if (lv < CRIT_LEVEL_C) {
27378 +		ce->scheduled = state->scheduled;
27379 +		ce->deadline = 0;
27380 +		ce->lv = lv;
27381 +	}
27382 +};
27383 +
27384 +static enum hrtimer_restart on_global_scheduling_timer(struct hrtimer *timer)
27385 +{
27386 +	unsigned long flags;
27387 +	enum hrtimer_restart restart = HRTIMER_NORESTART;
27388 +	struct mc2_cpu_state *state;
27389 +	struct next_timer_event *event, *next;
27390 +	bool schedule_now;
27391 +	lt_t update, now;
27392 +	int found_event = 0;
27393 +
27394 +	state = container_of(timer, struct mc2_cpu_state, g_timer);
27395 +
27396 +	/* The scheduling timer should only fire on the local CPU, because
27397 +	 * otherwise deadlocks via timer_cancel() are possible.
27398 +	 * Note: this does not interfere with dedicated interrupt handling, as
27399 +	 * even under dedicated interrupt handling scheduling timers for
27400 +	 * budget enforcement must occur locally on each CPU.
27401 +	 */
27402 +	//BUG_ON(state->cpu != raw_smp_processor_id());
27403 +	if (state->cpu != raw_smp_processor_id())
27404 +		TRACE("BUG!!!!!!!!!!!!! TIMER FIRED ON THE OTHER CPU\n");
27405 +
27406 +	raw_spin_lock_irqsave(&_global_env.lock, flags);
27407 +	
27408 +	update = litmus_clock();
27409 +	TRACE("GLOBAL TIMER FIRED at %llu\n", update);
27410 +	
27411 +	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
27412 +		if (event->next_update < update) {
27413 +			found_event = 1;
27414 +			list_del(&event->list);
27415 +			TRACE("EVENT at %llu IS DELETED\n", event->next_update);
27416 +			kfree(event);
27417 +		}
27418 +	}			
27419 +	
27420 +	if (!found_event) {
27421 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
27422 +		return restart;
27423 +	}
27424 +	
27425 +	schedule_now = gmp_update_time(&_global_env, update);
27426 +	
27427 +	raw_spin_lock(&state->lock);
27428 +	mc2_update_ghost_state(state);
27429 +	raw_spin_unlock(&state->lock);
27430 +	
27431 +	now = _global_env.env.current_time;
27432 +	
27433 +	TRACE_CUR("on_global_scheduling_timer at %llu, upd:%llu (for cpu=%d) SCHEDULE_NOW = %d\n",
27434 +		now, update, state->cpu, schedule_now);
27435 +
27436 +	if (schedule_now) {
27437 +		int cpu = get_lowest_prio_cpu();
27438 +		if (cpu != NO_CPU) {
27439 +			raw_spin_lock(&_lowest_prio_cpu.lock);
27440 +			_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
27441 +			raw_spin_unlock(&_lowest_prio_cpu.lock);
27442 +			TRACE("LOWEST CPU = P%d\n", cpu);
27443 +			litmus_reschedule(cpu);
27444 +		}
27445 +	} 
27446 +
27447 +	raw_spin_unlock_irqrestore(&_global_env.lock, flags);
27448 +
27449 +	return restart;
27450 +}
27451 +
27452  static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
27453  {
27454  	unsigned long flags;
27455 @@ -276,6 +479,7 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
27456  					if (likely(!ce->running)) {
27457  						sup_scheduler_update_after(sup_env, res->cur_budget);
27458  						res->blocked_by_ghost = 0;
27459 +						res->is_ghost = 0;
27460  						return tsk;
27461  					} else {
27462  						res->blocked_by_ghost = 1;
27463 @@ -284,7 +488,34 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
27464  			}
27465  		}
27466  	}
27467 -
27468 +	// no level A or B tasks
27469 +	
27470 +	list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
27471 +		if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) {
27472 +			tsk = res->ops->dispatch_client(res, &time_slice);
27473 +			if (likely(tsk)) {
27474 +				lv = get_task_crit_level(tsk);
27475 +				if (lv == NUM_CRIT_LEVELS) {
27476 +					gmp_scheduler_update_after(&_global_env, res->cur_budget);
27477 +					//raw_spin_unlock(&_global_env.lock);
27478 +					return tsk;
27479 +				} else {
27480 +					ce = &state->crit_entries[lv];
27481 +					if (likely(!ce->running)) {
27482 +						gmp_scheduler_update_after(&_global_env, res->cur_budget);
27483 +						res->blocked_by_ghost = 0;
27484 +						res->is_ghost = 0;
27485 +						res->scheduled_on = state->cpu;
27486 +						//raw_spin_unlock(&_global_env.lock);
27487 +						return tsk;
27488 +					} else {
27489 +						res->blocked_by_ghost = 1;
27490 +					}
27491 +				}
27492 +			}
27493 +		}
27494 +	}
27495 +	
27496  	return NULL;
27497  }
27498  
27499 @@ -292,17 +523,30 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
27500  {
27501  	/* next == NULL means "schedule background work". */
27502  	struct mc2_cpu_state *state = local_cpu_state();
27503 -
27504 +	
27505 +	raw_spin_lock(&_lowest_prio_cpu.lock);
27506 +	if (_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule == true)
27507 +		_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule = false;
27508 +	raw_spin_unlock(&_lowest_prio_cpu.lock);
27509 +	
27510  	raw_spin_lock(&state->lock);
27511  
27512 -	BUG_ON(state->scheduled && state->scheduled != prev);
27513 -	BUG_ON(state->scheduled && !is_realtime(prev));
27514 +	//BUG_ON(state->scheduled && state->scheduled != prev);
27515 +	//BUG_ON(state->scheduled && !is_realtime(prev));
27516 +	if (state->scheduled && state->scheduled != prev)
27517 +		TRACE("BUG1!!!!!!!!\n");
27518 +	if (state->scheduled && !is_realtime(prev))
27519 +		TRACE("BUG2!!!!!!!!\n");
27520  
27521  	/* update time */
27522  	state->sup_env.will_schedule = true;
27523 -	TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time ####\n");
27524 +	//TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time ####\n");
27525  	sup_update_time(&state->sup_env, litmus_clock());
27526 -	TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time !!!!\n");
27527 +	
27528 +	raw_spin_lock(&_global_env.lock);
27529 +	gmp_update_time(&_global_env, litmus_clock());
27530 +	
27531 +	//TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time !!!!\n");
27532  	mc2_update_ghost_state(state);
27533  	
27534  	/* remove task from reservation if it blocks */
27535 @@ -311,16 +555,29 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
27536  
27537  	/* figure out what to schedule next */
27538  	state->scheduled = mc2_dispatch(&state->sup_env, state);
27539 -
27540 +	if (state->scheduled && is_realtime(state->scheduled))
27541 +		TRACE_TASK(state->scheduled, "mc2_dispatch picked me!\n");
27542 +	
27543 +	raw_spin_lock(&_lowest_prio_cpu.lock);
27544 +	update_cpu_prio(state);
27545 +	raw_spin_unlock(&_lowest_prio_cpu.lock);
27546 +	
27547  	/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
27548  	sched_state_task_picked();
27549  
27550  	/* program scheduler timer */
27551  	state->sup_env.will_schedule = false;
27552 +	
27553 +	raw_spin_unlock(&_global_env.lock);
27554 +	
27555  	/* NOTE: drops state->lock */
27556  	mc2_update_timer_and_unlock(state);
27557  
27558  	if (prev != state->scheduled && is_realtime(prev)) {
27559 +		struct mc2_task_state* tinfo = get_mc2_state(prev);
27560 +		struct reservation* res = tinfo->res_info.client.reservation;
27561 +		TRACE_TASK(prev, "PREV JOB scheduled_on = P%d\n", res->scheduled_on);
27562 +		res->scheduled_on = NO_CPU;
27563  		TRACE_TASK(prev, "descheduled.\n");
27564  	}
27565  	if (state->scheduled) {
27566 @@ -354,10 +611,15 @@ static void mc2_task_resume(struct task_struct  *tsk)
27567  {
27568  	unsigned long flags;
27569  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
27570 -	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
27571 +	struct mc2_cpu_state *state;
27572  
27573  	TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
27574  
27575 +	if (tinfo->cpu != -1)
27576 +		state = cpu_state_for(tinfo->cpu);
27577 +	else
27578 +		state = local_cpu_state();
27579 +
27580  	raw_spin_lock_irqsave(&state->lock, flags);
27581  	/* Requeue only if self-suspension was already processed. */
27582  	if (tinfo->has_departed)
27583 @@ -365,7 +627,16 @@ static void mc2_task_resume(struct task_struct  *tsk)
27584  		/* Assumption: litmus_clock() is synchronized across cores,
27585  		 * since we might not actually be executing on tinfo->cpu
27586  		 * at the moment. */
27587 -		sup_update_time(&state->sup_env, litmus_clock());
27588 +		if (tinfo->cpu != -1) {
27589 +			sup_update_time(&state->sup_env, litmus_clock());
27590 +		} else {
27591 +			raw_spin_lock(&_global_env.lock);
27592 +			TRACE("RESUME UPDATE ####\n");
27593 +			gmp_update_time(&_global_env, litmus_clock());
27594 +			TRACE("RESUME UPDATE $$$$\n");
27595 +			raw_spin_unlock(&_global_env.lock);
27596 +		}
27597 +			
27598  		mc2_update_ghost_state(state);
27599  		task_arrives(state, tsk);
27600  		/* NOTE: drops state->lock */
27601 @@ -385,37 +656,55 @@ static long mc2_complete_job(void)
27602  {
27603  	ktime_t next_release;
27604  	long err;
27605 -	struct mc2_cpu_state *state = local_cpu_state();
27606 -	struct reservation_environment *env = &(state->sup_env.env);
27607 -	struct mc2_task_state *tinfo = get_mc2_state(current);
27608 -	struct reservation *res;
27609 -	
27610 -	res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
27611 -	if (!res)
27612 -		; // find in global env
27613 -	
27614 -	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu) (cur->budget: %llu)\n", litmus_clock(),
27615 -		get_deadline(current), res->cur_budget);
27616 +
27617 +	TRACE_CUR("mc2_complete_job at %llu (deadline: %llu)\n", litmus_clock(),
27618 +		get_deadline(current));
27619  
27620  	tsk_rt(current)->completed = 1;
27621  	
27622  	if (tsk_rt(current)->sporadic_release) {
27623 -		env->time_zero = tsk_rt(current)->sporadic_release_time;
27624 +		struct mc2_cpu_state *state;
27625 +		struct reservation_environment *env;
27626 +		struct mc2_task_state *tinfo;
27627 +		struct reservation *res;
27628 +		unsigned long flags;
27629 +
27630 +		local_irq_save(flags);
27631 +	
27632 +		state = local_cpu_state();
27633 +		env = &(state->sup_env.env);
27634 +		tinfo = get_mc2_state(current);
27635 +		
27636 +		res = res_find_by_id(state, tsk_rt(current)->mc2_data->res_id);
27637 +		
27638 +		if (get_task_crit_level(current) < CRIT_LEVEL_C) {
27639 +			raw_spin_lock(&state->lock);
27640 +			env->time_zero = tsk_rt(current)->sporadic_release_time;
27641 +		} else {
27642 +			raw_spin_lock(&_global_env.lock);
27643 +			_global_env.env.time_zero = tsk_rt(current)->sporadic_release_time;
27644 +		}
27645 +		
27646  		res->next_replenishment = tsk_rt(current)->sporadic_release_time;
27647 -		res->cur_budget = 0;
27648 -		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
27649  		
27650 -		if (tinfo->mc2_param.crit == CRIT_LEVEL_A) {
27651 +		if (get_task_crit_level(current) == CRIT_LEVEL_A) {
27652  			struct table_driven_reservation *tdres;
27653 -			
27654 -			//sup_update_time(&state->sup_env, litmus_clock());
27655 -			//res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
27656  			tdres = container_of(res, struct table_driven_reservation, res);
27657  			tdres->next_interval = 0;
27658  			tdres->major_cycle_start = tsk_rt(current)->sporadic_release_time;
27659  			res->next_replenishment += tdres->intervals[0].start;			
27660  		}
27661 -		TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
27662 +		res->cur_budget = 0;
27663 +		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
27664 +		
27665 +		//TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
27666 +		if (get_task_crit_level(current) < CRIT_LEVEL_C) {
27667 +			raw_spin_unlock(&state->lock);
27668 +		} else {
27669 +			raw_spin_unlock(&_global_env.lock);
27670 +		}
27671 +		
27672 +		local_irq_restore(flags);
27673  	}
27674  	
27675  	prepare_for_next_period(current);
27676 @@ -443,8 +732,9 @@ static long mc2_admit_task(struct task_struct *tsk)
27677  	struct reservation *res;
27678  	struct mc2_cpu_state *state;
27679  	struct mc2_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
27680 -	struct mc2_task *mp = tsk_rt(tsk)->plugin_state;
27681 -
27682 +	struct mc2_task *mp = tsk_rt(tsk)->mc2_data;
27683 +	enum crit_level lv;
27684 +	
27685  	if (!tinfo)
27686  		return -ENOMEM;
27687  
27688 @@ -453,33 +743,61 @@ static long mc2_admit_task(struct task_struct *tsk)
27689  		return err;
27690  	}
27691  	
27692 +	lv = mp->crit;
27693  	preempt_disable();
27694  
27695 -	state = cpu_state_for(task_cpu(tsk));
27696 -	raw_spin_lock_irqsave(&state->lock, flags);
27697 +	if (lv < CRIT_LEVEL_C) {
27698 +		state = cpu_state_for(task_cpu(tsk));
27699 +		raw_spin_lock_irqsave(&state->lock, flags);
27700  
27701 -	res = sup_find_by_id(&state->sup_env, mp->res_id);
27702 +		res = sup_find_by_id(&state->sup_env, mp->res_id);
27703  
27704 -	/* found the appropriate reservation (or vCPU) */
27705 -	if (res) {
27706 -		TRACE_TASK(tsk, "FOUND RES ID\n");
27707 -		tinfo->mc2_param.crit = mp->crit;
27708 -		tinfo->mc2_param.res_id = mp->res_id;
27709 -		
27710 -		kfree(tsk_rt(tsk)->plugin_state);
27711 -		tsk_rt(tsk)->plugin_state = NULL;
27712 +		/* found the appropriate reservation (or vCPU) */
27713 +		if (res) {
27714 +			TRACE_TASK(tsk, "SUP FOUND RES ID\n");
27715 +			tinfo->mc2_param.crit = mp->crit;
27716 +			tinfo->mc2_param.res_id = mp->res_id;
27717 +			
27718 +			//kfree(tsk_rt(tsk)->plugin_state);
27719 +			//tsk_rt(tsk)->plugin_state = NULL;
27720 +			
27721 +			err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
27722 +			tinfo->cpu = task_cpu(tsk);
27723 +			tinfo->has_departed = true;
27724 +			tsk_rt(tsk)->plugin_state = tinfo;
27725 +
27726 +			/* disable LITMUS^RT's per-thread budget enforcement */
27727 +			tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
27728 +		}
27729 +
27730 +		raw_spin_unlock_irqrestore(&state->lock, flags);
27731 +	} else if (lv == CRIT_LEVEL_C) {
27732 +		raw_spin_lock_irqsave(&_global_env.lock, flags);
27733  		
27734 -		err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
27735 -		tinfo->cpu = task_cpu(tsk);
27736 -		tinfo->has_departed = true;
27737 -		tsk_rt(tsk)->plugin_state = tinfo;
27738 +		res = gmp_find_by_id(&_global_env, mp->res_id);
27739  
27740 -		/* disable LITMUS^RT's per-thread budget enforcement */
27741 -		tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
27742 -	}
27743 +		/* found the appropriate reservation (or vCPU) */
27744 +		if (res) {
27745 +			TRACE_TASK(tsk, "GMP FOUND RES ID\n");
27746 +			tinfo->mc2_param.crit = mp->crit;
27747 +			tinfo->mc2_param.res_id = mp->res_id;
27748 +			
27749 +			//kfree(tsk_rt(tsk)->plugin_state);
27750 +			//tsk_rt(tsk)->plugin_state = NULL;
27751 +			
27752 +			err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
27753 +			tinfo->cpu = -1;
27754 +			tinfo->has_departed = true;
27755 +			tsk_rt(tsk)->plugin_state = tinfo;
27756  
27757 -	raw_spin_unlock_irqrestore(&state->lock, flags);
27758 +			/* disable LITMUS^RT's per-thread budget enforcement */
27759 +			tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
27760 +		}
27761  
27762 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
27763 +		
27764 +	}
27765 +	
27766  	preempt_enable();
27767  
27768  	if (err)
27769 @@ -493,12 +811,18 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
27770  {
27771  	unsigned long flags;
27772  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
27773 -	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
27774 +	struct mc2_cpu_state *state; // = cpu_state_for(tinfo->cpu);
27775  	struct reservation *res;
27776 -	
27777 +	enum crit_level lv = get_task_crit_level(tsk);
27778 +
27779  	TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
27780  		   litmus_clock(), on_runqueue, is_running);
27781  
27782 +	if (tinfo->cpu == -1)
27783 +		state = local_cpu_state();
27784 +	else 
27785 +		state = cpu_state_for(tinfo->cpu);
27786 +	
27787  	/* acquire the lock protecting the state and disable interrupts */
27788  	raw_spin_lock_irqsave(&state->lock, flags);
27789  
27790 @@ -511,7 +835,9 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
27791  	if (on_runqueue || is_running) {
27792  		/* Assumption: litmus_clock() is synchronized across cores
27793  		 * [see comment in pres_task_resume()] */
27794 -		sup_update_time(&state->sup_env, litmus_clock());
27795 +		raw_spin_lock(&_global_env.lock);
27796 +		mc2_update_time(lv, state, litmus_clock());
27797 +		raw_spin_unlock(&_global_env.lock);
27798  		mc2_update_ghost_state(state);
27799  		task_arrives(state, tsk);
27800  		/* NOTE: drops state->lock */
27801 @@ -521,12 +847,14 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
27802  	} else
27803  		raw_spin_unlock_irqrestore(&state->lock, flags);
27804  
27805 -	res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
27806 -	release_at(tsk, res->next_replenishment);
27807 -	if (res)
27808 -		TRACE_TASK(tsk, "next_replenishment = %llu\n", res->next_replenishment);
27809 +	res = res_find_by_id(state, tinfo->mc2_param.res_id);
27810 +	
27811 +	if (res) {
27812 +		TRACE_TASK(tsk, "mc2_task_new() next_replenishment = %llu\n", res->next_replenishment);
27813 +		release_at(tsk, res->next_replenishment);
27814 +	}
27815  	else
27816 -		TRACE_TASK(tsk, "next_replenishment = NULL\n");
27817 +		TRACE_TASK(tsk, "mc2_task_new() next_replenishment = NULL\n");
27818  }
27819  
27820  static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
27821 @@ -537,43 +865,71 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
27822  	struct sup_reservation_environment *sup_env;
27823  	int found = 0;
27824  	enum crit_level lv = get_task_crit_level(current);
27825 -		
27826 -	state = cpu_state_for(cpu);
27827 -	raw_spin_lock(&state->lock);
27828  	
27829 -//	res = sup_find_by_id(&state->sup_env, reservation_id);
27830 -	sup_env = &state->sup_env;
27831 -	//if (!res) {
27832 -	list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
27833 -		if (res->id == reservation_id) {
27834 -			if (lv == CRIT_LEVEL_A) {
27835 -				struct table_driven_reservation *tdres;
27836 -				tdres = container_of(res, struct table_driven_reservation, res);
27837 -				kfree(tdres->intervals);
27838 -			}
27839 -			list_del(&res->list);
27840 -			kfree(res);
27841 -			found = 1;
27842 -			ret = 0;
27843 -		}
27844 -	}
27845 -	if (!found) {
27846 -		list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
27847 +	if (cpu == -1) {
27848 +		raw_spin_lock(&_global_env.lock);
27849 +	
27850 +		list_for_each_entry_safe(res, next, &_global_env.depleted_reservations, list) {
27851  			if (res->id == reservation_id) {
27852 -				if (lv == CRIT_LEVEL_A) {
27853 -					struct table_driven_reservation *tdres;
27854 -					tdres = container_of(res, struct table_driven_reservation, res);
27855 -					kfree(tdres->intervals);
27856 -				}
27857 +				TRACE("DESTROY RES FOUND!!!\n");
27858  				list_del(&res->list);
27859  				kfree(res);
27860  				found = 1;
27861  				ret = 0;
27862  			}
27863  		}
27864 -	}
27865 -	if (!found) {
27866 -		list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
27867 +		if (!found) {
27868 +			list_for_each_entry_safe(res, next, &_global_env.inactive_reservations, list) {
27869 +				if (res->id == reservation_id) {
27870 +					TRACE("DESTROY RES FOUND!!!\n");
27871 +					list_del(&res->list);
27872 +					kfree(res);
27873 +					found = 1;
27874 +					ret = 0;
27875 +				}
27876 +			}
27877 +		}
27878 +		if (!found) {
27879 +			list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
27880 +				if (res->id == reservation_id) {
27881 +					TRACE("DESTROY RES FOUND!!!\n");
27882 +					list_del(&res->list);
27883 +					kfree(res);
27884 +					found = 1;
27885 +					ret = 0;
27886 +				}
27887 +			}
27888 +		}
27889 +
27890 +/*		
27891 +list_for_each_entry(res, &_global_env.depleted_reservations, list) {
27892 +	TRACE("DEPLETED LIST R%d\n", res->id);
27893 +}
27894 +list_for_each_entry(res, &_global_env.inactive_reservations, list) {
27895 +	TRACE("INACTIVE LIST R%d\n", res->id);
27896 +}
27897 +list_for_each_entry(res, &_global_env.active_reservations, list) {
27898 +	TRACE("ACTIVE LIST R%d\n", res->id);
27899 +}
27900 +*/
27901 +		if (list_empty(&_global_env.active_reservations)) 
27902 +			INIT_LIST_HEAD(&_global_env.active_reservations);
27903 +		if (list_empty(&_global_env.depleted_reservations)) 
27904 +			INIT_LIST_HEAD(&_global_env.depleted_reservations);
27905 +		if (list_empty(&_global_env.inactive_reservations)) 
27906 +			INIT_LIST_HEAD(&_global_env.inactive_reservations);
27907 +		if (list_empty(&_global_env.next_events)) 
27908 +			INIT_LIST_HEAD(&_global_env.next_events);
27909 +		
27910 +		raw_spin_unlock(&_global_env.lock);
27911 +	} else {
27912 +		state = cpu_state_for(cpu);
27913 +		raw_spin_lock(&state->lock);
27914 +		
27915 +	//	res = sup_find_by_id(&state->sup_env, reservation_id);
27916 +		sup_env = &state->sup_env;
27917 +		//if (!res) {
27918 +		list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
27919  			if (res->id == reservation_id) {
27920  				if (lv == CRIT_LEVEL_A) {
27921  					struct table_driven_reservation *tdres;
27922 @@ -586,10 +942,40 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
27923  				ret = 0;
27924  			}
27925  		}
27926 -	}
27927 -	//}
27928 +		if (!found) {
27929 +			list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
27930 +				if (res->id == reservation_id) {
27931 +					if (lv == CRIT_LEVEL_A) {
27932 +						struct table_driven_reservation *tdres;
27933 +						tdres = container_of(res, struct table_driven_reservation, res);
27934 +						kfree(tdres->intervals);
27935 +					}
27936 +					list_del(&res->list);
27937 +					kfree(res);
27938 +					found = 1;
27939 +					ret = 0;
27940 +				}
27941 +			}
27942 +		}
27943 +		if (!found) {
27944 +			list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
27945 +				if (res->id == reservation_id) {
27946 +					if (lv == CRIT_LEVEL_A) {
27947 +						struct table_driven_reservation *tdres;
27948 +						tdres = container_of(res, struct table_driven_reservation, res);
27949 +						kfree(tdres->intervals);
27950 +					}
27951 +					list_del(&res->list);
27952 +					kfree(res);
27953 +					found = 1;
27954 +					ret = 0;
27955 +				}
27956 +			}
27957 +		}
27958 +		//}
27959  
27960 -	raw_spin_unlock(&state->lock);
27961 +		raw_spin_unlock(&state->lock);
27962 +	}
27963  	
27964  	TRACE("RESERVATION_DESTROY ret = %d\n", ret);
27965  	return ret;
27966 @@ -599,10 +985,15 @@ static void mc2_task_exit(struct task_struct *tsk)
27967  {
27968  	unsigned long flags;
27969  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
27970 -	struct mc2_cpu_state *state = cpu_state_for(tinfo->cpu);
27971 +	struct mc2_cpu_state *state;
27972  	enum crit_level lv = tinfo->mc2_param.crit;
27973  	struct crit_entry* ce;	
27974  
27975 +	if (tinfo->cpu != -1)
27976 +		state = cpu_state_for(tinfo->cpu);
27977 +	else
27978 +		state = local_cpu_state();
27979 +	
27980  	raw_spin_lock_irqsave(&state->lock, flags);
27981  
27982  	if (state->scheduled == tsk)
27983 @@ -616,7 +1007,11 @@ static void mc2_task_exit(struct task_struct *tsk)
27984  	if (is_running(tsk)) {
27985  		/* Assumption: litmus_clock() is synchronized across cores
27986  		 * [see comment in pres_task_resume()] */
27987 -		sup_update_time(&state->sup_env, litmus_clock());
27988 +		//if (lv < CRIT_LEVEL_C)
27989 +		//	sup_update_time(&state->sup_env, litmus_clock());
27990 +		raw_spin_lock(&_global_env.lock);
27991 +		mc2_update_time(lv, state, litmus_clock());
27992 +		raw_spin_unlock(&_global_env.lock);
27993  		mc2_update_ghost_state(state);
27994  		task_departs(tsk, 0);
27995  		
27996 @@ -644,6 +1039,8 @@ static void mc2_task_exit(struct task_struct *tsk)
27997  */
27998  	kfree(tsk_rt(tsk)->plugin_state);
27999  	tsk_rt(tsk)->plugin_state = NULL;
28000 +	kfree(tsk_rt(tsk)->mc2_data);
28001 +	tsk_rt(tsk)->mc2_data = NULL;
28002  }
28003  
28004  static long create_polling_reservation(
28005 @@ -685,28 +1082,54 @@ static long create_polling_reservation(
28006  	if (!pres)
28007  		return -ENOMEM;
28008  
28009 -	state = cpu_state_for(config->cpu);
28010 -	raw_spin_lock_irqsave(&state->lock, flags);
28011 +	if (config->cpu != -1) {
28012 +		state = cpu_state_for(config->cpu);
28013 +		raw_spin_lock_irqsave(&state->lock, flags);
28014 +
28015 +		res = sup_find_by_id(&state->sup_env, config->id);
28016 +		if (!res) {
28017 +			polling_reservation_init(pres, use_edf, periodic,
28018 +				config->polling_params.budget,
28019 +				config->polling_params.period,
28020 +				config->polling_params.relative_deadline,
28021 +				config->polling_params.offset);
28022 +			pres->res.id = config->id;
28023 +			pres->res.blocked_by_ghost = 0;
28024 +			pres->res.is_ghost = 0;
28025 +			if (!use_edf)
28026 +				pres->res.priority = config->priority;
28027 +			sup_add_new_reservation(&state->sup_env, &pres->res);
28028 +			err = config->id;
28029 +		} else {
28030 +			err = -EEXIST;
28031 +		}
28032  
28033 -	res = sup_find_by_id(&state->sup_env, config->id);
28034 -	if (!res) {
28035 -		polling_reservation_init(pres, use_edf, periodic,
28036 -			config->polling_params.budget,
28037 -			config->polling_params.period,
28038 -			config->polling_params.relative_deadline,
28039 -			config->polling_params.offset);
28040 -		pres->res.id = config->id;
28041 -		pres->res.blocked_by_ghost = 0;
28042 -		if (!use_edf)
28043 -			pres->res.priority = config->priority;
28044 -		sup_add_new_reservation(&state->sup_env, &pres->res);
28045 -		err = config->id;
28046 +		raw_spin_unlock_irqrestore(&state->lock, flags);
28047  	} else {
28048 -		err = -EEXIST;
28049 +		raw_spin_lock_irqsave(&_global_env.lock, flags);
28050 +		
28051 +		res = gmp_find_by_id(&_global_env, config->id);
28052 +		if (!res) {
28053 +			polling_reservation_init(pres, use_edf, periodic,
28054 +				config->polling_params.budget,
28055 +				config->polling_params.period,
28056 +				config->polling_params.relative_deadline,
28057 +				config->polling_params.offset);
28058 +			pres->res.id = config->id;
28059 +			pres->res.blocked_by_ghost = 0;
28060 +			pres->res.scheduled_on = NO_CPU;
28061 +			pres->res.is_ghost = 0;
28062 +			if (!use_edf)
28063 +				pres->res.priority = config->priority;
28064 +			gmp_add_new_reservation(&_global_env, &pres->res);
28065 +			TRACE("GMP_ADD_NEW_RESERVATION R%d\n", pres->res.id);
28066 +			err = config->id;
28067 +		} else {
28068 +			err = -EEXIST;
28069 +		}
28070 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);		
28071  	}
28072 -
28073 -	raw_spin_unlock_irqrestore(&state->lock, flags);
28074 -
28075 +	
28076  	if (err < 0)
28077  		kfree(pres);
28078  
28079 @@ -825,10 +1248,12 @@ static long mc2_reservation_create(int res_type, void* __user _config)
28080  	if (copy_from_user(&config, _config, sizeof(config)))
28081  		return -EFAULT;
28082  
28083 -	if (config.cpu < 0 || !cpu_online(config.cpu)) {
28084 -		printk(KERN_ERR "invalid polling reservation (%u): "
28085 -		       "CPU %d offline\n", config.id, config.cpu);
28086 -		return -EINVAL;
28087 +	if (config.cpu != -1) {
28088 +		if (config.cpu < 0 || !cpu_online(config.cpu)) {
28089 +			printk(KERN_ERR "invalid polling reservation (%u): "
28090 +				   "CPU %d offline\n", config.id, config.cpu);
28091 +			return -EINVAL;
28092 +		}
28093  	}
28094  
28095  	switch (res_type) {
28096 @@ -885,19 +1310,30 @@ static long mc2_activate_plugin(void)
28097  {
28098  	int cpu, lv;
28099  	struct mc2_cpu_state *state;
28100 +	struct cpu_entry *ce;
28101  
28102 +	gmp_init(&_global_env);
28103 +	raw_spin_lock_init(&_lowest_prio_cpu.lock);
28104 +	
28105  	for_each_online_cpu(cpu) {
28106  		TRACE("Initializing CPU%d...\n", cpu);
28107  
28108  		state = cpu_state_for(cpu);
28109 +		ce = &_lowest_prio_cpu.cpu_entries[cpu];
28110 +		
28111 +		ce->cpu = cpu;
28112 +		ce->scheduled = NULL;
28113 +		ce->deadline = ULLONG_MAX;
28114 +		ce->lv = NUM_CRIT_LEVELS;
28115 +		ce->will_schedule = false;
28116  
28117  		raw_spin_lock_init(&state->lock);
28118  		state->cpu = cpu;
28119  		state->scheduled = NULL;
28120  		for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
28121 -			struct crit_entry *ce = &state->crit_entries[lv];
28122 -			ce->level = lv;
28123 -			ce->running = NULL;
28124 +			struct crit_entry *cr_entry = &state->crit_entries[lv];
28125 +			cr_entry->level = lv;
28126 +			cr_entry->running = NULL;
28127  			//hrtimer_init(&ce->ghost_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
28128  			//ce->ghost_timer.function = on_ghost_timer;
28129  		}
28130 @@ -905,6 +1341,9 @@ static long mc2_activate_plugin(void)
28131  
28132  		hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
28133  		state->timer.function = on_scheduling_timer;
28134 +		
28135 +		hrtimer_init(&state->g_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
28136 +		state->g_timer.function = on_global_scheduling_timer;
28137  	}
28138  
28139  	mc2_setup_domain_proc();
28140 @@ -912,17 +1351,26 @@ static long mc2_activate_plugin(void)
28141  	return 0;
28142  }
28143  
28144 +static void mc2_finish_switch(struct task_struct *prev)
28145 +{
28146 +	struct mc2_cpu_state *state = local_cpu_state();
28147 +	
28148 +	state->scheduled = is_realtime(current) ? current : NULL;
28149 +}
28150 +
28151  static long mc2_deactivate_plugin(void)
28152  {
28153  	int cpu;
28154  	struct mc2_cpu_state *state;
28155  	struct reservation *res;
28156 +	struct next_timer_event *event;
28157  
28158  	for_each_online_cpu(cpu) {
28159  		state = cpu_state_for(cpu);
28160  		raw_spin_lock(&state->lock);
28161  
28162  		hrtimer_cancel(&state->timer);
28163 +		hrtimer_cancel(&state->g_timer);
28164  
28165  		/* Delete all reservations --- assumes struct reservation
28166  		 * is prefix of containing struct. */
28167 @@ -954,6 +1402,46 @@ static long mc2_deactivate_plugin(void)
28168  		raw_spin_unlock(&state->lock);
28169  	}
28170  
28171 +	raw_spin_lock(&_global_env.lock);
28172 +
28173 +	while (!list_empty(&_global_env.active_reservations)) {
28174 +		TRACE("RES FOUND!!!\n");
28175 +		res = list_first_entry(
28176 +			&_global_env.active_reservations,
28177 +				struct reservation, list);
28178 +		list_del(&res->list);
28179 +		kfree(res);
28180 +	}
28181 +
28182 +	while (!list_empty(&_global_env.inactive_reservations)) {
28183 +		TRACE("RES FOUND!!!\n");
28184 +		res = list_first_entry(
28185 +			&_global_env.inactive_reservations,
28186 +				struct reservation, list);
28187 +		list_del(&res->list);
28188 +		kfree(res);
28189 +	}
28190 +
28191 +	while (!list_empty(&_global_env.depleted_reservations)) {
28192 +		TRACE("RES FOUND!!!\n");
28193 +		res = list_first_entry(
28194 +			&_global_env.depleted_reservations,
28195 +				struct reservation, list);
28196 +		list_del(&res->list);
28197 +		kfree(res);
28198 +	}
28199 +	
28200 +	while (!list_empty(&_global_env.next_events)) {
28201 +		TRACE("EVENT FOUND!!!\n");
28202 +		event = list_first_entry(
28203 +			&_global_env.next_events,
28204 +				struct next_timer_event, list);
28205 +		list_del(&event->list);
28206 +		kfree(event);
28207 +	}
28208 +	
28209 +	raw_spin_unlock(&_global_env.lock);
28210 +	
28211  	destroy_domain_proc_info(&mc2_domain_proc_info);
28212  	return 0;
28213  }
28214 @@ -961,6 +1449,7 @@ static long mc2_deactivate_plugin(void)
28215  static struct sched_plugin mc2_plugin = {
28216  	.plugin_name			= "MC2",
28217  	.schedule				= mc2_schedule,
28218 +	.finish_switch			= mc2_finish_switch,
28219  	.task_wake_up			= mc2_task_resume,
28220  	.admit_task				= mc2_admit_task,
28221  	.task_new				= mc2_task_new,
28222 -- 
28223 1.8.1.2
28224 
28225 
28226 From f9b8ce9e2c06fe8ecd3141837da910675af238c3 Mon Sep 17 00:00:00 2001
28227 From: ChengYang Fu <chengyangfu@gmail.com>
28228 Date: Tue, 20 Jan 2015 11:08:04 -0500
28229 Subject: [PATCH 087/119] add bank_proc.c for cache and bank coloring
28230 
28231 Conflicts:
28232 	litmus/litmus.c
28233 ---
28234  litmus/Makefile     |   1 +
28235  litmus/bank_proc.c  | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++++
28236  litmus/cache_proc.c |   2 +-
28237  litmus/litmus.c     |   8 +-
28238  4 files changed, 262 insertions(+), 3 deletions(-)
28239  create mode 100644 litmus/bank_proc.c
28240 
28241 diff --git a/litmus/Makefile b/litmus/Makefile
28242 index 997524f..713a14f 100644
28243 --- a/litmus/Makefile
28244 +++ b/litmus/Makefile
28245 @@ -20,6 +20,7 @@ obj-y     = sched_plugin.o litmus.o \
28246  	    ctrldev.o \
28247  	    uncachedev.o \
28248  	    cache_proc.o \
28249 +	    bank_proc.o \
28250  	    sched_gsn_edf.o \
28251  	    sched_psn_edf.o \
28252  	    sched_pfp.o
28253 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
28254 new file mode 100644
28255 index 0000000..2c69657
28256 --- /dev/null
28257 +++ b/litmus/bank_proc.c
28258 @@ -0,0 +1,254 @@
28259 +#include <linux/init.h>
28260 +#include <linux/types.h>
28261 +#include <linux/kernel.h>
28262 +#include <linux/module.h>
28263 +#include <linux/sysctl.h>
28264 +#include <linux/slab.h>
28265 +#include <linux/io.h>
28266 +#include <linux/mutex.h>
28267 +#include <linux/mm.h>
28268 +#include <linux/random.h>
28269 +
28270 +#include <litmus/litmus_proc.h>
28271 +#include <litmus/sched_trace.h>
28272 +
28273 +#define LITMUS_LOCKDEP_NAME_MAX_LEN 50
28274 +
28275 +// This is Address Decoding for imx6-sabredsd board
28276 +#define CACHE_MASK 0x0000f000      
28277 +#define BANK_MASK  0x00007000      
28278 +#define OFFSET_SHIFT 12
28279 +
28280 +#define PAGES_PER_COLOR 1024
28281 +
28282 +unsigned long number_banks;
28283 +unsigned long number_cachecolors;
28284 +
28285 +struct color_group {
28286 +	spinlock_t lock;
28287 +	char _lock_name[LITMUS_LOCKDEP_NAME_MAX_LEN];
28288 +	struct list_head list;
28289 +	atomic_t nr_pages;
28290 +};
28291 +
28292 +static struct alloced_pages {
28293 +	spinlock_t lock;
28294 +	struct list_head list;
28295 +} alloced_pages;
28296 +
28297 +struct alloced_page {
28298 +	struct page *page;
28299 +	struct vm_area_struct *vma;
28300 +	struct list_head list;
28301 +};
28302 +
28303 +static struct color_group *color_groups;
28304 +static struct lock_class_key color_lock_keys[16];
28305 +
28306 +//static struct color_group *color_groups;
28307 +
28308 +/* Decoding page color, 0~15 */ 
28309 +static inline unsigned long page_color(struct page *page)
28310 +{
28311 +	return ((page_to_phys(page)& CACHE_MASK) >> PAGE_SHIFT);
28312 +}
28313 +
28314 +/* Decoding page bank number, 0~7 */ 
28315 +static inline unsigned long page_bank(struct page *page)
28316 +{
28317 +	return ((page_to_phys(page)& BANK_MASK) >> PAGE_SHIFT);
28318 +}
28319 +
28320 +static unsigned long smallest_nr_pages(void)
28321 +{
28322 +	unsigned long i, min_pages = -1;
28323 +	struct color_group *cgroup;
28324 +	for (i = 0; i < number_cachecolors; ++i) {
28325 +		cgroup = &color_groups[i];
28326 +		if (atomic_read(&cgroup->nr_pages) < min_pages)
28327 +			min_pages = atomic_read(&cgroup->nr_pages);
28328 +	}
28329 +	return min_pages;
28330 +}
28331 +/*
28332 + * Page's count should be one, it sould not be on any LRU list.
28333 + */
28334 +void add_page_to_color_list(struct page *page)
28335 +{
28336 +	const unsigned long color = page_color(page);
28337 +	struct color_group *cgroup = &color_groups[color];
28338 +	BUG_ON(in_list(&page->lru) || PageLRU(page));
28339 +	BUG_ON(page_count(page) > 1);
28340 +	spin_lock(&cgroup->lock);
28341 +	list_add_tail(&page->lru, &cgroup->list);
28342 +	atomic_inc(&cgroup->nr_pages);
28343 +//	SetPageLRU(page);
28344 +	spin_unlock(&cgroup->lock);
28345 +}
28346 +
28347 +static int do_add_pages(void)
28348 +{
28349 +	printk("LITMUS do add pages\n");
28350 +	
28351 +	struct page *page, *page_tmp;
28352 +	LIST_HEAD(free_later);
28353 +	unsigned long color;
28354 +	int ret = 0;
28355 +
28356 +	while (smallest_nr_pages() < PAGES_PER_COLOR) {
28357 +	
28358 +		//page = alloc_page(GFP_HIGHUSER | __GFP_MOVABLE);
28359 +		page = alloc_page(GFP_HIGHUSER_MOVABLE);
28360 +		
28361 +		if (unlikely(!page)) {
28362 +			printk(KERN_WARNING "Could not allocate pages.\n");
28363 +			ret = -ENOMEM;
28364 +			goto out;
28365 +		}
28366 +		color = page_color(page);
28367 +		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR) {
28368 +	//		SetPageReserved(page);
28369 +			add_page_to_color_list(page);
28370 +		} else
28371 +			list_add_tail(&page->lru, &free_later);
28372 +	}
28373 +	list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
28374 +		list_del(&page->lru);
28375 +		__free_page(page);
28376 +	}
28377 +	/* setup the color queue stuff */
28378 +//	ret = setup_flusher_array();
28379 +out:
28380 +	return ret;
28381 +}
28382 +
28383 +
28384 +/*
28385 + * provide pages for replacement 
28386 + * node = 0 for Level A, B tasks in Cpu 0
28387 + * node = 1 for Level A, B tasks in Cpu 1
28388 + * node = 2 for Level A, B tasks in Cpu 2
28389 + * node = 3 for Level A, B tasks in Cpu 3
28390 + * node = 4 for Level C tasks 
28391 + */
28392 + #if 1 
28393 +struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
28394 +{
28395 +	printk("allocate new page node = %d\n", node);	
28396 +//	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
28397 +	struct color_group *cgroup;
28398 +	struct page *rPage = NULL;
28399 +	unsigned int color;
28400 +	get_random_bytes(&color, sizeof(unsigned int));
28401 +	
28402 +	/*
28403 +	if(node ==0){
28404 +		color = (color%2)*8+node;
28405 +	}else if(node == 1){
28406 +		color = (color%2)*8+node;
28407 +	}else if(node == 2){
28408 +		color = (color%2)*8+;
28409 +	}else if(node == 3){
28410 +		color = color%2 + 6;
28411 +	}else if(node == 4){
28412 +		color = color%8 + 8;
28413 +	}else{
28414 +		goto out;
28415 +	}
28416 +	*/
28417 +	
28418 +	switch(node ){
28419 +		case 0:
28420 +		case 1: 
28421 +		case 2: 
28422 +		case 3:
28423 +			color = (color%2) * 8 + node;
28424 +			break;
28425 +		case 4:
28426 +			color = (color%8)+4;
28427 +			if(color >=8)	
28428 +				color+=4;
28429 +			break;
28430 +		default:
28431 +			goto out;
28432 +	}
28433 +
28434 +
28435 +	printk("allocate new page color = %d\n", color);
28436 +		
28437 +	cgroup = &color_groups[color];
28438 +	spin_lock(&cgroup->lock);
28439 +	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
28440 +		TRACE_CUR("No free %lu colored pages.\n", color);
28441 +		printk(KERN_WARNING "no free %lu colored pages.\n", color);
28442 +		goto out_unlock;
28443 +	}
28444 +	rPage = list_first_entry(&cgroup->list, struct page, lru);
28445 +	BUG_ON(page_count(rPage) > 1);
28446 +	get_page(rPage);
28447 +	list_del(&rPage->lru);
28448 +	atomic_dec(&cgroup->nr_pages);
28449 +//	ClearPageLRU(rPage);
28450 +out_unlock:
28451 +	spin_unlock(&cgroup->lock);
28452 +out:
28453 +	do_add_pages();
28454 +	return rPage;
28455 +	//return  alloc_page(GFP_HIGHUSER_MOVABLE);
28456 +}
28457 +#endif
28458 +
28459 +static int __init init_variables(void)
28460 +{
28461 +	number_banks = 1+(BANK_MASK >> PAGE_SHIFT); 
28462 +	number_cachecolors = 1+(CACHE_MASK >> PAGE_SHIFT);
28463 +}
28464 +
28465 +
28466 +
28467 +static int __init init_color_groups(void)
28468 +{
28469 +	struct color_group *cgroup;
28470 +	unsigned long i;
28471 +	int err = 0;
28472 +
28473 +	color_groups = kmalloc(number_cachecolors *
28474 +			sizeof(struct color_group), GFP_KERNEL);
28475 +	if (!color_groups) {
28476 +		printk(KERN_WARNING "Could not allocate color groups.\n");
28477 +		err = -ENOMEM;
28478 +	}else{
28479 +
28480 +		for (i = 0; i < number_cachecolors; ++i) {
28481 +			cgroup = &color_groups[i];
28482 +			atomic_set(&cgroup->nr_pages, 0);
28483 +			INIT_LIST_HEAD(&cgroup->list);
28484 +			spin_lock_init(&cgroup->lock);
28485 +//			LOCKDEP_DYNAMIC_ALLOC(&cgroup->lock, &color_lock_keys[i],
28486 +//					cgroup->_lock_name, "color%lu", i);
28487 +		}
28488 +	}
28489 +	return err;
28490 +}
28491 +
28492 +/*
28493 + * Initialzie the this proc 
28494 + */
28495 +static int __init litmus_color_init(void)
28496 +{
28497 +	int err=0;
28498 +	
28499 +	INIT_LIST_HEAD(&alloced_pages.list);
28500 +	spin_lock_init(&alloced_pages.lock);
28501 +	init_variables();
28502 +	printk("Cache number = %d , Cache mask = 0x%lx\n", number_cachecolors, CACHE_MASK); 
28503 +	printk("Bank number = %d , Bank mask = 0x%lx\n", number_banks, BANK_MASK); 
28504 +	init_color_groups();			
28505 +	do_add_pages();
28506 +
28507 +	printk(KERN_INFO "Registering LITMUS^RT color and bank proc.\n");
28508 +	return err;
28509 +}
28510 +
28511 +module_init(litmus_color_init);
28512 +
28513 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
28514 index 4425bfb..cc818b9 100644
28515 --- a/litmus/cache_proc.c
28516 +++ b/litmus/cache_proc.c
28517 @@ -242,4 +242,4 @@ out:
28518  	return ret;
28519  }
28520  
28521 -module_init(litmus_sysctl_init);
28522 \ No newline at end of file
28523 +module_init(litmus_sysctl_init);
28524 diff --git a/litmus/litmus.c b/litmus/litmus.c
28525 index 6034ff8..dcb9ed5 100644
28526 --- a/litmus/litmus.c
28527 +++ b/litmus/litmus.c
28528 @@ -392,11 +392,14 @@ static struct page *walk_page_table(unsigned long addr)
28529  extern int isolate_lru_page(struct page *page);
28530  extern void putback_lru_page(struct page *page);
28531  
28532 +extern struct page *new_alloc_page(struct page *page, unsigned long node, int **x);
28533 +
28534 +#if 0
28535  static struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
28536  {
28537  	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
28538  }
28539 -
28540 +#endif
28541  asmlinkage long sys_set_page_color(int cpu)
28542  {
28543  	long ret = 0;
28544 @@ -442,6 +445,7 @@ asmlinkage long sys_set_page_color(int cpu)
28545  				continue;
28546  			if (!old_page)
28547  				continue;
28548 +
28549  			if (PageReserved(old_page)) {
28550  				put_page(old_page);
28551  				continue;
28552 @@ -482,7 +486,7 @@ asmlinkage long sys_set_page_color(int cpu)
28553  	
28554  	ret = 0;
28555  	if (!list_empty(&pagelist)) {
28556 -		ret = migrate_pages(&pagelist, new_alloc_page, 0, MIGRATE_ASYNC, MR_SYSCALL);
28557 +		ret = migrate_pages(&pagelist, new_alloc_page, 4, MIGRATE_ASYNC, MR_SYSCALL);
28558  		if (ret) {
28559  			printk(KERN_INFO "%ld pages not migrated.\n", ret);
28560  			putback_lru_pages(&pagelist);
28561 -- 
28562 1.8.1.2
28563 
28564 
28565 From d7f8145b8a0525dccd2990fd0739012d04f3d978 Mon Sep 17 00:00:00 2001
28566 From: ChengYang Fu <chengyangfu@gmail.com>
28567 Date: Tue, 27 Jan 2015 12:23:44 -0500
28568 Subject: [PATCH 088/119] Provide interface to let litmus ask new pages by bank
28569  number
28570 
28571 ---
28572  litmus/bank_proc.c | 40 +++++++++++++++++++++++++++++++++++++---
28573  1 file changed, 37 insertions(+), 3 deletions(-)
28574 
28575 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
28576 index 2c69657..ec04626 100644
28577 --- a/litmus/bank_proc.c
28578 +++ b/litmus/bank_proc.c
28579 @@ -131,7 +131,6 @@ out:
28580   * node = 3 for Level A, B tasks in Cpu 3
28581   * node = 4 for Level C tasks 
28582   */
28583 - #if 1 
28584  struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
28585  {
28586  	printk("allocate new page node = %d\n", node);	
28587 @@ -194,9 +193,44 @@ out_unlock:
28588  out:
28589  	do_add_pages();
28590  	return rPage;
28591 -	//return  alloc_page(GFP_HIGHUSER_MOVABLE);
28592  }
28593 -#endif
28594 +
28595 +struct page *new_alloc_page_banknr(struct page *page, unsigned long banknr, int **x)
28596 +{
28597 +	printk("allocate new page bank = %d\n", banknr);	
28598 +	struct color_group *cgroup;
28599 +	struct page *rPage = NULL;
28600 +	unsigned int color;
28601 +	get_random_bytes(&color, sizeof(unsigned int));
28602 +	
28603 +	if((banknr<= 7) && (banknr>=0)){
28604 +		color = (color%2) * 8 + banknr;
28605 +	}else{
28606 +		goto out;
28607 +	}
28608 +		
28609 +	cgroup = &color_groups[color];
28610 +	spin_lock(&cgroup->lock);
28611 +	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
28612 +		TRACE_CUR("No free %lu colored pages.\n", color);
28613 +		printk(KERN_WARNING "no free %lu colored pages.\n", color);
28614 +		goto out_unlock;
28615 +	}
28616 +	rPage = list_first_entry(&cgroup->list, struct page, lru);
28617 +	BUG_ON(page_count(rPage) > 1);
28618 +	get_page(rPage);
28619 +	list_del(&rPage->lru);
28620 +	atomic_dec(&cgroup->nr_pages);
28621 +//	ClearPageLRU(rPage);
28622 +out_unlock:
28623 +	spin_unlock(&cgroup->lock);
28624 +out:
28625 +	do_add_pages();
28626 +	return rPage;
28627 +
28628 +
28629 +
28630 +}
28631  
28632  static int __init init_variables(void)
28633  {
28634 -- 
28635 1.8.1.2
28636 
28637 
28638 From a83b6b631b081f9dfeb8134c9aee6aeb866f7231 Mon Sep 17 00:00:00 2001
28639 From: Namhoon Kim <namhoonk@cs.unc.edu>
28640 Date: Wed, 25 Feb 2015 10:42:28 -0500
28641 Subject: [PATCH 089/119] proto type
28642 
28643 ---
28644  include/litmus/reservation.h |  18 +-
28645  litmus/bank_proc.c           |  29 +++-
28646  litmus/cache_proc.c          |  40 +++++
28647  litmus/jobs.c                |   1 -
28648  litmus/litmus.c              |  48 +++--
28649  litmus/reservation.c         |  54 +++---
28650  litmus/sched_mc2.c           | 405 ++++++++++++++++++++++++++++---------------
28651  7 files changed, 396 insertions(+), 199 deletions(-)
28652 
28653 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
28654 index fc7e319..0b9c08d 100644
28655 --- a/include/litmus/reservation.h
28656 +++ b/include/litmus/reservation.h
28657 @@ -201,19 +201,19 @@ struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
28658  	unsigned int id);
28659  	
28660  /* A global multiprocessor reservation environment. */
28661 -/*
28662 +
28663  typedef enum {
28664  	EVENT_REPLENISH = 0,
28665  	EVENT_DRAIN,
28666  	EVENT_OTHERS,
28667  } event_type_t;
28668 -*/
28669 +
28670  
28671  struct next_timer_event {
28672  	lt_t next_update;
28673  	int timer_armed_on;
28674 -	//unsigned int id;
28675 -	//event_type_t type;
28676 +	unsigned int id;
28677 +	event_type_t type;
28678  	struct list_head list;
28679  };
28680  
28681 @@ -234,7 +234,7 @@ struct gmp_reservation_environment {
28682  	struct list_head next_events;
28683  	
28684  	/* (schedule_now == true) means call gmp_dispatch() now */
28685 -	bool schedule_now;
28686 +	int schedule_now;
28687  	/* set to true if a call to gmp_dispatch() is imminent */
28688  	bool will_schedule;
28689  };
28690 @@ -242,11 +242,11 @@ struct gmp_reservation_environment {
28691  void gmp_init(struct gmp_reservation_environment* gmp_env);
28692  void gmp_add_new_reservation(struct gmp_reservation_environment* gmp_env,
28693  	struct reservation* new_res);
28694 -void gmp_scheduler_update_after(struct gmp_reservation_environment* gmp_env,
28695 -	lt_t timeout);
28696 -bool gmp_update_time(struct gmp_reservation_environment* gmp_env, lt_t now);
28697 +void gmp_add_event_after(struct gmp_reservation_environment* gmp_env,
28698 +	lt_t timeout, unsigned int id, event_type_t type);
28699 +int gmp_update_time(struct gmp_reservation_environment* gmp_env, lt_t now);
28700  struct task_struct* gmp_dispatch(struct gmp_reservation_environment* gmp_env);
28701 -//struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env, unsigned int id);
28702 +struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env, unsigned int id);
28703  struct next_timer_event* gmp_find_event_by_time(struct gmp_reservation_environment* gmp_env, lt_t when);
28704  struct reservation* gmp_find_by_id(struct gmp_reservation_environment* gmp_env,
28705  	unsigned int id);
28706 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
28707 index ec04626..07d5728 100644
28708 --- a/litmus/bank_proc.c
28709 +++ b/litmus/bank_proc.c
28710 @@ -88,7 +88,7 @@ void add_page_to_color_list(struct page *page)
28711  
28712  static int do_add_pages(void)
28713  {
28714 -	printk("LITMUS do add pages\n");
28715 +	//printk("LITMUS do add pages\n");
28716  	
28717  	struct page *page, *page_tmp;
28718  	LIST_HEAD(free_later);
28719 @@ -122,6 +122,7 @@ out:
28720  	return ret;
28721  }
28722  
28723 +extern int l2_usable_sets;
28724  
28725  /*
28726   * provide pages for replacement 
28727 @@ -131,9 +132,10 @@ out:
28728   * node = 3 for Level A, B tasks in Cpu 3
28729   * node = 4 for Level C tasks 
28730   */
28731 +#if 1
28732  struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
28733  {
28734 -	printk("allocate new page node = %d\n", node);	
28735 +	//printk("allocate new page node = %d\n", node);	
28736  //	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
28737  	struct color_group *cgroup;
28738  	struct page *rPage = NULL;
28739 @@ -155,7 +157,20 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
28740  		goto out;
28741  	}
28742  	*/
28743 -	
28744 +	switch(node ){
28745 +		case 0:
28746 +			color = (color % l2_usable_sets);
28747 +			break;
28748 +		case 1: 
28749 +		case 2: 
28750 +		case 3:
28751 +		case 4:
28752 +			color = (color% (16-l2_usable_sets)) + l2_usable_sets;
28753 +			break;
28754 +		default:
28755 +			goto out;
28756 +	}
28757 +	/*
28758  	switch(node ){
28759  		case 0:
28760  		case 1: 
28761 @@ -171,14 +186,15 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
28762  		default:
28763  			goto out;
28764  	}
28765 +	*/
28766  
28767 -
28768 -	printk("allocate new page color = %d\n", color);
28769 +	//printk("allocate new page color = %d\n", color);
28770 +	//TRACE("allocate new page color = %d\n", color);
28771  		
28772  	cgroup = &color_groups[color];
28773  	spin_lock(&cgroup->lock);
28774  	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
28775 -		TRACE_CUR("No free %lu colored pages.\n", color);
28776 +		//TRACE_CUR("No free %lu colored pages.\n", color);
28777  		printk(KERN_WARNING "no free %lu colored pages.\n", color);
28778  		goto out_unlock;
28779  	}
28780 @@ -194,6 +210,7 @@ out:
28781  	do_add_pages();
28782  	return rPage;
28783  }
28784 +#endif
28785  
28786  struct page *new_alloc_page_banknr(struct page *page, unsigned long banknr, int **x)
28787  {
28788 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
28789 index cc818b9..7b48d5c 100644
28790 --- a/litmus/cache_proc.c
28791 +++ b/litmus/cache_proc.c
28792 @@ -49,6 +49,9 @@ struct mutex lockdown_proc;
28793  
28794  static int min_usable_ways = 0;
28795  static int max_usable_ways = 16;
28796 +static int min_usable_sets = 1;
28797 +static int max_usable_sets = 15;
28798 +
28799  static int zero = 0;
28800  static int one = 1;
28801  
28802 @@ -60,6 +63,7 @@ static int one = 1;
28803  			__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
28804  
28805  int l2_usable_ways;
28806 +int l2_usable_sets;
28807  int lock_all;
28808  int nr_lockregs;
28809  
28810 @@ -176,6 +180,7 @@ int l2_usable_ways_handler(struct ctl_table *table, int write, void __user *buff
28811  		goto out;
28812  		
28813  	TRACE_CUR("l2_usable_ways : %d\n", l2_usable_ways);
28814 +	printk("l2_usable_ways : %d\n", l2_usable_ways);
28815  	
28816  	if (write) {
28817  		//for (i = 0; i < nr_lockregs;  i++) {
28818 @@ -190,6 +195,31 @@ out:
28819  	return ret;
28820  }
28821  
28822 +int l2_usable_sets_handler(struct ctl_table *table, int write, void __user *buffer,
28823 +		size_t *lenp, loff_t *ppos)
28824 +{
28825 +	int ret = 0;
28826 +	
28827 +	mutex_lock(&lockdown_proc);
28828 +	
28829 +	flush_cache_all();
28830 +	
28831 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
28832 +	if (ret)
28833 +		goto out;
28834 +		
28835 +	TRACE_CUR("l2_usable_sets : %d\n", l2_usable_sets);
28836 +	printk("l2_usable_sets : %d\n", l2_usable_sets);
28837 +	
28838 +	if (write) {
28839 +		;
28840 +	}
28841 +
28842 +out:
28843 +	mutex_unlock(&lockdown_proc);
28844 +	return ret;
28845 +}
28846 +
28847  static struct ctl_table cache_table[] =
28848  {
28849  	{
28850 @@ -202,6 +232,15 @@ static struct ctl_table cache_table[] =
28851  		.extra2		= &max_usable_ways,
28852  	},
28853  	{
28854 +		.procname	= "l2_usable_sets",
28855 +		.mode		= 0666,
28856 +		.proc_handler	= l2_usable_sets_handler,
28857 +		.data		= &l2_usable_sets,
28858 +		.maxlen		= sizeof(l2_usable_sets),
28859 +		.extra1		= &min_usable_sets,
28860 +		.extra2		= &max_usable_sets,
28861 +	},
28862 +	{
28863  		.procname	= "lock_all",
28864  		.mode		= 0666,
28865  		.proc_handler	= lock_all_handler,
28866 @@ -237,6 +276,7 @@ static int __init litmus_sysctl_init(void)
28867  	}
28868  
28869  	l2_usable_ways = 16;
28870 +	l2_usable_sets = 5;
28871  
28872  out:
28873  	return ret;
28874 diff --git a/litmus/jobs.c b/litmus/jobs.c
28875 index e523e29..547222c 100644
28876 --- a/litmus/jobs.c
28877 +++ b/litmus/jobs.c
28878 @@ -45,7 +45,6 @@ void release_at(struct task_struct *t, lt_t start)
28879  {
28880  	BUG_ON(!t);
28881  	setup_release(t, start);
28882 -	TRACE("RELEASE!!\n");
28883  	tsk_rt(t)->completed = 0;
28884  }
28885  
28886 diff --git a/litmus/litmus.c b/litmus/litmus.c
28887 index dcb9ed5..4ff840d 100644
28888 --- a/litmus/litmus.c
28889 +++ b/litmus/litmus.c
28890 @@ -392,14 +392,16 @@ static struct page *walk_page_table(unsigned long addr)
28891  extern int isolate_lru_page(struct page *page);
28892  extern void putback_lru_page(struct page *page);
28893  
28894 -extern struct page *new_alloc_page(struct page *page, unsigned long node, int **x);
28895 -
28896  #if 0
28897  static struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
28898  {
28899 -	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
28900 +	return alloc_pages_exact_node(0, GFP_HIGHUSER_MOVABLE, 0);
28901  }
28902 +#else
28903 +extern struct page *new_alloc_page(struct page *page, unsigned long node, int **x);
28904 +
28905  #endif
28906 +
28907  asmlinkage long sys_set_page_color(int cpu)
28908  {
28909  	long ret = 0;
28910 @@ -408,16 +410,18 @@ asmlinkage long sys_set_page_color(int cpu)
28911  	struct page *page_itr = NULL;
28912  	struct vm_area_struct *vma_itr = NULL;
28913  	//struct task_page *entry = NULL;
28914 -	int nr_pages = 0, nr_shared_pages = 0;
28915 +	int nr_pages = 0, nr_shared_pages = 0, nr_failed = 0;
28916 +	unsigned long node;
28917 +	
28918  	LIST_HEAD(pagelist);
28919  	LIST_HEAD(shared_pagelist);
28920  	
28921  	down_read(&current->mm->mmap_sem);
28922 -	printk(KERN_INFO "SYSCALL set_page_color\n");
28923 +	TRACE_TASK(current, "SYSCALL set_page_color\n");
28924  	vma_itr = current->mm->mmap;
28925  	while (vma_itr != NULL) {
28926  		unsigned int num_pages = 0, i;
28927 -		struct page *new_page = NULL, *old_page = NULL;
28928 +		struct page *old_page = NULL;
28929  		/*
28930  		entry = kmalloc(sizeof(struct task_page), GFP_ATOMIC);
28931  		if (entry == NULL) {
28932 @@ -428,8 +432,8 @@ asmlinkage long sys_set_page_color(int cpu)
28933  		*/
28934  		num_pages = (vma_itr->vm_end - vma_itr->vm_start) / PAGE_SIZE;
28935  		// print vma flags
28936 -		printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
28937 -		printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
28938 +		//printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
28939 +		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
28940  		
28941  		for (i = 0; i < num_pages; i++) {
28942  /*
28943 @@ -447,19 +451,22 @@ asmlinkage long sys_set_page_color(int cpu)
28944  				continue;
28945  
28946  			if (PageReserved(old_page)) {
28947 +				TRACE("Reserved Page!\n");
28948  				put_page(old_page);
28949  				continue;
28950  			}
28951  			
28952 -			printk(KERN_INFO "addr: %lu, pfn: %lu, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page));
28953 +			TRACE_TASK(current, "addr: %lu, pfn: %lu, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page));
28954  			
28955 -			if (page_mapcount(old_page) == 1) {
28956 +			if (page_mapcount(old_page) != 0) {
28957  				ret = isolate_lru_page(old_page);
28958 -				//if (pfn_valid(__page_to_pfn(old_page)))
28959  				if (!ret) {
28960  					list_add_tail(&old_page->lru, &pagelist);
28961  					inc_zone_page_state(old_page, NR_ISOLATED_ANON + !PageSwapBacked(old_page));
28962  					nr_pages++;
28963 +				} else {
28964 +					TRACE_TASK(current, "isolate_lru_page failed\n");
28965 +					nr_failed++;
28966  				}
28967  				put_page(old_page);
28968  			}
28969 @@ -468,8 +475,9 @@ asmlinkage long sys_set_page_color(int cpu)
28970  				if (!ret) {
28971  					list_add_tail(&old_page->lru, &shared_pagelist);
28972  					inc_zone_page_state(old_page, NR_ISOLATED_ANON + !PageSwapBacked(old_page));
28973 -					nr_shared_pages++;
28974 -				}					
28975 +					
28976 +				}	
28977 +				nr_shared_pages++;
28978  				put_page(old_page);
28979  			}
28980  		}
28981 @@ -485,10 +493,15 @@ asmlinkage long sys_set_page_color(int cpu)
28982  //	}
28983  	
28984  	ret = 0;
28985 +	if (cpu == -1)
28986 +		node = 4;
28987 +	else
28988 +		node = cpu;
28989 +	
28990  	if (!list_empty(&pagelist)) {
28991 -		ret = migrate_pages(&pagelist, new_alloc_page, 4, MIGRATE_ASYNC, MR_SYSCALL);
28992 +		ret = migrate_pages(&pagelist, new_alloc_page, node, MIGRATE_ASYNC, MR_SYSCALL);
28993 +		TRACE_TASK(current, "%ld pages not migrated.\n", ret);
28994  		if (ret) {
28995 -			printk(KERN_INFO "%ld pages not migrated.\n", ret);
28996  			putback_lru_pages(&pagelist);
28997  		}
28998  	}
28999 @@ -507,7 +520,7 @@ asmlinkage long sys_set_page_color(int cpu)
29000  	up_read(&current->mm->mmap_sem);
29001  
29002  	list_for_each_entry(page_itr, &shared_pagelist, lru) {
29003 -		printk(KERN_INFO "S Anon=%d, pfn = %lu, _mapcount = %d, _count = %d\n", PageAnon(page_itr), __page_to_pfn(page_itr), page_mapcount(page_itr), page_count(page_itr));
29004 +		TRACE("S Anon=%d, pfn = %lu, _mapcount = %d, _count = %d\n", PageAnon(page_itr), __page_to_pfn(page_itr), page_mapcount(page_itr), page_count(page_itr));
29005  	}
29006  	
29007  /*	
29008 @@ -517,7 +530,7 @@ asmlinkage long sys_set_page_color(int cpu)
29009  		kfree(task_page_itr);		
29010  	}
29011  */	
29012 -	printk(KERN_INFO "nr_pages = %d\n", nr_pages);
29013 +	TRACE_TASK(current, "nr_pages = %d nr_failed = %d\n", nr_pages, nr_failed);
29014  	return ret;
29015  }
29016  
29017 @@ -888,6 +901,7 @@ static int __init _init_litmus(void)
29018  #endif
29019  	
29020  	color_mask = ((cache_info_sets << line_size_log) - 1) ^ (PAGE_SIZE - 1);
29021 +	printk("Page color mask %08x\n", color_mask);
29022  	return 0;
29023  }
29024  
29025 diff --git a/litmus/reservation.c b/litmus/reservation.c
29026 index e30892c..b0b13a9 100644
29027 --- a/litmus/reservation.c
29028 +++ b/litmus/reservation.c
29029 @@ -348,7 +348,7 @@ struct reservation* gmp_find_by_id(struct gmp_reservation_environment* gmp_env,
29030  	return NULL;
29031  }
29032  
29033 -/*
29034 +
29035  struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env,
29036  	unsigned int id)
29037  {
29038 @@ -361,7 +361,7 @@ struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment
29039  
29040  	return NULL;
29041  }
29042 -*/
29043 +
29044  
29045  struct next_timer_event* gmp_find_event_by_time(struct gmp_reservation_environment* gmp_env,
29046  	lt_t when)
29047 @@ -415,9 +415,9 @@ static void gmp_scheduler_update_at(
29048  */
29049  #define TIMER_RESOLUTION 100000L
29050  
29051 -static void gmp_scheduler_update_at(
29052 +static void gmp_add_event(
29053  	struct gmp_reservation_environment* gmp_env,
29054 -	lt_t when)
29055 +	lt_t when, unsigned int id, event_type_t type)
29056  {
29057  	struct next_timer_event *nevent, *queued;
29058  	struct list_head *pos;
29059 @@ -426,11 +426,13 @@ static void gmp_scheduler_update_at(
29060  	//when = div64_u64(when, TIMER_RESOLUTION);
29061  	//when *= TIMER_RESOLUTION;
29062  	
29063 -	nevent = gmp_find_event_by_time(gmp_env, when);
29064 +	nevent = gmp_find_event_by_id(gmp_env, id);
29065  	
29066  	if (!nevent) {
29067 -		nevent = kzalloc(sizeof(*nevent), GFP_KERNEL);
29068 +		nevent = kzalloc(sizeof(*nevent), GFP_ATOMIC);
29069  		nevent->next_update = when;
29070 +		nevent->id = id;
29071 +		nevent->type = type;
29072  		nevent->timer_armed_on = NO_CPU;
29073  		
29074  		list_for_each(pos, &gmp_env->next_events) {
29075 @@ -448,14 +450,14 @@ static void gmp_scheduler_update_at(
29076  			TRACE("NEXT_EVENT ADDED at %llu ADDED at HEAD\n", nevent->next_update);
29077  		}
29078  	} else {
29079 -		; //TRACE("EVENT FOUND at %llu, NEW EVENT %llu\n", nevent->next_update, when);
29080 +		TRACE("EVENT FOUND type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->type, nevent->next_update, type, when);
29081  	}
29082  }
29083  
29084 -void gmp_scheduler_update_after(
29085 -	struct gmp_reservation_environment* gmp_env, lt_t timeout)
29086 +void gmp_add_event_after(
29087 +	struct gmp_reservation_environment* gmp_env, lt_t timeout, unsigned int id, event_type_t type)
29088  {
29089 -	gmp_scheduler_update_at(gmp_env, gmp_env->env.current_time + timeout);
29090 +	gmp_add_event(gmp_env, gmp_env->env.current_time + timeout, id, type);
29091  }
29092  
29093  static void gmp_queue_depleted(
29094 @@ -468,7 +470,7 @@ static void gmp_queue_depleted(
29095  
29096  	list_for_each(pos, &gmp_env->depleted_reservations) {
29097  		queued = list_entry(pos, struct reservation, list);
29098 -		if (queued->next_replenishment > res->next_replenishment) {
29099 +		if (queued && queued->next_replenishment > res->next_replenishment) {
29100  			list_add(&res->list, pos->prev);
29101  			found = 1;
29102  		}
29103 @@ -477,7 +479,7 @@ static void gmp_queue_depleted(
29104  	if (!found)
29105  		list_add_tail(&res->list, &gmp_env->depleted_reservations);
29106  
29107 -	gmp_scheduler_update_at(gmp_env, res->next_replenishment);
29108 +	gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
29109  }
29110  
29111  static void gmp_queue_active(
29112 @@ -502,16 +504,20 @@ static void gmp_queue_active(
29113  		list_add_tail(&res->list, &gmp_env->active_reservations);
29114  
29115  	/* check for possible preemption */
29116 -	if (res->state == RESERVATION_ACTIVE && !check_preempt)
29117 -		gmp_env->schedule_now = true;
29118 +	if (res->state == RESERVATION_ACTIVE && check_preempt)
29119 +		gmp_env->schedule_now++;
29120  	
29121 -	gmp_scheduler_update_after(gmp_env, res->cur_budget);
29122 +	gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
29123  }
29124  
29125  static void gmp_queue_reservation(
29126  	struct gmp_reservation_environment* gmp_env,
29127  	struct reservation *res)
29128  {
29129 +	if (res == NULL) {
29130 +		BUG();
29131 +		return;
29132 +	}
29133  	switch (res->state) {
29134  		case RESERVATION_INACTIVE:
29135  			list_add(&res->list, &gmp_env->inactive_reservations);
29136 @@ -572,7 +578,7 @@ static void gmp_charge_budget(
29137  			 * its remaining budget */
29138  			 TRACE("requesting gmp_scheduler update for reservation %u in %llu nanoseconds\n",
29139  				res->id, res->cur_budget);
29140 -			 gmp_scheduler_update_after(gmp_env, res->cur_budget);
29141 +			 gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
29142  		}
29143  		//if (encountered_active == 2)
29144  			/* stop at the first ACTIVE reservation */
29145 @@ -601,26 +607,22 @@ static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
29146  	res = list_first_entry_or_null(&gmp_env->depleted_reservations,
29147  		struct reservation, list);
29148  	if (res)
29149 -		gmp_scheduler_update_at(gmp_env, res->next_replenishment);
29150 +		gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
29151  }
29152  
29153  /* return schedule_now */
29154 -bool gmp_update_time(
29155 +int gmp_update_time(
29156  	struct gmp_reservation_environment* gmp_env,
29157  	lt_t now)
29158  {
29159  	lt_t delta;
29160  
29161 -	if (!gmp_env) {
29162 -		TRACE("BUG****************************************\n");
29163 -		return false;
29164 -	}
29165  	/* If the time didn't advance, there is nothing to do.
29166  	 * This check makes it safe to call sup_advance_time() potentially
29167  	 * multiple times (e.g., via different code paths. */
29168  	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
29169  	if (unlikely(now <= gmp_env->env.current_time))
29170 -		return gmp_env->schedule_now;
29171 +		return min(gmp_env->schedule_now, NR_CPUS);
29172  
29173  	delta = now - gmp_env->env.current_time;
29174  	gmp_env->env.current_time = now;
29175 @@ -634,7 +636,7 @@ bool gmp_update_time(
29176  	//TRACE("REPLENISH###\n");
29177  	gmp_replenish_budgets(gmp_env);
29178  	
29179 -	return gmp_env->schedule_now;
29180 +	return min(gmp_env->schedule_now, NR_CPUS);
29181  }
29182  
29183  static void gmp_res_change_state(
29184 @@ -652,7 +654,7 @@ static void gmp_res_change_state(
29185  	list_del(&res->list);
29186  	/* check if we need to reschedule because we lost an active reservation */
29187  	if (res->state == RESERVATION_ACTIVE && !gmp_env->will_schedule)
29188 -		gmp_env->schedule_now = true;
29189 +		gmp_env->schedule_now++;
29190  	res->state = new_state;
29191  	gmp_queue_reservation(gmp_env, res);
29192  }
29193 @@ -668,7 +670,7 @@ void gmp_init(struct gmp_reservation_environment* gmp_env)
29194  
29195  	gmp_env->env.change_state = gmp_res_change_state;
29196  
29197 -	gmp_env->schedule_now = false;
29198 +	gmp_env->schedule_now = 0;
29199  	gmp_env->will_schedule = false;
29200  	
29201  	raw_spin_lock_init(&gmp_env->lock);
29202 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
29203 index 6dee1ec..79fecd4 100644
29204 --- a/litmus/sched_mc2.c
29205 +++ b/litmus/sched_mc2.c
29206 @@ -1,3 +1,12 @@
29207 +/*
29208 + * litmus/sched_mc2.c
29209 + *
29210 + * Implementation of the Mixed-Criticality on MultiCore scheduler
29211 + *
29212 + * Thus plugin implements a scheduling algorithm proposed in 
29213 + * "Mixed-Criticality Real-Time Scheduling for Multicore System" paper.
29214 + */ 
29215 + 
29216  #include <linux/percpu.h>
29217  #include <linux/slab.h>
29218  #include <asm/uaccess.h>
29219 @@ -10,21 +19,29 @@
29220  #include <litmus/jobs.h>
29221  #include <litmus/budget.h>
29222  #include <litmus/litmus_proc.h>
29223 +#include <litmus/sched_trace.h>
29224  
29225  #include <litmus/mc2_common.h>
29226  #include <litmus/reservation.h>
29227  #include <litmus/polling_reservations.h>
29228  
29229 +/* _global_env - reservation container for level-C tasks*/
29230  struct gmp_reservation_environment _global_env;
29231  
29232 +/* cpu_entry - keep track of a running task on a cpu
29233 + * This state is used to decide the lowest priority cpu
29234 + */
29235  struct cpu_entry {
29236  	struct task_struct *scheduled;
29237  	lt_t deadline;
29238  	int cpu;
29239  	enum crit_level lv;
29240 +	/* if will_schedule is true, this cpu is already selected and
29241 +	   call mc2_schedule() soon. */
29242  	bool will_schedule;
29243  };
29244  
29245 +/* cpu_priority - a global state for choosing the lowest priority CPU */
29246  struct cpu_priority {
29247  	raw_spinlock_t lock;
29248  	struct cpu_entry cpu_entries[NR_CPUS];
29249 @@ -32,19 +49,26 @@ struct cpu_priority {
29250  
29251  struct cpu_priority _lowest_prio_cpu;
29252  	
29253 +/* mc2_task_state - a task state structure */
29254  struct mc2_task_state {
29255  	struct task_client res_info;
29256 +	/* if cpu == -1, this task is a global task (level C) */
29257  	int cpu;
29258  	bool has_departed;
29259  	struct mc2_task mc2_param;
29260  };
29261  
29262 +/* crit_entry - maintain the logically running job (ghost job) */
29263  struct crit_entry {
29264  	enum crit_level level;
29265  	struct task_struct *running;
29266  	//struct hrtimer ghost_timer;
29267  };
29268  
29269 +/* mc2_cpu_state - maintain the scheduled state and ghost jobs
29270 + * timer : timer for partitioned tasks (level A and B)
29271 + * g_timer : timer for global tasks (level C)
29272 + */
29273  struct mc2_cpu_state {
29274  	raw_spinlock_t lock;
29275  
29276 @@ -62,13 +86,22 @@ static DEFINE_PER_CPU(struct mc2_cpu_state, mc2_cpu_state);
29277  #define cpu_state_for(cpu_id)	(&per_cpu(mc2_cpu_state, cpu_id))
29278  #define local_cpu_state()	(&__get_cpu_var(mc2_cpu_state))
29279  
29280 +/* get_mc2_state - get the task's state */
29281  static struct mc2_task_state* get_mc2_state(struct task_struct *tsk)
29282  {
29283 -	return (struct mc2_task_state*) tsk_rt(tsk)->plugin_state;
29284 +	struct mc2_task_state* tinfo;
29285 +	
29286 +	tinfo = (struct mc2_task_state*)tsk_rt(tsk)->plugin_state;
29287 +	
29288 +	if (tinfo)
29289 +		return tinfo;
29290 +	else
29291 +		return NULL;
29292  }
29293 +
29294 +/* get_task_crit_level - return the criticaility level of a task */
29295  static enum crit_level get_task_crit_level(struct task_struct *tsk)
29296  {
29297 -	//struct mc2_task_state *tinfo = get_mc2_state(tsk);
29298  	struct mc2_task *mp;
29299  	
29300  	if (!tsk || !is_realtime(tsk))
29301 @@ -82,7 +115,9 @@ static enum crit_level get_task_crit_level(struct task_struct *tsk)
29302  		return mp->crit;
29303  }
29304  
29305 -static struct reservation* res_find_by_id(struct mc2_cpu_state *state, unsigned int id)
29306 +/* res_find_by_id - find reservation by id */
29307 +static struct reservation* res_find_by_id(struct mc2_cpu_state *state,
29308 +                                          unsigned int id)
29309  {
29310  	struct reservation *res;
29311  
29312 @@ -93,7 +128,12 @@ static struct reservation* res_find_by_id(struct mc2_cpu_state *state, unsigned
29313  	return res;
29314  }
29315  
29316 -static void mc2_update_time(enum crit_level lv, struct mc2_cpu_state *state, lt_t time)
29317 +/* mc2_update_time - update time for a given criticality level. 
29318 + *                   caller must hold a proper lock
29319 + *                   (cpu_state lock or global lock)
29320 + */
29321 +static void mc2_update_time(enum crit_level lv, 
29322 +                            struct mc2_cpu_state *state, lt_t time)
29323  {
29324  	if (lv < CRIT_LEVEL_C)
29325  		sup_update_time(&state->sup_env, time);
29326 @@ -103,6 +143,12 @@ static void mc2_update_time(enum crit_level lv, struct mc2_cpu_state *state, lt_
29327  		TRACE("update_time(): Criticality level error!!!!\n");
29328  }
29329  
29330 +/* task_depart - remove a task from its reservation
29331 + *               If the job has remaining budget, convert it to a ghost job
29332 + *               and update crit_entries[]
29333 + *               
29334 + * @job_complete	indicate whether job completes or not              
29335 + */
29336  static void task_departs(struct task_struct *tsk, int job_complete)
29337  {
29338  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
29339 @@ -110,28 +156,30 @@ static void task_departs(struct task_struct *tsk, int job_complete)
29340  	struct reservation* res;
29341  	struct reservation_client *client;
29342  
29343 +	BUG_ON(!is_realtime(tsk));
29344 +	
29345  	res    = tinfo->res_info.client.reservation;
29346  	client = &tinfo->res_info.client;
29347  
29348  	res->ops->client_departs(res, client, job_complete);
29349  	tinfo->has_departed = true;
29350  	TRACE_TASK(tsk, "CLIENT DEPART with budget %llu\n", res->cur_budget);
29351 +	
29352  	if (job_complete && res->cur_budget) {
29353  		struct crit_entry* ce;
29354  		enum crit_level lv = tinfo->mc2_param.crit;
29355 -		//lt_t now = litmus_clock();
29356  		
29357  		ce = &state->crit_entries[lv];
29358  		ce->running = tsk;
29359  		res->is_ghost = 1;
29360  		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
29361  		
29362 -		//BUG_ON(hrtimer_active(&ce->ghost_timer));
29363 -		//TRACE("setting GHOST timer %llu\n", ns_to_ktime(now + res->cur_budget));
29364 -		//__hrtimer_start_range_ns(&ce->ghost_timer, ns_to_ktime(now + res->cur_budget), 0, HRTIMER_MODE_ABS_PINNED, 0);
29365  	}		
29366  }
29367  
29368 +/* task_arrive - put a task into its reservation
29369 + *               If the job was a ghost job, remove it from crit_entries[]
29370 + */
29371  static void task_arrives(struct mc2_cpu_state *state, struct task_struct *tsk)
29372  {
29373  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
29374 @@ -145,15 +193,22 @@ static void task_arrives(struct mc2_cpu_state *state, struct task_struct *tsk)
29375  	tinfo->has_departed = false;
29376  	res->ops->client_arrives(res, client);
29377  	
29378 +	sched_trace_task_release(tsk);
29379 +	
29380  	if (lv != NUM_CRIT_LEVELS) {
29381  		struct crit_entry *ce;
29382  		ce = &state->crit_entries[lv];
29383 +		/* if the currrent task is a ghost job, remove it */
29384  		if (ce->running == tsk)
29385  			ce->running = NULL;
29386  	}
29387  }
29388  
29389 -/* return: NO_CPU - all CPUs are running tasks with higher priority than Level C */
29390 +/* get_lowest_prio_cpu - return the lowest priority cpu
29391 + *                       This will be used for scheduling level-C tasks.
29392 + *                       If all CPUs are running tasks which has
29393 + *                       higher priority than level C, return NO_CPU.
29394 + */
29395  static int get_lowest_prio_cpu(void)
29396  {
29397  	struct cpu_entry *ce;
29398 @@ -163,11 +218,15 @@ static int get_lowest_prio_cpu(void)
29399  	raw_spin_lock(&_lowest_prio_cpu.lock);
29400  	for_each_online_cpu(cpu) {
29401  		ce = &_lowest_prio_cpu.cpu_entries[cpu];
29402 +		/* If a CPU will call schedule() in the near future, we don't
29403 +		   return that CPU. */
29404  		if (!ce->will_schedule) {
29405  			if (!ce->scheduled) {
29406 +				/* Idle cpu, return this. */
29407  				raw_spin_unlock(&_lowest_prio_cpu.lock);
29408  				return ce->cpu;
29409 -			} else if (ce->lv == CRIT_LEVEL_C && ce->deadline > latest_deadline) {
29410 +			} else if (ce->lv == CRIT_LEVEL_C && 
29411 +			           ce->deadline > latest_deadline) {
29412  				latest_deadline = ce->deadline;
29413  				ret = ce->cpu;
29414  			}
29415 @@ -180,6 +239,14 @@ static int get_lowest_prio_cpu(void)
29416  }
29417  
29418  /* NOTE: drops state->lock */
29419 +/* mc2_update_timer_and_unlock - set a timer and g_timer and unlock 
29420 + *                               Whenever res_env.current_time is updated,
29421 + *                               we check next_scheduler_update and set 
29422 + *                               a timer.
29423 + *                               If there exist a global event which is 
29424 + *                               not armed on any CPU and g_timer is not
29425 + *                               active, set a g_timer for that event.
29426 + */
29427  static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
29428  {
29429  	int local;
29430 @@ -211,7 +278,8 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
29431  		/* Reprogram only if not already set correctly. */
29432  		if (!hrtimer_active(&state->timer) ||
29433  		    ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
29434 -			TRACE("canceling timer...at %llu\n", ktime_to_ns(hrtimer_get_expires(&state->timer)));
29435 +			TRACE("canceling timer...at %llu\n", 
29436 +			      ktime_to_ns(hrtimer_get_expires(&state->timer)));
29437  			hrtimer_cancel(&state->timer);
29438  			TRACE("setting scheduler timer for %llu\n", update);
29439  			/* We cannot use hrtimer_start() here because the
29440 @@ -246,7 +314,8 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
29441  	raw_spin_lock(&_global_env.lock);
29442  	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
29443  		if (event->timer_armed_on == NO_CPU) {
29444 -			found_event = 1;
29445 +			/* If the event time is already passed, we call schedule() on
29446 +			   the lowest priority cpu */
29447  			if (event->next_update < litmus_clock()) {
29448  				int cpu = get_lowest_prio_cpu();
29449  				TRACE("GLOBAL EVENT PASSED!! poking CPU %d to reschedule\n", cpu);
29450 @@ -260,7 +329,12 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
29451  				}
29452  			} else if (!hrtimer_active(&state->g_timer)) {
29453  				int ret;
29454 -				TRACE("setting global scheduler timer for %llu\n", event->next_update);
29455 +			
29456 +				raw_spin_unlock(&_global_env.lock);
29457 +				found_event = 1;
29458 +			
29459 +				TRACE("setting global scheduler timer for %llu\n", 
29460 +				       event->next_update);
29461  				ret = __hrtimer_start_range_ns(&state->g_timer,
29462  						ns_to_ktime(event->next_update),
29463  						0 /* timer coalescing slack */,
29464 @@ -268,74 +342,76 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
29465  						0 /* wakeup */);
29466  				if (!ret) {
29467  					event->timer_armed_on = state->cpu;
29468 +					break;
29469  				}
29470  			}				
29471  		}
29472 -	}	
29473 -	raw_spin_unlock(&_global_env.lock);
29474 +	}
29475 +	if (found_event == 0)
29476 +		raw_spin_unlock(&_global_env.lock);
29477  }
29478  
29479 +/* mc2_update_ghost_state - Update crit_entries[] to track ghost jobs
29480 + *                          If the budget of a ghost is exhausted,
29481 + *                          clear is_ghost and reschedule
29482 + */
29483  static void mc2_update_ghost_state(struct mc2_cpu_state *state)
29484  {
29485  	int lv = 0;
29486  	struct crit_entry* ce;
29487  	struct reservation *res;
29488  	struct mc2_task_state *tinfo;
29489 +
29490 +	BUG_ON(!state);
29491  	
29492  	for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
29493  		ce = &state->crit_entries[lv];
29494  		if (ce->running != NULL) {
29495  			tinfo = get_mc2_state(ce->running);
29496 -			/*
29497 -			if (lv != CRIT_LEVEL_C)
29498 -				res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
29499 -			else
29500 +			if (!tinfo)
29501  				continue;
29502 -			*/
29503 +			
29504  			res = res_find_by_id(state, tinfo->mc2_param.res_id);
29505 -			TRACE("LV %d running id %d budget %llu\n", lv, tinfo->mc2_param.res_id, res->cur_budget);
29506 +			TRACE("LV %d running id %d budget %llu\n", 
29507 +			       lv, tinfo->mc2_param.res_id, res->cur_budget);
29508 +			/* If the budget is exhausted, clear is_ghost and reschedule */
29509  			if (!res->cur_budget) {
29510  				struct sup_reservation_environment* sup_env = &state->sup_env;
29511  				
29512 -				TRACE("GHOST FINISH id %d at %llu\n", tinfo->mc2_param.res_id, litmus_clock());
29513 +				TRACE("GHOST FINISH id %d at %llu\n", 
29514 +				      tinfo->mc2_param.res_id, litmus_clock());
29515  				ce->running = NULL;
29516  				res->is_ghost = 0;
29517 -				res = list_first_entry_or_null(&sup_env->active_reservations, struct reservation, list);
29518 -				if (res)
29519 -					litmus_reschedule_local();
29520 +				
29521 +				if (lv < CRIT_LEVEL_C) {
29522 +					res = list_first_entry_or_null(
29523 +					      &sup_env->active_reservations, 
29524 +						  struct reservation, list);
29525 +					if (res)
29526 +						litmus_reschedule_local();
29527 +				} else {
29528 +					res = list_first_entry_or_null(
29529 +					      &_global_env.active_reservations,
29530 +						  struct reservation, list);
29531 +					if (res)
29532 +						litmus_reschedule(state->cpu);
29533 +				}
29534  			}
29535  		}
29536  	}
29537  }			
29538  
29539 -/*
29540 -static enum hrtimer_restart on_ghost_timer(struct hrtimer *timer)
29541 -{
29542 -	struct crit_entry *ce;
29543 -	struct mc2_cpu_state *state;
29544 -	
29545 -	ce = container_of(timer, struct crit_entry, ghost_timer);
29546 -	state = container_of(ce, struct mc2_cpu_state, crit_entries[ce->level]);
29547 -	
29548 -	TRACE("GHOST_TIMER FIRED at %llu\n", litmus_clock());
29549 -	
29550 -	raw_spin_lock(&state->lock);
29551 -	sup_update_time(&state->sup_env, litmus_clock());
29552 -	mc2_update_ghost_state(state);
29553 -	
29554 -	raw_spin_unlock(&state->lock);
29555 -	
29556 -	return HRTIMER_NORESTART;
29557 -}
29558 -*/
29559 -
29560 +/* update_cpu_prio - Update cpu's priority
29561 + *                   When a cpu picks a new task, call this function
29562 + *                   to update cpu priorities.
29563 + */
29564  static void update_cpu_prio(struct mc2_cpu_state *state)
29565  {
29566  	struct cpu_entry *ce = &_lowest_prio_cpu.cpu_entries[state->cpu];
29567  	enum crit_level lv = get_task_crit_level(state->scheduled);
29568  	
29569  	if (!state->scheduled) {
29570 -		// cpu is idle.
29571 +		/* cpu is idle. */
29572  		ce->scheduled = NULL;
29573  		ce->deadline = ULLONG_MAX;
29574  		ce->lv = NUM_CRIT_LEVELS;
29575 @@ -344,24 +420,31 @@ static void update_cpu_prio(struct mc2_cpu_state *state)
29576  		ce->deadline = get_deadline(state->scheduled);
29577  		ce->lv = lv;
29578  	} else if (lv < CRIT_LEVEL_C) {
29579 +		/* If cpu is running level A or B tasks, it is not eligible
29580 +		   to run level-C tasks */
29581  		ce->scheduled = state->scheduled;
29582  		ce->deadline = 0;
29583  		ce->lv = lv;
29584  	}
29585  };
29586  
29587 +/* on_global_scheduling_timer - Process the budget accounting (replenish
29588 + *                              and charge)
29589 + */								
29590  static enum hrtimer_restart on_global_scheduling_timer(struct hrtimer *timer)
29591  {
29592  	unsigned long flags;
29593  	enum hrtimer_restart restart = HRTIMER_NORESTART;
29594  	struct mc2_cpu_state *state;
29595  	struct next_timer_event *event, *next;
29596 -	bool schedule_now;
29597 +	int schedule_now;
29598  	lt_t update, now;
29599  	int found_event = 0;
29600  
29601  	state = container_of(timer, struct mc2_cpu_state, g_timer);
29602  
29603 +	raw_spin_lock_irqsave(&state->lock, flags);
29604 +	
29605  	/* The scheduling timer should only fire on the local CPU, because
29606  	 * otherwise deadlocks via timer_cancel() are possible.
29607  	 * Note: this does not interfere with dedicated interrupt handling, as
29608 @@ -372,11 +455,13 @@ static enum hrtimer_restart on_global_scheduling_timer(struct hrtimer *timer)
29609  	if (state->cpu != raw_smp_processor_id())
29610  		TRACE("BUG!!!!!!!!!!!!! TIMER FIRED ON THE OTHER CPU\n");
29611  
29612 -	raw_spin_lock_irqsave(&_global_env.lock, flags);
29613 +	raw_spin_lock(&_global_env.lock);
29614  	
29615  	update = litmus_clock();
29616  	TRACE("GLOBAL TIMER FIRED at %llu\n", update);
29617  	
29618 +	/* The event can be processed by the other cpus. So, if there is no 
29619 +	   events to process, we do nothing */
29620  	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
29621  		if (event->next_update < update) {
29622  			found_event = 1;
29623 @@ -387,22 +472,21 @@ static enum hrtimer_restart on_global_scheduling_timer(struct hrtimer *timer)
29624  	}			
29625  	
29626  	if (!found_event) {
29627 -		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
29628 -		return restart;
29629 +		goto unlock;
29630  	}
29631  	
29632 +	/* gmp_update_timer returns how many tasks become ACTIVE */
29633  	schedule_now = gmp_update_time(&_global_env, update);
29634  	
29635 -	raw_spin_lock(&state->lock);
29636  	mc2_update_ghost_state(state);
29637 -	raw_spin_unlock(&state->lock);
29638  	
29639  	now = _global_env.env.current_time;
29640  	
29641  	TRACE_CUR("on_global_scheduling_timer at %llu, upd:%llu (for cpu=%d) SCHEDULE_NOW = %d\n",
29642  		now, update, state->cpu, schedule_now);
29643  
29644 -	if (schedule_now) {
29645 +	/* Find the lowest cpu, and call reschedule */
29646 +	while (schedule_now--) {
29647  		int cpu = get_lowest_prio_cpu();
29648  		if (cpu != NO_CPU) {
29649  			raw_spin_lock(&_lowest_prio_cpu.lock);
29650 @@ -413,11 +497,15 @@ static enum hrtimer_restart on_global_scheduling_timer(struct hrtimer *timer)
29651  		}
29652  	} 
29653  
29654 -	raw_spin_unlock_irqrestore(&_global_env.lock, flags);
29655 -
29656 +unlock:
29657 +	raw_spin_unlock(&_global_env.lock);
29658 +	raw_spin_unlock_irqrestore(&state->lock, flags);
29659 +	
29660  	return restart;
29661  }
29662  
29663 +/* on_scheduling_timer - timer event for partitioned tasks
29664 + */                       
29665  static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
29666  {
29667  	unsigned long flags;
29668 @@ -438,8 +526,9 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
29669  	TRACE("TIMER FIRED at %llu\n", litmus_clock());
29670  	raw_spin_lock_irqsave(&state->lock, flags);
29671  	sup_update_time(&state->sup_env, litmus_clock());
29672 +	raw_spin_lock(&_global_env.lock);
29673  	mc2_update_ghost_state(state);
29674 -	
29675 +	raw_spin_unlock(&_global_env.lock);
29676  	update = state->sup_env.next_scheduler_update;
29677  	now = state->sup_env.env.current_time;
29678  
29679 @@ -458,6 +547,8 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
29680  	return restart;
29681  }
29682  
29683 +/* mc2_dispatch - Select the next task to schedule.
29684 + */
29685  struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, struct mc2_cpu_state* state)
29686  {
29687  	struct reservation *res, *next;
29688 @@ -477,36 +568,38 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
29689  				} else {
29690  					ce = &state->crit_entries[lv];
29691  					if (likely(!ce->running)) {
29692 +						/* If we found the next task, clear all flags */
29693  						sup_scheduler_update_after(sup_env, res->cur_budget);
29694  						res->blocked_by_ghost = 0;
29695  						res->is_ghost = 0;
29696  						return tsk;
29697  					} else {
29698 +						/* We cannot schedule the same criticality task
29699 +						   because the ghost job exists. Set blocked_by_ghost
29700 +						   flag not to charge budget */
29701  						res->blocked_by_ghost = 1;
29702  					}
29703  				}
29704  			}
29705  		}
29706  	}
29707 -	// no level A or B tasks
29708  	
29709 +	/* no eligible level A or B tasks exists */
29710  	list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
29711  		if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) {
29712  			tsk = res->ops->dispatch_client(res, &time_slice);
29713  			if (likely(tsk)) {
29714  				lv = get_task_crit_level(tsk);
29715  				if (lv == NUM_CRIT_LEVELS) {
29716 -					gmp_scheduler_update_after(&_global_env, res->cur_budget);
29717 -					//raw_spin_unlock(&_global_env.lock);
29718 +					gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
29719  					return tsk;
29720  				} else {
29721  					ce = &state->crit_entries[lv];
29722  					if (likely(!ce->running)) {
29723 -						gmp_scheduler_update_after(&_global_env, res->cur_budget);
29724 +						gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
29725  						res->blocked_by_ghost = 0;
29726  						res->is_ghost = 0;
29727  						res->scheduled_on = state->cpu;
29728 -						//raw_spin_unlock(&_global_env.lock);
29729  						return tsk;
29730  					} else {
29731  						res->blocked_by_ghost = 1;
29732 @@ -519,18 +612,43 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
29733  	return NULL;
29734  }
29735  
29736 +/* not used now */
29737 +static void pre_schedule(struct task_struct *prev)
29738 +{
29739 +	enum crit_level lv;
29740 +	if (!is_realtime(prev) || !prev)
29741 +		return;
29742 +	
29743 +	lv = get_task_crit_level(prev);
29744 +}
29745 +
29746 +/* not used now */
29747 +static void post_schedule(struct task_struct *next)
29748 +{
29749 +	enum crit_level lv;
29750 +	if (!is_realtime(next) || !next)
29751 +		return;
29752 +	
29753 +	lv = get_task_crit_level(next);
29754 +}
29755 +
29756 +/* mc2_schedule - main scheduler function. pick the next task to run
29757 + */
29758  static struct task_struct* mc2_schedule(struct task_struct * prev)
29759  {
29760  	/* next == NULL means "schedule background work". */
29761  	struct mc2_cpu_state *state = local_cpu_state();
29762  	
29763 +	pre_schedule(prev);
29764 +	
29765  	raw_spin_lock(&_lowest_prio_cpu.lock);
29766  	if (_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule == true)
29767  		_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule = false;
29768  	raw_spin_unlock(&_lowest_prio_cpu.lock);
29769  	
29770  	raw_spin_lock(&state->lock);
29771 -
29772 +	raw_spin_lock(&_global_env.lock);
29773 +	
29774  	//BUG_ON(state->scheduled && state->scheduled != prev);
29775  	//BUG_ON(state->scheduled && !is_realtime(prev));
29776  	if (state->scheduled && state->scheduled != prev)
29777 @@ -540,19 +658,16 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
29778  
29779  	/* update time */
29780  	state->sup_env.will_schedule = true;
29781 -	//TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time ####\n");
29782 +
29783  	sup_update_time(&state->sup_env, litmus_clock());
29784 -	
29785 -	raw_spin_lock(&_global_env.lock);
29786  	gmp_update_time(&_global_env, litmus_clock());
29787  	
29788 -	//TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time !!!!\n");
29789  	mc2_update_ghost_state(state);
29790  	
29791  	/* remove task from reservation if it blocks */
29792  	if (is_realtime(prev) && !is_running(prev))
29793  		task_departs(prev, is_completed(prev));
29794 -
29795 +	
29796  	/* figure out what to schedule next */
29797  	state->scheduled = mc2_dispatch(&state->sup_env, state);
29798  	if (state->scheduled && is_realtime(state->scheduled))
29799 @@ -582,10 +697,10 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
29800  	}
29801  	if (state->scheduled) {
29802  		TRACE_TASK(state->scheduled, "scheduled.\n");
29803 -		//tinfo = get_mc2_state(state->scheduled);
29804 -		//state->run_level = tinfo->mc2_param.crit;
29805  	}
29806  	
29807 +	post_schedule(state->scheduled);
29808 +	
29809  	return state->scheduled;
29810  }
29811  
29812 @@ -599,13 +714,15 @@ static void resume_legacy_task_model_updates(struct task_struct *tsk)
29813  		 * P-RES scheduler. */
29814  
29815  		now = litmus_clock();
29816 -		if (is_tardy(tsk, now))
29817 +		if (is_tardy(tsk, now)) {
29818  			release_at(tsk, now);
29819 +			sched_trace_task_release(tsk);
29820 +		}
29821  	}
29822  }
29823  
29824 -/* Called when the state of tsk changes back to TASK_RUNNING.
29825 - * We need to requeue the task.
29826 +/* mc2_task_resume - Called when the state of tsk changes back to 
29827 + *                   TASK_RUNNING. We need to requeue the task.
29828   */
29829  static void mc2_task_resume(struct task_struct  *tsk)
29830  {
29831 @@ -624,23 +741,25 @@ static void mc2_task_resume(struct task_struct  *tsk)
29832  	/* Requeue only if self-suspension was already processed. */
29833  	if (tinfo->has_departed)
29834  	{
29835 +		raw_spin_lock(&_global_env.lock);
29836  		/* Assumption: litmus_clock() is synchronized across cores,
29837  		 * since we might not actually be executing on tinfo->cpu
29838  		 * at the moment. */
29839  		if (tinfo->cpu != -1) {
29840  			sup_update_time(&state->sup_env, litmus_clock());
29841  		} else {
29842 -			raw_spin_lock(&_global_env.lock);
29843 +			//raw_spin_lock(&_global_env.lock);
29844  			TRACE("RESUME UPDATE ####\n");
29845  			gmp_update_time(&_global_env, litmus_clock());
29846  			TRACE("RESUME UPDATE $$$$\n");
29847 -			raw_spin_unlock(&_global_env.lock);
29848 +			//raw_spin_unlock(&_global_env.lock);
29849  		}
29850  			
29851  		mc2_update_ghost_state(state);
29852  		task_arrives(state, tsk);
29853  		/* NOTE: drops state->lock */
29854  		TRACE_TASK(tsk, "mc2_resume()\n");
29855 +		raw_spin_unlock(&_global_env.lock);
29856  		mc2_update_timer_and_unlock(state);
29857  		local_irq_restore(flags);
29858  	} else {
29859 @@ -651,7 +770,8 @@ static void mc2_task_resume(struct task_struct  *tsk)
29860  	resume_legacy_task_model_updates(tsk);
29861  }
29862  
29863 -/* syscall backend for job completions */
29864 +/* mc2_complete_job - syscall backend for job completions
29865 + */
29866  static long mc2_complete_job(void)
29867  {
29868  	ktime_t next_release;
29869 @@ -662,6 +782,8 @@ static long mc2_complete_job(void)
29870  
29871  	tsk_rt(current)->completed = 1;
29872  	
29873 +	/* If this the first job instance, we need to reset replenish
29874 +	   time to the next release time */
29875  	if (tsk_rt(current)->sporadic_release) {
29876  		struct mc2_cpu_state *state;
29877  		struct reservation_environment *env;
29878 @@ -670,21 +792,27 @@ static long mc2_complete_job(void)
29879  		unsigned long flags;
29880  
29881  		local_irq_save(flags);
29882 -	
29883 -		state = local_cpu_state();
29884 -		env = &(state->sup_env.env);
29885 +		
29886  		tinfo = get_mc2_state(current);
29887  		
29888 -		res = res_find_by_id(state, tsk_rt(current)->mc2_data->res_id);
29889 +		if (get_task_crit_level(current) < CRIT_LEVEL_C)
29890 +			state = cpu_state_for(tinfo->cpu);
29891 +		else
29892 +			state = local_cpu_state();
29893 +		
29894 +		raw_spin_lock(&state->lock);
29895 +		env = &(state->sup_env.env);
29896 +		
29897 +		res = res_find_by_id(state, tinfo->mc2_param.res_id);
29898  		
29899  		if (get_task_crit_level(current) < CRIT_LEVEL_C) {
29900 -			raw_spin_lock(&state->lock);
29901  			env->time_zero = tsk_rt(current)->sporadic_release_time;
29902  		} else {
29903  			raw_spin_lock(&_global_env.lock);
29904  			_global_env.env.time_zero = tsk_rt(current)->sporadic_release_time;
29905  		}
29906  		
29907 +		/* set next_replenishtime to synchronous release time */
29908  		res->next_replenishment = tsk_rt(current)->sporadic_release_time;
29909  		
29910  		if (get_task_crit_level(current) == CRIT_LEVEL_A) {
29911 @@ -697,34 +825,44 @@ static long mc2_complete_job(void)
29912  		res->cur_budget = 0;
29913  		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
29914  		
29915 -		//TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
29916 -		if (get_task_crit_level(current) < CRIT_LEVEL_C) {
29917 -			raw_spin_unlock(&state->lock);
29918 -		} else {
29919 +		TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
29920 +		if (get_task_crit_level(current) == CRIT_LEVEL_C) {
29921  			raw_spin_unlock(&_global_env.lock);
29922  		}
29923 +		raw_spin_unlock(&state->lock);
29924  		
29925  		local_irq_restore(flags);
29926  	}
29927 +	sched_trace_task_completion(current, 0);
29928  	
29929 +	/* update the next release time and deadline */
29930  	prepare_for_next_period(current);
29931 +	
29932  	next_release = ns_to_ktime(get_release(current));
29933  	preempt_disable();
29934  	TRACE_CUR("next_release=%llu\n", get_release(current));
29935  	if (get_release(current) > litmus_clock()) {
29936 +		/* sleep until next_release */
29937  		set_current_state(TASK_INTERRUPTIBLE);
29938  		preempt_enable_no_resched();
29939  		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
29940 +		if (get_task_crit_level(current) == CRIT_LEVEL_A)
29941 +			sched_trace_task_release(current);
29942  	} else {
29943 +		/* release the next job immediately */
29944  		err = 0;
29945  		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
29946  		preempt_enable();
29947 +		if (get_task_crit_level(current) == CRIT_LEVEL_A)
29948 +			sched_trace_task_release(current);
29949  	}
29950  
29951  	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
29952  	return err;
29953  }
29954  
29955 +/* mc2_admit_task - Setup mc2 task parameters
29956 + */
29957  static long mc2_admit_task(struct task_struct *tsk)
29958  {
29959  	long err = -ESRCH;
29960 @@ -752,15 +890,13 @@ static long mc2_admit_task(struct task_struct *tsk)
29961  
29962  		res = sup_find_by_id(&state->sup_env, mp->res_id);
29963  
29964 -		/* found the appropriate reservation (or vCPU) */
29965 +		/* found the appropriate reservation */
29966  		if (res) {
29967  			TRACE_TASK(tsk, "SUP FOUND RES ID\n");
29968  			tinfo->mc2_param.crit = mp->crit;
29969  			tinfo->mc2_param.res_id = mp->res_id;
29970 -			
29971 -			//kfree(tsk_rt(tsk)->plugin_state);
29972 -			//tsk_rt(tsk)->plugin_state = NULL;
29973 -			
29974 +		
29975 +			/* initial values */
29976  			err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
29977  			tinfo->cpu = task_cpu(tsk);
29978  			tinfo->has_departed = true;
29979 @@ -772,7 +908,10 @@ static long mc2_admit_task(struct task_struct *tsk)
29980  
29981  		raw_spin_unlock_irqrestore(&state->lock, flags);
29982  	} else if (lv == CRIT_LEVEL_C) {
29983 -		raw_spin_lock_irqsave(&_global_env.lock, flags);
29984 +		local_irq_save(flags);
29985 +		state = local_cpu_state();
29986 +		raw_spin_lock(&state->lock);
29987 +		raw_spin_lock(&_global_env.lock);
29988  		
29989  		res = gmp_find_by_id(&_global_env, mp->res_id);
29990  
29991 @@ -782,9 +921,7 @@ static long mc2_admit_task(struct task_struct *tsk)
29992  			tinfo->mc2_param.crit = mp->crit;
29993  			tinfo->mc2_param.res_id = mp->res_id;
29994  			
29995 -			//kfree(tsk_rt(tsk)->plugin_state);
29996 -			//tsk_rt(tsk)->plugin_state = NULL;
29997 -			
29998 +			/* initial values */
29999  			err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res);
30000  			tinfo->cpu = -1;
30001  			tinfo->has_departed = true;
30002 @@ -794,8 +931,9 @@ static long mc2_admit_task(struct task_struct *tsk)
30003  			tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
30004  		}
30005  
30006 -		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
30007 -		
30008 +		raw_spin_unlock(&_global_env.lock);
30009 +		raw_spin_unlock(&state->lock);
30010 +		local_irq_restore(flags);
30011  	}
30012  	
30013  	preempt_enable();
30014 @@ -806,6 +944,9 @@ static long mc2_admit_task(struct task_struct *tsk)
30015  	return err;
30016  }
30017  
30018 +/* mc2_task_new - A new real-time job is arrived. Release the next job
30019 + *                at the next reservation replenish time
30020 + */
30021  static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
30022  			  int is_running)
30023  {
30024 @@ -837,11 +978,12 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
30025  		 * [see comment in pres_task_resume()] */
30026  		raw_spin_lock(&_global_env.lock);
30027  		mc2_update_time(lv, state, litmus_clock());
30028 -		raw_spin_unlock(&_global_env.lock);
30029  		mc2_update_ghost_state(state);
30030  		task_arrives(state, tsk);
30031  		/* NOTE: drops state->lock */
30032  		TRACE("mc2_new()\n");
30033 +		raw_spin_unlock(&_global_env.lock);
30034 +		
30035  		mc2_update_timer_and_unlock(state);
30036  		local_irq_restore(flags);
30037  	} else
30038 @@ -857,6 +999,8 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
30039  		TRACE_TASK(tsk, "mc2_task_new() next_replenishment = NULL\n");
30040  }
30041  
30042 +/* mc2_reservation_destroy - reservation_destroy system call backend
30043 + */
30044  static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
30045  {
30046  	long ret = -EINVAL;
30047 @@ -865,8 +1009,13 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
30048  	struct sup_reservation_environment *sup_env;
30049  	int found = 0;
30050  	enum crit_level lv = get_task_crit_level(current);
30051 +	unsigned long flags;
30052  	
30053  	if (cpu == -1) {
30054 +		/* if the reservation is global reservation */
30055 +		local_irq_save(flags);
30056 +		state = local_cpu_state();
30057 +		raw_spin_lock(&state->lock);
30058  		raw_spin_lock(&_global_env.lock);
30059  	
30060  		list_for_each_entry_safe(res, next, &_global_env.depleted_reservations, list) {
30061 @@ -901,34 +1050,16 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
30062  			}
30063  		}
30064  
30065 -/*		
30066 -list_for_each_entry(res, &_global_env.depleted_reservations, list) {
30067 -	TRACE("DEPLETED LIST R%d\n", res->id);
30068 -}
30069 -list_for_each_entry(res, &_global_env.inactive_reservations, list) {
30070 -	TRACE("INACTIVE LIST R%d\n", res->id);
30071 -}
30072 -list_for_each_entry(res, &_global_env.active_reservations, list) {
30073 -	TRACE("ACTIVE LIST R%d\n", res->id);
30074 -}
30075 -*/
30076 -		if (list_empty(&_global_env.active_reservations)) 
30077 -			INIT_LIST_HEAD(&_global_env.active_reservations);
30078 -		if (list_empty(&_global_env.depleted_reservations)) 
30079 -			INIT_LIST_HEAD(&_global_env.depleted_reservations);
30080 -		if (list_empty(&_global_env.inactive_reservations)) 
30081 -			INIT_LIST_HEAD(&_global_env.inactive_reservations);
30082 -		if (list_empty(&_global_env.next_events)) 
30083 -			INIT_LIST_HEAD(&_global_env.next_events);
30084 -		
30085  		raw_spin_unlock(&_global_env.lock);
30086 +		raw_spin_unlock(&state->lock);
30087 +		local_irq_restore(flags);
30088  	} else {
30089 +		/* if the reservation is partitioned reservation */
30090  		state = cpu_state_for(cpu);
30091 -		raw_spin_lock(&state->lock);
30092 +		raw_spin_lock_irqsave(&state->lock, flags);
30093  		
30094  	//	res = sup_find_by_id(&state->sup_env, reservation_id);
30095  		sup_env = &state->sup_env;
30096 -		//if (!res) {
30097  		list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
30098  			if (res->id == reservation_id) {
30099  				if (lv == CRIT_LEVEL_A) {
30100 @@ -972,15 +1103,16 @@ list_for_each_entry(res, &_global_env.active_reservations, list) {
30101  				}
30102  			}
30103  		}
30104 -		//}
30105  
30106 -		raw_spin_unlock(&state->lock);
30107 +		raw_spin_unlock_irqrestore(&state->lock, flags);
30108  	}
30109  	
30110  	TRACE("RESERVATION_DESTROY ret = %d\n", ret);
30111  	return ret;
30112  }
30113  
30114 +/* mc2_task_exit - Task became a normal task (not real-time task)
30115 + */
30116  static void mc2_task_exit(struct task_struct *tsk)
30117  {
30118  	unsigned long flags;
30119 @@ -1007,42 +1139,30 @@ static void mc2_task_exit(struct task_struct *tsk)
30120  	if (is_running(tsk)) {
30121  		/* Assumption: litmus_clock() is synchronized across cores
30122  		 * [see comment in pres_task_resume()] */
30123 -		//if (lv < CRIT_LEVEL_C)
30124 -		//	sup_update_time(&state->sup_env, litmus_clock());
30125 +		
30126  		raw_spin_lock(&_global_env.lock);
30127 +		/* update both global and partitioned */
30128  		mc2_update_time(lv, state, litmus_clock());
30129 -		raw_spin_unlock(&_global_env.lock);
30130 +		
30131  		mc2_update_ghost_state(state);
30132  		task_departs(tsk, 0);
30133  		
30134  		/* NOTE: drops state->lock */
30135  		TRACE("mc2_exit()\n");
30136 +		raw_spin_unlock(&_global_env.lock);
30137  		mc2_update_timer_and_unlock(state);
30138  		local_irq_restore(flags);
30139  	} else
30140  		raw_spin_unlock_irqrestore(&state->lock, flags);
30141 -/*
30142 -	if (tinfo->mc2_param.crit == CRIT_LEVEL_A) {
30143 -		struct table_driven_reservation *td_res;
30144 -		struct reservation *res;
30145 -		res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
30146 -		td_res = container_of(res, struct table_driven_reservation, res);
30147 -		kfree(td_res->intervals);
30148 -		//kfree(td_res);
30149 -	} else if (tinfo->mc2_param.crit == CRIT_LEVEL_B) {
30150 -		struct polling_reservation *pres;
30151 -		struct reservation *res;
30152 -		res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id);
30153 -		pres = container_of(res, struct polling_reservation, res);
30154 -		kfree(pres);
30155 -	}
30156 -*/
30157 +
30158  	kfree(tsk_rt(tsk)->plugin_state);
30159  	tsk_rt(tsk)->plugin_state = NULL;
30160  	kfree(tsk_rt(tsk)->mc2_data);
30161  	tsk_rt(tsk)->mc2_data = NULL;
30162  }
30163  
30164 +/* create_polling_reservation - create a new polling reservation
30165 + */
30166  static long create_polling_reservation(
30167  	int res_type,
30168  	struct reservation_config *config)
30169 @@ -1055,6 +1175,7 @@ static long create_polling_reservation(
30170  	int periodic =  res_type == PERIODIC_POLLING;
30171  	long err = -EINVAL;
30172  
30173 +	/* sanity checks */
30174  	if (config->polling_params.budget >
30175  	    config->polling_params.period) {
30176  		printk(KERN_ERR "invalid polling reservation (%u): "
30177 @@ -1138,6 +1259,8 @@ static long create_polling_reservation(
30178  
30179  #define MAX_INTERVALS 1024
30180  
30181 +/* create_table_driven_reservation - create a table_driven reservation
30182 + */
30183  static long create_table_driven_reservation(
30184  	struct reservation_config *config)
30185  {
30186 @@ -1238,6 +1361,8 @@ static long create_table_driven_reservation(
30187  	return err;
30188  }
30189  
30190 +/* mc2_reservation_create - reservation_create system call backend
30191 + */
30192  static long mc2_reservation_create(int res_type, void* __user _config)
30193  {
30194  	long ret = -EINVAL;
30195 -- 
30196 1.8.1.2
30197 
30198 
30199 From f34d9982907644ade66b8689460cf0f414e88ce7 Mon Sep 17 00:00:00 2001
30200 From: Namhoon Kim <namhoonk@cs.unc.edu>
30201 Date: Thu, 26 Feb 2015 10:10:13 -0500
30202 Subject: [PATCH 090/119] seems working
30203 
30204 ---
30205  drivers/net/ethernet/freescale/fec_main.c | 2 ++
30206  litmus/polling_reservations.c             | 2 +-
30207  2 files changed, 3 insertions(+), 1 deletion(-)
30208 
30209 diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
30210 index 7b95428..a05b372 100644
30211 --- a/drivers/net/ethernet/freescale/fec_main.c
30212 +++ b/drivers/net/ethernet/freescale/fec_main.c
30213 @@ -3390,7 +3390,9 @@ static struct platform_driver fec_driver = {
30214  	.driver	= {
30215  		.name	= DRIVER_NAME,
30216  		.owner	= THIS_MODULE,
30217 +#ifdef CONFIG_PM
30218  		.pm	= &fec_pm_ops,
30219 +#endif
30220  		.of_match_table = fec_dt_ids,
30221  	},
30222  	.id_table = fec_devtype,
30223 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
30224 index d2c54c4..a3125eb 100644
30225 --- a/litmus/polling_reservations.c
30226 +++ b/litmus/polling_reservations.c
30227 @@ -363,7 +363,7 @@ static void td_client_departs(
30228  	switch (res->state) {
30229  		case RESERVATION_INACTIVE:
30230  		case RESERVATION_ACTIVE_IDLE:
30231 -			BUG(); /* INACTIVE or IDLE <=> no client */
30232 +			//BUG(); /* INACTIVE or IDLE <=> no client */
30233  			break;
30234  
30235  		case RESERVATION_ACTIVE:
30236 -- 
30237 1.8.1.2
30238 
30239 
30240 From 0a62a98d4cbd2f1cb0ecee6669f708a3e83afcb3 Mon Sep 17 00:00:00 2001
30241 From: Namhoon Kim <namhoonk@cs.unc.edu>
30242 Date: Sun, 1 Mar 2015 17:58:29 -0500
30243 Subject: [PATCH 091/119] MC2 scheduling infrastructure
30244 
30245 ---
30246  include/litmus/reservation.h |   2 +
30247  litmus/reservation.c         |  78 +++++++++++++----
30248  litmus/sched_mc2.c           | 196 ++++++++++++++++++++++++++++++-------------
30249  3 files changed, 204 insertions(+), 72 deletions(-)
30250 
30251 diff --git a/include/litmus/reservation.h b/include/litmus/reservation.h
30252 index 0b9c08d..7e022b3 100644
30253 --- a/include/litmus/reservation.h
30254 +++ b/include/litmus/reservation.h
30255 @@ -129,6 +129,7 @@ struct reservation {
30256  	
30257  	/* for global env. */
30258  	int scheduled_on;
30259 +	int event_added;
30260  	/* for blocked by ghost. Do not charge budget when ACTIVE */
30261  	int blocked_by_ghost;
30262  	/* ghost_job. If it is clear, do not charge budget when ACTIVE_IDLE */
30263 @@ -244,6 +245,7 @@ void gmp_add_new_reservation(struct gmp_reservation_environment* gmp_env,
30264  	struct reservation* new_res);
30265  void gmp_add_event_after(struct gmp_reservation_environment* gmp_env,
30266  	lt_t timeout, unsigned int id, event_type_t type);
30267 +void gmp_print_events(struct gmp_reservation_environment* gmp_env, lt_t now);
30268  int gmp_update_time(struct gmp_reservation_environment* gmp_env, lt_t now);
30269  struct task_struct* gmp_dispatch(struct gmp_reservation_environment* gmp_env);
30270  struct next_timer_event* gmp_find_event_by_id(struct gmp_reservation_environment* gmp_env, unsigned int id);
30271 diff --git a/litmus/reservation.c b/litmus/reservation.c
30272 index b0b13a9..3ec18a2 100644
30273 --- a/litmus/reservation.c
30274 +++ b/litmus/reservation.c
30275 @@ -428,7 +428,7 @@ static void gmp_add_event(
30276  	
30277  	nevent = gmp_find_event_by_id(gmp_env, id);
30278  	
30279 -	if (!nevent) {
30280 +	if (!nevent || nevent->type != type) {
30281  		nevent = kzalloc(sizeof(*nevent), GFP_ATOMIC);
30282  		nevent->next_update = when;
30283  		nevent->id = id;
30284 @@ -440,18 +440,22 @@ static void gmp_add_event(
30285  			if (queued->next_update > nevent->next_update) {
30286  				list_add(&nevent->list, pos->prev);
30287  				found = 1;
30288 -				TRACE("NEXT_EVENT at %llu ADDED before %llu\n", nevent->next_update, queued->next_update);
30289 +				TRACE("NEXT_EVENT id=%d type=%d update=%llu ADDED at before %llu\n", nevent->id, nevent->type, nevent->next_update, queued->next_update);
30290  				break;
30291  			}
30292  		}
30293  		
30294  		if (!found) {
30295  			list_add_tail(&nevent->list, &gmp_env->next_events);
30296 -			TRACE("NEXT_EVENT ADDED at %llu ADDED at HEAD\n", nevent->next_update);
30297 +			TRACE("NEXT_EVENT id=%d type=%d update=%llu ADDED at TAIL\n", nevent->id, nevent->type, nevent->next_update);
30298  		}
30299  	} else {
30300 -		TRACE("EVENT FOUND type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->type, nevent->next_update, type, when);
30301 +		TRACE("EVENT FOUND id = %d type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->id, nevent->type, nevent->next_update, type, when);
30302  	}
30303 +	
30304 +	TRACE("======START PRINTING EVENT LIST======\n");
30305 +	gmp_print_events(gmp_env, litmus_clock());
30306 +	TRACE("======FINISH PRINTING EVENT LIST======\n");
30307  }
30308  
30309  void gmp_add_event_after(
30310 @@ -478,8 +482,9 @@ static void gmp_queue_depleted(
30311  
30312  	if (!found)
30313  		list_add_tail(&res->list, &gmp_env->depleted_reservations);
30314 -
30315 +	TRACE("R%d queued to depleted_list\n", res->id);
30316  	gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
30317 +	res->event_added = 1;
30318  }
30319  
30320  static void gmp_queue_active(
30321 @@ -508,6 +513,7 @@ static void gmp_queue_active(
30322  		gmp_env->schedule_now++;
30323  	
30324  	gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
30325 +	res->event_added = 1;
30326  }
30327  
30328  static void gmp_queue_reservation(
30329 @@ -554,12 +560,14 @@ static void gmp_charge_budget(
30330  		/* charge all ACTIVE_IDLE up to the first ACTIVE reservation */
30331  		res = list_entry(pos, struct reservation, list);
30332  		if (res->state == RESERVATION_ACTIVE) {
30333 -			TRACE("gmp_charge_budget ACTIVE R%u drain %llu\n", res->id, delta);
30334 +			TRACE("gmp_charge_budget ACTIVE R%u scheduled_on=%d drain %llu\n", res->id, res->scheduled_on, delta);
30335  			if (res->scheduled_on != NO_CPU && res->blocked_by_ghost == 0) {
30336  				TRACE("DRAIN !!\n");
30337  				drained = 1;
30338  				res->ops->drain_budget(res, delta);
30339 -			}			
30340 +			} else {
30341 +				TRACE("NO DRAIN (not scheduled)!!\n");
30342 +			}
30343  		} else {
30344  			//BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
30345  			if (res->state != RESERVATION_ACTIVE_IDLE)
30346 @@ -579,6 +587,7 @@ static void gmp_charge_budget(
30347  			 TRACE("requesting gmp_scheduler update for reservation %u in %llu nanoseconds\n",
30348  				res->id, res->cur_budget);
30349  			 gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
30350 +			 res->event_added = 1;
30351  		}
30352  		//if (encountered_active == 2)
30353  			/* stop at the first ACTIVE reservation */
30354 @@ -601,33 +610,49 @@ static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
30355  			break;
30356  		}
30357  	}
30358 -	//TRACE("finished replenishing budgets\n");
30359 +	TRACE("finished replenishing budgets\n");
30360  
30361  	/* request a scheduler update at the next replenishment instant */
30362 +	list_for_each_safe(pos, next, &gmp_env->depleted_reservations) {
30363 +		res = list_entry(pos, struct reservation, list);
30364 +		if (res->event_added == 0) {
30365 +			gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
30366 +			res->event_added = 1;
30367 +		}
30368 +	}	
30369 +	
30370 +/*
30371  	res = list_first_entry_or_null(&gmp_env->depleted_reservations,
30372  		struct reservation, list);
30373 -	if (res)
30374 +	if (res && res->event_added == 0) {
30375  		gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
30376 +		res->event_added = 1;
30377 +	}
30378 +*/
30379  }
30380  
30381 +#define EPSILON	50
30382 +
30383  /* return schedule_now */
30384  int gmp_update_time(
30385  	struct gmp_reservation_environment* gmp_env,
30386  	lt_t now)
30387  {
30388 -	lt_t delta;
30389 +	struct next_timer_event *event, *next;
30390 +	lt_t delta, ret;
30391  
30392  	/* If the time didn't advance, there is nothing to do.
30393  	 * This check makes it safe to call sup_advance_time() potentially
30394  	 * multiple times (e.g., via different code paths. */
30395 -	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
30396 -	if (unlikely(now <= gmp_env->env.current_time))
30397 -		return min(gmp_env->schedule_now, NR_CPUS);
30398 +	TRACE("(gmp_update_time) now: %llu, current_time: %llu\n", now, gmp_env->env.current_time);
30399 +	if (unlikely(now <= gmp_env->env.current_time + EPSILON))
30400 +		return 0;
30401  
30402  	delta = now - gmp_env->env.current_time;
30403  	gmp_env->env.current_time = now;
30404  
30405  
30406 +	//gmp_print_events(gmp_env, now);
30407  	/* deplete budgets by passage of time */
30408  	//TRACE("CHARGE###\n");
30409  	gmp_charge_budget(gmp_env, delta);
30410 @@ -636,7 +661,30 @@ int gmp_update_time(
30411  	//TRACE("REPLENISH###\n");
30412  	gmp_replenish_budgets(gmp_env);
30413  	
30414 -	return min(gmp_env->schedule_now, NR_CPUS);
30415 +	list_for_each_entry_safe(event, next, &gmp_env->next_events, list) {
30416 +		if (event->next_update < now) {
30417 +			list_del(&event->list);
30418 +			TRACE("EVENT at %llu IS DELETED\n", event->next_update);
30419 +			kfree(event);
30420 +		}
30421 +	}		
30422 +	
30423 +	//gmp_print_events(gmp_env, litmus_clock());
30424 +	
30425 +	ret = min(gmp_env->schedule_now, NR_CPUS);
30426 +	gmp_env->schedule_now = 0;
30427 +	
30428 +	return ret;
30429 +}
30430 +
30431 +void gmp_print_events(struct gmp_reservation_environment* gmp_env, lt_t now)
30432 +{
30433 +	struct next_timer_event *event, *next;
30434 +
30435 +	TRACE("GLOBAL EVENTS now=%llu\n", now);
30436 +	list_for_each_entry_safe(event, next, &gmp_env->next_events, list) {
30437 +		TRACE("at %llu type=%d id=%d armed_on=%d\n", event->next_update, event->type, event->id, event->timer_armed_on);
30438 +	}		
30439  }
30440  
30441  static void gmp_res_change_state(
30442 @@ -653,7 +701,7 @@ static void gmp_res_change_state(
30443  
30444  	list_del(&res->list);
30445  	/* check if we need to reschedule because we lost an active reservation */
30446 -	if (res->state == RESERVATION_ACTIVE && !gmp_env->will_schedule)
30447 +	if (res->state == RESERVATION_ACTIVE)
30448  		gmp_env->schedule_now++;
30449  	res->state = new_state;
30450  	gmp_queue_reservation(gmp_env, res);
30451 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
30452 index 79fecd4..b3390dc 100644
30453 --- a/litmus/sched_mc2.c
30454 +++ b/litmus/sched_mc2.c
30455 @@ -74,7 +74,7 @@ struct mc2_cpu_state {
30456  
30457  	struct sup_reservation_environment sup_env;
30458  	struct hrtimer timer;
30459 -	struct hrtimer g_timer;
30460 +	//struct hrtimer g_timer;
30461  
30462  	int cpu;
30463  	struct task_struct* scheduled;
30464 @@ -209,21 +209,33 @@ static void task_arrives(struct mc2_cpu_state *state, struct task_struct *tsk)
30465   *                       If all CPUs are running tasks which has
30466   *                       higher priority than level C, return NO_CPU.
30467   */
30468 -static int get_lowest_prio_cpu(void)
30469 +static int get_lowest_prio_cpu(lt_t priority)
30470  {
30471  	struct cpu_entry *ce;
30472  	int cpu, ret = NO_CPU;
30473  	lt_t latest_deadline = 0;
30474  	
30475  	raw_spin_lock(&_lowest_prio_cpu.lock);
30476 +	ce = &_lowest_prio_cpu.cpu_entries[local_cpu_state()->cpu];
30477 +	if (!ce->will_schedule && !ce->scheduled) {
30478 +		raw_spin_unlock(&_lowest_prio_cpu.lock);
30479 +		TRACE("CPU %d (local) is the lowest!\n", ce->cpu);
30480 +		return ce->cpu;
30481 +	}	
30482 +
30483  	for_each_online_cpu(cpu) {
30484  		ce = &_lowest_prio_cpu.cpu_entries[cpu];
30485  		/* If a CPU will call schedule() in the near future, we don't
30486  		   return that CPU. */
30487 +		TRACE("CPU %d will_schedule=%d, scheduled=(%s/%d:%d)\n", cpu, ce->will_schedule,
30488 +	      ce->scheduled ? (ce->scheduled)->comm : "null",
30489 +	      ce->scheduled ? (ce->scheduled)->pid : 0,
30490 +	      ce->scheduled ? (ce->scheduled)->rt_param.job_params.job_no : 0);
30491  		if (!ce->will_schedule) {
30492  			if (!ce->scheduled) {
30493  				/* Idle cpu, return this. */
30494  				raw_spin_unlock(&_lowest_prio_cpu.lock);
30495 +				TRACE("CPU %d is the lowest!\n", ce->cpu);
30496  				return ce->cpu;
30497  			} else if (ce->lv == CRIT_LEVEL_C && 
30498  			           ce->deadline > latest_deadline) {
30499 @@ -234,7 +246,12 @@ static int get_lowest_prio_cpu(void)
30500  	}		
30501  	
30502  	raw_spin_unlock(&_lowest_prio_cpu.lock);
30503 +
30504 +	if (priority >= latest_deadline)
30505 +		ret = NO_CPU;
30506  	
30507 +	TRACE("CPU %d is the lowest!\n", ret);
30508 +
30509  	return ret;
30510  }
30511  
30512 @@ -253,7 +270,6 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
30513  	lt_t update, now;
30514  	enum crit_level lv = get_task_crit_level(state->scheduled);
30515  	struct next_timer_event *event, *next;
30516 -	int found_event = 0;
30517  	
30518  	//TRACE_TASK(state->scheduled, "update_timer!\n");
30519  	if (lv != NUM_CRIT_LEVELS)
30520 @@ -268,10 +284,35 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
30521  	 */
30522  	local = local_cpu_state() == state;
30523  
30524 +	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
30525 +		/* If the event time is already passed, we call schedule() on
30526 +		   the lowest priority cpu */
30527 +		if (event->next_update < litmus_clock()) {
30528 +			if (event->timer_armed_on == NO_CPU) {
30529 +				struct reservation *res = gmp_find_by_id(&_global_env, event->id);
30530 +				int cpu = get_lowest_prio_cpu(res?res->priority:0);
30531 +				TRACE("GLOBAL EVENT PASSED!! poking CPU %d to reschedule\n", cpu);
30532 +				list_del(&event->list);
30533 +				kfree(event);
30534 +				if (cpu != NO_CPU) {
30535 +					raw_spin_lock(&_lowest_prio_cpu.lock);
30536 +					_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
30537 +					raw_spin_unlock(&_lowest_prio_cpu.lock);
30538 +					litmus_reschedule(cpu);
30539 +				}
30540 +			}
30541 +		} else if (event->next_update < update && event->timer_armed_on == NO_CPU) {
30542 +			event->timer_armed_on = state->cpu;
30543 +			update = event->next_update;
30544 +			break;
30545 +		}
30546 +	}
30547 +	
30548  	/* Must drop state lock before calling into hrtimer_start(), which
30549  	 * may raise a softirq, which in turn may wake ksoftirqd. */
30550  	raw_spin_unlock(&state->lock);
30551 -
30552 +	raw_spin_unlock(&_global_env.lock);
30553 +	
30554  	if (update <= now) {
30555  		litmus_reschedule(state->cpu);
30556  	} else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
30557 @@ -310,7 +351,8 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
30558  			litmus_reschedule(state->cpu);
30559  		}
30560  	}
30561 -	
30562 +
30563 +#if 0	
30564  	raw_spin_lock(&_global_env.lock);
30565  	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
30566  		if (event->timer_armed_on == NO_CPU) {
30567 @@ -349,6 +391,7 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
30568  	}
30569  	if (found_event == 0)
30570  		raw_spin_unlock(&_global_env.lock);
30571 +#endif	
30572  }
30573  
30574  /* mc2_update_ghost_state - Update crit_entries[] to track ghost jobs
30575 @@ -396,6 +439,9 @@ static void mc2_update_ghost_state(struct mc2_cpu_state *state)
30576  					if (res)
30577  						litmus_reschedule(state->cpu);
30578  				}
30579 +			} else {
30580 +				TRACE("GHOST NOT FINISH id %d budget %llu\n", res->id, res->cur_budget);
30581 +				gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
30582  			}
30583  		}
30584  	}
30585 @@ -428,6 +474,7 @@ static void update_cpu_prio(struct mc2_cpu_state *state)
30586  	}
30587  };
30588  
30589 +#if 0
30590  /* on_global_scheduling_timer - Process the budget accounting (replenish
30591   *                              and charge)
30592   */								
30593 @@ -503,6 +550,7 @@ unlock:
30594  	
30595  	return restart;
30596  }
30597 +#endif
30598  
30599  /* on_scheduling_timer - timer event for partitioned tasks
30600   */                       
30601 @@ -512,6 +560,7 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
30602  	enum hrtimer_restart restart = HRTIMER_NORESTART;
30603  	struct mc2_cpu_state *state;
30604  	lt_t update, now;
30605 +	int global_schedule_now;
30606  
30607  	state = container_of(timer, struct mc2_cpu_state, timer);
30608  
30609 @@ -524,16 +573,19 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
30610  	BUG_ON(state->cpu != raw_smp_processor_id());
30611  
30612  	TRACE("TIMER FIRED at %llu\n", litmus_clock());
30613 -	raw_spin_lock_irqsave(&state->lock, flags);
30614 +	raw_spin_lock_irqsave(&_global_env.lock, flags);
30615 +	raw_spin_lock(&state->lock);
30616 +	
30617  	sup_update_time(&state->sup_env, litmus_clock());
30618 -	raw_spin_lock(&_global_env.lock);
30619 +	global_schedule_now = gmp_update_time(&_global_env, litmus_clock());
30620 +	
30621  	mc2_update_ghost_state(state);
30622 -	raw_spin_unlock(&_global_env.lock);
30623 +	
30624  	update = state->sup_env.next_scheduler_update;
30625  	now = state->sup_env.env.current_time;
30626  
30627 -	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
30628 -		now, update, state->cpu);
30629 +	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n",
30630 +		now, update, state->cpu, global_schedule_now);
30631  
30632  	if (update <= now) {
30633  		litmus_reschedule_local();
30634 @@ -542,7 +594,20 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
30635  		restart = HRTIMER_RESTART;
30636  	}
30637  
30638 -	raw_spin_unlock_irqrestore(&state->lock, flags);
30639 +	/* Find the lowest cpu, and call reschedule */
30640 +	while (global_schedule_now--) {
30641 +		int cpu = get_lowest_prio_cpu(0);
30642 +		if (cpu != NO_CPU) {
30643 +			raw_spin_lock(&_lowest_prio_cpu.lock);
30644 +			_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
30645 +			raw_spin_unlock(&_lowest_prio_cpu.lock);
30646 +			TRACE("LOWEST CPU = P%d\n", cpu);
30647 +			litmus_reschedule(cpu);
30648 +		}
30649 +	} 
30650 +	
30651 +	raw_spin_unlock(&state->lock);
30652 +	raw_spin_unlock_irqrestore(&_global_env.lock, flags);
30653  
30654  	return restart;
30655  }
30656 @@ -555,7 +620,7 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
30657  	struct task_struct *tsk = NULL;
30658  	struct crit_entry *ce;
30659  	enum crit_level lv;
30660 -	lt_t time_slice;
30661 +	lt_t time_slice, cur_priority;
30662  
30663  	list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
30664  		if (res->state == RESERVATION_ACTIVE) {
30665 @@ -578,6 +643,7 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
30666  						   because the ghost job exists. Set blocked_by_ghost
30667  						   flag not to charge budget */
30668  						res->blocked_by_ghost = 1;
30669 +						TRACE_TASK(ce->running, " is GHOST\n");
30670  					}
30671  				}
30672  			}
30673 @@ -585,24 +651,32 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
30674  	}
30675  	
30676  	/* no eligible level A or B tasks exists */
30677 +	cur_priority = _lowest_prio_cpu.cpu_entries[state->cpu].deadline;
30678 +	
30679 +	TRACE("****** ACTIVE LIST ******\n");
30680 +	TRACE_TASK(_lowest_prio_cpu.cpu_entries[state->cpu].scheduled, "** CURRENT JOB deadline %llu **\n", cur_priority);
30681  	list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
30682 +		TRACE("R%d deadline=%llu, scheduled_on=%d\n", res->id, res->priority, res->scheduled_on);
30683  		if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) {
30684  			tsk = res->ops->dispatch_client(res, &time_slice);
30685  			if (likely(tsk)) {
30686  				lv = get_task_crit_level(tsk);
30687  				if (lv == NUM_CRIT_LEVELS) {
30688  					gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
30689 +					res->event_added = 1;
30690  					return tsk;
30691  				} else {
30692  					ce = &state->crit_entries[lv];
30693  					if (likely(!ce->running)) {
30694  						gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
30695 +						res->event_added = 1;
30696  						res->blocked_by_ghost = 0;
30697  						res->is_ghost = 0;
30698  						res->scheduled_on = state->cpu;
30699  						return tsk;
30700  					} else {
30701  						res->blocked_by_ghost = 1;
30702 +						TRACE_TASK(ce->running, " is GHOST\n");
30703  					}
30704  				}
30705  			}
30706 @@ -641,13 +715,8 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
30707  	
30708  	pre_schedule(prev);
30709  	
30710 -	raw_spin_lock(&_lowest_prio_cpu.lock);
30711 -	if (_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule == true)
30712 -		_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule = false;
30713 -	raw_spin_unlock(&_lowest_prio_cpu.lock);
30714 -	
30715 -	raw_spin_lock(&state->lock);
30716  	raw_spin_lock(&_global_env.lock);
30717 +	raw_spin_lock(&state->lock);
30718  	
30719  	//BUG_ON(state->scheduled && state->scheduled != prev);
30720  	//BUG_ON(state->scheduled && !is_realtime(prev));
30721 @@ -668,12 +737,14 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
30722  	if (is_realtime(prev) && !is_running(prev))
30723  		task_departs(prev, is_completed(prev));
30724  	
30725 +	raw_spin_lock(&_lowest_prio_cpu.lock);
30726 +	_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule = false;
30727 +	
30728  	/* figure out what to schedule next */
30729  	state->scheduled = mc2_dispatch(&state->sup_env, state);
30730  	if (state->scheduled && is_realtime(state->scheduled))
30731  		TRACE_TASK(state->scheduled, "mc2_dispatch picked me!\n");
30732  	
30733 -	raw_spin_lock(&_lowest_prio_cpu.lock);
30734  	update_cpu_prio(state);
30735  	raw_spin_unlock(&_lowest_prio_cpu.lock);
30736  	
30737 @@ -682,18 +753,29 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
30738  
30739  	/* program scheduler timer */
30740  	state->sup_env.will_schedule = false;
30741 -	
30742 -	raw_spin_unlock(&_global_env.lock);
30743 -	
30744 +		
30745  	/* NOTE: drops state->lock */
30746  	mc2_update_timer_and_unlock(state);
30747  
30748 +
30749 +	
30750  	if (prev != state->scheduled && is_realtime(prev)) {
30751  		struct mc2_task_state* tinfo = get_mc2_state(prev);
30752  		struct reservation* res = tinfo->res_info.client.reservation;
30753  		TRACE_TASK(prev, "PREV JOB scheduled_on = P%d\n", res->scheduled_on);
30754  		res->scheduled_on = NO_CPU;
30755  		TRACE_TASK(prev, "descheduled.\n");
30756 +		/* if prev is preempted and a global task, find the lowest cpu and reschedule */
30757 +		if (tinfo->has_departed == false && get_task_crit_level(prev) == CRIT_LEVEL_C) {
30758 +			int cpu = get_lowest_prio_cpu(res?res->priority:0);
30759 +			TRACE("LEVEL-C TASK PREEMPTED!! poking CPU %d to reschedule\n", cpu);
30760 +			if (cpu != NO_CPU) {
30761 +				raw_spin_lock(&_lowest_prio_cpu.lock);
30762 +				_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
30763 +				raw_spin_unlock(&_lowest_prio_cpu.lock);
30764 +				litmus_reschedule(cpu);
30765 +			}
30766 +		}
30767  	}
30768  	if (state->scheduled) {
30769  		TRACE_TASK(state->scheduled, "scheduled.\n");
30770 @@ -737,34 +819,31 @@ static void mc2_task_resume(struct task_struct  *tsk)
30771  	else
30772  		state = local_cpu_state();
30773  
30774 -	raw_spin_lock_irqsave(&state->lock, flags);
30775 +	raw_spin_lock_irqsave(&_global_env.lock, flags);
30776  	/* Requeue only if self-suspension was already processed. */
30777  	if (tinfo->has_departed)
30778  	{
30779 -		raw_spin_lock(&_global_env.lock);
30780 +		raw_spin_lock(&state->lock);
30781  		/* Assumption: litmus_clock() is synchronized across cores,
30782  		 * since we might not actually be executing on tinfo->cpu
30783  		 * at the moment. */
30784  		if (tinfo->cpu != -1) {
30785  			sup_update_time(&state->sup_env, litmus_clock());
30786  		} else {
30787 -			//raw_spin_lock(&_global_env.lock);
30788  			TRACE("RESUME UPDATE ####\n");
30789  			gmp_update_time(&_global_env, litmus_clock());
30790  			TRACE("RESUME UPDATE $$$$\n");
30791 -			//raw_spin_unlock(&_global_env.lock);
30792  		}
30793  			
30794  		mc2_update_ghost_state(state);
30795  		task_arrives(state, tsk);
30796  		/* NOTE: drops state->lock */
30797  		TRACE_TASK(tsk, "mc2_resume()\n");
30798 -		raw_spin_unlock(&_global_env.lock);
30799  		mc2_update_timer_and_unlock(state);
30800  		local_irq_restore(flags);
30801  	} else {
30802  		TRACE_TASK(tsk, "resume event ignored, still scheduled\n");
30803 -		raw_spin_unlock_irqrestore(&state->lock, flags);
30804 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
30805  	}
30806  
30807  	resume_legacy_task_model_updates(tsk);
30808 @@ -800,7 +879,9 @@ static long mc2_complete_job(void)
30809  		else
30810  			state = local_cpu_state();
30811  		
30812 +		raw_spin_lock(&_global_env.lock);
30813  		raw_spin_lock(&state->lock);
30814 +
30815  		env = &(state->sup_env.env);
30816  		
30817  		res = res_find_by_id(state, tinfo->mc2_param.res_id);
30818 @@ -808,7 +889,6 @@ static long mc2_complete_job(void)
30819  		if (get_task_crit_level(current) < CRIT_LEVEL_C) {
30820  			env->time_zero = tsk_rt(current)->sporadic_release_time;
30821  		} else {
30822 -			raw_spin_lock(&_global_env.lock);
30823  			_global_env.env.time_zero = tsk_rt(current)->sporadic_release_time;
30824  		}
30825  		
30826 @@ -826,10 +906,9 @@ static long mc2_complete_job(void)
30827  		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
30828  		
30829  		TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
30830 -		if (get_task_crit_level(current) == CRIT_LEVEL_C) {
30831 -			raw_spin_unlock(&_global_env.lock);
30832 -		}
30833 +		
30834  		raw_spin_unlock(&state->lock);
30835 +		raw_spin_unlock(&_global_env.lock);
30836  		
30837  		local_irq_restore(flags);
30838  	}
30839 @@ -908,10 +987,10 @@ static long mc2_admit_task(struct task_struct *tsk)
30840  
30841  		raw_spin_unlock_irqrestore(&state->lock, flags);
30842  	} else if (lv == CRIT_LEVEL_C) {
30843 -		local_irq_save(flags);
30844 +		raw_spin_lock_irqsave(&_global_env.lock, flags);
30845  		state = local_cpu_state();
30846 +		
30847  		raw_spin_lock(&state->lock);
30848 -		raw_spin_lock(&_global_env.lock);
30849  		
30850  		res = gmp_find_by_id(&_global_env, mp->res_id);
30851  
30852 @@ -931,9 +1010,8 @@ static long mc2_admit_task(struct task_struct *tsk)
30853  			tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
30854  		}
30855  
30856 -		raw_spin_unlock(&_global_env.lock);
30857  		raw_spin_unlock(&state->lock);
30858 -		local_irq_restore(flags);
30859 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
30860  	}
30861  	
30862  	preempt_enable();
30863 @@ -965,8 +1043,9 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
30864  		state = cpu_state_for(tinfo->cpu);
30865  	
30866  	/* acquire the lock protecting the state and disable interrupts */
30867 -	raw_spin_lock_irqsave(&state->lock, flags);
30868 -
30869 +	raw_spin_lock_irqsave(&_global_env.lock, flags);
30870 +	raw_spin_lock(&state->lock);
30871 +	
30872  	if (is_running) {
30873  		state->scheduled = tsk;
30874  		/* make sure this task should actually be running */
30875 @@ -976,18 +1055,16 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
30876  	if (on_runqueue || is_running) {
30877  		/* Assumption: litmus_clock() is synchronized across cores
30878  		 * [see comment in pres_task_resume()] */
30879 -		raw_spin_lock(&_global_env.lock);
30880  		mc2_update_time(lv, state, litmus_clock());
30881  		mc2_update_ghost_state(state);
30882  		task_arrives(state, tsk);
30883  		/* NOTE: drops state->lock */
30884  		TRACE("mc2_new()\n");
30885 -		raw_spin_unlock(&_global_env.lock);
30886  		
30887  		mc2_update_timer_and_unlock(state);
30888  		local_irq_restore(flags);
30889  	} else
30890 -		raw_spin_unlock_irqrestore(&state->lock, flags);
30891 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
30892  
30893  	res = res_find_by_id(state, tinfo->mc2_param.res_id);
30894  	
30895 @@ -1015,9 +1092,9 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
30896  		/* if the reservation is global reservation */
30897  		local_irq_save(flags);
30898  		state = local_cpu_state();
30899 -		raw_spin_lock(&state->lock);
30900  		raw_spin_lock(&_global_env.lock);
30901 -	
30902 +		raw_spin_lock(&state->lock);
30903 +		
30904  		list_for_each_entry_safe(res, next, &_global_env.depleted_reservations, list) {
30905  			if (res->id == reservation_id) {
30906  				TRACE("DESTROY RES FOUND!!!\n");
30907 @@ -1050,8 +1127,8 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
30908  			}
30909  		}
30910  
30911 -		raw_spin_unlock(&_global_env.lock);
30912  		raw_spin_unlock(&state->lock);
30913 +		raw_spin_unlock(&_global_env.lock);
30914  		local_irq_restore(flags);
30915  	} else {
30916  		/* if the reservation is partitioned reservation */
30917 @@ -1125,9 +1202,10 @@ static void mc2_task_exit(struct task_struct *tsk)
30918  		state = cpu_state_for(tinfo->cpu);
30919  	else
30920  		state = local_cpu_state();
30921 +		
30922 +	raw_spin_lock_irqsave(&_global_env.lock, flags);
30923 +	raw_spin_lock(&state->lock);
30924  	
30925 -	raw_spin_lock_irqsave(&state->lock, flags);
30926 -
30927  	if (state->scheduled == tsk)
30928  		state->scheduled = NULL;
30929  
30930 @@ -1140,20 +1218,18 @@ static void mc2_task_exit(struct task_struct *tsk)
30931  		/* Assumption: litmus_clock() is synchronized across cores
30932  		 * [see comment in pres_task_resume()] */
30933  		
30934 -		raw_spin_lock(&_global_env.lock);
30935  		/* update both global and partitioned */
30936  		mc2_update_time(lv, state, litmus_clock());
30937 -		
30938  		mc2_update_ghost_state(state);
30939  		task_departs(tsk, 0);
30940  		
30941  		/* NOTE: drops state->lock */
30942  		TRACE("mc2_exit()\n");
30943 -		raw_spin_unlock(&_global_env.lock);
30944 +
30945  		mc2_update_timer_and_unlock(state);
30946  		local_irq_restore(flags);
30947  	} else
30948 -		raw_spin_unlock_irqrestore(&state->lock, flags);
30949 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
30950  
30951  	kfree(tsk_rt(tsk)->plugin_state);
30952  	tsk_rt(tsk)->plugin_state = NULL;
30953 @@ -1204,8 +1280,10 @@ static long create_polling_reservation(
30954  		return -ENOMEM;
30955  
30956  	if (config->cpu != -1) {
30957 +		
30958 +		raw_spin_lock_irqsave(&_global_env.lock, flags);
30959  		state = cpu_state_for(config->cpu);
30960 -		raw_spin_lock_irqsave(&state->lock, flags);
30961 +		raw_spin_lock(&state->lock);
30962  
30963  		res = sup_find_by_id(&state->sup_env, config->id);
30964  		if (!res) {
30965 @@ -1225,7 +1303,9 @@ static long create_polling_reservation(
30966  			err = -EEXIST;
30967  		}
30968  
30969 -		raw_spin_unlock_irqrestore(&state->lock, flags);
30970 +		raw_spin_unlock(&state->lock);
30971 +		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
30972 +
30973  	} else {
30974  		raw_spin_lock_irqsave(&_global_env.lock, flags);
30975  		
30976 @@ -1467,8 +1547,8 @@ static long mc2_activate_plugin(void)
30977  		hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
30978  		state->timer.function = on_scheduling_timer;
30979  		
30980 -		hrtimer_init(&state->g_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
30981 -		state->g_timer.function = on_global_scheduling_timer;
30982 +//		hrtimer_init(&state->g_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
30983 +//		state->g_timer.function = on_global_scheduling_timer;
30984  	}
30985  
30986  	mc2_setup_domain_proc();
30987 @@ -1481,6 +1561,7 @@ static void mc2_finish_switch(struct task_struct *prev)
30988  	struct mc2_cpu_state *state = local_cpu_state();
30989  	
30990  	state->scheduled = is_realtime(current) ? current : NULL;
30991 +	TRACE_TASK(prev, "FINISH CXS! complete=%d\n", tsk_rt(prev)->completed);
30992  }
30993  
30994  static long mc2_deactivate_plugin(void)
30995 @@ -1490,12 +1571,14 @@ static long mc2_deactivate_plugin(void)
30996  	struct reservation *res;
30997  	struct next_timer_event *event;
30998  
30999 +	raw_spin_lock(&_global_env.lock);
31000 +
31001  	for_each_online_cpu(cpu) {
31002  		state = cpu_state_for(cpu);
31003  		raw_spin_lock(&state->lock);
31004  
31005  		hrtimer_cancel(&state->timer);
31006 -		hrtimer_cancel(&state->g_timer);
31007 +//		hrtimer_cancel(&state->g_timer);
31008  
31009  		/* Delete all reservations --- assumes struct reservation
31010  		 * is prefix of containing struct. */
31011 @@ -1527,8 +1610,7 @@ static long mc2_deactivate_plugin(void)
31012  		raw_spin_unlock(&state->lock);
31013  	}
31014  
31015 -	raw_spin_lock(&_global_env.lock);
31016 -
31017 +	
31018  	while (!list_empty(&_global_env.active_reservations)) {
31019  		TRACE("RES FOUND!!!\n");
31020  		res = list_first_entry(
31021 -- 
31022 1.8.1.2
31023 
31024 
31025 From d9f5d5edbda26349cf6bf4e7d371d6e91660fe0f Mon Sep 17 00:00:00 2001
31026 From: Namhoon Kim <namhoonk@cs.unc.edu>
31027 Date: Mon, 2 Mar 2015 15:57:54 -0500
31028 Subject: [PATCH 092/119] Working version
31029 
31030 ---
31031  kernel/sched/litmus.c         |   2 +-
31032  litmus/polling_reservations.c |  28 ++--
31033  litmus/reservation.c          |  98 ++++--------
31034  litmus/sched_mc2.c            | 337 ++++++++++++++++++------------------------
31035  4 files changed, 187 insertions(+), 278 deletions(-)
31036 
31037 diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
31038 index b84361f..29cd69d 100644
31039 --- a/kernel/sched/litmus.c
31040 +++ b/kernel/sched/litmus.c
31041 @@ -73,7 +73,7 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
31042  			if (next->rt_param.stack_in_use == NO_CPU)
31043  				TRACE_TASK(next,"descheduled. Proceeding.\n");
31044  
31045 -			if (lt_before(_maybe_deadlock + 1000000000L,
31046 +			if (lt_before(_maybe_deadlock + 5000000000L,
31047  				      litmus_clock())) {
31048  				/* We've been spinning for 1s.
31049  				 * Something can't be right!
31050 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
31051 index a3125eb..df1aeb0 100644
31052 --- a/litmus/polling_reservations.c
31053 +++ b/litmus/polling_reservations.c
31054 @@ -383,11 +383,7 @@ static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
31055  {
31056  	lt_t now = tdres->res.env->current_time;
31057  	lt_t end = tdres->cur_interval.end;
31058 -	TRACE("td_remaining(%u): start=%llu now=%llu end=%llu state=%d\n",
31059 -		tdres->res.id,
31060 -		tdres->cur_interval.start,
31061 -		now, end,
31062 -		tdres->res.state);
31063 +	//TRACE("td_remaining(%u): start=%llu now=%llu end=%llu state=%d\n", tdres->res.id,	tdres->cur_interval.start, now, end, tdres->res.state);
31064  	if (now >=  end)
31065  		return 0;
31066  	else
31067 @@ -400,24 +396,22 @@ static void td_replenish(
31068  	struct table_driven_reservation *tdres =
31069  		container_of(res, struct table_driven_reservation, res);
31070  
31071 -	TRACE("td_replenish(%u): expected_replenishment=%llu\n", res->id,
31072 -		res->next_replenishment);
31073 +	//TRACE("td_replenish(%u): expected_replenishment=%llu\n", res->id, res->next_replenishment);
31074  
31075  	/* figure out current interval */
31076  	tdres->cur_interval.start = tdres->major_cycle_start +
31077  		tdres->intervals[tdres->next_interval].start;
31078  	tdres->cur_interval.end =  tdres->major_cycle_start +
31079  		tdres->intervals[tdres->next_interval].end;
31080 -	TRACE("major_cycle_start=%llu => [%llu, %llu]\n",
31081 +/*	TRACE("major_cycle_start=%llu => [%llu, %llu]\n",
31082  		tdres->major_cycle_start,
31083  		tdres->cur_interval.start,
31084  		tdres->cur_interval.end);
31085 -
31086 +*/
31087  	/* reset budget */
31088  	res->cur_budget = td_time_remaining_until_end(tdres);
31089  	res->budget_consumed = 0;
31090 -	TRACE("td_replenish(%u): %s budget=%llu\n", res->id,
31091 -		res->cur_budget ? "" : "WARNING", res->cur_budget);
31092 +	//TRACE("td_replenish(%u): %s budget=%llu\n", res->id, res->cur_budget ? "" : "WARNING", res->cur_budget);
31093  
31094  	/* prepare next slot */
31095  	tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
31096 @@ -428,8 +422,7 @@ static void td_replenish(
31097  	/* determine next time this reservation becomes eligible to execute */
31098  	res->next_replenishment  = tdres->major_cycle_start;
31099  	res->next_replenishment += tdres->intervals[tdres->next_interval].start;
31100 -	TRACE("td_replenish(%u): next_replenishment=%llu\n", res->id,
31101 -		res->next_replenishment);
31102 +	//TRACE("td_replenish(%u): next_replenishment=%llu\n", res->id, res->next_replenishment);
31103  
31104  
31105  	switch (res->state) {
31106 @@ -465,7 +458,7 @@ static void td_drain_budget(
31107  	 * how much time is left in this allocation interval. */
31108  
31109  	/* sanity check: we should never try to drain from future slots */
31110 -	TRACE("TD_DRAIN STATE(%d) [%llu,%llu]  %llu ?\n", res->state, tdres->cur_interval.start, tdres->cur_interval.end, res->env->current_time);
31111 +	//TRACE("TD_DRAIN STATE(%d) [%llu,%llu]  %llu ?\n", res->state, tdres->cur_interval.start, tdres->cur_interval.end, res->env->current_time);
31112  	//BUG_ON(tdres->cur_interval.start > res->env->current_time);
31113  	if (tdres->cur_interval.start > res->env->current_time)
31114  		TRACE("TD_DRAIN BUG!!!!!!!!!!\n");
31115 @@ -480,8 +473,7 @@ static void td_drain_budget(
31116  		case RESERVATION_ACTIVE_IDLE:
31117  		case RESERVATION_ACTIVE:
31118  			res->cur_budget = td_time_remaining_until_end(tdres);
31119 -			TRACE("td_drain_budget(%u): drained to budget=%llu\n",
31120 -				res->id, res->cur_budget);
31121 +			//TRACE("td_drain_budget(%u): drained to budget=%llu\n", res->id, res->cur_budget);
31122  			if (!res->cur_budget) {
31123  				res->env->change_state(res->env, res,
31124  					RESERVATION_DEPLETED);
31125 @@ -489,6 +481,10 @@ static void td_drain_budget(
31126  				/* sanity check budget calculation */
31127  				//BUG_ON(res->env->current_time >= tdres->cur_interval.end);
31128  				//BUG_ON(res->env->current_time < tdres->cur_interval.start);
31129 +				if (res->env->current_time >= tdres->cur_interval.end)
31130 +					printk(KERN_ALERT "TD_DRAIN_BUDGET WARNING1\n");
31131 +				if (res->env->current_time < tdres->cur_interval.start)
31132 +					printk(KERN_ALERT "TD_DRAIN_BUDGET WARNING2\n");
31133  			}
31134  
31135  			break;
31136 diff --git a/litmus/reservation.c b/litmus/reservation.c
31137 index 3ec18a2..86d2f6e 100644
31138 --- a/litmus/reservation.c
31139 +++ b/litmus/reservation.c
31140 @@ -4,6 +4,9 @@
31141  #include <litmus/litmus.h>
31142  #include <litmus/reservation.h>
31143  
31144 +#define TRACE(fmt, args...) do {} while (false)
31145 +#define TRACE_TASK(fmt, args...) do {} while (false)
31146 +
31147  void reservation_init(struct reservation *res)
31148  {
31149  	memset(res, sizeof(*res), 0);
31150 @@ -376,43 +379,6 @@ struct next_timer_event* gmp_find_event_by_time(struct gmp_reservation_environme
31151  	return NULL;
31152  }
31153  
31154 -/*
31155 -static void gmp_scheduler_update_at(
31156 -	struct gmp_reservation_environment* gmp_env, unsigned int id,
31157 -	event_type_t type, lt_t when)
31158 -{
31159 -	struct next_timer_event *nevent, *queued;
31160 -	struct list_head *pos;
31161 -	int found = 0;
31162 -	
31163 -	nevent = gmp_find_event_by_id(gmp_env, id);
31164 -	
31165 -	if (!nevent) {
31166 -		nevent = kzalloc(sizeof(*nevent), GFP_KERNEL);
31167 -		nevent->next_update = when;
31168 -		nevent->id = id;
31169 -		nevent->timer_armed_on = NO_CPU;
31170 -		nevent->type = type;
31171 -		
31172 -		list_for_each(pos, &gmp_env->next_events) {
31173 -			queued = list_entry(pos, struct next_timer_event, list);
31174 -			if (queued->next_update > nevent->next_update) {
31175 -				list_add(&nevent->list, pos->prev);
31176 -				found = 1;
31177 -				TRACE("NEXT_EVENT ADDED after %llu\n", queued->next_update);
31178 -				break;
31179 -			}
31180 -		}
31181 -		
31182 -		if (!found) {
31183 -			list_add_tail(&nevent->list, &gmp_env->next_events);
31184 -			TRACE("NEXT_EVENT ADDED at [0]\n");
31185 -		}
31186 -	} else {
31187 -		TRACE("EVENT FOUND at %llu T(%d), NEW EVENT %llu T(%d)\n", nevent->next_update, nevent->type, when, type);
31188 -	}
31189 -}
31190 -*/
31191  #define TIMER_RESOLUTION 100000L
31192  
31193  static void gmp_add_event(
31194 @@ -425,11 +391,12 @@ static void gmp_add_event(
31195  
31196  	//when = div64_u64(when, TIMER_RESOLUTION);
31197  	//when *= TIMER_RESOLUTION;
31198 -	
31199 +//printk(KERN_ALERT "GMP_ADD id=%d type=%d when=%llu\n", id, type, when);
31200  	nevent = gmp_find_event_by_id(gmp_env, id);
31201  	
31202  	if (!nevent || nevent->type != type) {
31203  		nevent = kzalloc(sizeof(*nevent), GFP_ATOMIC);
31204 +		BUG_ON(!nevent);
31205  		nevent->next_update = when;
31206  		nevent->id = id;
31207  		nevent->type = type;
31208 @@ -450,17 +417,19 @@ static void gmp_add_event(
31209  			TRACE("NEXT_EVENT id=%d type=%d update=%llu ADDED at TAIL\n", nevent->id, nevent->type, nevent->next_update);
31210  		}
31211  	} else {
31212 -		TRACE("EVENT FOUND id = %d type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->id, nevent->type, nevent->next_update, type, when);
31213 +		//TRACE("EVENT FOUND id = %d type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->id, nevent->type, nevent->next_update, type, when);
31214 +; //printk(KERN_ALERT "EVENT FOUND id = %d type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->id, nevent->type, nevent->next_update, type, when);
31215  	}
31216  	
31217 -	TRACE("======START PRINTING EVENT LIST======\n");
31218 -	gmp_print_events(gmp_env, litmus_clock());
31219 -	TRACE("======FINISH PRINTING EVENT LIST======\n");
31220 +	//TRACE("======START PRINTING EVENT LIST======\n");
31221 +	//gmp_print_events(gmp_env, litmus_clock());
31222 +	//TRACE("======FINISH PRINTING EVENT LIST======\n");
31223  }
31224  
31225  void gmp_add_event_after(
31226  	struct gmp_reservation_environment* gmp_env, lt_t timeout, unsigned int id, event_type_t type)
31227  {
31228 +	//printk(KERN_ALERT "ADD_EVENT_AFTER id = %d\n", id);
31229  	gmp_add_event(gmp_env, gmp_env->env.current_time + timeout, id, type);
31230  }
31231  
31232 @@ -472,19 +441,24 @@ static void gmp_queue_depleted(
31233  	struct reservation *queued;
31234  	int found = 0;
31235  
31236 +//printk(KERN_ALERT "R%d request to enqueue depleted_list\n", res->id);
31237 +	
31238  	list_for_each(pos, &gmp_env->depleted_reservations) {
31239  		queued = list_entry(pos, struct reservation, list);
31240 -		if (queued && queued->next_replenishment > res->next_replenishment) {
31241 +		if (queued && (queued->next_replenishment > res->next_replenishment)) {
31242 +//printk(KERN_ALERT "QUEUED R%d %llu\n", queued->id, queued->next_replenishment);
31243  			list_add(&res->list, pos->prev);
31244  			found = 1;
31245 +			break;
31246  		}
31247  	}
31248  
31249  	if (!found)
31250  		list_add_tail(&res->list, &gmp_env->depleted_reservations);
31251 +
31252  	TRACE("R%d queued to depleted_list\n", res->id);
31253 +//printk(KERN_ALERT "R%d queued to depleted_list\n", res->id);
31254  	gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
31255 -	res->event_added = 1;
31256  }
31257  
31258  static void gmp_queue_active(
31259 @@ -520,10 +494,8 @@ static void gmp_queue_reservation(
31260  	struct gmp_reservation_environment* gmp_env,
31261  	struct reservation *res)
31262  {
31263 -	if (res == NULL) {
31264 -		BUG();
31265 -		return;
31266 -	}
31267 +
31268 +//printk(KERN_ALERT "DEBUG: Passed %s %d %p R%d STATE %d\n",__FUNCTION__,__LINE__, gmp_env, res->id, res->state);
31269  	switch (res->state) {
31270  		case RESERVATION_INACTIVE:
31271  			list_add(&res->list, &gmp_env->inactive_reservations);
31272 @@ -584,8 +556,7 @@ static void gmp_charge_budget(
31273  		{
31274  			/* make sure scheduler is invoked when this reservation expires
31275  			 * its remaining budget */
31276 -			 TRACE("requesting gmp_scheduler update for reservation %u in %llu nanoseconds\n",
31277 -				res->id, res->cur_budget);
31278 +			 TRACE("requesting gmp_scheduler update for reservation %u in %llu nanoseconds\n", res->id, res->cur_budget);
31279  			 gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
31280  			 res->event_added = 1;
31281  		}
31282 @@ -593,7 +564,7 @@ static void gmp_charge_budget(
31283  			/* stop at the first ACTIVE reservation */
31284  		//	break;
31285  	}
31286 -	//TRACE("finished charging budgets\n");
31287 +	TRACE("finished charging budgets\n");
31288  }
31289  
31290  static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
31291 @@ -611,24 +582,6 @@ static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
31292  		}
31293  	}
31294  	TRACE("finished replenishing budgets\n");
31295 -
31296 -	/* request a scheduler update at the next replenishment instant */
31297 -	list_for_each_safe(pos, next, &gmp_env->depleted_reservations) {
31298 -		res = list_entry(pos, struct reservation, list);
31299 -		if (res->event_added == 0) {
31300 -			gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
31301 -			res->event_added = 1;
31302 -		}
31303 -	}	
31304 -	
31305 -/*
31306 -	res = list_first_entry_or_null(&gmp_env->depleted_reservations,
31307 -		struct reservation, list);
31308 -	if (res && res->event_added == 0) {
31309 -		gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
31310 -		res->event_added = 1;
31311 -	}
31312 -*/
31313  }
31314  
31315  #define EPSILON	50
31316 @@ -644,7 +597,7 @@ int gmp_update_time(
31317  	/* If the time didn't advance, there is nothing to do.
31318  	 * This check makes it safe to call sup_advance_time() potentially
31319  	 * multiple times (e.g., via different code paths. */
31320 -	TRACE("(gmp_update_time) now: %llu, current_time: %llu\n", now, gmp_env->env.current_time);
31321 +	//TRACE("(gmp_update_time) now: %llu, current_time: %llu\n", now, gmp_env->env.current_time);
31322  	if (unlikely(now <= gmp_env->env.current_time + EPSILON))
31323  		return 0;
31324  
31325 @@ -660,12 +613,15 @@ int gmp_update_time(
31326  	/* check if any budgets where replenished */
31327  	//TRACE("REPLENISH###\n");
31328  	gmp_replenish_budgets(gmp_env);
31329 +
31330  	
31331  	list_for_each_entry_safe(event, next, &gmp_env->next_events, list) {
31332  		if (event->next_update < now) {
31333  			list_del(&event->list);
31334  			TRACE("EVENT at %llu IS DELETED\n", event->next_update);
31335  			kfree(event);
31336 +		} else {
31337 +			break;
31338  		}
31339  	}		
31340  	
31341 @@ -673,7 +629,7 @@ int gmp_update_time(
31342  	
31343  	ret = min(gmp_env->schedule_now, NR_CPUS);
31344  	gmp_env->schedule_now = 0;
31345 -	
31346 +
31347  	return ret;
31348  }
31349  
31350 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
31351 index b3390dc..f7758f2 100644
31352 --- a/litmus/sched_mc2.c
31353 +++ b/litmus/sched_mc2.c
31354 @@ -62,7 +62,6 @@ struct mc2_task_state {
31355  struct crit_entry {
31356  	enum crit_level level;
31357  	struct task_struct *running;
31358 -	//struct hrtimer ghost_timer;
31359  };
31360  
31361  /* mc2_cpu_state - maintain the scheduled state and ghost jobs
31362 @@ -74,7 +73,6 @@ struct mc2_cpu_state {
31363  
31364  	struct sup_reservation_environment sup_env;
31365  	struct hrtimer timer;
31366 -	//struct hrtimer g_timer;
31367  
31368  	int cpu;
31369  	struct task_struct* scheduled;
31370 @@ -221,7 +219,9 @@ static int get_lowest_prio_cpu(lt_t priority)
31371  		raw_spin_unlock(&_lowest_prio_cpu.lock);
31372  		TRACE("CPU %d (local) is the lowest!\n", ce->cpu);
31373  		return ce->cpu;
31374 -	}	
31375 +	} else {
31376 +		TRACE("Local CPU will_schedule=%d, scheduled=(%s/%d)\n", ce->will_schedule, ce->scheduled ? (ce->scheduled)->comm : "null", ce->scheduled ? (ce->scheduled)->pid : 0);
31377 +	}
31378  
31379  	for_each_online_cpu(cpu) {
31380  		ce = &_lowest_prio_cpu.cpu_entries[cpu];
31381 @@ -287,6 +287,10 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
31382  	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
31383  		/* If the event time is already passed, we call schedule() on
31384  		   the lowest priority cpu */
31385 +		if (event->next_update >= update) {
31386 +			break;
31387 +		}
31388 +		
31389  		if (event->next_update < litmus_clock()) {
31390  			if (event->timer_armed_on == NO_CPU) {
31391  				struct reservation *res = gmp_find_by_id(&_global_env, event->id);
31392 @@ -351,70 +355,33 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
31393  			litmus_reschedule(state->cpu);
31394  		}
31395  	}
31396 -
31397 -#if 0	
31398 -	raw_spin_lock(&_global_env.lock);
31399 -	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
31400 -		if (event->timer_armed_on == NO_CPU) {
31401 -			/* If the event time is already passed, we call schedule() on
31402 -			   the lowest priority cpu */
31403 -			if (event->next_update < litmus_clock()) {
31404 -				int cpu = get_lowest_prio_cpu();
31405 -				TRACE("GLOBAL EVENT PASSED!! poking CPU %d to reschedule\n", cpu);
31406 -				list_del(&event->list);
31407 -				kfree(event);
31408 -				if (cpu != NO_CPU) {
31409 -					raw_spin_lock(&_lowest_prio_cpu.lock);
31410 -					_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
31411 -					raw_spin_unlock(&_lowest_prio_cpu.lock);
31412 -					litmus_reschedule(cpu);
31413 -				}
31414 -			} else if (!hrtimer_active(&state->g_timer)) {
31415 -				int ret;
31416 -			
31417 -				raw_spin_unlock(&_global_env.lock);
31418 -				found_event = 1;
31419 -			
31420 -				TRACE("setting global scheduler timer for %llu\n", 
31421 -				       event->next_update);
31422 -				ret = __hrtimer_start_range_ns(&state->g_timer,
31423 -						ns_to_ktime(event->next_update),
31424 -						0 /* timer coalescing slack */,
31425 -						HRTIMER_MODE_ABS_PINNED,
31426 -						0 /* wakeup */);
31427 -				if (!ret) {
31428 -					event->timer_armed_on = state->cpu;
31429 -					break;
31430 -				}
31431 -			}				
31432 -		}
31433 -	}
31434 -	if (found_event == 0)
31435 -		raw_spin_unlock(&_global_env.lock);
31436 -#endif	
31437  }
31438  
31439  /* mc2_update_ghost_state - Update crit_entries[] to track ghost jobs
31440   *                          If the budget of a ghost is exhausted,
31441   *                          clear is_ghost and reschedule
31442   */
31443 -static void mc2_update_ghost_state(struct mc2_cpu_state *state)
31444 +static lt_t mc2_update_ghost_state(struct mc2_cpu_state *state)
31445  {
31446  	int lv = 0;
31447  	struct crit_entry* ce;
31448  	struct reservation *res;
31449  	struct mc2_task_state *tinfo;
31450 -
31451 +	lt_t ret = ULLONG_MAX;
31452 +	
31453  	BUG_ON(!state);
31454  	
31455  	for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
31456  		ce = &state->crit_entries[lv];
31457  		if (ce->running != NULL) {
31458 +//printk(KERN_ALERT "P%d ce->running : %s/%d\n", state->cpu,  ce->running ? (ce->running)->comm : "null", ce->running ? (ce->running)->pid : 0);
31459  			tinfo = get_mc2_state(ce->running);
31460  			if (!tinfo)
31461  				continue;
31462  			
31463  			res = res_find_by_id(state, tinfo->mc2_param.res_id);
31464 +			BUG_ON(!res);
31465 +//printk(KERN_ALERT "R%d found!\n", res->id);			
31466  			TRACE("LV %d running id %d budget %llu\n", 
31467  			       lv, tinfo->mc2_param.res_id, res->cur_budget);
31468  			/* If the budget is exhausted, clear is_ghost and reschedule */
31469 @@ -432,7 +399,7 @@ static void mc2_update_ghost_state(struct mc2_cpu_state *state)
31470  						  struct reservation, list);
31471  					if (res)
31472  						litmus_reschedule_local();
31473 -				} else {
31474 +				} else if (lv == CRIT_LEVEL_C) {
31475  					res = list_first_entry_or_null(
31476  					      &_global_env.active_reservations,
31477  						  struct reservation, list);
31478 @@ -440,11 +407,16 @@ static void mc2_update_ghost_state(struct mc2_cpu_state *state)
31479  						litmus_reschedule(state->cpu);
31480  				}
31481  			} else {
31482 -				TRACE("GHOST NOT FINISH id %d budget %llu\n", res->id, res->cur_budget);
31483 -				gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
31484 +				//TRACE("GHOST NOT FINISH id %d budget %llu\n", res->id, res->cur_budget);
31485 +				//gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
31486 +				if (ret > res->cur_budget) {
31487 +					ret = res->cur_budget;
31488 +				}
31489  			}
31490  		}
31491  	}
31492 +	
31493 +	return ret;
31494  }			
31495  
31496  /* update_cpu_prio - Update cpu's priority
31497 @@ -474,84 +446,6 @@ static void update_cpu_prio(struct mc2_cpu_state *state)
31498  	}
31499  };
31500  
31501 -#if 0
31502 -/* on_global_scheduling_timer - Process the budget accounting (replenish
31503 - *                              and charge)
31504 - */								
31505 -static enum hrtimer_restart on_global_scheduling_timer(struct hrtimer *timer)
31506 -{
31507 -	unsigned long flags;
31508 -	enum hrtimer_restart restart = HRTIMER_NORESTART;
31509 -	struct mc2_cpu_state *state;
31510 -	struct next_timer_event *event, *next;
31511 -	int schedule_now;
31512 -	lt_t update, now;
31513 -	int found_event = 0;
31514 -
31515 -	state = container_of(timer, struct mc2_cpu_state, g_timer);
31516 -
31517 -	raw_spin_lock_irqsave(&state->lock, flags);
31518 -	
31519 -	/* The scheduling timer should only fire on the local CPU, because
31520 -	 * otherwise deadlocks via timer_cancel() are possible.
31521 -	 * Note: this does not interfere with dedicated interrupt handling, as
31522 -	 * even under dedicated interrupt handling scheduling timers for
31523 -	 * budget enforcement must occur locally on each CPU.
31524 -	 */
31525 -	//BUG_ON(state->cpu != raw_smp_processor_id());
31526 -	if (state->cpu != raw_smp_processor_id())
31527 -		TRACE("BUG!!!!!!!!!!!!! TIMER FIRED ON THE OTHER CPU\n");
31528 -
31529 -	raw_spin_lock(&_global_env.lock);
31530 -	
31531 -	update = litmus_clock();
31532 -	TRACE("GLOBAL TIMER FIRED at %llu\n", update);
31533 -	
31534 -	/* The event can be processed by the other cpus. So, if there is no 
31535 -	   events to process, we do nothing */
31536 -	list_for_each_entry_safe(event, next, &_global_env.next_events, list) {
31537 -		if (event->next_update < update) {
31538 -			found_event = 1;
31539 -			list_del(&event->list);
31540 -			TRACE("EVENT at %llu IS DELETED\n", event->next_update);
31541 -			kfree(event);
31542 -		}
31543 -	}			
31544 -	
31545 -	if (!found_event) {
31546 -		goto unlock;
31547 -	}
31548 -	
31549 -	/* gmp_update_timer returns how many tasks become ACTIVE */
31550 -	schedule_now = gmp_update_time(&_global_env, update);
31551 -	
31552 -	mc2_update_ghost_state(state);
31553 -	
31554 -	now = _global_env.env.current_time;
31555 -	
31556 -	TRACE_CUR("on_global_scheduling_timer at %llu, upd:%llu (for cpu=%d) SCHEDULE_NOW = %d\n",
31557 -		now, update, state->cpu, schedule_now);
31558 -
31559 -	/* Find the lowest cpu, and call reschedule */
31560 -	while (schedule_now--) {
31561 -		int cpu = get_lowest_prio_cpu();
31562 -		if (cpu != NO_CPU) {
31563 -			raw_spin_lock(&_lowest_prio_cpu.lock);
31564 -			_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
31565 -			raw_spin_unlock(&_lowest_prio_cpu.lock);
31566 -			TRACE("LOWEST CPU = P%d\n", cpu);
31567 -			litmus_reschedule(cpu);
31568 -		}
31569 -	} 
31570 -
31571 -unlock:
31572 -	raw_spin_unlock(&_global_env.lock);
31573 -	raw_spin_unlock_irqrestore(&state->lock, flags);
31574 -	
31575 -	return restart;
31576 -}
31577 -#endif
31578 -
31579  /* on_scheduling_timer - timer event for partitioned tasks
31580   */                       
31581  static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
31582 @@ -561,7 +455,8 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
31583  	struct mc2_cpu_state *state;
31584  	lt_t update, now;
31585  	int global_schedule_now;
31586 -
31587 +	lt_t remain_budget;
31588 +	
31589  	state = container_of(timer, struct mc2_cpu_state, timer);
31590  
31591  	/* The scheduling timer should only fire on the local CPU, because
31592 @@ -575,18 +470,22 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
31593  	TRACE("TIMER FIRED at %llu\n", litmus_clock());
31594  	raw_spin_lock_irqsave(&_global_env.lock, flags);
31595  	raw_spin_lock(&state->lock);
31596 -	
31597 -	sup_update_time(&state->sup_env, litmus_clock());
31598 -	global_schedule_now = gmp_update_time(&_global_env, litmus_clock());
31599 -	
31600 -	mc2_update_ghost_state(state);
31601 +//printk(KERN_ALERT "P%d on_scheduling_timer() hold lock %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);			
31602 +	now = litmus_clock();
31603 +	sup_update_time(&state->sup_env, now);
31604 +	global_schedule_now = gmp_update_time(&_global_env, now);
31605 +//printk(KERN_ALERT "P%d update_time in timer() %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);			
31606 +	remain_budget = mc2_update_ghost_state(state);
31607  	
31608  	update = state->sup_env.next_scheduler_update;
31609  	now = state->sup_env.env.current_time;
31610  
31611 -	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n",
31612 -		now, update, state->cpu, global_schedule_now);
31613 -
31614 +	if (remain_budget != ULLONG_MAX && update > now + remain_budget) {
31615 +		update = now + remain_budget;
31616 +	}
31617 +	
31618 +	//TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
31619 +//printk(KERN_ALERT "on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
31620  	if (update <= now) {
31621  		litmus_reschedule_local();
31622  	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
31623 @@ -594,6 +493,8 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
31624  		restart = HRTIMER_RESTART;
31625  	}
31626  
31627 +	BUG_ON(global_schedule_now < 0 || global_schedule_now > 4);
31628 +	
31629  	/* Find the lowest cpu, and call reschedule */
31630  	while (global_schedule_now--) {
31631  		int cpu = get_lowest_prio_cpu(0);
31632 @@ -601,14 +502,14 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
31633  			raw_spin_lock(&_lowest_prio_cpu.lock);
31634  			_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
31635  			raw_spin_unlock(&_lowest_prio_cpu.lock);
31636 -			TRACE("LOWEST CPU = P%d\n", cpu);
31637 +			//TRACE("LOWEST CPU = P%d\n", cpu);
31638  			litmus_reschedule(cpu);
31639  		}
31640  	} 
31641  	
31642  	raw_spin_unlock(&state->lock);
31643  	raw_spin_unlock_irqrestore(&_global_env.lock, flags);
31644 -
31645 +//printk(KERN_ALERT "P%d on_scheduling_timer() release lock %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);	
31646  	return restart;
31647  }
31648  
31649 @@ -651,6 +552,13 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
31650  	}
31651  	
31652  	/* no eligible level A or B tasks exists */
31653 +	/* check the ghost job */
31654 +	ce = &state->crit_entries[CRIT_LEVEL_C];
31655 +	if (ce->running) {
31656 +		TRACE_TASK(ce->running," is GHOST\n");
31657 +		return NULL;
31658 +	}
31659 +	
31660  	cur_priority = _lowest_prio_cpu.cpu_entries[state->cpu].deadline;
31661  	
31662  	TRACE("****** ACTIVE LIST ******\n");
31663 @@ -663,21 +571,24 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
31664  				lv = get_task_crit_level(tsk);
31665  				if (lv == NUM_CRIT_LEVELS) {
31666  					gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
31667 -					res->event_added = 1;
31668 +					//res->event_added = 1;
31669  					return tsk;
31670 -				} else {
31671 -					ce = &state->crit_entries[lv];
31672 -					if (likely(!ce->running)) {
31673 +				} else if (lv == CRIT_LEVEL_C) {
31674 +					//ce = &state->crit_entries[lv];
31675 +					//if (likely(!ce->running)) {
31676  						gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
31677  						res->event_added = 1;
31678  						res->blocked_by_ghost = 0;
31679  						res->is_ghost = 0;
31680  						res->scheduled_on = state->cpu;
31681  						return tsk;
31682 -					} else {
31683 -						res->blocked_by_ghost = 1;
31684 -						TRACE_TASK(ce->running, " is GHOST\n");
31685 -					}
31686 +					//} else {
31687 +					//	res->blocked_by_ghost = 1;
31688 +					//	TRACE_TASK(ce->running, " is GHOST\n");
31689 +					//	return NULL;
31690 +					//}
31691 +				} else {
31692 +					BUG();
31693  				}
31694  			}
31695  		}
31696 @@ -711,8 +622,9 @@ static void post_schedule(struct task_struct *next)
31697  static struct task_struct* mc2_schedule(struct task_struct * prev)
31698  {
31699  	/* next == NULL means "schedule background work". */
31700 +	lt_t now;
31701  	struct mc2_cpu_state *state = local_cpu_state();
31702 -	
31703 +
31704  	pre_schedule(prev);
31705  	
31706  	raw_spin_lock(&_global_env.lock);
31707 @@ -721,17 +633,18 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
31708  	//BUG_ON(state->scheduled && state->scheduled != prev);
31709  	//BUG_ON(state->scheduled && !is_realtime(prev));
31710  	if (state->scheduled && state->scheduled != prev)
31711 -		TRACE("BUG1!!!!!!!!\n");
31712 +		printk(KERN_ALERT "BUG1!!!!!!!! %s %s\n", state->scheduled ? (state->scheduled)->comm : "null", prev ? (prev)->comm : "null");
31713  	if (state->scheduled && !is_realtime(prev))
31714 -		TRACE("BUG2!!!!!!!!\n");
31715 +		printk(KERN_ALERT "BUG2!!!!!!!! \n");
31716  
31717  	/* update time */
31718  	state->sup_env.will_schedule = true;
31719  
31720 -	sup_update_time(&state->sup_env, litmus_clock());
31721 -	gmp_update_time(&_global_env, litmus_clock());
31722 -	
31723 -	mc2_update_ghost_state(state);
31724 +	now = litmus_clock();
31725 +	sup_update_time(&state->sup_env, now);
31726 +	gmp_update_time(&_global_env, now);
31727 +		
31728 +	mc2_update_ghost_state(state);	
31729  	
31730  	/* remove task from reservation if it blocks */
31731  	if (is_realtime(prev) && !is_running(prev))
31732 @@ -767,14 +680,17 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
31733  		TRACE_TASK(prev, "descheduled.\n");
31734  		/* if prev is preempted and a global task, find the lowest cpu and reschedule */
31735  		if (tinfo->has_departed == false && get_task_crit_level(prev) == CRIT_LEVEL_C) {
31736 -			int cpu = get_lowest_prio_cpu(res?res->priority:0);
31737 -			TRACE("LEVEL-C TASK PREEMPTED!! poking CPU %d to reschedule\n", cpu);
31738 +			int cpu;
31739 +			raw_spin_lock(&_global_env.lock);
31740 +			cpu = get_lowest_prio_cpu(res?res->priority:0);
31741 +			//TRACE("LEVEL-C TASK PREEMPTED!! poking CPU %d to reschedule\n", cpu);
31742  			if (cpu != NO_CPU) {
31743  				raw_spin_lock(&_lowest_prio_cpu.lock);
31744  				_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
31745  				raw_spin_unlock(&_lowest_prio_cpu.lock);
31746  				litmus_reschedule(cpu);
31747  			}
31748 +			raw_spin_unlock(&_global_env.lock);
31749  		}
31750  	}
31751  	if (state->scheduled) {
31752 @@ -814,12 +730,14 @@ static void mc2_task_resume(struct task_struct  *tsk)
31753  
31754  	TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
31755  
31756 +	local_irq_save(flags);
31757  	if (tinfo->cpu != -1)
31758  		state = cpu_state_for(tinfo->cpu);
31759  	else
31760  		state = local_cpu_state();
31761  
31762 -	raw_spin_lock_irqsave(&_global_env.lock, flags);
31763 +	raw_spin_lock(&_global_env.lock);
31764 +//printk(KERN_ALERT "P%d resume() hold lock\n", state->cpu);	
31765  	/* Requeue only if self-suspension was already processed. */
31766  	if (tinfo->has_departed)
31767  	{
31768 @@ -830,22 +748,25 @@ static void mc2_task_resume(struct task_struct  *tsk)
31769  		if (tinfo->cpu != -1) {
31770  			sup_update_time(&state->sup_env, litmus_clock());
31771  		} else {
31772 -			TRACE("RESUME UPDATE ####\n");
31773 +			//TRACE("RESUME UPDATE ####\n");
31774  			gmp_update_time(&_global_env, litmus_clock());
31775 -			TRACE("RESUME UPDATE $$$$\n");
31776 +			//TRACE("RESUME UPDATE $$$$\n");
31777  		}
31778  			
31779  		mc2_update_ghost_state(state);
31780  		task_arrives(state, tsk);
31781  		/* NOTE: drops state->lock */
31782  		TRACE_TASK(tsk, "mc2_resume()\n");
31783 -		mc2_update_timer_and_unlock(state);
31784 -		local_irq_restore(flags);
31785 +		mc2_update_timer_and_unlock(state);	
31786 +//printk(KERN_ALERT "P%d resume() dropped lock\n", state->cpu);			
31787  	} else {
31788  		TRACE_TASK(tsk, "resume event ignored, still scheduled\n");
31789 -		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
31790 +		raw_spin_unlock(&_global_env.lock);
31791 +//printk(KERN_ALERT "P%d resume() release lock\n", state->cpu);			
31792  	}
31793  
31794 +	local_irq_restore(flags);
31795 +	
31796  	resume_legacy_task_model_updates(tsk);
31797  }
31798  
31799 @@ -870,6 +791,7 @@ static long mc2_complete_job(void)
31800  		struct reservation *res;
31801  		unsigned long flags;
31802  
31803 +		preempt_disable();
31804  		local_irq_save(flags);
31805  		
31806  		tinfo = get_mc2_state(current);
31807 @@ -881,7 +803,7 @@ static long mc2_complete_job(void)
31808  		
31809  		raw_spin_lock(&_global_env.lock);
31810  		raw_spin_lock(&state->lock);
31811 -
31812 +//printk(KERN_ALERT "P%d complete() hold lock\n", state->cpu);
31813  		env = &(state->sup_env.env);
31814  		
31815  		res = res_find_by_id(state, tinfo->mc2_param.res_id);
31816 @@ -905,12 +827,13 @@ static long mc2_complete_job(void)
31817  		res->cur_budget = 0;
31818  		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
31819  		
31820 -		TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
31821 +		//TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
31822  		
31823  		raw_spin_unlock(&state->lock);
31824  		raw_spin_unlock(&_global_env.lock);
31825 -		
31826 +//printk(KERN_ALERT "P%d complete() release lock\n", state->cpu);				
31827  		local_irq_restore(flags);
31828 +		preempt_enable();
31829  	}
31830  	sched_trace_task_completion(current, 0);
31831  	
31832 @@ -937,6 +860,7 @@ static long mc2_complete_job(void)
31833  	}
31834  
31835  	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
31836 +
31837  	return err;
31838  }
31839  
31840 @@ -988,6 +912,7 @@ static long mc2_admit_task(struct task_struct *tsk)
31841  		raw_spin_unlock_irqrestore(&state->lock, flags);
31842  	} else if (lv == CRIT_LEVEL_C) {
31843  		raw_spin_lock_irqsave(&_global_env.lock, flags);
31844 +//printk(KERN_ALERT "admit() hold lock\n");		
31845  		state = local_cpu_state();
31846  		
31847  		raw_spin_lock(&state->lock);
31848 @@ -1012,6 +937,7 @@ static long mc2_admit_task(struct task_struct *tsk)
31849  
31850  		raw_spin_unlock(&state->lock);
31851  		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
31852 +//printk(KERN_ALERT "admit() release lock\n");		
31853  	}
31854  	
31855  	preempt_enable();
31856 @@ -1033,25 +959,30 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
31857  	struct mc2_cpu_state *state; // = cpu_state_for(tinfo->cpu);
31858  	struct reservation *res;
31859  	enum crit_level lv = get_task_crit_level(tsk);
31860 +	lt_t release = 0;
31861  
31862  	TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
31863  		   litmus_clock(), on_runqueue, is_running);
31864  
31865 +	local_irq_save(flags);
31866  	if (tinfo->cpu == -1)
31867  		state = local_cpu_state();
31868  	else 
31869  		state = cpu_state_for(tinfo->cpu);
31870  	
31871  	/* acquire the lock protecting the state and disable interrupts */
31872 -	raw_spin_lock_irqsave(&_global_env.lock, flags);
31873 +	raw_spin_lock(&_global_env.lock);
31874  	raw_spin_lock(&state->lock);
31875 -	
31876 +//printk(KERN_ALERT "new() hold lock R%d\n", tinfo->mc2_param.res_id);	
31877  	if (is_running) {
31878  		state->scheduled = tsk;
31879  		/* make sure this task should actually be running */
31880  		litmus_reschedule_local();
31881  	}
31882 -
31883 +	
31884 +	res = res_find_by_id(state, tinfo->mc2_param.res_id);
31885 +	release = res->next_replenishment;
31886 +	
31887  	if (on_runqueue || is_running) {
31888  		/* Assumption: litmus_clock() is synchronized across cores
31889  		 * [see comment in pres_task_resume()] */
31890 @@ -1062,18 +993,20 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
31891  		TRACE("mc2_new()\n");
31892  		
31893  		mc2_update_timer_and_unlock(state);
31894 -		local_irq_restore(flags);
31895 -	} else
31896 -		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
31897 -
31898 -	res = res_find_by_id(state, tinfo->mc2_param.res_id);
31899 +//printk(KERN_ALERT "new() dropped lock R%d\n",tinfo->mc2_param.res_id);		
31900 +	} else {
31901 +		raw_spin_unlock(&state->lock);
31902 +		raw_spin_unlock(&_global_env.lock);
31903 +//printk(KERN_ALERT "new() release lock R%d\n",tinfo->mc2_param.res_id);		
31904 +	}
31905 +	local_irq_restore(flags);
31906  	
31907 -	if (res) {
31908 -		TRACE_TASK(tsk, "mc2_task_new() next_replenishment = %llu\n", res->next_replenishment);
31909 -		release_at(tsk, res->next_replenishment);
31910 +	if (!release) {
31911 +		TRACE_TASK(tsk, "mc2_task_new() next_release = %llu\n", release);
31912 +		release_at(tsk, release);
31913  	}
31914  	else
31915 -		TRACE_TASK(tsk, "mc2_task_new() next_replenishment = NULL\n");
31916 +		TRACE_TASK(tsk, "mc2_task_new() next_release = NULL\n");
31917  }
31918  
31919  /* mc2_reservation_destroy - reservation_destroy system call backend
31920 @@ -1196,14 +1129,16 @@ static void mc2_task_exit(struct task_struct *tsk)
31921  	struct mc2_task_state* tinfo = get_mc2_state(tsk);
31922  	struct mc2_cpu_state *state;
31923  	enum crit_level lv = tinfo->mc2_param.crit;
31924 -	struct crit_entry* ce;	
31925 +	struct crit_entry* ce;
31926 +	int cpu;
31927  
31928 +	local_irq_save(flags);
31929  	if (tinfo->cpu != -1)
31930  		state = cpu_state_for(tinfo->cpu);
31931  	else
31932  		state = local_cpu_state();
31933  		
31934 -	raw_spin_lock_irqsave(&_global_env.lock, flags);
31935 +	raw_spin_lock(&_global_env.lock);
31936  	raw_spin_lock(&state->lock);
31937  	
31938  	if (state->scheduled == tsk)
31939 @@ -1226,11 +1161,30 @@ static void mc2_task_exit(struct task_struct *tsk)
31940  		/* NOTE: drops state->lock */
31941  		TRACE("mc2_exit()\n");
31942  
31943 -		mc2_update_timer_and_unlock(state);
31944 -		local_irq_restore(flags);
31945 -	} else
31946 -		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
31947 +		mc2_update_timer_and_unlock(state);	
31948 +	} else {
31949 +		raw_spin_unlock(&state->lock);
31950 +		raw_spin_unlock(&_global_env.lock);
31951 +	}
31952  
31953 +	for_each_online_cpu(cpu) {
31954 +		state = cpu_state_for(cpu);
31955 +		if (state == local_cpu_state())
31956 +			continue;
31957 +		raw_spin_lock(&state->lock);
31958 +		
31959 +		if (state->scheduled == tsk)
31960 +			state->scheduled = NULL;
31961 +		
31962 +		ce = &state->crit_entries[lv];
31963 +		if (ce->running == tsk)
31964 +			ce->running = NULL;
31965 +		
31966 +		raw_spin_unlock(&state->lock);
31967 +	}
31968 +	
31969 +	local_irq_restore(flags);
31970 +	
31971  	kfree(tsk_rt(tsk)->plugin_state);
31972  	tsk_rt(tsk)->plugin_state = NULL;
31973  	kfree(tsk_rt(tsk)->mc2_data);
31974 @@ -1539,16 +1493,11 @@ static long mc2_activate_plugin(void)
31975  			struct crit_entry *cr_entry = &state->crit_entries[lv];
31976  			cr_entry->level = lv;
31977  			cr_entry->running = NULL;
31978 -			//hrtimer_init(&ce->ghost_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
31979 -			//ce->ghost_timer.function = on_ghost_timer;
31980  		}
31981  		sup_init(&state->sup_env);
31982  
31983  		hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
31984  		state->timer.function = on_scheduling_timer;
31985 -		
31986 -//		hrtimer_init(&state->g_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
31987 -//		state->g_timer.function = on_global_scheduling_timer;
31988  	}
31989  
31990  	mc2_setup_domain_proc();
31991 @@ -1561,7 +1510,7 @@ static void mc2_finish_switch(struct task_struct *prev)
31992  	struct mc2_cpu_state *state = local_cpu_state();
31993  	
31994  	state->scheduled = is_realtime(current) ? current : NULL;
31995 -	TRACE_TASK(prev, "FINISH CXS! complete=%d\n", tsk_rt(prev)->completed);
31996 +	TRACE("FINISH CXS! from %s/%d to %s/%d\n", prev ? (prev)->comm : "null", prev ? (prev)->pid : 0, current ? (current)->comm : "null", current ? (current)->pid : 0);
31997  }
31998  
31999  static long mc2_deactivate_plugin(void)
32000 @@ -1570,7 +1519,8 @@ static long mc2_deactivate_plugin(void)
32001  	struct mc2_cpu_state *state;
32002  	struct reservation *res;
32003  	struct next_timer_event *event;
32004 -
32005 +	struct cpu_entry *ce;
32006 +	
32007  	raw_spin_lock(&_global_env.lock);
32008  
32009  	for_each_online_cpu(cpu) {
32010 @@ -1578,7 +1528,14 @@ static long mc2_deactivate_plugin(void)
32011  		raw_spin_lock(&state->lock);
32012  
32013  		hrtimer_cancel(&state->timer);
32014 -//		hrtimer_cancel(&state->g_timer);
32015 +
32016 +		ce = &_lowest_prio_cpu.cpu_entries[cpu];
32017 +		
32018 +		ce->cpu = cpu;
32019 +		ce->scheduled = NULL;
32020 +		ce->deadline = ULLONG_MAX;
32021 +		ce->lv = NUM_CRIT_LEVELS;
32022 +		ce->will_schedule = false;
32023  
32024  		/* Delete all reservations --- assumes struct reservation
32025  		 * is prefix of containing struct. */
32026 -- 
32027 1.8.1.2
32028 
32029 
32030 From 6b091698a8c1575d96e6c4e3dd36252cfa7aabd1 Mon Sep 17 00:00:00 2001
32031 From: ChengYang Fu <chengyangfu@gmail.com>
32032 Date: Mon, 2 Mar 2015 16:11:18 -0500
32033 Subject: [PATCH 093/119] Merge chengyangfu branch to wip-mc2-new
32034 
32035 ---
32036  litmus/bank_proc.c | 210 +++++++++++++++++++++++++++++++----------------------
32037  1 file changed, 123 insertions(+), 87 deletions(-)
32038 
32039 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
32040 index 07d5728..295c450 100644
32041 --- a/litmus/bank_proc.c
32042 +++ b/litmus/bank_proc.c
32043 @@ -1,3 +1,9 @@
32044 +/*
32045 + * bank_proc.c -- Implementation of the page coloring for cache and bank partition. 
32046 + *                The file will keep a pool of colored pages. Users can require pages with 
32047 + *		  specific color or bank number.
32048 + *                Part of the code is modified from Jonathan Herman's code  
32049 + */
32050  #include <linux/init.h>
32051  #include <linux/types.h>
32052  #include <linux/kernel.h>
32053 @@ -14,16 +20,23 @@
32054  
32055  #define LITMUS_LOCKDEP_NAME_MAX_LEN 50
32056  
32057 -// This is Address Decoding for imx6-sabredsd board
32058 +// This Address Decoding is used in imx6-sabredsd platform
32059  #define CACHE_MASK 0x0000f000      
32060  #define BANK_MASK  0x00007000      
32061  #define OFFSET_SHIFT 12
32062  
32063  #define PAGES_PER_COLOR 1024
32064  
32065 +unsigned long used_cachecolor;
32066 +unsigned long curr_cachecolor;
32067 +
32068 +
32069  unsigned long number_banks;
32070  unsigned long number_cachecolors;
32071  
32072 +/*
32073 + * Every page list should contain a lock, a list, and a number recording how many pages it store
32074 + */ 
32075  struct color_group {
32076  	spinlock_t lock;
32077  	char _lock_name[LITMUS_LOCKDEP_NAME_MAX_LEN];
32078 @@ -31,6 +44,10 @@ struct color_group {
32079  	atomic_t nr_pages;
32080  };
32081  
32082 +/*
32083 + * This is old code which is not used in current version
32084 + */ 
32085 +/*
32086  static struct alloced_pages {
32087  	spinlock_t lock;
32088  	struct list_head list;
32089 @@ -41,6 +58,7 @@ struct alloced_page {
32090  	struct vm_area_struct *vma;
32091  	struct list_head list;
32092  };
32093 +*/
32094  
32095  static struct color_group *color_groups;
32096  static struct lock_class_key color_lock_keys[16];
32097 @@ -59,6 +77,9 @@ static inline unsigned long page_bank(struct page *page)
32098  	return ((page_to_phys(page)& BANK_MASK) >> PAGE_SHIFT);
32099  }
32100  
32101 +/*
32102 + * It is used to determine the smallest number of page lists. 
32103 + */
32104  static unsigned long smallest_nr_pages(void)
32105  {
32106  	unsigned long i, min_pages = -1;
32107 @@ -70,8 +91,9 @@ static unsigned long smallest_nr_pages(void)
32108  	}
32109  	return min_pages;
32110  }
32111 +
32112  /*
32113 - * Page's count should be one, it sould not be on any LRU list.
32114 + * Add a page to current pool.
32115   */
32116  void add_page_to_color_list(struct page *page)
32117  {
32118 @@ -82,22 +104,26 @@ void add_page_to_color_list(struct page *page)
32119  	spin_lock(&cgroup->lock);
32120  	list_add_tail(&page->lru, &cgroup->list);
32121  	atomic_inc(&cgroup->nr_pages);
32122 -//	SetPageLRU(page);
32123  	spin_unlock(&cgroup->lock);
32124  }
32125  
32126 +/*
32127 + * Replenish the page pool. 
32128 + * If the newly allocate page is what we want, it will be pushed to the correct page list
32129 + * otherwise, it will be freed. 
32130 + */
32131  static int do_add_pages(void)
32132  {
32133 -	//printk("LITMUS do add pages\n");
32134 +	printk("LITMUS do add pages\n");
32135  	
32136  	struct page *page, *page_tmp;
32137  	LIST_HEAD(free_later);
32138  	unsigned long color;
32139  	int ret = 0;
32140  
32141 +	// until all the page lists contain enough pages 
32142  	while (smallest_nr_pages() < PAGES_PER_COLOR) {
32143  	
32144 -		//page = alloc_page(GFP_HIGHUSER | __GFP_MOVABLE);
32145  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
32146  		
32147  		if (unlikely(!page)) {
32148 @@ -107,70 +133,79 @@ static int do_add_pages(void)
32149  		}
32150  		color = page_color(page);
32151  		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR) {
32152 -	//		SetPageReserved(page);
32153  			add_page_to_color_list(page);
32154 -		} else
32155 +		} else{
32156 +			// Pages here will be freed later 
32157  			list_add_tail(&page->lru, &free_later);
32158 +		}
32159  	}
32160 +	// Free the unwanted pages
32161  	list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
32162  		list_del(&page->lru);
32163  		__free_page(page);
32164  	}
32165 -	/* setup the color queue stuff */
32166 -//	ret = setup_flusher_array();
32167  out:
32168  	return ret;
32169  }
32170  
32171 -extern int l2_usable_sets;
32172 +/*
32173 + * Provide pages for replacement according cache color 
32174 + * This should be the only implementation here
32175 + * This function should not be accessed by others directly. 
32176 + * 
32177 + */ 
32178 +static struct  page *new_alloc_page_color( unsigned long color)
32179 +{
32180 +	printk("allocate new page color = %d\n", color);	
32181 +	struct color_group *cgroup;
32182 +	struct page *rPage = NULL;
32183 +		
32184 +	if( (color <0) || (color)>15) {
32185 +		TRACE_CUR("Wrong color %lu\n", color);	
32186 +		printk(KERN_WARNING "Wrong color %lu\n", color);
32187 +		goto out_unlock;
32188 +	}
32189 +
32190 +		
32191 +	cgroup = &color_groups[color];
32192 +	spin_lock(&cgroup->lock);
32193 +	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
32194 +		TRACE_CUR("No free %lu colored pages.\n", color);
32195 +		printk(KERN_WARNING "no free %lu colored pages.\n", color);
32196 +		goto out_unlock;
32197 +	}
32198 +	rPage = list_first_entry(&cgroup->list, struct page, lru);
32199 +	BUG_ON(page_count(rPage) > 1);
32200 +	get_page(rPage);
32201 +	list_del(&rPage->lru);
32202 +	atomic_dec(&cgroup->nr_pages);
32203 +//	ClearPageLRU(rPage);
32204 +out_unlock:
32205 +	spin_unlock(&cgroup->lock);
32206 +out:
32207 +	do_add_pages();
32208 +	return rPage;
32209 +}
32210 +
32211  
32212  /*
32213 - * provide pages for replacement 
32214 + * provide pages for replacement according to  
32215   * node = 0 for Level A, B tasks in Cpu 0
32216   * node = 1 for Level A, B tasks in Cpu 1
32217   * node = 2 for Level A, B tasks in Cpu 2
32218   * node = 3 for Level A, B tasks in Cpu 3
32219   * node = 4 for Level C tasks 
32220   */
32221 -#if 1
32222  struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32223  {
32224 -	//printk("allocate new page node = %d\n", node);	
32225 +	printk("allocate new page node = %d\n", node);	
32226  //	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
32227  	struct color_group *cgroup;
32228  	struct page *rPage = NULL;
32229  	unsigned int color;
32230  	get_random_bytes(&color, sizeof(unsigned int));
32231  	
32232 -	/*
32233 -	if(node ==0){
32234 -		color = (color%2)*8+node;
32235 -	}else if(node == 1){
32236 -		color = (color%2)*8+node;
32237 -	}else if(node == 2){
32238 -		color = (color%2)*8+;
32239 -	}else if(node == 3){
32240 -		color = color%2 + 6;
32241 -	}else if(node == 4){
32242 -		color = color%8 + 8;
32243 -	}else{
32244 -		goto out;
32245 -	}
32246 -	*/
32247 -	switch(node ){
32248 -		case 0:
32249 -			color = (color % l2_usable_sets);
32250 -			break;
32251 -		case 1: 
32252 -		case 2: 
32253 -		case 3:
32254 -		case 4:
32255 -			color = (color% (16-l2_usable_sets)) + l2_usable_sets;
32256 -			break;
32257 -		default:
32258 -			goto out;
32259 -	}
32260 -	/*
32261 +	// Decode the node to decide what color pages we should provide
32262  	switch(node ){
32263  		case 0:
32264  		case 1: 
32265 @@ -184,34 +219,22 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32266  				color+=4;
32267  			break;
32268  		default:
32269 -			goto out;
32270 +			TRACE_CUR("Wrong color %lu\n", color);	
32271 +			printk(KERN_WARNING "Wrong color %lu\n", color);
32272 +			return rPage;
32273  	}
32274 -	*/
32275  
32276 -	//printk("allocate new page color = %d\n", color);
32277 -	//TRACE("allocate new page color = %d\n", color);
32278 +
32279 +	printk("allocate new page color = %d\n", color);
32280  		
32281 -	cgroup = &color_groups[color];
32282 -	spin_lock(&cgroup->lock);
32283 -	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
32284 -		//TRACE_CUR("No free %lu colored pages.\n", color);
32285 -		printk(KERN_WARNING "no free %lu colored pages.\n", color);
32286 -		goto out_unlock;
32287 -	}
32288 -	rPage = list_first_entry(&cgroup->list, struct page, lru);
32289 -	BUG_ON(page_count(rPage) > 1);
32290 -	get_page(rPage);
32291 -	list_del(&rPage->lru);
32292 -	atomic_dec(&cgroup->nr_pages);
32293 -//	ClearPageLRU(rPage);
32294 -out_unlock:
32295 -	spin_unlock(&cgroup->lock);
32296 -out:
32297 -	do_add_pages();
32298 -	return rPage;
32299 +	rPage =  new_alloc_page_color(color);
32300 +	return rPage; 
32301  }
32302 -#endif
32303  
32304 +/*
32305 + * Provide pages for replacement according to bank number. 
32306 + * This is used in cache way partition 
32307 + */
32308  struct page *new_alloc_page_banknr(struct page *page, unsigned long banknr, int **x)
32309  {
32310  	printk("allocate new page bank = %d\n", banknr);	
32311 @@ -225,30 +248,43 @@ struct page *new_alloc_page_banknr(struct page *page, unsigned long banknr, int
32312  	}else{
32313  		goto out;
32314  	}
32315 +	
32316 +	rPage =  new_alloc_page_color(color);
32317  		
32318 -	cgroup = &color_groups[color];
32319 -	spin_lock(&cgroup->lock);
32320 -	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
32321 -		TRACE_CUR("No free %lu colored pages.\n", color);
32322 -		printk(KERN_WARNING "no free %lu colored pages.\n", color);
32323 -		goto out_unlock;
32324 -	}
32325 -	rPage = list_first_entry(&cgroup->list, struct page, lru);
32326 -	BUG_ON(page_count(rPage) > 1);
32327 -	get_page(rPage);
32328 -	list_del(&rPage->lru);
32329 -	atomic_dec(&cgroup->nr_pages);
32330 -//	ClearPageLRU(rPage);
32331 -out_unlock:
32332 -	spin_unlock(&cgroup->lock);
32333  out:
32334 -	do_add_pages();
32335  	return rPage;
32336 +}
32337  
32338  
32339 +void set_number_of_colors(unsigned long colornr)
32340 +{
32341 +	used_cachecolor = colornr ; 
32342 +	curr_cachecolor = 0;
32343 +}
32344 +
32345  
32346 +/*
32347 + * Provide pages for replacement 
32348 + * This is used to generate experiments 
32349 + */
32350 +struct page *new_alloc_page_predefined(struct page *page,  int **x)
32351 +{
32352 +	unsigned int color = curr_cachecolor; 
32353 +	
32354 +	printk("allocate new page color = %d\n", color);	
32355 +	struct color_group *cgroup;
32356 +	struct page *rPage = NULL;
32357 +	
32358 +	rPage =  new_alloc_page_color(color);
32359 +	color = (color + 1)% used_cachecolor;
32360 +out:
32361 +	return rPage;
32362  }
32363  
32364 +
32365 +/*
32366 + * Initialize the numbers of banks and cache colors 
32367 + */ 
32368  static int __init init_variables(void)
32369  {
32370  	number_banks = 1+(BANK_MASK >> PAGE_SHIFT); 
32371 @@ -256,7 +292,9 @@ static int __init init_variables(void)
32372  }
32373  
32374  
32375 -
32376 +/*
32377 + * Initialize the page pool 
32378 + */
32379  static int __init init_color_groups(void)
32380  {
32381  	struct color_group *cgroup;
32382 @@ -275,22 +313,20 @@ static int __init init_color_groups(void)
32383  			atomic_set(&cgroup->nr_pages, 0);
32384  			INIT_LIST_HEAD(&cgroup->list);
32385  			spin_lock_init(&cgroup->lock);
32386 -//			LOCKDEP_DYNAMIC_ALLOC(&cgroup->lock, &color_lock_keys[i],
32387 -//					cgroup->_lock_name, "color%lu", i);
32388  		}
32389  	}
32390  	return err;
32391  }
32392  
32393  /*
32394 - * Initialzie the this proc 
32395 + * Initialzie this proc 
32396   */
32397  static int __init litmus_color_init(void)
32398  {
32399  	int err=0;
32400  	
32401 -	INIT_LIST_HEAD(&alloced_pages.list);
32402 -	spin_lock_init(&alloced_pages.lock);
32403 +	//INIT_LIST_HEAD(&alloced_pages.list);
32404 +	//spin_lock_init(&alloced_pages.lock);
32405  	init_variables();
32406  	printk("Cache number = %d , Cache mask = 0x%lx\n", number_cachecolors, CACHE_MASK); 
32407  	printk("Bank number = %d , Bank mask = 0x%lx\n", number_banks, BANK_MASK); 
32408 -- 
32409 1.8.1.2
32410 
32411 
32412 From a27319a7ffe7f72828faec29c6748453297488a8 Mon Sep 17 00:00:00 2001
32413 From: Namhoon Kim <namhoonk@cs.unc.edu>
32414 Date: Wed, 4 Mar 2015 09:58:24 -0500
32415 Subject: [PATCH 094/119] fixed timer
32416 
32417 ---
32418  litmus/reservation.c | 26 ++++++++++++++++++--------
32419  litmus/sched_mc2.c   |  7 ++++---
32420  2 files changed, 22 insertions(+), 11 deletions(-)
32421 
32422 diff --git a/litmus/reservation.c b/litmus/reservation.c
32423 index 86d2f6e..25e838c 100644
32424 --- a/litmus/reservation.c
32425 +++ b/litmus/reservation.c
32426 @@ -4,8 +4,8 @@
32427  #include <litmus/litmus.h>
32428  #include <litmus/reservation.h>
32429  
32430 -#define TRACE(fmt, args...) do {} while (false)
32431 -#define TRACE_TASK(fmt, args...) do {} while (false)
32432 +//#define TRACE(fmt, args...) do {} while (false)
32433 +//#define TRACE_TASK(fmt, args...) do {} while (false)
32434  
32435  void reservation_init(struct reservation *res)
32436  {
32437 @@ -387,15 +387,25 @@ static void gmp_add_event(
32438  {
32439  	struct next_timer_event *nevent, *queued;
32440  	struct list_head *pos;
32441 -	int found = 0;
32442 +	int found = 0, update = 0;
32443  
32444  	//when = div64_u64(when, TIMER_RESOLUTION);
32445  	//when *= TIMER_RESOLUTION;
32446  //printk(KERN_ALERT "GMP_ADD id=%d type=%d when=%llu\n", id, type, when);
32447  	nevent = gmp_find_event_by_id(gmp_env, id);
32448  	
32449 -	if (!nevent || nevent->type != type) {
32450 -		nevent = kzalloc(sizeof(*nevent), GFP_ATOMIC);
32451 +	if (nevent)
32452 +		TRACE("EVENT R%d update prev = %llu, new = %llu\n", nevent->id, nevent->next_update, when);
32453 +	
32454 +	if (nevent && nevent->next_update > when) {
32455 +		list_del(&nevent->list);
32456 +		update = 1;
32457 +		
32458 +	}
32459 +	
32460 +	if (!nevent || nevent->type != type || update == 1) {
32461 +		if (update == 0)
32462 +			nevent = kzalloc(sizeof(*nevent), GFP_ATOMIC);
32463  		BUG_ON(!nevent);
32464  		nevent->next_update = when;
32465  		nevent->id = id;
32466 @@ -421,9 +431,9 @@ static void gmp_add_event(
32467  ; //printk(KERN_ALERT "EVENT FOUND id = %d type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->id, nevent->type, nevent->next_update, type, when);
32468  	}
32469  	
32470 -	//TRACE("======START PRINTING EVENT LIST======\n");
32471 -	//gmp_print_events(gmp_env, litmus_clock());
32472 -	//TRACE("======FINISH PRINTING EVENT LIST======\n");
32473 +	TRACE("======START PRINTING EVENT LIST======\n");
32474 +	gmp_print_events(gmp_env, litmus_clock());
32475 +	TRACE("======FINISH PRINTING EVENT LIST======\n");
32476  }
32477  
32478  void gmp_add_event_after(
32479 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
32480 index f7758f2..0d378c1 100644
32481 --- a/litmus/sched_mc2.c
32482 +++ b/litmus/sched_mc2.c
32483 @@ -170,6 +170,7 @@ static void task_departs(struct task_struct *tsk, int job_complete)
32484  		ce = &state->crit_entries[lv];
32485  		ce->running = tsk;
32486  		res->is_ghost = 1;
32487 +		gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
32488  		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
32489  		
32490  	}		
32491 @@ -305,7 +306,7 @@ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state)
32492  					litmus_reschedule(cpu);
32493  				}
32494  			}
32495 -		} else if (event->next_update < update && event->timer_armed_on == NO_CPU) {
32496 +		} else if (event->next_update < update && (event->timer_armed_on == NO_CPU || event->timer_armed_on == state->cpu)) {
32497  			event->timer_armed_on = state->cpu;
32498  			update = event->next_update;
32499  			break;
32500 @@ -484,7 +485,7 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
32501  		update = now + remain_budget;
32502  	}
32503  	
32504 -	//TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
32505 +	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
32506  //printk(KERN_ALERT "on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
32507  	if (update <= now) {
32508  		litmus_reschedule_local();
32509 @@ -502,7 +503,7 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
32510  			raw_spin_lock(&_lowest_prio_cpu.lock);
32511  			_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
32512  			raw_spin_unlock(&_lowest_prio_cpu.lock);
32513 -			//TRACE("LOWEST CPU = P%d\n", cpu);
32514 +			TRACE("LOWEST CPU = P%d\n", cpu);
32515  			litmus_reschedule(cpu);
32516  		}
32517  	} 
32518 -- 
32519 1.8.1.2
32520 
32521 
32522 From 52cf970884a42f24c583ee9baeab536a6622991e Mon Sep 17 00:00:00 2001
32523 From: ChengYang Fu <chengyangfu@gmail.com>
32524 Date: Mon, 9 Mar 2015 13:33:55 -0400
32525 Subject: [PATCH 095/119] add new functions for bank_proc.c
32526 
32527 ---
32528  litmus/bank_proc.c  | 55 ++++++++++++++++++++++++++++++++++-------------------
32529  litmus/cache_proc.c |  9 +++++++--
32530  2 files changed, 42 insertions(+), 22 deletions(-)
32531 
32532 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
32533 index 295c450..844e090 100644
32534 --- a/litmus/bank_proc.c
32535 +++ b/litmus/bank_proc.c
32536 @@ -18,6 +18,7 @@
32537  #include <litmus/litmus_proc.h>
32538  #include <litmus/sched_trace.h>
32539  
32540 +
32541  #define LITMUS_LOCKDEP_NAME_MAX_LEN 50
32542  
32543  // This Address Decoding is used in imx6-sabredsd platform
32544 @@ -27,13 +28,14 @@
32545  
32546  #define PAGES_PER_COLOR 1024
32547  
32548 -unsigned long used_cachecolor;
32549  unsigned long curr_cachecolor;
32550 +int used_cachecolor;
32551  
32552  
32553  unsigned long number_banks;
32554  unsigned long number_cachecolors;
32555  
32556 +
32557  /*
32558   * Every page list should contain a lock, a list, and a number recording how many pages it store
32559   */ 
32560 @@ -183,12 +185,30 @@ static struct  page *new_alloc_page_color( unsigned long color)
32561  out_unlock:
32562  	spin_unlock(&cgroup->lock);
32563  out:
32564 -	do_add_pages();
32565 +	if( smallest_nr_pages() == 0)
32566 +		do_add_pages();
32567  	return rPage;
32568  }
32569  
32570  
32571  /*
32572 + * Provide pages for replacement 
32573 + * This is used to generate experiments 
32574 + */
32575 +struct page *new_alloc_page_predefined(struct page *page,  int **x)
32576 +{
32577 +	unsigned int color = curr_cachecolor; 
32578 +	
32579 +	printk("allocate new page color = %d\n", color);	
32580 +	struct color_group *cgroup;
32581 +	struct page *rPage = NULL;
32582 +	
32583 +	rPage =  new_alloc_page_color(color);
32584 +	curr_cachecolor = (color + 1)% used_cachecolor;
32585 +out:
32586 +	return rPage;
32587 +}
32588 +/*
32589   * provide pages for replacement according to  
32590   * node = 0 for Level A, B tasks in Cpu 0
32591   * node = 1 for Level A, B tasks in Cpu 1
32592 @@ -196,6 +216,7 @@ out:
32593   * node = 3 for Level A, B tasks in Cpu 3
32594   * node = 4 for Level C tasks 
32595   */
32596 +#if 1
32597  struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32598  {
32599  	printk("allocate new page node = %d\n", node);	
32600 @@ -230,6 +251,12 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32601  	rPage =  new_alloc_page_color(color);
32602  	return rPage; 
32603  }
32604 +#else
32605 +struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32606 +{
32607 +	return new_alloc_page_predefined(page,  x);
32608 +}
32609 +#endif
32610  
32611  /*
32612   * Provide pages for replacement according to bank number. 
32613 @@ -256,30 +283,14 @@ out:
32614  }
32615  
32616  
32617 -void set_number_of_colors(unsigned long colornr)
32618 +void set_number_of_colors(int colornr)
32619  {
32620 +	printk(KERN_WARNING "Set of colors = %d.\n", colornr);
32621  	used_cachecolor = colornr ; 
32622  	curr_cachecolor = 0;
32623  }
32624  
32625  
32626 -/*
32627 - * Provide pages for replacement 
32628 - * This is used to generate experiments 
32629 - */
32630 -struct page *new_alloc_page_predefined(struct page *page,  int **x)
32631 -{
32632 -	unsigned int color = curr_cachecolor; 
32633 -	
32634 -	printk("allocate new page color = %d\n", color);	
32635 -	struct color_group *cgroup;
32636 -	struct page *rPage = NULL;
32637 -	
32638 -	rPage =  new_alloc_page_color(color);
32639 -	color = (color + 1)% used_cachecolor;
32640 -out:
32641 -	return rPage;
32642 -}
32643  
32644  
32645  /*
32646 @@ -289,6 +300,10 @@ static int __init init_variables(void)
32647  {
32648  	number_banks = 1+(BANK_MASK >> PAGE_SHIFT); 
32649  	number_cachecolors = 1+(CACHE_MASK >> PAGE_SHIFT);
32650 +	used_cachecolor = 16;
32651 +	curr_cachecolor = 0;
32652 +
32653 +	
32654  }
32655  
32656  
32657 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
32658 index 7b48d5c..e244616 100644
32659 --- a/litmus/cache_proc.c
32660 +++ b/litmus/cache_proc.c
32661 @@ -13,6 +13,7 @@
32662  #include <asm/hardware/cache-l2x0.h>
32663  #include <asm/cacheflush.h>
32664  
32665 +
32666  #define UNLOCK_ALL	0x00000000 /* allocation in any way */
32667  #define LOCK_ALL        (~UNLOCK_ALL)
32668  #define MAX_NR_WAYS	16
32669 @@ -50,7 +51,7 @@ struct mutex lockdown_proc;
32670  static int min_usable_ways = 0;
32671  static int max_usable_ways = 16;
32672  static int min_usable_sets = 1;
32673 -static int max_usable_sets = 15;
32674 +static int max_usable_sets = 16;
32675  
32676  static int zero = 0;
32677  static int one = 1;
32678 @@ -67,6 +68,9 @@ int l2_usable_sets;
32679  int lock_all;
32680  int nr_lockregs;
32681  
32682 +extern void set_number_of_colors(int colornr);
32683 +
32684 +
32685  static void print_lockdown_registers(void)
32686  {
32687  	int i;
32688 @@ -212,7 +216,7 @@ int l2_usable_sets_handler(struct ctl_table *table, int write, void __user *buff
32689  	printk("l2_usable_sets : %d\n", l2_usable_sets);
32690  	
32691  	if (write) {
32692 -		;
32693 +		set_number_of_colors(l2_usable_sets);
32694  	}
32695  
32696  out:
32697 @@ -277,6 +281,7 @@ static int __init litmus_sysctl_init(void)
32698  
32699  	l2_usable_ways = 16;
32700  	l2_usable_sets = 5;
32701 +	set_number_of_colors(l2_usable_sets);
32702  
32703  out:
32704  	return ret;
32705 -- 
32706 1.8.1.2
32707 
32708 
32709 From 987ed4eca956b1d445b796d8c494a1ab2826422e Mon Sep 17 00:00:00 2001
32710 From: ChengYang Fu <chengyangfu@gmail.com>
32711 Date: Mon, 9 Mar 2015 13:38:17 -0400
32712 Subject: [PATCH 096/119] comment all printk in bank_proc.c
32713 
32714 ---
32715  litmus/bank_proc.c | 30 +++++++++++++++---------------
32716  1 file changed, 15 insertions(+), 15 deletions(-)
32717 
32718 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
32719 index 844e090..53c20db 100644
32720 --- a/litmus/bank_proc.c
32721 +++ b/litmus/bank_proc.c
32722 @@ -116,7 +116,7 @@ void add_page_to_color_list(struct page *page)
32723   */
32724  static int do_add_pages(void)
32725  {
32726 -	printk("LITMUS do add pages\n");
32727 +//	printk("LITMUS do add pages\n");
32728  	
32729  	struct page *page, *page_tmp;
32730  	LIST_HEAD(free_later);
32731 @@ -129,7 +129,7 @@ static int do_add_pages(void)
32732  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
32733  		
32734  		if (unlikely(!page)) {
32735 -			printk(KERN_WARNING "Could not allocate pages.\n");
32736 +	//		printk(KERN_WARNING "Could not allocate pages.\n");
32737  			ret = -ENOMEM;
32738  			goto out;
32739  		}
32740 @@ -158,13 +158,13 @@ out:
32741   */ 
32742  static struct  page *new_alloc_page_color( unsigned long color)
32743  {
32744 -	printk("allocate new page color = %d\n", color);	
32745 +//	printk("allocate new page color = %d\n", color);	
32746  	struct color_group *cgroup;
32747  	struct page *rPage = NULL;
32748  		
32749  	if( (color <0) || (color)>15) {
32750  		TRACE_CUR("Wrong color %lu\n", color);	
32751 -		printk(KERN_WARNING "Wrong color %lu\n", color);
32752 +//		printk(KERN_WARNING "Wrong color %lu\n", color);
32753  		goto out_unlock;
32754  	}
32755  
32756 @@ -173,7 +173,7 @@ static struct  page *new_alloc_page_color( unsigned long color)
32757  	spin_lock(&cgroup->lock);
32758  	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
32759  		TRACE_CUR("No free %lu colored pages.\n", color);
32760 -		printk(KERN_WARNING "no free %lu colored pages.\n", color);
32761 +//		printk(KERN_WARNING "no free %lu colored pages.\n", color);
32762  		goto out_unlock;
32763  	}
32764  	rPage = list_first_entry(&cgroup->list, struct page, lru);
32765 @@ -199,7 +199,7 @@ struct page *new_alloc_page_predefined(struct page *page,  int **x)
32766  {
32767  	unsigned int color = curr_cachecolor; 
32768  	
32769 -	printk("allocate new page color = %d\n", color);	
32770 +//	printk("allocate new page color = %d\n", color);	
32771  	struct color_group *cgroup;
32772  	struct page *rPage = NULL;
32773  	
32774 @@ -219,7 +219,7 @@ out:
32775  #if 1
32776  struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32777  {
32778 -	printk("allocate new page node = %d\n", node);	
32779 +//	printk("allocate new page node = %d\n", node);	
32780  //	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
32781  	struct color_group *cgroup;
32782  	struct page *rPage = NULL;
32783 @@ -241,12 +241,12 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32784  			break;
32785  		default:
32786  			TRACE_CUR("Wrong color %lu\n", color);	
32787 -			printk(KERN_WARNING "Wrong color %lu\n", color);
32788 +//			printk(KERN_WARNING "Wrong color %lu\n", color);
32789  			return rPage;
32790  	}
32791  
32792  
32793 -	printk("allocate new page color = %d\n", color);
32794 +//	printk("allocate new page color = %d\n", color);
32795  		
32796  	rPage =  new_alloc_page_color(color);
32797  	return rPage; 
32798 @@ -264,7 +264,7 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
32799   */
32800  struct page *new_alloc_page_banknr(struct page *page, unsigned long banknr, int **x)
32801  {
32802 -	printk("allocate new page bank = %d\n", banknr);	
32803 +//	printk("allocate new page bank = %d\n", banknr);	
32804  	struct color_group *cgroup;
32805  	struct page *rPage = NULL;
32806  	unsigned int color;
32807 @@ -285,7 +285,7 @@ out:
32808  
32809  void set_number_of_colors(int colornr)
32810  {
32811 -	printk(KERN_WARNING "Set of colors = %d.\n", colornr);
32812 +//	printk(KERN_WARNING "Set of colors = %d.\n", colornr);
32813  	used_cachecolor = colornr ; 
32814  	curr_cachecolor = 0;
32815  }
32816 @@ -319,7 +319,7 @@ static int __init init_color_groups(void)
32817  	color_groups = kmalloc(number_cachecolors *
32818  			sizeof(struct color_group), GFP_KERNEL);
32819  	if (!color_groups) {
32820 -		printk(KERN_WARNING "Could not allocate color groups.\n");
32821 +//		printk(KERN_WARNING "Could not allocate color groups.\n");
32822  		err = -ENOMEM;
32823  	}else{
32824  
32825 @@ -343,12 +343,12 @@ static int __init litmus_color_init(void)
32826  	//INIT_LIST_HEAD(&alloced_pages.list);
32827  	//spin_lock_init(&alloced_pages.lock);
32828  	init_variables();
32829 -	printk("Cache number = %d , Cache mask = 0x%lx\n", number_cachecolors, CACHE_MASK); 
32830 -	printk("Bank number = %d , Bank mask = 0x%lx\n", number_banks, BANK_MASK); 
32831 +//	printk("Cache number = %d , Cache mask = 0x%lx\n", number_cachecolors, CACHE_MASK); 
32832 +//	printk("Bank number = %d , Bank mask = 0x%lx\n", number_banks, BANK_MASK); 
32833  	init_color_groups();			
32834  	do_add_pages();
32835  
32836 -	printk(KERN_INFO "Registering LITMUS^RT color and bank proc.\n");
32837 +//	printk(KERN_INFO "Registering LITMUS^RT color and bank proc.\n");
32838  	return err;
32839  }
32840  
32841 -- 
32842 1.8.1.2
32843 
32844 
32845 From fa47a8c03809058a0823cfbeeff5a574eae344f7 Mon Sep 17 00:00:00 2001
32846 From: Namhoon Kim <namhoonk@cs.unc.edu>
32847 Date: Mon, 9 Mar 2015 13:41:28 -0400
32848 Subject: [PATCH 097/119] Add way_partition proc.
32849 
32850 ---
32851  include/litmus/rt_param.h |   1 +
32852  litmus/cache_proc.c       |  54 ++++++++++++++++++++
32853  litmus/ctrldev.c          |   4 +-
32854  litmus/litmus.c           |  73 +++++++++++++--------------
32855  litmus/reservation.c      |   9 ++--
32856  litmus/sched_mc2.c        | 124 ++++++++++++++++++++++++++++------------------
32857  6 files changed, 175 insertions(+), 90 deletions(-)
32858 
32859 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
32860 index 284b89e..a3dde39 100644
32861 --- a/include/litmus/rt_param.h
32862 +++ b/include/litmus/rt_param.h
32863 @@ -326,6 +326,7 @@ struct rt_param {
32864  	
32865  	/* Mixed-criticality specific data */
32866  	struct mc2_task* mc2_data;
32867 +	unsigned long addr_ctrl_page;
32868  };
32869  
32870  #endif
32871 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
32872 index 7b48d5c..13c595c 100644
32873 --- a/litmus/cache_proc.c
32874 +++ b/litmus/cache_proc.c
32875 @@ -9,6 +9,8 @@
32876  
32877  #include <litmus/litmus_proc.h>
32878  #include <litmus/sched_trace.h>
32879 +#include <litmus/cache_proc.h>
32880 +#include <litmus/mc2_common.h>
32881  
32882  #include <asm/hardware/cache-l2x0.h>
32883  #include <asm/cacheflush.h>
32884 @@ -37,6 +39,14 @@ u32 nr_unlocked_way[MAX_NR_WAYS+1]  = {
32885  	0xFFFF0000, /* way ~15 unlocked. usable = 16 */
32886  };
32887  
32888 +u32 way_partitions[5] = {
32889 +	0xFFFFFFFC, /* cpu0 A and B */
32890 +	0xFFFFFFF3, /* cpu1 A and B */
32891 +	0xFFFFFFCF, /* cpu2 A and B */
32892 +	0xFFFFFF3F, /* cpu3 A and B */
32893 +	0xFFFF00FF, /* lv C */
32894 +};
32895 +
32896  static void __iomem *cache_base;
32897  static void __iomem *lockreg_d;
32898  static void __iomem *lockreg_i;
32899 @@ -66,6 +76,7 @@ int l2_usable_ways;
32900  int l2_usable_sets;
32901  int lock_all;
32902  int nr_lockregs;
32903 +int use_way_partition;
32904  
32905  static void print_lockdown_registers(void)
32906  {
32907 @@ -195,6 +206,19 @@ out:
32908  	return ret;
32909  }
32910  
32911 +void do_way_partition(enum crit_level lv, int cpu)
32912 +{
32913 +	if (use_way_partition == 1) {
32914 +		if (lv < CRIT_LEVEL_C) {
32915 +			writel_relaxed(way_partitions[cpu], ld_d_reg(cpu));
32916 +			writel_relaxed(way_partitions[cpu], ld_i_reg(cpu));
32917 +		} else {
32918 +			writel_relaxed(way_partitions[4], ld_d_reg(cpu));
32919 +			writel_relaxed(way_partitions[4], ld_i_reg(cpu));
32920 +		}
32921 +	}
32922 +}
32923 +		
32924  int l2_usable_sets_handler(struct ctl_table *table, int write, void __user *buffer,
32925  		size_t *lenp, loff_t *ppos)
32926  {
32927 @@ -220,6 +244,26 @@ out:
32928  	return ret;
32929  }
32930  
32931 +int use_way_partition_handler(struct ctl_table *table, int write, void __user *buffer,
32932 +		size_t *lenp, loff_t *ppos)
32933 +{
32934 +	int ret = 0;
32935 +	
32936 +	mutex_lock(&lockdown_proc);
32937 +	
32938 +	flush_cache_all();
32939 +	
32940 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
32941 +	if (ret)
32942 +		goto out;
32943 +		
32944 +	printk(KERN_INFO "use_way_partition : %d\n", use_way_partition);
32945 +
32946 +out:
32947 +	mutex_unlock(&lockdown_proc);
32948 +	return ret;
32949 +}
32950 +
32951  static struct ctl_table cache_table[] =
32952  {
32953  	{
32954 @@ -249,6 +293,15 @@ static struct ctl_table cache_table[] =
32955  		.extra1		= &zero,
32956  		.extra2		= &one,
32957  	},
32958 +	{
32959 +		.procname	= "use_way_partition",
32960 +		.mode		= 0666,
32961 +		.proc_handler	= use_way_partition_handler,
32962 +		.data		= &use_way_partition,
32963 +		.maxlen		= sizeof(use_way_partition),
32964 +		.extra1		= &zero,
32965 +		.extra2		= &one,
32966 +	},
32967  	{ }
32968  };
32969  
32970 @@ -277,6 +330,7 @@ static int __init litmus_sysctl_init(void)
32971  
32972  	l2_usable_ways = 16;
32973  	l2_usable_sets = 5;
32974 +	use_way_partition = 0;
32975  
32976  out:
32977  	return ret;
32978 diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
32979 index 877f278..208a212 100644
32980 --- a/litmus/ctrldev.c
32981 +++ b/litmus/ctrldev.c
32982 @@ -43,7 +43,7 @@ static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
32983  
32984  	if (err)
32985  		TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
32986 -
32987 +	tsk_rt(t)->addr_ctrl_page = vma->vm_start;
32988  	return err;
32989  }
32990  
32991 @@ -104,7 +104,7 @@ static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
32992  	 * don't care if it was touched or not. PAGE_SHARED means RW access, but
32993  	 * not execute, and avoids copy-on-write behavior.
32994  	 * See protection_map in mmap.c.  */
32995 -	vma->vm_page_prot = PAGE_SHARED;
32996 +	//vma->vm_page_prot = PAGE_SHARED;
32997  
32998  	err = alloc_ctrl_page(current);
32999  	if (!err)
33000 diff --git a/litmus/litmus.c b/litmus/litmus.c
33001 index 4ff840d..344c68c 100644
33002 --- a/litmus/litmus.c
33003 +++ b/litmus/litmus.c
33004 @@ -17,6 +17,7 @@
33005  #include <linux/migrate.h>
33006  #include <linux/mm.h>
33007  #include <linux/memcontrol.h>
33008 +#include <linux/mm_inline.h>
33009  
33010  #include <litmus/litmus.h>
33011  #include <litmus/bheap.h>
33012 @@ -346,45 +347,33 @@ static inline unsigned long page_color(struct page *page)
33013  }
33014  
33015  /*
33016 -static struct page *walk_page_table(unsigned long addr)
33017 +static struct page *page_by_address(const struct mm_struct *const mm,
33018 +                             const unsigned long address)
33019  {
33020      pgd_t *pgd;
33021 -    pte_t *ptep, pte;
33022      pud_t *pud;
33023      pmd_t *pmd;
33024 -
33025 +    pte_t *pte;
33026      struct page *page = NULL;
33027 -    struct mm_struct *mm = current->mm;
33028 -
33029 -    pgd = pgd_offset(mm, addr);
33030 -    //if (pgd_none(*pgd) || pgd_bad(*pgd))
33031 -	if (pgd_none_or_clear_bad(pgd))
33032 -        goto out;
33033 -    
33034 -    pud = pud_offset(pgd, addr);
33035 -    //if (pud_none(*pud) || pud_bad(*pud))
33036 -	if (pud_none_or_clear_bad(pud))
33037 -        goto out;
33038 -    
33039 -    pmd = pmd_offset(pud, addr);
33040 -    //if (pmd_none(*pmd) || pmd_bad(*pmd))
33041 -	if (pmd_none_or_clear_bad(pmd))
33042 -        goto out;
33043 -    
33044 -    ptep = pte_offset_map(pmd, addr);
33045 -    if (!ptep)
33046 -        goto out;
33047 -    pte = *ptep;
33048 -
33049 -    page = pte_page(pte);
33050 -    if (pfn_valid(__page_to_pfn(page))) {
33051 -        ;//printk(KERN_INFO "page frame struct is @ %p\n", page);
33052 -		//printk(KERN_INFO "pfn is %lu\n", __page_to_pfn(page));
33053 -	}
33054 -	
33055 -	pte_unmap(ptep);
33056  
33057 - out:
33058 +    pgd = pgd_offset(mm, address);
33059 +    if (!pgd_present(*pgd))
33060 +        goto do_return;
33061 +
33062 +    pud = pud_offset(pgd, address);
33063 +    if (!pud_present(*pud))
33064 +        goto do_return;
33065 +
33066 +    pmd = pmd_offset(pud, address);
33067 +    if (!pmd_present(*pmd))
33068 +        goto do_return;
33069 +
33070 +    pte = pte_offset_kernel(pmd, address);
33071 +    if (!pte_present(*pte))
33072 +        goto do_return;
33073 +
33074 +    page = pte_page(*pte);
33075 +do_return:
33076      return page;
33077  }
33078  */
33079 @@ -392,7 +381,7 @@ static struct page *walk_page_table(unsigned long addr)
33080  extern int isolate_lru_page(struct page *page);
33081  extern void putback_lru_page(struct page *page);
33082  
33083 -#if 0
33084 +#if 1
33085  static struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
33086  {
33087  	return alloc_pages_exact_node(0, GFP_HIGHUSER_MOVABLE, 0);
33088 @@ -456,7 +445,7 @@ asmlinkage long sys_set_page_color(int cpu)
33089  				continue;
33090  			}
33091  			
33092 -			TRACE_TASK(current, "addr: %lu, pfn: %lu, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page));
33093 +			TRACE_TASK(current, "addr: %08x, pfn: %x, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page));
33094  			
33095  			if (page_mapcount(old_page) != 0) {
33096  				ret = isolate_lru_page(old_page);
33097 @@ -466,6 +455,7 @@ asmlinkage long sys_set_page_color(int cpu)
33098  					nr_pages++;
33099  				} else {
33100  					TRACE_TASK(current, "isolate_lru_page failed\n");
33101 +					TRACE_TASK(current, "page_lru = %d PageLRU = %d\n", page_lru(old_page), PageLRU(old_page));
33102  					nr_failed++;
33103  				}
33104  				put_page(old_page);
33105 @@ -506,6 +496,17 @@ asmlinkage long sys_set_page_color(int cpu)
33106  		}
33107  	}
33108  	
33109 +	/* handle sigpage and litmus ctrl_page */
33110 +	vma_itr = current->mm->mmap;
33111 +	while (vma_itr != NULL) {
33112 +		if (vma_itr->vm_start == tsk_rt(current)->addr_ctrl_page) {
33113 +			TRACE("litmus ctrl_page = %08x\n", vma_itr->vm_start);
33114 +			vma_itr->vm_page_prot = PAGE_SHARED;
33115 +			break;
33116 +		}
33117 +		vma_itr = vma_itr->vm_next;
33118 +	}
33119 +	
33120  	/* copy shared pages HERE */
33121  /*	
33122  	ret = 0;
33123 @@ -901,7 +902,7 @@ static int __init _init_litmus(void)
33124  #endif
33125  	
33126  	color_mask = ((cache_info_sets << line_size_log) - 1) ^ (PAGE_SIZE - 1);
33127 -	printk("Page color mask %08x\n", color_mask);
33128 +	printk("Page color mask %lx\n", color_mask);
33129  	return 0;
33130  }
33131  
33132 diff --git a/litmus/reservation.c b/litmus/reservation.c
33133 index 25e838c..8457b4b 100644
33134 --- a/litmus/reservation.c
33135 +++ b/litmus/reservation.c
33136 @@ -555,7 +555,7 @@ static void gmp_charge_budget(
33137  			if (res->state != RESERVATION_ACTIVE_IDLE)
33138  				TRACE("BUG!!!!!!!!!!!! gmp_charge_budget()\n");
33139  			TRACE("gmp_charge_budget INACTIVE R%u drain %llu\n", res->id, delta);
33140 -			//if (res->is_ghost == 1) {
33141 +			//if (res->is_ghost != NO_CPU) {
33142  				TRACE("DRAIN !!\n");
33143  				drained = 1;
33144  				res->ops->drain_budget(res, delta);
33145 @@ -574,7 +574,7 @@ static void gmp_charge_budget(
33146  			/* stop at the first ACTIVE reservation */
33147  		//	break;
33148  	}
33149 -	TRACE("finished charging budgets\n");
33150 +	//TRACE("finished charging budgets\n");
33151  }
33152  
33153  static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
33154 @@ -586,12 +586,15 @@ static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
33155  		res = list_entry(pos, struct reservation, list);
33156  		if (res->next_replenishment <= gmp_env->env.current_time) {
33157  			res->ops->replenish(res);
33158 +			if (res->is_ghost != NO_CPU) {
33159 +				TRACE("R%d replenished! scheduled_on=%d\n", res->id, res->scheduled_on);
33160 +			}
33161  		} else {
33162  			/* list is ordered by increasing depletion times */
33163  			break;
33164  		}
33165  	}
33166 -	TRACE("finished replenishing budgets\n");
33167 +	//TRACE("finished replenishing budgets\n");
33168  }
33169  
33170  #define EPSILON	50
33171 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
33172 index 0d378c1..3aaa88c 100644
33173 --- a/litmus/sched_mc2.c
33174 +++ b/litmus/sched_mc2.c
33175 @@ -20,11 +20,15 @@
33176  #include <litmus/budget.h>
33177  #include <litmus/litmus_proc.h>
33178  #include <litmus/sched_trace.h>
33179 +#include <litmus/cache_proc.h>
33180  
33181  #include <litmus/mc2_common.h>
33182  #include <litmus/reservation.h>
33183  #include <litmus/polling_reservations.h>
33184  
33185 +extern int use_way_partition;
33186 +extern void do_way_partition(enum crit_level lv, int cpu);
33187 +
33188  /* _global_env - reservation container for level-C tasks*/
33189  struct gmp_reservation_environment _global_env;
33190  
33191 @@ -126,21 +130,6 @@ static struct reservation* res_find_by_id(struct mc2_cpu_state *state,
33192  	return res;
33193  }
33194  
33195 -/* mc2_update_time - update time for a given criticality level. 
33196 - *                   caller must hold a proper lock
33197 - *                   (cpu_state lock or global lock)
33198 - */
33199 -static void mc2_update_time(enum crit_level lv, 
33200 -                            struct mc2_cpu_state *state, lt_t time)
33201 -{
33202 -	if (lv < CRIT_LEVEL_C)
33203 -		sup_update_time(&state->sup_env, time);
33204 -	else if (lv == CRIT_LEVEL_C)
33205 -		gmp_update_time(&_global_env, time);
33206 -	else
33207 -		TRACE("update_time(): Criticality level error!!!!\n");
33208 -}
33209 -
33210  /* task_depart - remove a task from its reservation
33211   *               If the job has remaining budget, convert it to a ghost job
33212   *               and update crit_entries[]
33213 @@ -169,7 +158,7 @@ static void task_departs(struct task_struct *tsk, int job_complete)
33214  		
33215  		ce = &state->crit_entries[lv];
33216  		ce->running = tsk;
33217 -		res->is_ghost = 1;
33218 +		res->is_ghost = state->cpu;
33219  		gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
33220  		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
33221  		
33222 @@ -256,6 +245,36 @@ static int get_lowest_prio_cpu(lt_t priority)
33223  	return ret;
33224  }
33225  
33226 +/* mc2_update_time - update time for a given criticality level. 
33227 + *                   caller must hold a proper lock
33228 + *                   (cpu_state lock or global lock)
33229 + */
33230 +static void mc2_update_time(enum crit_level lv, 
33231 +                            struct mc2_cpu_state *state, lt_t time)
33232 +{
33233 +	int global_schedule_now;
33234 +	
33235 +	if (lv < CRIT_LEVEL_C)
33236 +		sup_update_time(&state->sup_env, time);
33237 +	else if (lv == CRIT_LEVEL_C) {
33238 +		global_schedule_now = gmp_update_time(&_global_env, time);
33239 +		while (global_schedule_now--) {
33240 +			int cpu = get_lowest_prio_cpu(0);
33241 +			if (cpu != NO_CPU) {
33242 +				raw_spin_lock(&_lowest_prio_cpu.lock);
33243 +				_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
33244 +				raw_spin_unlock(&_lowest_prio_cpu.lock);
33245 +				TRACE("LOWEST CPU = P%d\n", cpu);
33246 +				litmus_reschedule(cpu);
33247 +			}
33248 +		} 
33249 +	}
33250 +	else
33251 +		TRACE("update_time(): Criticality level error!!!!\n");
33252 +	
33253 +	
33254 +}
33255 +
33256  /* NOTE: drops state->lock */
33257  /* mc2_update_timer_and_unlock - set a timer and g_timer and unlock 
33258   *                               Whenever res_env.current_time is updated,
33259 @@ -392,7 +411,7 @@ static lt_t mc2_update_ghost_state(struct mc2_cpu_state *state)
33260  				TRACE("GHOST FINISH id %d at %llu\n", 
33261  				      tinfo->mc2_param.res_id, litmus_clock());
33262  				ce->running = NULL;
33263 -				res->is_ghost = 0;
33264 +				res->is_ghost = NO_CPU;
33265  				
33266  				if (lv < CRIT_LEVEL_C) {
33267  					res = list_first_entry_or_null(
33268 @@ -485,7 +504,7 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
33269  		update = now + remain_budget;
33270  	}
33271  	
33272 -	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
33273 +	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d remain_budget:%llu\n", now, update, state->cpu, global_schedule_now, remain_budget);
33274  //printk(KERN_ALERT "on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
33275  	if (update <= now) {
33276  		litmus_reschedule_local();
33277 @@ -534,19 +553,21 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
33278  					return tsk;
33279  				} else {
33280  					ce = &state->crit_entries[lv];
33281 +					sup_scheduler_update_after(sup_env, res->cur_budget);
33282 +					res->blocked_by_ghost = 0;
33283 +					res->is_ghost = NO_CPU;
33284 +					return tsk;
33285 +/*
33286  					if (likely(!ce->running)) {
33287 -						/* If we found the next task, clear all flags */
33288  						sup_scheduler_update_after(sup_env, res->cur_budget);
33289  						res->blocked_by_ghost = 0;
33290 -						res->is_ghost = 0;
33291 +						res->is_ghost = NO_CPU;
33292  						return tsk;
33293  					} else {
33294 -						/* We cannot schedule the same criticality task
33295 -						   because the ghost job exists. Set blocked_by_ghost
33296 -						   flag not to charge budget */
33297  						res->blocked_by_ghost = 1;
33298  						TRACE_TASK(ce->running, " is GHOST\n");
33299  					}
33300 +*/
33301  				}
33302  			}
33303  		}
33304 @@ -562,10 +583,10 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
33305  	
33306  	cur_priority = _lowest_prio_cpu.cpu_entries[state->cpu].deadline;
33307  	
33308 -	TRACE("****** ACTIVE LIST ******\n");
33309 -	TRACE_TASK(_lowest_prio_cpu.cpu_entries[state->cpu].scheduled, "** CURRENT JOB deadline %llu **\n", cur_priority);
33310 +	//TRACE("****** ACTIVE LIST ******\n");
33311 +	//TRACE_TASK(_lowest_prio_cpu.cpu_entries[state->cpu].scheduled, "** CURRENT JOB deadline %llu **\n", cur_priority);
33312  	list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
33313 -		TRACE("R%d deadline=%llu, scheduled_on=%d\n", res->id, res->priority, res->scheduled_on);
33314 +		//TRACE("R%d deadline=%llu, scheduled_on=%d\n", res->id, res->priority, res->scheduled_on);
33315  		if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) {
33316  			tsk = res->ops->dispatch_client(res, &time_slice);
33317  			if (likely(tsk)) {
33318 @@ -580,7 +601,7 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
33319  						gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
33320  						res->event_added = 1;
33321  						res->blocked_by_ghost = 0;
33322 -						res->is_ghost = 0;
33323 +						res->is_ghost = NO_CPU;
33324  						res->scheduled_on = state->cpu;
33325  						return tsk;
33326  					//} else {
33327 @@ -599,23 +620,23 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
33328  }
33329  
33330  /* not used now */
33331 -static void pre_schedule(struct task_struct *prev)
33332 +static void pre_schedule(struct task_struct *prev, int cpu)
33333  {
33334 -	enum crit_level lv;
33335 -	if (!is_realtime(prev) || !prev)
33336 +	if (!prev || !is_realtime(prev))
33337  		return;
33338  	
33339 -	lv = get_task_crit_level(prev);
33340 +	do_way_partition(CRIT_LEVEL_C, cpu);
33341  }
33342  
33343  /* not used now */
33344 -static void post_schedule(struct task_struct *next)
33345 +static void post_schedule(struct task_struct *next, int cpu)
33346  {
33347 -	enum crit_level lv;
33348 -	if (!is_realtime(next) || !next)
33349 +	enum crit_level lev;
33350 +	if (!next || !is_realtime(next))
33351  		return;
33352  	
33353 -	lv = get_task_crit_level(next);
33354 +	lev = get_task_crit_level(next);
33355 +	do_way_partition(lev, cpu);
33356  }
33357  
33358  /* mc2_schedule - main scheduler function. pick the next task to run
33359 @@ -626,7 +647,7 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
33360  	lt_t now;
33361  	struct mc2_cpu_state *state = local_cpu_state();
33362  
33363 -	pre_schedule(prev);
33364 +	pre_schedule(prev, state->cpu);
33365  	
33366  	raw_spin_lock(&_global_env.lock);
33367  	raw_spin_lock(&state->lock);
33368 @@ -698,7 +719,7 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
33369  		TRACE_TASK(state->scheduled, "scheduled.\n");
33370  	}
33371  	
33372 -	post_schedule(state->scheduled);
33373 +	post_schedule(state->scheduled, state->cpu);
33374  	
33375  	return state->scheduled;
33376  }
33377 @@ -817,7 +838,7 @@ static long mc2_complete_job(void)
33378  		
33379  		/* set next_replenishtime to synchronous release time */
33380  		res->next_replenishment = tsk_rt(current)->sporadic_release_time;
33381 -		
33382 +/*		
33383  		if (get_task_crit_level(current) == CRIT_LEVEL_A) {
33384  			struct table_driven_reservation *tdres;
33385  			tdres = container_of(res, struct table_driven_reservation, res);
33386 @@ -825,6 +846,7 @@ static long mc2_complete_job(void)
33387  			tdres->major_cycle_start = tsk_rt(current)->sporadic_release_time;
33388  			res->next_replenishment += tdres->intervals[0].start;			
33389  		}
33390 +*/		
33391  		res->cur_budget = 0;
33392  		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
33393  		
33394 @@ -849,15 +871,15 @@ static long mc2_complete_job(void)
33395  		set_current_state(TASK_INTERRUPTIBLE);
33396  		preempt_enable_no_resched();
33397  		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
33398 -		if (get_task_crit_level(current) == CRIT_LEVEL_A)
33399 -			sched_trace_task_release(current);
33400 +//		if (get_task_crit_level(current) == CRIT_LEVEL_A)
33401 +//			sched_trace_task_release(current);
33402  	} else {
33403  		/* release the next job immediately */
33404  		err = 0;
33405  		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
33406  		preempt_enable();
33407 -		if (get_task_crit_level(current) == CRIT_LEVEL_A)
33408 -			sched_trace_task_release(current);
33409 +//		if (get_task_crit_level(current) == CRIT_LEVEL_A)
33410 +//			sched_trace_task_release(current);
33411  	}
33412  
33413  	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
33414 @@ -1073,11 +1095,13 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
33415  		sup_env = &state->sup_env;
33416  		list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
33417  			if (res->id == reservation_id) {
33418 -				if (lv == CRIT_LEVEL_A) {
33419 +/*
33420 +			if (lv == CRIT_LEVEL_A) {
33421  					struct table_driven_reservation *tdres;
33422  					tdres = container_of(res, struct table_driven_reservation, res);
33423  					kfree(tdres->intervals);
33424 -				}
33425 +			}
33426 +*/
33427  				list_del(&res->list);
33428  				kfree(res);
33429  				found = 1;
33430 @@ -1087,11 +1111,12 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
33431  		if (!found) {
33432  			list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
33433  				if (res->id == reservation_id) {
33434 -					if (lv == CRIT_LEVEL_A) {
33435 +/*					if (lv == CRIT_LEVEL_A) {
33436  						struct table_driven_reservation *tdres;
33437  						tdres = container_of(res, struct table_driven_reservation, res);
33438  						kfree(tdres->intervals);
33439  					}
33440 +*/
33441  					list_del(&res->list);
33442  					kfree(res);
33443  					found = 1;
33444 @@ -1102,11 +1127,12 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
33445  		if (!found) {
33446  			list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
33447  				if (res->id == reservation_id) {
33448 -					if (lv == CRIT_LEVEL_A) {
33449 +/*					if (lv == CRIT_LEVEL_A) {
33450  						struct table_driven_reservation *tdres;
33451  						tdres = container_of(res, struct table_driven_reservation, res);
33452  						kfree(tdres->intervals);
33453  					}
33454 +*/
33455  					list_del(&res->list);
33456  					kfree(res);
33457  					found = 1;
33458 @@ -1249,7 +1275,7 @@ static long create_polling_reservation(
33459  				config->polling_params.offset);
33460  			pres->res.id = config->id;
33461  			pres->res.blocked_by_ghost = 0;
33462 -			pres->res.is_ghost = 0;
33463 +			pres->res.is_ghost = NO_CPU;
33464  			if (!use_edf)
33465  				pres->res.priority = config->priority;
33466  			sup_add_new_reservation(&state->sup_env, &pres->res);
33467 @@ -1274,7 +1300,7 @@ static long create_polling_reservation(
33468  			pres->res.id = config->id;
33469  			pres->res.blocked_by_ghost = 0;
33470  			pres->res.scheduled_on = NO_CPU;
33471 -			pres->res.is_ghost = 0;
33472 +			pres->res.is_ghost = NO_CPU;
33473  			if (!use_edf)
33474  				pres->res.priority = config->priority;
33475  			gmp_add_new_reservation(&_global_env, &pres->res);
33476 @@ -1511,7 +1537,7 @@ static void mc2_finish_switch(struct task_struct *prev)
33477  	struct mc2_cpu_state *state = local_cpu_state();
33478  	
33479  	state->scheduled = is_realtime(current) ? current : NULL;
33480 -	TRACE("FINISH CXS! from %s/%d to %s/%d\n", prev ? (prev)->comm : "null", prev ? (prev)->pid : 0, current ? (current)->comm : "null", current ? (current)->pid : 0);
33481 +	//TRACE("FINISH CXS! from %s/%d to %s/%d\n", prev ? (prev)->comm : "null", prev ? (prev)->pid : 0, current ? (current)->comm : "null", current ? (current)->pid : 0);
33482  }
33483  
33484  static long mc2_deactivate_plugin(void)
33485 -- 
33486 1.8.1.2
33487 
33488 
33489 From e5c2080e0d7cb2201d021edd7d89f3c2e783744e Mon Sep 17 00:00:00 2001
33490 From: Namhoon Kim <namhoonk@cs.unc.edu>
33491 Date: Tue, 10 Mar 2015 11:42:17 -0400
33492 Subject: [PATCH 098/119] add use_set_partition
33493 
33494 ---
33495  arch/arm/kernel/irq.c       |  4 +++
33496  include/litmus/cache_proc.h |  2 ++
33497  litmus/cache_proc.c         | 68 ++++++++++++++++++++++++++++++++++++++++++++-
33498  litmus/litmus.c             | 20 +++++++++++--
33499  litmus/sched_mc2.c          |  4 +--
33500  5 files changed, 92 insertions(+), 6 deletions(-)
33501 
33502 diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
33503 index 9723d17..ce01835 100644
33504 --- a/arch/arm/kernel/irq.c
33505 +++ b/arch/arm/kernel/irq.c
33506 @@ -42,6 +42,8 @@
33507  #include <asm/mach/irq.h>
33508  #include <asm/mach/time.h>
33509  
33510 +#include <litmus/cache_proc.h>
33511 +
33512  unsigned long irq_err_count;
33513  
33514  int arch_show_interrupts(struct seq_file *p, int prec)
33515 @@ -66,6 +68,7 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
33516  {
33517  	struct pt_regs *old_regs = set_irq_regs(regs);
33518  
33519 +	enter_irq_mode();
33520  	irq_enter();
33521  
33522  	/*
33523 @@ -81,6 +84,7 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
33524  	}
33525  
33526  	irq_exit();
33527 +	exit_irq_mode();
33528  	set_irq_regs(old_regs);
33529  }
33530  
33531 diff --git a/include/litmus/cache_proc.h b/include/litmus/cache_proc.h
33532 index a7a740e..5a66c34 100644
33533 --- a/include/litmus/cache_proc.h
33534 +++ b/include/litmus/cache_proc.h
33535 @@ -4,6 +4,8 @@
33536  #ifdef __KERNEL__
33537  
33538  void litmus_setup_lockdown(void __iomem*, u32);
33539 +void enter_irq_mode(void);
33540 +void exit_irq_mode(void);
33541  
33542  #endif
33543  
33544 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
33545 index f315391..bb9d341 100644
33546 --- a/litmus/cache_proc.c
33547 +++ b/litmus/cache_proc.c
33548 @@ -48,6 +48,22 @@ u32 way_partitions[5] = {
33549  	0xFFFF00FF, /* lv C */
33550  };
33551  
33552 +u32 prev_lockdown_d_reg[5] = {
33553 +	0x00000000,
33554 +	0x00000000,
33555 +	0x00000000,
33556 +	0x00000000,
33557 +	0xFFFF00FF, /* share with level-C */
33558 +};
33559 +
33560 +u32 prev_lockdown_i_reg[5] = {
33561 +	0x00000000,
33562 +	0x00000000,
33563 +	0x00000000,
33564 +	0x00000000,
33565 +	0xFFFF00FF, /* share with level-C */
33566 +};
33567 +
33568  static void __iomem *cache_base;
33569  static void __iomem *lockreg_d;
33570  static void __iomem *lockreg_i;
33571 @@ -78,6 +94,7 @@ int l2_usable_sets;
33572  int lock_all;
33573  int nr_lockregs;
33574  int use_way_partition;
33575 +int use_set_partition;
33576  
33577  extern void set_number_of_colors(int colornr);
33578  
33579 @@ -222,7 +239,26 @@ void do_way_partition(enum crit_level lv, int cpu)
33580  		}
33581  	}
33582  }
33583 -		
33584 +
33585 +void enter_irq_mode(void)
33586 +{
33587 +	int cpu = smp_processor_id();
33588 +	
33589 +	prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
33590 +	prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
33591 +	
33592 +	writel_relaxed(prev_lockdown_i_reg[4], ld_i_reg(cpu));
33593 +	writel_relaxed(prev_lockdown_d_reg[4], ld_d_reg(cpu));
33594 +}
33595 +
33596 +void exit_irq_mode(void)
33597 +{
33598 +	int cpu = smp_processor_id();
33599 +	
33600 +	writel_relaxed(prev_lockdown_i_reg[cpu], ld_i_reg(cpu));
33601 +	writel_relaxed(prev_lockdown_d_reg[cpu], ld_d_reg(cpu));	
33602 +}
33603 +
33604  int l2_usable_sets_handler(struct ctl_table *table, int write, void __user *buffer,
33605  		size_t *lenp, loff_t *ppos)
33606  {
33607 @@ -268,6 +304,26 @@ out:
33608  	return ret;
33609  }
33610  
33611 +int use_set_partition_handler(struct ctl_table *table, int write, void __user *buffer,
33612 +		size_t *lenp, loff_t *ppos)
33613 +{
33614 +	int ret = 0;
33615 +	
33616 +	mutex_lock(&lockdown_proc);
33617 +	
33618 +	flush_cache_all();
33619 +	
33620 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
33621 +	if (ret)
33622 +		goto out;
33623 +		
33624 +	printk(KERN_INFO "use_set_partition : %d\n", use_set_partition);
33625 +
33626 +out:
33627 +	mutex_unlock(&lockdown_proc);
33628 +	return ret;
33629 +}
33630 +
33631  static struct ctl_table cache_table[] =
33632  {
33633  	{
33634 @@ -306,6 +362,15 @@ static struct ctl_table cache_table[] =
33635  		.extra1		= &zero,
33636  		.extra2		= &one,
33637  	},
33638 +	{
33639 +		.procname	= "use_set_partition",
33640 +		.mode		= 0666,
33641 +		.proc_handler	= use_set_partition_handler,
33642 +		.data		= &use_set_partition,
33643 +		.maxlen		= sizeof(use_set_partition),
33644 +		.extra1		= &zero,
33645 +		.extra2		= &one,
33646 +	},
33647  	{ }
33648  };
33649  
33650 @@ -335,6 +400,7 @@ static int __init litmus_sysctl_init(void)
33651  	l2_usable_ways = 16;
33652  	l2_usable_sets = 5;
33653  	use_way_partition = 0;
33654 +	use_set_partition = 0;
33655  	set_number_of_colors(l2_usable_sets);
33656  
33657  out:
33658 diff --git a/litmus/litmus.c b/litmus/litmus.c
33659 index 344c68c..77c609b 100644
33660 --- a/litmus/litmus.c
33661 +++ b/litmus/litmus.c
33662 @@ -314,8 +314,17 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
33663  		now = litmus_get_cycles();
33664  		ret = put_user(now, ts);
33665  	}
33666 -	else
33667 -		flush_cache_all();
33668 +	else {
33669 +		//flush_cache_all();
33670 +		int *dummy;
33671 +		int size = 20, i, t = 0;
33672 +		dummy = kmalloc(PAGE_SIZE*size, GFP_ATOMIC);
33673 +		for (i = 0; i<PAGE_SIZE*size/sizeof(int); i++) {
33674 +			dummy[i] = t++;
33675 +		}
33676 +		
33677 +		kfree(dummy);
33678 +	}
33679  
33680  	return ret;
33681  }
33682 @@ -391,6 +400,8 @@ extern struct page *new_alloc_page(struct page *page, unsigned long node, int **
33683  
33684  #endif
33685  
33686 +extern int use_set_partition;
33687 +
33688  asmlinkage long sys_set_page_color(int cpu)
33689  {
33690  	long ret = 0;
33691 @@ -401,10 +412,13 @@ asmlinkage long sys_set_page_color(int cpu)
33692  	//struct task_page *entry = NULL;
33693  	int nr_pages = 0, nr_shared_pages = 0, nr_failed = 0;
33694  	unsigned long node;
33695 -	
33696 +		
33697  	LIST_HEAD(pagelist);
33698  	LIST_HEAD(shared_pagelist);
33699  	
33700 +	if (use_set_partition == 0)
33701 +		return 0;
33702 +	
33703  	down_read(&current->mm->mmap_sem);
33704  	TRACE_TASK(current, "SYSCALL set_page_color\n");
33705  	vma_itr = current->mm->mmap;
33706 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
33707 index 3aaa88c..aa6452a 100644
33708 --- a/litmus/sched_mc2.c
33709 +++ b/litmus/sched_mc2.c
33710 @@ -655,9 +655,9 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
33711  	//BUG_ON(state->scheduled && state->scheduled != prev);
33712  	//BUG_ON(state->scheduled && !is_realtime(prev));
33713  	if (state->scheduled && state->scheduled != prev)
33714 -		printk(KERN_ALERT "BUG1!!!!!!!! %s %s\n", state->scheduled ? (state->scheduled)->comm : "null", prev ? (prev)->comm : "null");
33715 +		; //printk(KERN_ALERT "BUG1!!!!!!!! %s %s\n", state->scheduled ? (state->scheduled)->comm : "null", prev ? (prev)->comm : "null");
33716  	if (state->scheduled && !is_realtime(prev))
33717 -		printk(KERN_ALERT "BUG2!!!!!!!! \n");
33718 +		; //printk(KERN_ALERT "BUG2!!!!!!!! \n");
33719  
33720  	/* update time */
33721  	state->sup_env.will_schedule = true;
33722 -- 
33723 1.8.1.2
33724 
33725 
33726 From a2db9de00d3e16763c85d8694194f66c53eca8af Mon Sep 17 00:00:00 2001
33727 From: Namhoon Kim <namhoonk@cs.unc.edu>
33728 Date: Fri, 20 Mar 2015 22:11:29 -0400
33729 Subject: [PATCH 099/119] Fixed set partition bug
33730 
33731 ---
33732  litmus/bank_proc.c        | 10 +++++++++-
33733  litmus/cache_proc.c       | 18 +++++++++++++++++-
33734  litmus/sched_mc2.c        |  6 ++++--
33735  litmus/sched_task_trace.c |  2 +-
33736  4 files changed, 31 insertions(+), 5 deletions(-)
33737 
33738 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
33739 index 53c20db..05c7fc3 100644
33740 --- a/litmus/bank_proc.c
33741 +++ b/litmus/bank_proc.c
33742 @@ -229,6 +229,14 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
33743  	// Decode the node to decide what color pages we should provide
33744  	switch(node ){
33745  		case 0:
33746 +		case 1:
33747 +		case 2:
33748 +		case 3:
33749 +			color = (color%4) * 4 + node;
33750 +		case 4:
33751 +			color = (color%16);
33752 +/*
33753 +		case 0:
33754  		case 1: 
33755  		case 2: 
33756  		case 3:
33757 @@ -241,8 +249,8 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
33758  			break;
33759  		default:
33760  			TRACE_CUR("Wrong color %lu\n", color);	
33761 -//			printk(KERN_WARNING "Wrong color %lu\n", color);
33762  			return rPage;
33763 +*/
33764  	}
33765  
33766  
33767 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
33768 index bb9d341..68f68bf 100644
33769 --- a/litmus/cache_proc.c
33770 +++ b/litmus/cache_proc.c
33771 @@ -48,6 +48,11 @@ u32 way_partitions[5] = {
33772  	0xFFFF00FF, /* lv C */
33773  };
33774  
33775 +u32 set_partitions[2] = {
33776 +	0xFFFFFF00, /* cpuX A and B */
33777 +	0xFFFF00FF, /* lv C */
33778 +};
33779 +
33780  u32 prev_lockdown_d_reg[5] = {
33781  	0x00000000,
33782  	0x00000000,
33783 @@ -190,8 +195,8 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
33784  			writel_relaxed(nr_unlocked_way[0], ld_d_reg(i));
33785  			writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
33786  		}
33787 -		print_lockdown_registers();
33788  	}
33789 +	print_lockdown_registers();
33790  
33791  out:
33792  	mutex_unlock(&lockdown_proc);
33793 @@ -229,6 +234,9 @@ out:
33794  
33795  void do_way_partition(enum crit_level lv, int cpu)
33796  {
33797 +	if (use_set_partition == 1 && use_way_partition == 1)
33798 +		printk(KERN_ALERT "BOTH SET, WAY ARE SET!!!!\n");
33799 +	
33800  	if (use_way_partition == 1) {
33801  		if (lv < CRIT_LEVEL_C) {
33802  			writel_relaxed(way_partitions[cpu], ld_d_reg(cpu));
33803 @@ -237,6 +245,14 @@ void do_way_partition(enum crit_level lv, int cpu)
33804  			writel_relaxed(way_partitions[4], ld_d_reg(cpu));
33805  			writel_relaxed(way_partitions[4], ld_i_reg(cpu));
33806  		}
33807 +	} else if (use_set_partition == 1) {
33808 +		if (lv < CRIT_LEVEL_C) {
33809 +			writel_relaxed(set_partitions[0], ld_d_reg(cpu));
33810 +			writel_relaxed(set_partitions[0], ld_i_reg(cpu));
33811 +		} else {
33812 +			writel_relaxed(set_partitions[1], ld_d_reg(cpu));
33813 +			writel_relaxed(set_partitions[1], ld_i_reg(cpu));
33814 +		}
33815  	}
33816  }
33817  
33818 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
33819 index aa6452a..1c7cdfe 100644
33820 --- a/litmus/sched_mc2.c
33821 +++ b/litmus/sched_mc2.c
33822 @@ -575,12 +575,13 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
33823  	
33824  	/* no eligible level A or B tasks exists */
33825  	/* check the ghost job */
33826 +	/*
33827  	ce = &state->crit_entries[CRIT_LEVEL_C];
33828  	if (ce->running) {
33829  		TRACE_TASK(ce->running," is GHOST\n");
33830  		return NULL;
33831  	}
33832 -	
33833 +	*/
33834  	cur_priority = _lowest_prio_cpu.cpu_entries[state->cpu].deadline;
33835  	
33836  	//TRACE("****** ACTIVE LIST ******\n");
33837 @@ -858,6 +859,7 @@ static long mc2_complete_job(void)
33838  		local_irq_restore(flags);
33839  		preempt_enable();
33840  	}
33841 +	
33842  	sched_trace_task_completion(current, 0);
33843  	
33844  	/* update the next release time and deadline */
33845 @@ -879,7 +881,7 @@ static long mc2_complete_job(void)
33846  		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
33847  		preempt_enable();
33848  //		if (get_task_crit_level(current) == CRIT_LEVEL_A)
33849 -//			sched_trace_task_release(current);
33850 +		sched_trace_task_release(current);
33851  	}
33852  
33853  	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
33854 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
33855 index 933e7e4..6224f8c 100644
33856 --- a/litmus/sched_task_trace.c
33857 +++ b/litmus/sched_task_trace.c
33858 @@ -15,7 +15,7 @@
33859  #include <litmus/feather_trace.h>
33860  #include <litmus/ftdev.h>
33861  
33862 -#define NO_EVENTS		(1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
33863 +#define NO_EVENTS		(1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+3))
33864  
33865  #define now() litmus_clock()
33866  
33867 -- 
33868 1.8.1.2
33869 
33870 
33871 From 623fe6255439add90f416df69b92134fbd01f342 Mon Sep 17 00:00:00 2001
33872 From: Namhoon Kim <namhoonk@cs.unc.edu>
33873 Date: Fri, 20 Mar 2015 22:23:14 -0400
33874 Subject: [PATCH 100/119] Change function do_way_partition to do_partition
33875 
33876 ---
33877  litmus/cache_proc.c | 2 +-
33878  litmus/sched_mc2.c  | 7 +++----
33879  2 files changed, 4 insertions(+), 5 deletions(-)
33880 
33881 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
33882 index 68f68bf..59f166e 100644
33883 --- a/litmus/cache_proc.c
33884 +++ b/litmus/cache_proc.c
33885 @@ -232,7 +232,7 @@ out:
33886  	return ret;
33887  }
33888  
33889 -void do_way_partition(enum crit_level lv, int cpu)
33890 +void do_partition(enum crit_level lv, int cpu)
33891  {
33892  	if (use_set_partition == 1 && use_way_partition == 1)
33893  		printk(KERN_ALERT "BOTH SET, WAY ARE SET!!!!\n");
33894 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
33895 index 1c7cdfe..9c6d762 100644
33896 --- a/litmus/sched_mc2.c
33897 +++ b/litmus/sched_mc2.c
33898 @@ -26,8 +26,7 @@
33899  #include <litmus/reservation.h>
33900  #include <litmus/polling_reservations.h>
33901  
33902 -extern int use_way_partition;
33903 -extern void do_way_partition(enum crit_level lv, int cpu);
33904 +extern void do_partition(enum crit_level lv, int cpu);
33905  
33906  /* _global_env - reservation container for level-C tasks*/
33907  struct gmp_reservation_environment _global_env;
33908 @@ -626,7 +625,7 @@ static void pre_schedule(struct task_struct *prev, int cpu)
33909  	if (!prev || !is_realtime(prev))
33910  		return;
33911  	
33912 -	do_way_partition(CRIT_LEVEL_C, cpu);
33913 +	do_partition(CRIT_LEVEL_C, cpu);
33914  }
33915  
33916  /* not used now */
33917 @@ -637,7 +636,7 @@ static void post_schedule(struct task_struct *next, int cpu)
33918  		return;
33919  	
33920  	lev = get_task_crit_level(next);
33921 -	do_way_partition(lev, cpu);
33922 +	do_partition(lev, cpu);
33923  }
33924  
33925  /* mc2_schedule - main scheduler function. pick the next task to run
33926 -- 
33927 1.8.1.2
33928 
33929 
33930 From bf0b4079ab52d1eba4c99dfe404548fefea4b94d Mon Sep 17 00:00:00 2001
33931 From: ChengYang Fu <chengyangfu@gmail.com>
33932 Date: Sun, 22 Mar 2015 15:05:52 -0400
33933 Subject: [PATCH 101/119] Use (Interleaving off), and provide system variables
33934  to adjust the size of cache and bank number
33935 
33936 ---
33937  litmus/bank_proc.c | 548 +++++++++++++++++++++++++++++++++++++++--------------
33938  litmus/litmus.c    |   5 +-
33939  2 files changed, 407 insertions(+), 146 deletions(-)
33940 
33941 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
33942 index 05c7fc3..7cf07ee 100644
33943 --- a/litmus/bank_proc.c
33944 +++ b/litmus/bank_proc.c
33945 @@ -18,22 +18,58 @@
33946  #include <litmus/litmus_proc.h>
33947  #include <litmus/sched_trace.h>
33948  
33949 -
33950  #define LITMUS_LOCKDEP_NAME_MAX_LEN 50
33951  
33952  // This Address Decoding is used in imx6-sabredsd platform
33953 -#define CACHE_MASK 0x0000f000      
33954 -#define BANK_MASK  0x00007000      
33955 -#define OFFSET_SHIFT 12
33956 +#define BANK_MASK  0x38000000     
33957 +#define BANK_SHIFT  27
33958 +#define CACHE_MASK  0x0000f000      
33959 +#define CACHE_SHIFT 12
33960 +
33961 +#define PAGES_PER_COLOR 256
33962 +unsigned int NUM_PAGE_LIST;  //8*16
33963 +
33964 +unsigned int number_banks;
33965 +unsigned int number_cachecolors;
33966 +
33967 +unsigned int set_partition_max = 0x0000ffff;
33968 +unsigned int set_partition_min = 0;
33969 +unsigned int bank_partition_max = 0x000000ff;
33970 +unsigned int bank_partition_min = 0;
33971 +
33972 +unsigned int set_partition[9] = {
33973 +        0x00000003,  /* Core 0, and Level A*/
33974 +        0x00000003,  /* Core 0, and Level B*/
33975 +        0x0000000C,  /* Core 1, and Level A*/
33976 +        0x0000000C,  /* Core 1, and Level B*/
33977 +        0x00000030,  /* Core 2, and Level A*/
33978 +        0x00000030,  /* Core 2, and Level B*/
33979 +        0x000000C0,  /* Core 3, and Level A*/
33980 +        0x000000C0,  /* Core 3, and Level B*/
33981 +        0x0000ff00,  /* Level C */
33982 +};
33983  
33984 -#define PAGES_PER_COLOR 1024
33985 +unsigned int bank_partition[9] = {
33986 +        0x00000010,  /* Core 0, and Level A*/
33987 +        0x00000010,  /* Core 0, and Level B*/
33988 +        0x00000020,  /* Core 1, and Level A*/
33989 +        0x00000020,  /* Core 1, and Level B*/
33990 +        0x00000040,  /* Core 2, and Level A*/
33991 +        0x00000040,  /* Core 2, and Level B*/
33992 +        0x00000080,  /* Core 3, and Level A*/
33993 +        0x00000080,  /* Core 3, and Level B*/
33994 +        0x0000000f,  /* Level C */
33995 +};
33996  
33997 -unsigned long curr_cachecolor;
33998 -int used_cachecolor;
33999 +unsigned int set_index[9] = {
34000 +    0, 0, 0, 0, 0, 0, 0, 0, 0
34001 +};
34002  
34003 +unsigned int bank_index[9] = {
34004 +    0, 0, 0, 0, 0, 0, 0, 0, 0
34005 +};
34006  
34007 -unsigned long number_banks;
34008 -unsigned long number_cachecolors;
34009 +struct mutex void_lockdown_proc;
34010  
34011  
34012  /*
34013 @@ -46,39 +82,80 @@ struct color_group {
34014  	atomic_t nr_pages;
34015  };
34016  
34017 +
34018 +static struct color_group *color_groups;
34019 +
34020  /*
34021 - * This is old code which is not used in current version
34022 - */ 
34023 -/*
34024 -static struct alloced_pages {
34025 -	spinlock_t lock;
34026 -	struct list_head list;
34027 -} alloced_pages;
34028 + * Naive function to count the number of 1's
34029 + */
34030 +unsigned int counting_one_set(unsigned int v)
34031 +{
34032 +//    unsigned int v; // count the number of bits set in v
34033 +    unsigned int c; // c accumulates the total bits set in v
34034 +
34035 +    for (c = 0; v; v >>= 1)
34036 +    {
34037 +        c += v & 1;
34038 +    }
34039 +    return c;
34040 +}
34041  
34042 -struct alloced_page {
34043 -	struct page *page;
34044 -	struct vm_area_struct *vma;
34045 -	struct list_head list;
34046 -};
34047 -*/
34048 +unsigned int two_exp(unsigned int e)
34049 +{
34050 +    unsigned int v = 1;
34051 +    for (e; e>0; e-- )
34052 +    {
34053 +        v=v*2;
34054 +    }
34055 +    return v;
34056 +}
34057 +
34058 +unsigned int num_by_bitmask_index(bitmask, index)
34059 +{
34060 +    unsigned int pos = 0;
34061 +
34062 +    while(true)
34063 +    {
34064 +        if(index ==0 && (bitmask & 1)==1)
34065 +        {
34066 +            break;
34067 +        }
34068 +        if(index !=0 && (bitmask & 1)==1){
34069 +            index--;
34070 +        }
34071 +        pos++;
34072 +        bitmask = bitmask >>1;
34073 +
34074 +    }
34075 +    return pos;
34076 +}
34077  
34078 -static struct color_group *color_groups;
34079 -static struct lock_class_key color_lock_keys[16];
34080  
34081 -//static struct color_group *color_groups;
34082  
34083  /* Decoding page color, 0~15 */ 
34084 -static inline unsigned long page_color(struct page *page)
34085 +static inline unsigned int page_color(struct page *page)
34086  {
34087 -	return ((page_to_phys(page)& CACHE_MASK) >> PAGE_SHIFT);
34088 +	return ((page_to_phys(page)& CACHE_MASK) >> CACHE_SHIFT);
34089  }
34090  
34091  /* Decoding page bank number, 0~7 */ 
34092 -static inline unsigned long page_bank(struct page *page)
34093 +static inline unsigned int page_bank(struct page *page)
34094  {
34095 -	return ((page_to_phys(page)& BANK_MASK) >> PAGE_SHIFT);
34096 +	return ((page_to_phys(page)& BANK_MASK) >> BANK_SHIFT);
34097  }
34098  
34099 +static inline unsigned int page_list_index(struct page *page)
34100 +{
34101 +    unsigned int idx;  
34102 +    idx = (page_color(page) + page_bank(page)*(number_cachecolors));
34103 +//    printk("address = %lx, ", page_to_phys(page));
34104 +//    printk("color(%d), bank(%d), indx = %d\n", page_color(page), page_bank(page), idx);
34105 +
34106 +    return idx; 
34107 +}
34108 +
34109 +
34110 +
34111  /*
34112   * It is used to determine the smallest number of page lists. 
34113   */
34114 @@ -86,7 +163,7 @@ static unsigned long smallest_nr_pages(void)
34115  {
34116  	unsigned long i, min_pages = -1;
34117  	struct color_group *cgroup;
34118 -	for (i = 0; i < number_cachecolors; ++i) {
34119 +	for (i = 0; i < NUM_PAGE_LIST; ++i) {
34120  		cgroup = &color_groups[i];
34121  		if (atomic_read(&cgroup->nr_pages) < min_pages)
34122  			min_pages = atomic_read(&cgroup->nr_pages);
34123 @@ -94,12 +171,22 @@ static unsigned long smallest_nr_pages(void)
34124  	return min_pages;
34125  }
34126  
34127 +static void show_nr_pages(void)
34128 +{
34129 +	unsigned long i;
34130 +	struct color_group *cgroup;
34131 +	for (i = 0; i < NUM_PAGE_LIST; ++i) {
34132 +		cgroup = &color_groups[i];
34133 +		printk("i =%d, nr_pages = %d\n", i, atomic_read(&cgroup->nr_pages));
34134 +	}
34135 +}
34136 +
34137  /*
34138   * Add a page to current pool.
34139   */
34140  void add_page_to_color_list(struct page *page)
34141  {
34142 -	const unsigned long color = page_color(page);
34143 +	const unsigned long color = page_list_index(page);
34144  	struct color_group *cgroup = &color_groups[color];
34145  	BUG_ON(in_list(&page->lru) || PageLRU(page));
34146  	BUG_ON(page_count(page) > 1);
34147 @@ -116,38 +203,47 @@ void add_page_to_color_list(struct page *page)
34148   */
34149  static int do_add_pages(void)
34150  {
34151 -//	printk("LITMUS do add pages\n");
34152 +	printk("LITMUS do add pages\n");
34153  	
34154  	struct page *page, *page_tmp;
34155  	LIST_HEAD(free_later);
34156  	unsigned long color;
34157  	int ret = 0;
34158 +	int i = 0;
34159  
34160  	// until all the page lists contain enough pages 
34161 +	//for (i =0; i<5; i++) {
34162  	while (smallest_nr_pages() < PAGES_PER_COLOR) {
34163  	
34164  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
34165 -		
34166  		if (unlikely(!page)) {
34167 -	//		printk(KERN_WARNING "Could not allocate pages.\n");
34168 +			printk(KERN_WARNING "Could not allocate pages.\n");
34169  			ret = -ENOMEM;
34170  			goto out;
34171  		}
34172 -		color = page_color(page);
34173 +		color = page_list_index(page);
34174 +                //show_nr_pages();
34175 +		//printk("before : nr_pages = %d\n", atomic_read(&color_groups[color].nr_pages));
34176  		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR) {
34177  			add_page_to_color_list(page);
34178  		} else{
34179  			// Pages here will be freed later 
34180  			list_add_tail(&page->lru, &free_later);
34181  		}
34182 -	}
34183 +               //show_nr_pages();
34184 +		//printk("after : nr_pages = %d\n", atomic_read(&color_groups[color].nr_pages));
34185 +
34186 +        }
34187 +        show_nr_pages();
34188 +#if 1
34189  	// Free the unwanted pages
34190  	list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
34191  		list_del(&page->lru);
34192  		__free_page(page);
34193  	}
34194 +#endif
34195  out:
34196 -	return ret;
34197 +        return ret;
34198  }
34199  
34200  /*
34201 @@ -162,7 +258,7 @@ static struct  page *new_alloc_page_color( unsigned long color)
34202  	struct color_group *cgroup;
34203  	struct page *rPage = NULL;
34204  		
34205 -	if( (color <0) || (color)>15) {
34206 +	if( (color <0) || (color)>(number_cachecolors*number_banks -1)) {
34207  		TRACE_CUR("Wrong color %lu\n", color);	
34208  //		printk(KERN_WARNING "Wrong color %lu\n", color);
34209  		goto out_unlock;
34210 @@ -192,31 +288,17 @@ out:
34211  
34212  
34213  /*
34214 - * Provide pages for replacement 
34215 - * This is used to generate experiments 
34216 - */
34217 -struct page *new_alloc_page_predefined(struct page *page,  int **x)
34218 -{
34219 -	unsigned int color = curr_cachecolor; 
34220 -	
34221 -//	printk("allocate new page color = %d\n", color);	
34222 -	struct color_group *cgroup;
34223 -	struct page *rPage = NULL;
34224 -	
34225 -	rPage =  new_alloc_page_color(color);
34226 -	curr_cachecolor = (color + 1)% used_cachecolor;
34227 -out:
34228 -	return rPage;
34229 -}
34230 -/*
34231   * provide pages for replacement according to  
34232 - * node = 0 for Level A, B tasks in Cpu 0
34233 - * node = 1 for Level A, B tasks in Cpu 1
34234 - * node = 2 for Level A, B tasks in Cpu 2
34235 - * node = 3 for Level A, B tasks in Cpu 3
34236 - * node = 4 for Level C tasks 
34237 + * node = 0 for Level A tasks in Cpu 0
34238 + * node = 1 for Level B tasks in Cpu 0
34239 + * node = 2 for Level A tasks in Cpu 1
34240 + * node = 3 for Level B tasks in Cpu 1
34241 + * node = 4 for Level A tasks in Cpu 2
34242 + * node = 5 for Level B tasks in Cpu 2
34243 + * node = 6 for Level A tasks in Cpu 3
34244 + * node = 7 for Level B tasks in Cpu 3
34245 + * node = 8 for Level C tasks 
34246   */
34247 -#if 1
34248  struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
34249  {
34250  //	printk("allocate new page node = %d\n", node);	
34251 @@ -224,79 +306,20 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
34252  	struct color_group *cgroup;
34253  	struct page *rPage = NULL;
34254  	unsigned int color;
34255 -	get_random_bytes(&color, sizeof(unsigned int));
34256  	
34257 -	// Decode the node to decide what color pages we should provide
34258 -	switch(node ){
34259 -		case 0:
34260 -		case 1:
34261 -		case 2:
34262 -		case 3:
34263 -			color = (color%4) * 4 + node;
34264 -		case 4:
34265 -			color = (color%16);
34266 -/*
34267 -		case 0:
34268 -		case 1: 
34269 -		case 2: 
34270 -		case 3:
34271 -			color = (color%2) * 8 + node;
34272 -			break;
34273 -		case 4:
34274 -			color = (color%8)+4;
34275 -			if(color >=8)	
34276 -				color+=4;
34277 -			break;
34278 -		default:
34279 -			TRACE_CUR("Wrong color %lu\n", color);	
34280 -			return rPage;
34281 -*/
34282 -	}
34283  
34284 +        unsigned int idx = 0;
34285 +        idx += num_by_bitmask_index(set_partition[node], set_index[node]);
34286 +        idx += number_cachecolors* num_by_bitmask_index(bank_partition[node], bank_index[node]);
34287 +	printk("node  = %d, idx = %d\n", node, idx);
34288  
34289 -//	printk("allocate new page color = %d\n", color);
34290 -		
34291 -	rPage =  new_alloc_page_color(color);
34292 +	rPage =  new_alloc_page_color(idx);
34293 +        
34294 +            
34295 +        set_index[node] = (set_index[node]+1) % counting_one_set(set_partition[node]);
34296 +        bank_index[node] = (bank_index[node]+1) % counting_one_set(bank_partition[node]);
34297  	return rPage; 
34298  }
34299 -#else
34300 -struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
34301 -{
34302 -	return new_alloc_page_predefined(page,  x);
34303 -}
34304 -#endif
34305 -
34306 -/*
34307 - * Provide pages for replacement according to bank number. 
34308 - * This is used in cache way partition 
34309 - */
34310 -struct page *new_alloc_page_banknr(struct page *page, unsigned long banknr, int **x)
34311 -{
34312 -//	printk("allocate new page bank = %d\n", banknr);	
34313 -	struct color_group *cgroup;
34314 -	struct page *rPage = NULL;
34315 -	unsigned int color;
34316 -	get_random_bytes(&color, sizeof(unsigned int));
34317 -	
34318 -	if((banknr<= 7) && (banknr>=0)){
34319 -		color = (color%2) * 8 + banknr;
34320 -	}else{
34321 -		goto out;
34322 -	}
34323 -	
34324 -	rPage =  new_alloc_page_color(color);
34325 -		
34326 -out:
34327 -	return rPage;
34328 -}
34329 -
34330 -
34331 -void set_number_of_colors(int colornr)
34332 -{
34333 -//	printk(KERN_WARNING "Set of colors = %d.\n", colornr);
34334 -	used_cachecolor = colornr ; 
34335 -	curr_cachecolor = 0;
34336 -}
34337  
34338  
34339  
34340 @@ -306,12 +329,14 @@ void set_number_of_colors(int colornr)
34341   */ 
34342  static int __init init_variables(void)
34343  {
34344 -	number_banks = 1+(BANK_MASK >> PAGE_SHIFT); 
34345 -	number_cachecolors = 1+(CACHE_MASK >> PAGE_SHIFT);
34346 -	used_cachecolor = 16;
34347 -	curr_cachecolor = 0;
34348 -
34349 -	
34350 +	number_banks = counting_one_set(BANK_MASK); 
34351 +	number_banks = two_exp(number_banks); 
34352 +
34353 +	number_cachecolors = counting_one_set(CACHE_MASK);
34354 +	number_cachecolors = two_exp(number_cachecolors);
34355 +	NUM_PAGE_LIST = number_banks * number_cachecolors; 
34356 +        printk(KERN_WARNING "number of banks = %d, number of cachecolors=%d\n", number_banks, number_cachecolors);
34357 +	mutex_init(&void_lockdown_proc);
34358  }
34359  
34360  
34361 @@ -324,39 +349,274 @@ static int __init init_color_groups(void)
34362  	unsigned long i;
34363  	int err = 0;
34364  
34365 -	color_groups = kmalloc(number_cachecolors *
34366 -			sizeof(struct color_group), GFP_KERNEL);
34367 +        printk("NUM_PAGE_LIST = %d\n", NUM_PAGE_LIST);
34368 +        color_groups = kmalloc(NUM_PAGE_LIST *sizeof(struct color_group), GFP_KERNEL);
34369 +
34370  	if (!color_groups) {
34371 -//		printk(KERN_WARNING "Could not allocate color groups.\n");
34372 +		printk(KERN_WARNING "Could not allocate color groups.\n");
34373  		err = -ENOMEM;
34374  	}else{
34375  
34376 -		for (i = 0; i < number_cachecolors; ++i) {
34377 +		for (i = 0; i < NUM_PAGE_LIST; ++i) {
34378  			cgroup = &color_groups[i];
34379  			atomic_set(&cgroup->nr_pages, 0);
34380  			INIT_LIST_HEAD(&cgroup->list);
34381  			spin_lock_init(&cgroup->lock);
34382  		}
34383  	}
34384 -	return err;
34385 +        return err;
34386 +}
34387 +
34388 +int set_partition_handler(struct ctl_table *table, int write, void __user *buffer,
34389 +		size_t *lenp, loff_t *ppos)
34390 +{
34391 +	int ret = 0, i = 0;
34392 +	mutex_lock(&void_lockdown_proc);
34393 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
34394 +	if (ret)
34395 +		goto out;
34396 +	if (write) {
34397 +            printk("New set Partition : \n");
34398 +	    for(i =0;i <9;i++)
34399 +            {
34400 +                set_index[i] = 0;
34401 +                printk("set[%d] = %x \n", i, set_partition[i]);
34402 +            }
34403 +	}
34404 +out:
34405 +	mutex_unlock(&void_lockdown_proc);
34406 +	return ret;
34407  }
34408  
34409 +int bank_partition_handler(struct ctl_table *table, int write, void __user *buffer,
34410 +		size_t *lenp, loff_t *ppos)
34411 +{
34412 +	int ret = 0, i = 0;
34413 +	mutex_lock(&void_lockdown_proc);
34414 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
34415 +	if (ret)
34416 +		goto out;
34417 +	if (write) {
34418 +	    for(i =0;i <9;i++)
34419 +            {
34420 +                bank_index[i] = 0;
34421 +            }
34422 +	}
34423 +out:
34424 +	mutex_unlock(&void_lockdown_proc);
34425 +	return ret;
34426 +}
34427 +
34428 +
34429 +static struct ctl_table cache_table[] =
34430 +{
34431 +        
34432 +	{
34433 +		.procname	= "C0_LA_set",
34434 +		.mode		= 0666,
34435 +		.proc_handler	= set_partition_handler,
34436 +		.data		= &set_partition[0],
34437 +		.maxlen		= sizeof(set_partition[0]),
34438 +		.extra1		= &set_partition_min,
34439 +		.extra2		= &set_partition_max,
34440 +	},	
34441 +	{
34442 +		.procname	= "C0_LB_set",
34443 +		.mode		= 0666,
34444 +		.proc_handler	= set_partition_handler,
34445 +		.data		= &set_partition[1],
34446 +		.maxlen		= sizeof(set_partition[1]),
34447 +		.extra1		= &set_partition_min,
34448 +		.extra2		= &set_partition_max,
34449 +	},	
34450 +	{
34451 +		.procname	= "C1_LA_set",
34452 +		.mode		= 0666,
34453 +		.proc_handler	= set_partition_handler,
34454 +		.data		= &set_partition[2],
34455 +		.maxlen		= sizeof(set_partition[2]),
34456 +		.extra1		= &set_partition_min,
34457 +		.extra2		= &set_partition_max,
34458 +	},
34459 +	{
34460 +		.procname	= "C1_LB_set",
34461 +		.mode		= 0666,
34462 +		.proc_handler	= set_partition_handler,
34463 +		.data		= &set_partition[3],
34464 +		.maxlen		= sizeof(set_partition[3]),
34465 +		.extra1		= &set_partition_min,
34466 +		.extra2		= &set_partition_max,
34467 +	},
34468 +	{
34469 +		.procname	= "C2_LA_set",
34470 +		.mode		= 0666,
34471 +		.proc_handler	= set_partition_handler,
34472 +		.data		= &set_partition[4],
34473 +		.maxlen		= sizeof(set_partition[4]),
34474 +		.extra1		= &set_partition_min,
34475 +		.extra2		= &set_partition_max,
34476 +	},
34477 +	{
34478 +		.procname	= "C2_LB_set",
34479 +		.mode		= 0666,
34480 +		.proc_handler	= set_partition_handler,
34481 +		.data		= &set_partition[5],
34482 +		.maxlen		= sizeof(set_partition[5]),
34483 +		.extra1		= &set_partition_min,
34484 +		.extra2		= &set_partition_max,
34485 +	},
34486 +	{
34487 +		.procname	= "C3_LA_set",
34488 +		.mode		= 0666,
34489 +		.proc_handler	= set_partition_handler,
34490 +		.data		= &set_partition[6],
34491 +		.maxlen		= sizeof(set_partition[6]),
34492 +		.extra1		= &set_partition_min,
34493 +		.extra2		= &set_partition_max,
34494 +	},
34495 +	{
34496 +		.procname	= "C3_LB_set",
34497 +		.mode		= 0666,
34498 +		.proc_handler	= set_partition_handler,
34499 +		.data		= &set_partition[7],
34500 +		.maxlen		= sizeof(set_partition[7]),
34501 +		.extra1		= &set_partition_min,
34502 +		.extra2		= &set_partition_max,
34503 +	},	
34504 +	{
34505 +		.procname	= "Call_LC_set",
34506 +		.mode		= 0666,
34507 +		.proc_handler	= set_partition_handler,
34508 +		.data		= &set_partition[8],
34509 +		.maxlen		= sizeof(set_partition[8]),
34510 +		.extra1		= &set_partition_min,
34511 +		.extra2		= &set_partition_max,
34512 +	},	
34513 +	{
34514 +		.procname	= "C0_LA_bank",
34515 +		.mode		= 0666,
34516 +		.proc_handler	= bank_partition_handler,
34517 +		.data		= &bank_partition[0],
34518 +		.maxlen		= sizeof(set_partition[0]),
34519 +		.extra1		= &bank_partition_min,
34520 +		.extra2		= &bank_partition_max,
34521 +	},
34522 +	{
34523 +		.procname	= "C0_LB_bank",
34524 +		.mode		= 0666,
34525 +		.proc_handler	= bank_partition_handler,
34526 +		.data		= &bank_partition[1],
34527 +		.maxlen		= sizeof(set_partition[1]),
34528 +		.extra1		= &bank_partition_min,
34529 +		.extra2		= &bank_partition_max,
34530 +	},		
34531 +	{
34532 +		.procname	= "C1_LA_bank",
34533 +		.mode		= 0666,
34534 +		.proc_handler	= bank_partition_handler,
34535 +		.data		= &bank_partition[2],
34536 +		.maxlen		= sizeof(set_partition[2]),
34537 +		.extra1		= &bank_partition_min,
34538 +		.extra2		= &bank_partition_max,
34539 +	},
34540 +	{
34541 +		.procname	= "C1_LB_bank",
34542 +		.mode		= 0666,
34543 +		.proc_handler	= bank_partition_handler,
34544 +		.data		= &bank_partition[3],
34545 +		.maxlen		= sizeof(set_partition[3]),
34546 +		.extra1		= &bank_partition_min,
34547 +		.extra2		= &bank_partition_max,
34548 +	},
34549 +	{
34550 +		.procname	= "C2_LA_bank",
34551 +		.mode		= 0666,
34552 +		.proc_handler	= bank_partition_handler,
34553 +		.data		= &bank_partition[4],
34554 +		.maxlen		= sizeof(set_partition[4]),
34555 +		.extra1		= &bank_partition_min,
34556 +		.extra2		= &bank_partition_max,
34557 +	},	
34558 +	{
34559 +		.procname	= "C2_LB_bank",
34560 +		.mode		= 0666,
34561 +		.proc_handler	= bank_partition_handler,
34562 +		.data		= &bank_partition[5],
34563 +		.maxlen		= sizeof(set_partition[5]),
34564 +		.extra1		= &bank_partition_min,
34565 +		.extra2		= &bank_partition_max,
34566 +	},		
34567 +	{
34568 +		.procname	= "C3_LA_bank",
34569 +		.mode		= 0666,
34570 +		.proc_handler	= bank_partition_handler,
34571 +		.data		= &bank_partition[6],
34572 +		.maxlen		= sizeof(set_partition[6]),
34573 +		.extra1		= &bank_partition_min,
34574 +		.extra2		= &bank_partition_max,
34575 +	},	
34576 +	{
34577 +		.procname	= "C3_LB_bank",
34578 +		.mode		= 0666,
34579 +		.proc_handler	= bank_partition_handler,
34580 +		.data		= &bank_partition[7],
34581 +		.maxlen		= sizeof(set_partition[7]),
34582 +		.extra1		= &bank_partition_min,
34583 +		.extra2		= &bank_partition_max,
34584 +	},	
34585 +	{
34586 +		.procname	= "Call_LC_bank",
34587 +		.mode		= 0666,
34588 +		.proc_handler	= bank_partition_handler,
34589 +		.data		= &bank_partition[8],
34590 +		.maxlen		= sizeof(set_partition[8]),
34591 +		.extra1		= &bank_partition_min,
34592 +		.extra2		= &bank_partition_max,
34593 +	},	
34594 +
34595 +
34596 +	{ }
34597 +};
34598 +
34599 +static struct ctl_table litmus_dir_table[] = {
34600 +	{
34601 +		.procname	= "litmus",
34602 + 		.mode		= 0555,
34603 +		.child		= cache_table,
34604 +	},
34605 +	{ }
34606 +};
34607 +
34608 +
34609 +static struct ctl_table_header *litmus_sysctls;
34610 +
34611 +
34612  /*
34613   * Initialzie this proc 
34614   */
34615  static int __init litmus_color_init(void)
34616  {
34617  	int err=0;
34618 -	
34619 +        printk("Init bankproc.c\n");
34620 +
34621  	//INIT_LIST_HEAD(&alloced_pages.list);
34622  	//spin_lock_init(&alloced_pages.lock);
34623  	init_variables();
34624 -//	printk("Cache number = %d , Cache mask = 0x%lx\n", number_cachecolors, CACHE_MASK); 
34625 -//	printk("Bank number = %d , Bank mask = 0x%lx\n", number_banks, BANK_MASK); 
34626 +
34627 +	printk(KERN_INFO "Registering LITMUS^RT proc color sysctl.\n");
34628 +
34629 +	litmus_sysctls = register_sysctl_table(litmus_dir_table);
34630 +	if (!litmus_sysctls) {
34631 +		printk(KERN_WARNING "Could not register LITMUS^RT color sysctl.\n");
34632 +		err = -EFAULT;
34633 +		goto out;
34634 +	}
34635 +
34636  	init_color_groups();			
34637  	do_add_pages();
34638  
34639 -//	printk(KERN_INFO "Registering LITMUS^RT color and bank proc.\n");
34640 +	printk(KERN_INFO "Registering LITMUS^RT color and bank proc.\n");
34641 +out:
34642  	return err;
34643  }
34644  
34645 diff --git a/litmus/litmus.c b/litmus/litmus.c
34646 index 77c609b..0352079 100644
34647 --- a/litmus/litmus.c
34648 +++ b/litmus/litmus.c
34649 @@ -390,7 +390,7 @@ do_return:
34650  extern int isolate_lru_page(struct page *page);
34651  extern void putback_lru_page(struct page *page);
34652  
34653 -#if 1
34654 +#if 0
34655  static struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
34656  {
34657  	return alloc_pages_exact_node(0, GFP_HIGHUSER_MOVABLE, 0);
34658 @@ -501,7 +501,8 @@ asmlinkage long sys_set_page_color(int cpu)
34659  		node = 4;
34660  	else
34661  		node = cpu;
34662 -	
34663 +
34664 +        //node= 0;
34665  	if (!list_empty(&pagelist)) {
34666  		ret = migrate_pages(&pagelist, new_alloc_page, node, MIGRATE_ASYNC, MR_SYSCALL);
34667  		TRACE_TASK(current, "%ld pages not migrated.\n", ret);
34668 -- 
34669 1.8.1.2
34670 
34671 
34672 From 56a820730e5ba600fa2654db635cf21b6cde5f21 Mon Sep 17 00:00:00 2001
34673 From: Namhoon Kim <namhoonk@cs.unc.edu>
34674 Date: Sun, 22 Mar 2015 17:15:52 -0400
34675 Subject: [PATCH 102/119] Added way-partition /proc fs
34676 
34677 ---
34678  litmus/cache_proc.c       | 258 ++++++++++++++++++++++------------------------
34679  litmus/litmus.c           |   5 -
34680  litmus/reservation.c      |  19 +++-
34681  litmus/sched_mc2.c        |  12 ++-
34682  litmus/sched_task_trace.c |   2 +-
34683  5 files changed, 152 insertions(+), 144 deletions(-)
34684 
34685 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
34686 index 59f166e..a2d560a 100644
34687 --- a/litmus/cache_proc.c
34688 +++ b/litmus/cache_proc.c
34689 @@ -40,12 +40,16 @@ u32 nr_unlocked_way[MAX_NR_WAYS+1]  = {
34690  	0xFFFF0000, /* way ~15 unlocked. usable = 16 */
34691  };
34692  
34693 -u32 way_partitions[5] = {
34694 -	0xFFFFFFFC, /* cpu0 A and B */
34695 -	0xFFFFFFF3, /* cpu1 A and B */
34696 -	0xFFFFFFCF, /* cpu2 A and B */
34697 -	0xFFFFFF3F, /* cpu3 A and B */
34698 -	0xFFFF00FF, /* lv C */
34699 +u32 way_partitions[9] = {
34700 +	0x00000003, /* cpu0 A */
34701 +	0x00000003, /* cpu0 B */
34702 +	0x0000000C, /* cpu1 A */
34703 +	0x0000000C, /* cpu1 B */
34704 +	0x00000030, /* cpu2 A */
34705 +	0x00000030, /* cpu2 B */
34706 +	0x000000C0, /* cpu3 A */
34707 +	0x000000C0, /* cpu3 B */
34708 +	0x0000FF00, /* lv C */
34709  };
34710  
34711  u32 set_partitions[2] = {
34712 @@ -78,11 +82,8 @@ static u32 cache_id;
34713  struct mutex actlr_mutex;
34714  struct mutex l2x0_prefetch_mutex;
34715  struct mutex lockdown_proc;
34716 -
34717 -static int min_usable_ways = 0;
34718 -static int max_usable_ways = 16;
34719 -static int min_usable_sets = 1;
34720 -static int max_usable_sets = 16;
34721 +static unsigned int way_partition_min;
34722 +static unsigned int way_partition_max;
34723  
34724  static int zero = 0;
34725  static int one = 1;
34726 @@ -94,15 +95,8 @@ static int one = 1;
34727  			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_I_BASE + \
34728  			__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
34729  
34730 -int l2_usable_ways;
34731 -int l2_usable_sets;
34732  int lock_all;
34733  int nr_lockregs;
34734 -int use_way_partition;
34735 -int use_set_partition;
34736 -
34737 -extern void set_number_of_colors(int colornr);
34738 -
34739  
34740  static void print_lockdown_registers(void)
34741  {
34742 @@ -177,7 +171,8 @@ void litmus_setup_lockdown(void __iomem *base, u32 id)
34743  	
34744  	test_lockdown(NULL);
34745  }
34746 -int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
34747 +
34748 +int way_partition_handler(struct ctl_table *table, int write, void __user *buffer,
34749  		size_t *lenp, loff_t *ppos)
34750  {
34751  	int ret = 0, i;
34752 @@ -190,10 +185,10 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
34753  	if (ret)
34754  		goto out;
34755  	
34756 -	if (write && lock_all == 1) {
34757 -		for (i = 0; i < nr_lockregs;  i++) {
34758 -			writel_relaxed(nr_unlocked_way[0], ld_d_reg(i));
34759 -			writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
34760 +	if (write) {
34761 +		printk("Way-partition settings:\n");
34762 +		for (i = 0; i < 9; i++) {
34763 +			printk("0x%08X\n", ~way_partitions[i]);
34764  		}
34765  	}
34766  	print_lockdown_registers();
34767 @@ -203,10 +198,10 @@ out:
34768  	return ret;
34769  }
34770  
34771 -int l2_usable_ways_handler(struct ctl_table *table, int write, void __user *buffer,
34772 +int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
34773  		size_t *lenp, loff_t *ppos)
34774  {
34775 -	int ret = 0, i = 0;
34776 +	int ret = 0, i;
34777  	
34778  	mutex_lock(&lockdown_proc);
34779  	
34780 @@ -215,17 +210,14 @@ int l2_usable_ways_handler(struct ctl_table *table, int write, void __user *buff
34781  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
34782  	if (ret)
34783  		goto out;
34784 -		
34785 -	TRACE_CUR("l2_usable_ways : %d\n", l2_usable_ways);
34786 -	printk("l2_usable_ways : %d\n", l2_usable_ways);
34787  	
34788 -	if (write) {
34789 -		//for (i = 0; i < nr_lockregs;  i++) {
34790 -			writel_relaxed(nr_unlocked_way[l2_usable_ways], ld_d_reg(i));
34791 -			writel_relaxed(nr_unlocked_way[l2_usable_ways], ld_i_reg(i));
34792 -		//}
34793 -		print_lockdown_registers();
34794 +	if (write && lock_all == 1) {
34795 +		for (i = 0; i < nr_lockregs;  i++) {
34796 +			writel_relaxed(nr_unlocked_way[0], ld_d_reg(i));
34797 +			writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
34798 +		}
34799  	}
34800 +	print_lockdown_registers();
34801  
34802  out:
34803  	mutex_unlock(&lockdown_proc);
34804 @@ -234,6 +226,28 @@ out:
34805  
34806  void do_partition(enum crit_level lv, int cpu)
34807  {
34808 +	u32 regs;
34809 +	switch(lv) {
34810 +		case CRIT_LEVEL_A:
34811 +			regs = ~way_partitions[cpu*2];
34812 +			writel_relaxed(regs, ld_d_reg(cpu));
34813 +			writel_relaxed(regs, ld_i_reg(cpu));
34814 +			break;
34815 +		case CRIT_LEVEL_B:
34816 +			regs = ~way_partitions[cpu*2+1];
34817 +			writel_relaxed(regs, ld_d_reg(cpu));
34818 +			writel_relaxed(regs, ld_i_reg(cpu));
34819 +			break;
34820 +		case CRIT_LEVEL_C:
34821 +		case NUM_CRIT_LEVELS:
34822 +			regs = ~way_partitions[8];
34823 +			writel_relaxed(regs, ld_d_reg(cpu));
34824 +			writel_relaxed(regs, ld_i_reg(cpu));
34825 +			break;
34826 +		default:
34827 +			BUG();
34828 +	}
34829 +/*
34830  	if (use_set_partition == 1 && use_way_partition == 1)
34831  		printk(KERN_ALERT "BOTH SET, WAY ARE SET!!!!\n");
34832  	
34833 @@ -254,6 +268,7 @@ void do_partition(enum crit_level lv, int cpu)
34834  			writel_relaxed(set_partitions[1], ld_i_reg(cpu));
34835  		}
34836  	}
34837 +*/
34838  }
34839  
34840  void enter_irq_mode(void)
34841 @@ -275,115 +290,95 @@ void exit_irq_mode(void)
34842  	writel_relaxed(prev_lockdown_d_reg[cpu], ld_d_reg(cpu));	
34843  }
34844  
34845 -int l2_usable_sets_handler(struct ctl_table *table, int write, void __user *buffer,
34846 -		size_t *lenp, loff_t *ppos)
34847 -{
34848 -	int ret = 0;
34849 -	
34850 -	mutex_lock(&lockdown_proc);
34851 -	
34852 -	flush_cache_all();
34853 -	
34854 -	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
34855 -	if (ret)
34856 -		goto out;
34857 -		
34858 -	TRACE_CUR("l2_usable_sets : %d\n", l2_usable_sets);
34859 -	printk("l2_usable_sets : %d\n", l2_usable_sets);
34860 -	
34861 -	if (write) {
34862 -		set_number_of_colors(l2_usable_sets);
34863 -	}
34864 -
34865 -out:
34866 -	mutex_unlock(&lockdown_proc);
34867 -	return ret;
34868 -}
34869 -
34870 -int use_way_partition_handler(struct ctl_table *table, int write, void __user *buffer,
34871 -		size_t *lenp, loff_t *ppos)
34872 -{
34873 -	int ret = 0;
34874 -	
34875 -	mutex_lock(&lockdown_proc);
34876 -	
34877 -	flush_cache_all();
34878 -	
34879 -	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
34880 -	if (ret)
34881 -		goto out;
34882 -		
34883 -	printk(KERN_INFO "use_way_partition : %d\n", use_way_partition);
34884 -
34885 -out:
34886 -	mutex_unlock(&lockdown_proc);
34887 -	return ret;
34888 -}
34889 -
34890 -int use_set_partition_handler(struct ctl_table *table, int write, void __user *buffer,
34891 -		size_t *lenp, loff_t *ppos)
34892 -{
34893 -	int ret = 0;
34894 -	
34895 -	mutex_lock(&lockdown_proc);
34896 -	
34897 -	flush_cache_all();
34898 -	
34899 -	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
34900 -	if (ret)
34901 -		goto out;
34902 -		
34903 -	printk(KERN_INFO "use_set_partition : %d\n", use_set_partition);
34904 -
34905 -out:
34906 -	mutex_unlock(&lockdown_proc);
34907 -	return ret;
34908 -}
34909 -
34910  static struct ctl_table cache_table[] =
34911  {
34912  	{
34913 -		.procname	= "l2_usable_ways",
34914 +		.procname	= "C0_LA_way",
34915  		.mode		= 0666,
34916 -		.proc_handler	= l2_usable_ways_handler,
34917 -		.data		= &l2_usable_ways,
34918 -		.maxlen		= sizeof(l2_usable_ways),
34919 -		.extra1		= &min_usable_ways,
34920 -		.extra2		= &max_usable_ways,
34921 +		.proc_handler	= way_partition_handler,
34922 +		.data		= &way_partitions[0],
34923 +		.maxlen		= sizeof(way_partitions[0]),
34924 +		.extra1		= &way_partition_min,
34925 +		.extra2		= &way_partition_max,
34926 +	},	
34927 +	{
34928 +		.procname	= "C0_LB_way",
34929 +		.mode		= 0666,
34930 +		.proc_handler	= way_partition_handler,
34931 +		.data		= &way_partitions[1],
34932 +		.maxlen		= sizeof(way_partitions[1]),
34933 +		.extra1		= &way_partition_min,
34934 +		.extra2		= &way_partition_max,
34935 +	},	
34936 +	{
34937 +		.procname	= "C1_LA_way",
34938 +		.mode		= 0666,
34939 +		.proc_handler	= way_partition_handler,
34940 +		.data		= &way_partitions[2],
34941 +		.maxlen		= sizeof(way_partitions[2]),
34942 +		.extra1		= &way_partition_min,
34943 +		.extra2		= &way_partition_max,
34944  	},
34945  	{
34946 -		.procname	= "l2_usable_sets",
34947 +		.procname	= "C1_LB_way",
34948  		.mode		= 0666,
34949 -		.proc_handler	= l2_usable_sets_handler,
34950 -		.data		= &l2_usable_sets,
34951 -		.maxlen		= sizeof(l2_usable_sets),
34952 -		.extra1		= &min_usable_sets,
34953 -		.extra2		= &max_usable_sets,
34954 +		.proc_handler	= way_partition_handler,
34955 +		.data		= &way_partitions[3],
34956 +		.maxlen		= sizeof(way_partitions[3]),
34957 +		.extra1		= &way_partition_min,
34958 +		.extra2		= &way_partition_max,
34959  	},
34960  	{
34961 -		.procname	= "lock_all",
34962 +		.procname	= "C2_LA_way",
34963  		.mode		= 0666,
34964 -		.proc_handler	= lock_all_handler,
34965 -		.data		= &lock_all,
34966 -		.maxlen		= sizeof(lock_all),
34967 -		.extra1		= &zero,
34968 -		.extra2		= &one,
34969 +		.proc_handler	= way_partition_handler,
34970 +		.data		= &way_partitions[4],
34971 +		.maxlen		= sizeof(way_partitions[4]),
34972 +		.extra1		= &way_partition_min,
34973 +		.extra2		= &way_partition_max,
34974  	},
34975  	{
34976 -		.procname	= "use_way_partition",
34977 +		.procname	= "C2_LB_way",
34978  		.mode		= 0666,
34979 -		.proc_handler	= use_way_partition_handler,
34980 -		.data		= &use_way_partition,
34981 -		.maxlen		= sizeof(use_way_partition),
34982 -		.extra1		= &zero,
34983 -		.extra2		= &one,
34984 +		.proc_handler	= way_partition_handler,
34985 +		.data		= &way_partitions[5],
34986 +		.maxlen		= sizeof(way_partitions[5]),
34987 +		.extra1		= &way_partition_min,
34988 +		.extra2		= &way_partition_max,
34989  	},
34990  	{
34991 -		.procname	= "use_set_partition",
34992 +		.procname	= "C3_LA_way",
34993  		.mode		= 0666,
34994 -		.proc_handler	= use_set_partition_handler,
34995 -		.data		= &use_set_partition,
34996 -		.maxlen		= sizeof(use_set_partition),
34997 +		.proc_handler	= way_partition_handler,
34998 +		.data		= &way_partitions[6],
34999 +		.maxlen		= sizeof(way_partitions[6]),
35000 +		.extra1		= &way_partition_min,
35001 +		.extra2		= &way_partition_max,
35002 +	},
35003 +	{
35004 +		.procname	= "C3_LB_way",
35005 +		.mode		= 0666,
35006 +		.proc_handler	= way_partition_handler,
35007 +		.data		= &way_partitions[7],
35008 +		.maxlen		= sizeof(way_partitions[7]),
35009 +		.extra1		= &way_partition_min,
35010 +		.extra2		= &way_partition_max,
35011 +	},	
35012 +	{
35013 +		.procname	= "Call_LC_way",
35014 +		.mode		= 0666,
35015 +		.proc_handler	= way_partition_handler,
35016 +		.data		= &way_partitions[8],
35017 +		.maxlen		= sizeof(way_partitions[8]),
35018 +		.extra1		= &way_partition_min,
35019 +		.extra2		= &way_partition_max,
35020 +	},		
35021 +	{
35022 +		.procname	= "lock_all",
35023 +		.mode		= 0666,
35024 +		.proc_handler	= lock_all_handler,
35025 +		.data		= &lock_all,
35026 +		.maxlen		= sizeof(lock_all),
35027  		.extra1		= &zero,
35028  		.extra2		= &one,
35029  	},
35030 @@ -413,12 +408,9 @@ static int __init litmus_sysctl_init(void)
35031  		goto out;
35032  	}
35033  
35034 -	l2_usable_ways = 16;
35035 -	l2_usable_sets = 5;
35036 -	use_way_partition = 0;
35037 -	use_set_partition = 0;
35038 -	set_number_of_colors(l2_usable_sets);
35039 -
35040 +	way_partition_min = 0x00000000;
35041 +	way_partition_max = 0x0000FFFF;
35042 +	
35043  out:
35044  	return ret;
35045  }
35046 diff --git a/litmus/litmus.c b/litmus/litmus.c
35047 index 0352079..04c5017 100644
35048 --- a/litmus/litmus.c
35049 +++ b/litmus/litmus.c
35050 @@ -400,8 +400,6 @@ extern struct page *new_alloc_page(struct page *page, unsigned long node, int **
35051  
35052  #endif
35053  
35054 -extern int use_set_partition;
35055 -
35056  asmlinkage long sys_set_page_color(int cpu)
35057  {
35058  	long ret = 0;
35059 @@ -416,9 +414,6 @@ asmlinkage long sys_set_page_color(int cpu)
35060  	LIST_HEAD(pagelist);
35061  	LIST_HEAD(shared_pagelist);
35062  	
35063 -	if (use_set_partition == 0)
35064 -		return 0;
35065 -	
35066  	down_read(&current->mm->mmap_sem);
35067  	TRACE_TASK(current, "SYSCALL set_page_color\n");
35068  	vma_itr = current->mm->mmap;
35069 diff --git a/litmus/reservation.c b/litmus/reservation.c
35070 index 8457b4b..af5a934 100644
35071 --- a/litmus/reservation.c
35072 +++ b/litmus/reservation.c
35073 @@ -7,6 +7,8 @@
35074  //#define TRACE(fmt, args...) do {} while (false)
35075  //#define TRACE_TASK(fmt, args...) do {} while (false)
35076  
35077 +#define BUDGET_ENFORCEMENT_AT_C 1
35078 +	
35079  void reservation_init(struct reservation *res)
35080  {
35081  	memset(res, sizeof(*res), 0);
35082 @@ -495,9 +497,11 @@ static void gmp_queue_active(
35083  	/* check for possible preemption */
35084  	if (res->state == RESERVATION_ACTIVE && check_preempt)
35085  		gmp_env->schedule_now++;
35086 -	
35087 +
35088 +//#if BUDGET_ENFORCEMENT_AT_C	
35089  	gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
35090 -	res->event_added = 1;
35091 +//#endif
35092 +	res->event_added = 1;	
35093  }
35094  
35095  static void gmp_queue_reservation(
35096 @@ -530,6 +534,7 @@ void gmp_add_new_reservation(
35097  	gmp_queue_reservation(gmp_env, new_res);
35098  }
35099  
35100 +//#if BUDGET_ENFORCEMENT_AT_C
35101  static void gmp_charge_budget(
35102  	struct gmp_reservation_environment* gmp_env,
35103  	lt_t delta)
35104 @@ -576,6 +581,16 @@ static void gmp_charge_budget(
35105  	}
35106  	//TRACE("finished charging budgets\n");
35107  }
35108 +//#else
35109 +/*
35110 +static void gmp_charge_budget(
35111 +	struct gmp_reservation_environment* gmp_env,
35112 +	lt_t delta)
35113 +{
35114 +	return;
35115 +}
35116 +*/
35117 +//#endif
35118  
35119  static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
35120  {
35121 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
35122 index 9c6d762..885218e 100644
35123 --- a/litmus/sched_mc2.c
35124 +++ b/litmus/sched_mc2.c
35125 @@ -26,6 +26,8 @@
35126  #include <litmus/reservation.h>
35127  #include <litmus/polling_reservations.h>
35128  
35129 +#define BUDGET_ENFORCEMENT_AT_C 1
35130 +
35131  extern void do_partition(enum crit_level lv, int cpu);
35132  
35133  /* _global_env - reservation container for level-C tasks*/
35134 @@ -158,7 +160,9 @@ static void task_departs(struct task_struct *tsk, int job_complete)
35135  		ce = &state->crit_entries[lv];
35136  		ce->running = tsk;
35137  		res->is_ghost = state->cpu;
35138 +//#if BUDGET_ENFORCEMENT_AT_C		
35139  		gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
35140 +//#endif
35141  		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
35142  		
35143  	}		
35144 @@ -583,10 +587,10 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
35145  	*/
35146  	cur_priority = _lowest_prio_cpu.cpu_entries[state->cpu].deadline;
35147  	
35148 -	//TRACE("****** ACTIVE LIST ******\n");
35149 -	//TRACE_TASK(_lowest_prio_cpu.cpu_entries[state->cpu].scheduled, "** CURRENT JOB deadline %llu **\n", cur_priority);
35150 +	TRACE("****** ACTIVE LIST ******\n");
35151 +	TRACE_TASK(_lowest_prio_cpu.cpu_entries[state->cpu].scheduled, "** CURRENT JOB deadline %llu **\n", cur_priority);
35152  	list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
35153 -		//TRACE("R%d deadline=%llu, scheduled_on=%d\n", res->id, res->priority, res->scheduled_on);
35154 +		TRACE("R%d deadline=%llu, scheduled_on=%d\n", res->id, res->priority, res->scheduled_on);
35155  		if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) {
35156  			tsk = res->ops->dispatch_client(res, &time_slice);
35157  			if (likely(tsk)) {
35158 @@ -598,7 +602,9 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
35159  				} else if (lv == CRIT_LEVEL_C) {
35160  					//ce = &state->crit_entries[lv];
35161  					//if (likely(!ce->running)) {
35162 +//#if BUDGET_ENFORCEMENT_AT_C						
35163  						gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
35164 +//#endif
35165  						res->event_added = 1;
35166  						res->blocked_by_ghost = 0;
35167  						res->is_ghost = NO_CPU;
35168 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
35169 index 6224f8c..3a6756d 100644
35170 --- a/litmus/sched_task_trace.c
35171 +++ b/litmus/sched_task_trace.c
35172 @@ -15,7 +15,7 @@
35173  #include <litmus/feather_trace.h>
35174  #include <litmus/ftdev.h>
35175  
35176 -#define NO_EVENTS		(1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+3))
35177 +#define NO_EVENTS		(1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+6))
35178  
35179  #define now() litmus_clock()
35180  
35181 -- 
35182 1.8.1.2
35183 
35184 
35185 From 0041e83c7994510cebe9f335eb30b6049d8b4c1f Mon Sep 17 00:00:00 2001
35186 From: ChengYang Fu <chengyangfu@gmail.com>
35187 Date: Mon, 23 Mar 2015 23:31:09 -0400
35188 Subject: [PATCH 103/119] solve the bug in bank_proc.c
35189 
35190 ---
35191  litmus/bank_proc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++---------
35192  1 file changed, 79 insertions(+), 16 deletions(-)
35193 
35194 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
35195 index 7cf07ee..3cf9cda 100644
35196 --- a/litmus/bank_proc.c
35197 +++ b/litmus/bank_proc.c
35198 @@ -26,7 +26,7 @@
35199  #define CACHE_MASK  0x0000f000      
35200  #define CACHE_SHIFT 12
35201  
35202 -#define PAGES_PER_COLOR 256
35203 +#define PAGES_PER_COLOR 512
35204  unsigned int NUM_PAGE_LIST;  //8*16
35205  
35206  unsigned int number_banks;
35207 @@ -37,6 +37,8 @@ unsigned int set_partition_min = 0;
35208  unsigned int bank_partition_max = 0x000000ff;
35209  unsigned int bank_partition_min = 0;
35210  
35211 +int show_page_pool = 0;
35212 +
35213  unsigned int set_partition[9] = {
35214          0x00000003,  /* Core 0, and Level A*/
35215          0x00000003,  /* Core 0, and Level B*/
35216 @@ -58,7 +60,7 @@ unsigned int bank_partition[9] = {
35217          0x00000040,  /* Core 2, and Level B*/
35218          0x00000080,  /* Core 3, and Level A*/
35219          0x00000080,  /* Core 3, and Level B*/
35220 -        0x0000000f,  /* Level C */
35221 +        0x0000000c,  /* Level C */
35222  };
35223  
35224  unsigned int set_index[9] = {
35225 @@ -161,9 +163,11 @@ static inline unsigned int page_list_index(struct page *page)
35226   */
35227  static unsigned long smallest_nr_pages(void)
35228  {
35229 -	unsigned long i, min_pages = -1;
35230 +	unsigned long i, min_pages;
35231  	struct color_group *cgroup;
35232 -	for (i = 0; i < NUM_PAGE_LIST; ++i) {
35233 +	cgroup = &color_groups[16*2];
35234 +	min_pages =atomic_read(&cgroup->nr_pages); 
35235 +	for (i = 16*2; i < NUM_PAGE_LIST; ++i) {
35236  		cgroup = &color_groups[i];
35237  		if (atomic_read(&cgroup->nr_pages) < min_pages)
35238  			min_pages = atomic_read(&cgroup->nr_pages);
35239 @@ -175,9 +179,13 @@ static void show_nr_pages(void)
35240  {
35241  	unsigned long i;
35242  	struct color_group *cgroup;
35243 +	printk("show nr pages***************************************\n");
35244  	for (i = 0; i < NUM_PAGE_LIST; ++i) {
35245  		cgroup = &color_groups[i];
35246 -		printk("i =%d, nr_pages = %d\n", i, atomic_read(&cgroup->nr_pages));
35247 +		printk("(%03d) =  %03d, ", i, atomic_read(&cgroup->nr_pages));
35248 +		if((i % 8) ==7){
35249 +		    printk("\n");
35250 +                }
35251  	}
35252  }
35253  
35254 @@ -193,6 +201,7 @@ void add_page_to_color_list(struct page *page)
35255  	spin_lock(&cgroup->lock);
35256  	list_add_tail(&page->lru, &cgroup->list);
35257  	atomic_inc(&cgroup->nr_pages);
35258 +	SetPageLRU(page);
35259  	spin_unlock(&cgroup->lock);
35260  }
35261  
35262 @@ -210,30 +219,65 @@ static int do_add_pages(void)
35263  	unsigned long color;
35264  	int ret = 0;
35265  	int i = 0;
35266 +        int free_counter = 0;
35267 +        unsigned long counter[128]= {0}; 
35268 +        
35269 +        printk("Before refill : \n");
35270 +        show_nr_pages();
35271  
35272  	// until all the page lists contain enough pages 
35273  	//for (i =0; i<5; i++) {
35274 -	while (smallest_nr_pages() < PAGES_PER_COLOR) {
35275 -	
35276 +	for (i=0; i< 1024*100;i++) {
35277 +//	while (smallest_nr_pages() < PAGES_PER_COLOR) {
35278 +       //         printk("smallest = %d\n", smallest_nr_pages());	
35279  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
35280 +	    //    page = alloc_pages_exact_node(0, GFP_HIGHUSER_MOVABLE, 0);
35281 +	
35282  		if (unlikely(!page)) {
35283  			printk(KERN_WARNING "Could not allocate pages.\n");
35284  			ret = -ENOMEM;
35285  			goto out;
35286  		}
35287  		color = page_list_index(page);
35288 +		counter[color]++;
35289 +	//	printk("page(%d) = color %x, bank %x, [color] =%d \n", color, page_color(page), page_bank(page), atomic_read(&color_groups[color].nr_pages));
35290                  //show_nr_pages();
35291 -		//printk("before : nr_pages = %d\n", atomic_read(&color_groups[color].nr_pages));
35292 -		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR) {
35293 +		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR && color>=32) {
35294  			add_page_to_color_list(page);
35295 +	//		printk("add page(%d) = color %x, bank %x\n", color, page_color(page), page_bank(page));
35296  		} else{
35297  			// Pages here will be freed later 
35298  			list_add_tail(&page->lru, &free_later);
35299 +			free_counter++;
35300 +		        //list_del(&page->lru);
35301 +		//        __free_page(page);
35302 +	//		printk("useless page(%d) = color %x, bank %x\n", color,  page_color(page), page_bank(page));
35303  		}
35304                 //show_nr_pages();
35305 -		//printk("after : nr_pages = %d\n", atomic_read(&color_groups[color].nr_pages));
35306 +                /*
35307 +                if(free_counter >= PAGES_PER_COLOR)
35308 +                {
35309 +                    printk("free unwanted page list eariler");
35310 +                    free_counter = 0;
35311 +	            list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
35312 +		        list_del(&page->lru);
35313 +		        __free_page(page);
35314 +	            }
35315 +
35316 +                    show_nr_pages();
35317 +                }
35318 +                */
35319 +        }
35320 +        printk("page counter = \n");
35321 +        for (i=0; i<128; i++)
35322 +        {
35323 +            printk("(%03d) = %4d, ", i , counter[i]);
35324 +            if(i%8 == 7){
35325 +                printk("\n");
35326 +            }
35327  
35328          }
35329 +        printk("After refill : \n");
35330          show_nr_pages();
35331  #if 1
35332  	// Free the unwanted pages
35333 @@ -274,10 +318,10 @@ static struct  page *new_alloc_page_color( unsigned long color)
35334  	}
35335  	rPage = list_first_entry(&cgroup->list, struct page, lru);
35336  	BUG_ON(page_count(rPage) > 1);
35337 -	get_page(rPage);
35338 +//	get_page(rPage);
35339  	list_del(&rPage->lru);
35340  	atomic_dec(&cgroup->nr_pages);
35341 -//	ClearPageLRU(rPage);
35342 +	ClearPageLRU(rPage);
35343  out_unlock:
35344  	spin_unlock(&cgroup->lock);
35345  out:
35346 @@ -311,7 +355,7 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
35347          unsigned int idx = 0;
35348          idx += num_by_bitmask_index(set_partition[node], set_index[node]);
35349          idx += number_cachecolors* num_by_bitmask_index(bank_partition[node], bank_index[node]);
35350 -	printk("node  = %d, idx = %d\n", node, idx);
35351 +	//printk("node  = %d, idx = %d\n", node, idx);
35352  
35353  	rPage =  new_alloc_page_color(idx);
35354          
35355 @@ -407,7 +451,21 @@ out:
35356  	return ret;
35357  }
35358  
35359 -
35360 +int show_page_pool_handler(struct ctl_table *table, int write, void __user *buffer,
35361 +		size_t *lenp, loff_t *ppos)
35362 +{
35363 +	int ret = 0, i = 0;
35364 +	mutex_lock(&void_lockdown_proc);
35365 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
35366 +	if (ret)
35367 +		goto out;
35368 +	if (write) {
35369 +            show_nr_pages();
35370 +	}
35371 +out:
35372 +	mutex_unlock(&void_lockdown_proc);
35373 +	return ret;
35374 +}
35375  static struct ctl_table cache_table[] =
35376  {
35377          
35378 @@ -573,8 +631,13 @@ static struct ctl_table cache_table[] =
35379  		.extra1		= &bank_partition_min,
35380  		.extra2		= &bank_partition_max,
35381  	},	
35382 -
35383 -
35384 +	{
35385 +		.procname	= "show_page_pool",
35386 +		.mode		= 0666,
35387 +		.proc_handler	= show_page_pool_handler,
35388 +		.data		= &show_page_pool,
35389 +		.maxlen		= sizeof(show_page_pool),
35390 +	},	
35391  	{ }
35392  };
35393  
35394 -- 
35395 1.8.1.2
35396 
35397 
35398 From 6941230b519e96be0ea464206e795046c0938d01 Mon Sep 17 00:00:00 2001
35399 From: ChengYang Fu <chengyangfu@gmail.com>
35400 Date: Tue, 24 Mar 2015 21:28:44 -0400
35401 Subject: [PATCH 104/119] change loop condition in refill page process in
35402  bank_proc.
35403 
35404 ---
35405  litmus/bank_proc.c | 3 ++-
35406  1 file changed, 2 insertions(+), 1 deletion(-)
35407 
35408 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
35409 index 3cf9cda..2688d79 100644
35410 --- a/litmus/bank_proc.c
35411 +++ b/litmus/bank_proc.c
35412 @@ -242,7 +242,8 @@ static int do_add_pages(void)
35413  		counter[color]++;
35414  	//	printk("page(%d) = color %x, bank %x, [color] =%d \n", color, page_color(page), page_bank(page), atomic_read(&color_groups[color].nr_pages));
35415                  //show_nr_pages();
35416 -		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR && color>=32) {
35417 +		//if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR && color>=32) {
35418 +		if ( PAGES_PER_COLOR && color>=32) {
35419  			add_page_to_color_list(page);
35420  	//		printk("add page(%d) = color %x, bank %x\n", color, page_color(page), page_bank(page));
35421  		} else{
35422 -- 
35423 1.8.1.2
35424 
35425 
35426 From f441fa3f923ccb825d9888c8a75c033a67316d91 Mon Sep 17 00:00:00 2001
35427 From: ChengYang Fu <chengyangfu@gmail.com>
35428 Date: Wed, 25 Mar 2015 10:59:55 -0400
35429 Subject: [PATCH 105/119] add reclaim_page function in bank_proc.c to recyle
35430  all the pages. And only do_add_pages in the kernel booting stage
35431 
35432 ---
35433  litmus/bank_proc.c | 31 +++++++++++++++++++++++++------
35434  1 file changed, 25 insertions(+), 6 deletions(-)
35435 
35436 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
35437 index 2688d79..9771529 100644
35438 --- a/litmus/bank_proc.c
35439 +++ b/litmus/bank_proc.c
35440 @@ -38,6 +38,7 @@ unsigned int bank_partition_max = 0x000000ff;
35441  unsigned int bank_partition_min = 0;
35442  
35443  int show_page_pool = 0;
35444 +spinlock_t reclaim_lock;
35445  
35446  unsigned int set_partition[9] = {
35447          0x00000003,  /* Core 0, and Level A*/
35448 @@ -227,7 +228,7 @@ static int do_add_pages(void)
35449  
35450  	// until all the page lists contain enough pages 
35451  	//for (i =0; i<5; i++) {
35452 -	for (i=0; i< 1024*100;i++) {
35453 +	for (i=0; i< 1024*200;i++) {
35454  //	while (smallest_nr_pages() < PAGES_PER_COLOR) {
35455         //         printk("smallest = %d\n", smallest_nr_pages());	
35456  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
35457 @@ -243,7 +244,7 @@ static int do_add_pages(void)
35458  	//	printk("page(%d) = color %x, bank %x, [color] =%d \n", color, page_color(page), page_bank(page), atomic_read(&color_groups[color].nr_pages));
35459                  //show_nr_pages();
35460  		//if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR && color>=32) {
35461 -		if ( PAGES_PER_COLOR && color>=32) {
35462 +		if ( PAGES_PER_COLOR && color>=16*2) {
35463  			add_page_to_color_list(page);
35464  	//		printk("add page(%d) = color %x, bank %x\n", color, page_color(page), page_bank(page));
35465  		} else{
35466 @@ -319,7 +320,7 @@ static struct  page *new_alloc_page_color( unsigned long color)
35467  	}
35468  	rPage = list_first_entry(&cgroup->list, struct page, lru);
35469  	BUG_ON(page_count(rPage) > 1);
35470 -//	get_page(rPage);
35471 +	get_page(rPage);
35472  	list_del(&rPage->lru);
35473  	atomic_dec(&cgroup->nr_pages);
35474  	ClearPageLRU(rPage);
35475 @@ -327,7 +328,11 @@ out_unlock:
35476  	spin_unlock(&cgroup->lock);
35477  out:
35478  	if( smallest_nr_pages() == 0)
35479 -		do_add_pages();
35480 +        {
35481 +//		do_add_pages();
35482 +            printk("ERROR(bank_proc.c) = We don't have enough pages in bank_proc.c\n");        
35483 +        
35484 +        }
35485  	return rPage;
35486  }
35487  
35488 @@ -367,6 +372,20 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
35489  }
35490  
35491  
35492 +/*
35493 + * Reclaim pages.
35494 + */
35495 +void reclaim_page(struct page *page)
35496 +{
35497 +	const unsigned long color = page_list_index(page);
35498 +	unsigned long nr_reclaimed = 0;
35499 +	spin_lock(&reclaim_lock);
35500 +    	put_page(page);
35501 +	add_page_to_color_list(page);
35502 +
35503 +	spin_unlock(&reclaim_lock);
35504 +	printk("Reclaimed page(%d) = color %x, bank %x, [color] =%d \n", color, page_color(page), page_bank(page), atomic_read(&color_groups[color].nr_pages));
35505 +}
35506  
35507  
35508  /*
35509 @@ -382,6 +401,8 @@ static int __init init_variables(void)
35510  	NUM_PAGE_LIST = number_banks * number_cachecolors; 
35511          printk(KERN_WARNING "number of banks = %d, number of cachecolors=%d\n", number_banks, number_cachecolors);
35512  	mutex_init(&void_lockdown_proc);
35513 +	spin_lock_init(&reclaim_lock);
35514 +
35515  }
35516  
35517  
35518 @@ -663,8 +684,6 @@ static int __init litmus_color_init(void)
35519  	int err=0;
35520          printk("Init bankproc.c\n");
35521  
35522 -	//INIT_LIST_HEAD(&alloced_pages.list);
35523 -	//spin_lock_init(&alloced_pages.lock);
35524  	init_variables();
35525  
35526  	printk(KERN_INFO "Registering LITMUS^RT proc color sysctl.\n");
35527 -- 
35528 1.8.1.2
35529 
35530 
35531 From 09471d13bd498bdc9d6f0874c0e00eba574f5558 Mon Sep 17 00:00:00 2001
35532 From: Namhoon Kim <namhoonk@cs.unc.edu>
35533 Date: Wed, 25 Mar 2015 11:03:29 -0400
35534 Subject: [PATCH 106/119] litmus_migrate_pages
35535 
35536 ---
35537  include/linux/balloon_compaction.h |  14 +++++
35538  include/linux/migrate.h            |   2 +
35539  litmus/litmus.c                    |   2 +-
35540  mm/migrate.c                       | 113 +++++++++++++++++++++++++++++++++++++
35541  4 files changed, 130 insertions(+), 1 deletion(-)
35542 
35543 diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
35544 index 089743a..1dbef0b 100644
35545 --- a/include/linux/balloon_compaction.h
35546 +++ b/include/linux/balloon_compaction.h
35547 @@ -93,6 +93,20 @@ static inline void balloon_page_free(struct page *page)
35548  	__free_page(page);
35549  }
35550  
35551 +static inline void litmus_balloon_page_free(struct page *page)
35552 +{
35553 +	/*
35554 +	 * Balloon pages always get an extra refcount before being isolated
35555 +	 * and before being dequeued to help on sorting out fortuite colisions
35556 +	 * between a thread attempting to isolate and another thread attempting
35557 +	 * to release the very same balloon page.
35558 +	 *
35559 +	 * Before we handle the page back to Buddy, lets drop its extra refcnt.
35560 +	 */
35561 +	put_page(page);
35562 +	__free_page(page);
35563 +}
35564 +
35565  #ifdef CONFIG_BALLOON_COMPACTION
35566  extern bool balloon_page_isolate(struct page *page);
35567  extern void balloon_page_putback(struct page *page);
35568 diff --git a/include/linux/migrate.h b/include/linux/migrate.h
35569 index a405d3dc..a2a7e25e 100644
35570 --- a/include/linux/migrate.h
35571 +++ b/include/linux/migrate.h
35572 @@ -41,6 +41,8 @@ extern int migrate_page(struct address_space *,
35573  			struct page *, struct page *, enum migrate_mode);
35574  extern int migrate_pages(struct list_head *l, new_page_t x,
35575  		unsigned long private, enum migrate_mode mode, int reason);
35576 +extern int litmus_migrate_pages(struct list_head *l, new_page_t x,
35577 +		unsigned long private, enum migrate_mode mode, int reason);
35578  extern int migrate_huge_page(struct page *, new_page_t x,
35579  		unsigned long private, enum migrate_mode mode);
35580  
35581 diff --git a/litmus/litmus.c b/litmus/litmus.c
35582 index 04c5017..c8ed597 100644
35583 --- a/litmus/litmus.c
35584 +++ b/litmus/litmus.c
35585 @@ -499,7 +499,7 @@ asmlinkage long sys_set_page_color(int cpu)
35586  
35587          //node= 0;
35588  	if (!list_empty(&pagelist)) {
35589 -		ret = migrate_pages(&pagelist, new_alloc_page, node, MIGRATE_ASYNC, MR_SYSCALL);
35590 +		ret = litmus_migrate_pages(&pagelist, new_alloc_page, node, MIGRATE_ASYNC, MR_SYSCALL);
35591  		TRACE_TASK(current, "%ld pages not migrated.\n", ret);
35592  		if (ret) {
35593  			putback_lru_pages(&pagelist);
35594 diff --git a/mm/migrate.c b/mm/migrate.c
35595 index a88c12f..eab459a 100644
35596 --- a/mm/migrate.c
35597 +++ b/mm/migrate.c
35598 @@ -918,6 +918,65 @@ out:
35599  	return rc;
35600  }
35601  
35602 +static int litmus_unmap_and_move(new_page_t get_new_page, unsigned long private,
35603 +			struct page *page, int force, enum migrate_mode mode)
35604 +{
35605 +	int rc = 0;
35606 +	int *result = NULL;
35607 +	struct page *newpage = get_new_page(page, private, &result);
35608 +
35609 +	if (!newpage)
35610 +		return -ENOMEM;
35611 +
35612 +	if (page_count(page) == 1) {
35613 +		/* page was freed from under us. So we are done. */
35614 +		goto out;
35615 +	}
35616 +
35617 +	if (unlikely(PageTransHuge(page)))
35618 +		if (unlikely(split_huge_page(page)))
35619 +			goto out;
35620 +
35621 +	rc = __unmap_and_move(page, newpage, force, mode);
35622 +
35623 +	if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
35624 +		/*
35625 +		 * A ballooned page has been migrated already.
35626 +		 * Now, it's the time to wrap-up counters,
35627 +		 * handle the page back to Buddy and return.
35628 +		 */
35629 +		dec_zone_page_state(page, NR_ISOLATED_ANON +
35630 +				    page_is_file_cache(page));
35631 +		litmus_balloon_page_free(page);
35632 +		return MIGRATEPAGE_SUCCESS;
35633 +	}
35634 +out:
35635 +	if (rc != -EAGAIN) {
35636 +		/*
35637 +		 * A page that has been migrated has all references
35638 +		 * removed and will be freed. A page that has not been
35639 +		 * migrated will have kepts its references and be
35640 +		 * restored.
35641 +		 */
35642 +		list_del(&page->lru);
35643 +		dec_zone_page_state(page, NR_ISOLATED_ANON +
35644 +				page_is_file_cache(page));
35645 +		putback_lru_page(page);
35646 +	}
35647 +	/*
35648 +	 * Move the new page to the LRU. If migration was not successful
35649 +	 * then this will free the page.
35650 +	 */
35651 +	putback_lru_page(newpage);
35652 +	if (result) {
35653 +		if (rc)
35654 +			*result = rc;
35655 +		else
35656 +			*result = page_to_nid(newpage);
35657 +	}
35658 +	return rc;
35659 +}
35660 +
35661  /*
35662   * Counterpart of unmap_and_move_page() for hugepage migration.
35663   *
35664 @@ -1058,6 +1117,60 @@ out:
35665  	return rc;
35666  }
35667  
35668 +int litmus_migrate_pages(struct list_head *from, new_page_t get_new_page,
35669 +		unsigned long private, enum migrate_mode mode, int reason)
35670 +{
35671 +	int retry = 1;
35672 +	int nr_failed = 0;
35673 +	int nr_succeeded = 0;
35674 +	int pass = 0;
35675 +	struct page *page;
35676 +	struct page *page2;
35677 +	int swapwrite = current->flags & PF_SWAPWRITE;
35678 +	int rc;
35679 +
35680 +	if (!swapwrite)
35681 +		current->flags |= PF_SWAPWRITE;
35682 +
35683 +	for(pass = 0; pass < 10 && retry; pass++) {
35684 +		retry = 0;
35685 +
35686 +		list_for_each_entry_safe(page, page2, from, lru) {
35687 +			cond_resched();
35688 +
35689 +			rc = litmus_unmap_and_move(get_new_page, private,
35690 +						page, pass > 2, mode);
35691 +
35692 +			switch(rc) {
35693 +			case -ENOMEM:
35694 +				goto out;
35695 +			case -EAGAIN:
35696 +				retry++;
35697 +				break;
35698 +			case MIGRATEPAGE_SUCCESS:
35699 +				nr_succeeded++;
35700 +				break;
35701 +			default:
35702 +				/* Permanent failure */
35703 +				nr_failed++;
35704 +				break;
35705 +			}
35706 +		}
35707 +	}
35708 +	rc = nr_failed + retry;
35709 +out:
35710 +	if (nr_succeeded)
35711 +		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
35712 +	if (nr_failed)
35713 +		count_vm_events(PGMIGRATE_FAIL, nr_failed);
35714 +	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
35715 +
35716 +	if (!swapwrite)
35717 +		current->flags &= ~PF_SWAPWRITE;
35718 +
35719 +	return rc;
35720 +}
35721 +
35722  int migrate_huge_page(struct page *hpage, new_page_t get_new_page,
35723  		      unsigned long private, enum migrate_mode mode)
35724  {
35725 -- 
35726 1.8.1.2
35727 
35728 
35729 From 0d90fd30739e41acb6b060b7e145fbdf6a946686 Mon Sep 17 00:00:00 2001
35730 From: ChengYang Fu <chengyangfu@gmail.com>
35731 Date: Wed, 25 Mar 2015 23:28:27 -0400
35732 Subject: [PATCH 107/119] modified refill mechanism
35733 
35734 ---
35735  litmus/bank_proc.c | 34 +++++++++++++++++++++++++++++-----
35736  1 file changed, 29 insertions(+), 5 deletions(-)
35737 
35738 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
35739 index 9771529..7be55b7 100644
35740 --- a/litmus/bank_proc.c
35741 +++ b/litmus/bank_proc.c
35742 @@ -38,6 +38,7 @@ unsigned int bank_partition_max = 0x000000ff;
35743  unsigned int bank_partition_min = 0;
35744  
35745  int show_page_pool = 0;
35746 +int refill_page_pool = 0;
35747  spinlock_t reclaim_lock;
35748  
35749  unsigned int set_partition[9] = {
35750 @@ -228,7 +229,7 @@ static int do_add_pages(void)
35751  
35752  	// until all the page lists contain enough pages 
35753  	//for (i =0; i<5; i++) {
35754 -	for (i=0; i< 1024*200;i++) {
35755 +	for (i=0; i< 1024*100;i++) {
35756  //	while (smallest_nr_pages() < PAGES_PER_COLOR) {
35757         //         printk("smallest = %d\n", smallest_nr_pages());	
35758  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
35759 @@ -243,8 +244,8 @@ static int do_add_pages(void)
35760  		counter[color]++;
35761  	//	printk("page(%d) = color %x, bank %x, [color] =%d \n", color, page_color(page), page_bank(page), atomic_read(&color_groups[color].nr_pages));
35762                  //show_nr_pages();
35763 -		//if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR && color>=32) {
35764 -		if ( PAGES_PER_COLOR && color>=16*2) {
35765 +		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR && color>=32) {
35766 +	//	if ( PAGES_PER_COLOR && color>=16*2) {
35767  			add_page_to_color_list(page);
35768  	//		printk("add page(%d) = color %x, bank %x\n", color, page_color(page), page_bank(page));
35769  		} else{
35770 @@ -329,8 +330,8 @@ out_unlock:
35771  out:
35772  	if( smallest_nr_pages() == 0)
35773          {
35774 -//		do_add_pages();
35775 -            printk("ERROR(bank_proc.c) = We don't have enough pages in bank_proc.c\n");        
35776 +		do_add_pages();
35777 +       //     printk("ERROR(bank_proc.c) = We don't have enough pages in bank_proc.c\n");        
35778          
35779          }
35780  	return rPage;
35781 @@ -488,6 +489,23 @@ out:
35782  	mutex_unlock(&void_lockdown_proc);
35783  	return ret;
35784  }
35785 +
35786 +int refill_page_pool_handler(struct ctl_table *table, int write, void __user *buffer,
35787 +		size_t *lenp, loff_t *ppos)
35788 +{
35789 +	int ret = 0, i = 0;
35790 +	mutex_lock(&void_lockdown_proc);
35791 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
35792 +	if (ret)
35793 +		goto out;
35794 +	if (write) {
35795 +            do_add_pages();
35796 +	}
35797 +out:
35798 +	mutex_unlock(&void_lockdown_proc);
35799 +	return ret;
35800 +}
35801 +
35802  static struct ctl_table cache_table[] =
35803  {
35804          
35805 @@ -659,6 +677,12 @@ static struct ctl_table cache_table[] =
35806  		.proc_handler	= show_page_pool_handler,
35807  		.data		= &show_page_pool,
35808  		.maxlen		= sizeof(show_page_pool),
35809 +	},		{
35810 +		.procname	= "refill_page_pool",
35811 +		.mode		= 0666,
35812 +		.proc_handler	= refill_page_pool_handler,
35813 +		.data		= &refill_page_pool,
35814 +		.maxlen		= sizeof(refill_page_pool),
35815  	},	
35816  	{ }
35817  };
35818 -- 
35819 1.8.1.2
35820 
35821 
35822 From bf0775c5b3366443a580874ebf82349b7d185f6f Mon Sep 17 00:00:00 2001
35823 From: Namhoon Kim <namhoonk@cs.unc.edu>
35824 Date: Wed, 25 Mar 2015 23:30:58 -0400
35825 Subject: [PATCH 108/119] add cache conf. proc file system
35826 
35827 ---
35828  arch/arm/kernel/irq.c |   4 +-
35829  litmus/cache_proc.c   | 173 +++++++++++++++++++++++++++++++++++++++++++++++++-
35830  2 files changed, 173 insertions(+), 4 deletions(-)
35831 
35832 diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
35833 index ce01835..145f290 100644
35834 --- a/arch/arm/kernel/irq.c
35835 +++ b/arch/arm/kernel/irq.c
35836 @@ -68,7 +68,7 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
35837  {
35838  	struct pt_regs *old_regs = set_irq_regs(regs);
35839  
35840 -	enter_irq_mode();
35841 +	//enter_irq_mode();
35842  	irq_enter();
35843  
35844  	/*
35845 @@ -84,7 +84,7 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
35846  	}
35847  
35848  	irq_exit();
35849 -	exit_irq_mode();
35850 +	//exit_irq_mode();
35851  	set_irq_regs(old_regs);
35852  }
35853  
35854 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
35855 index a2d560a..c331dd6 100644
35856 --- a/litmus/cache_proc.c
35857 +++ b/litmus/cache_proc.c
35858 @@ -88,6 +88,11 @@ static unsigned int way_partition_max;
35859  static int zero = 0;
35860  static int one = 1;
35861  
35862 +static int l1_prefetch_proc;
35863 +static int l2_prefetch_hint_proc;
35864 +static int l2_double_linefill_proc;
35865 +static int l2_data_prefetch_proc;
35866 +
35867  #define ld_d_reg(cpu) ({ int __cpu = cpu; \
35868  			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_D_BASE + \
35869  			__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
35870 @@ -185,12 +190,12 @@ int way_partition_handler(struct ctl_table *table, int write, void __user *buffe
35871  	if (ret)
35872  		goto out;
35873  	
35874 -	if (write) {
35875 +	//if (write) {
35876  		printk("Way-partition settings:\n");
35877  		for (i = 0; i < 9; i++) {
35878  			printk("0x%08X\n", ~way_partitions[i]);
35879  		}
35880 -	}
35881 +	//}
35882  	print_lockdown_registers();
35883  
35884  out:
35885 @@ -217,6 +222,12 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
35886  			writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
35887  		}
35888  	}
35889 +	if (write && lock_all == 0) {
35890 +		for (i = 0; i < nr_lockregs;  i++) {
35891 +			writel_relaxed(nr_unlocked_way[16], ld_d_reg(i));
35892 +			writel_relaxed(nr_unlocked_way[16], ld_i_reg(i));
35893 +		}
35894 +	}
35895  	print_lockdown_registers();
35896  
35897  out:
35898 @@ -227,6 +238,9 @@ out:
35899  void do_partition(enum crit_level lv, int cpu)
35900  {
35901  	u32 regs;
35902 +	
35903 +	if (lock_all)
35904 +		return;
35905  	switch(lv) {
35906  		case CRIT_LEVEL_A:
35907  			regs = ~way_partitions[cpu*2];
35908 @@ -290,6 +304,133 @@ void exit_irq_mode(void)
35909  	writel_relaxed(prev_lockdown_d_reg[cpu], ld_d_reg(cpu));	
35910  }
35911  
35912 +/* Operate on the Cortex-A9's ACTLR register */
35913 +#define ACTLR_L2_PREFETCH_HINT	(1 << 1)
35914 +#define ACTLR_L1_PREFETCH	(1 << 2)
35915 +
35916 +/*
35917 + * Change the ACTLR.
35918 + * @mode	- If 1 (0), set (clear) the bit given in @mask in the ACTLR.
35919 + * @mask	- A mask in which one bit is set to operate on the ACTLR.
35920 + */
35921 +static void actlr_change(int mode, int mask)
35922 +{
35923 +	u32 orig_value, new_value, reread_value;
35924 +
35925 +	if (0 != mode && 1 != mode) {
35926 +		printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n",
35927 +				__FUNCTION__);
35928 +		return;
35929 +	}
35930 +
35931 +	/* get the original value */
35932 +	asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (orig_value));
35933 +
35934 +	if (0 == mode)
35935 +		new_value = orig_value & ~(mask);
35936 +	else
35937 +		new_value = orig_value | mask;
35938 +
35939 +	asm volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (new_value));
35940 +	asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (reread_value));
35941 +
35942 +	printk("ACTLR: orig: 0x%8x  wanted: 0x%8x  new: 0x%8x\n",
35943 +			orig_value, new_value, reread_value);
35944 +}
35945 +
35946 +int litmus_l1_prefetch_proc_handler(struct ctl_table *table, int write,
35947 +		void __user *buffer, size_t *lenp, loff_t *ppos)
35948 +{
35949 +	int ret, mode;
35950 +
35951 +	mutex_lock(&actlr_mutex);
35952 +	ret = proc_dointvec(table, write, buffer, lenp, ppos);
35953 +
35954 +	if (!ret && write) {
35955 +		mode = *((int*)table->data);
35956 +		actlr_change(mode, ACTLR_L1_PREFETCH);
35957 +	}
35958 +	mutex_unlock(&actlr_mutex);
35959 +
35960 +	return ret;
35961 +}
35962 +
35963 +int litmus_l2_prefetch_hint_proc_handler(struct ctl_table *table, int write,
35964 +		void __user *buffer, size_t *lenp, loff_t *ppos)
35965 +{
35966 +	int ret, mode;
35967 +
35968 +	mutex_lock(&actlr_mutex);
35969 +	ret = proc_dointvec(table, write, buffer, lenp, ppos);
35970 +	if (!ret && write) {
35971 +		mode = *((int*)table->data);
35972 +		actlr_change(mode, ACTLR_L2_PREFETCH_HINT);
35973 +	}
35974 +	mutex_unlock(&actlr_mutex);
35975 +
35976 +	return ret;
35977 +}
35978 +
35979 +
35980 +/* Operate on the PL-310's Prefetch Control Register, L2X0_PREFETCH_CTRL */
35981 +#define L2X0_PREFETCH_DOUBLE_LINEFILL	(1 << 30)
35982 +#define L2X0_PREFETCH_DATA_PREFETCH	(1 << 28)
35983 +static void l2x0_prefetch_change(int mode, int mask)
35984 +{
35985 +	u32 orig_value, new_value, reread_value;
35986 +
35987 +	if (0 != mode && 1 != mode) {
35988 +		printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n",
35989 +				__FUNCTION__);
35990 +		return;
35991 +	}
35992 +
35993 +	orig_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL);
35994 +
35995 +	if (0 == mode)
35996 +		new_value = orig_value & ~(mask);
35997 +	else
35998 +		new_value = orig_value | mask;
35999 +
36000 +	writel_relaxed(new_value, cache_base + L2X0_PREFETCH_CTRL);
36001 +	reread_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL);
36002 +
36003 +	printk("l2x0 prefetch: orig: 0x%8x  wanted: 0x%8x  new: 0x%8x\n",
36004 +			orig_value, new_value, reread_value);
36005 +}
36006 +
36007 +int litmus_l2_double_linefill_proc_handler(struct ctl_table *table, int write,
36008 +		void __user *buffer, size_t *lenp, loff_t *ppos)
36009 +{
36010 +	int ret, mode;
36011 +
36012 +	mutex_lock(&l2x0_prefetch_mutex);
36013 +	ret = proc_dointvec(table, write, buffer, lenp, ppos);
36014 +	if (!ret && write) {
36015 +		mode = *((int*)table->data);
36016 +		l2x0_prefetch_change(mode, L2X0_PREFETCH_DOUBLE_LINEFILL);
36017 +	}
36018 +	mutex_unlock(&l2x0_prefetch_mutex);
36019 +
36020 +	return ret;
36021 +}
36022 +
36023 +int litmus_l2_data_prefetch_proc_handler(struct ctl_table *table, int write,
36024 +		void __user *buffer, size_t *lenp, loff_t *ppos)
36025 +{
36026 +	int ret, mode;
36027 +
36028 +	mutex_lock(&l2x0_prefetch_mutex);
36029 +	ret = proc_dointvec(table, write, buffer, lenp, ppos);
36030 +	if (!ret && write) {
36031 +		mode = *((int*)table->data);
36032 +		l2x0_prefetch_change(mode, L2X0_PREFETCH_DATA_PREFETCH);
36033 +	}
36034 +	mutex_unlock(&l2x0_prefetch_mutex);
36035 +
36036 +	return ret;
36037 +}
36038 +
36039  static struct ctl_table cache_table[] =
36040  {
36041  	{
36042 @@ -382,6 +523,34 @@ static struct ctl_table cache_table[] =
36043  		.extra1		= &zero,
36044  		.extra2		= &one,
36045  	},
36046 +	{
36047 +		.procname	= "l1_prefetch",
36048 +		.mode		= 0644,
36049 +		.proc_handler	= litmus_l1_prefetch_proc_handler,
36050 +		.data		= &l1_prefetch_proc,
36051 +		.maxlen		= sizeof(l1_prefetch_proc),
36052 +	},
36053 +	{
36054 +		.procname	= "l2_prefetch_hint",
36055 +		.mode		= 0644,
36056 +		.proc_handler	= litmus_l2_prefetch_hint_proc_handler,
36057 +		.data		= &l2_prefetch_hint_proc,
36058 +		.maxlen		= sizeof(l2_prefetch_hint_proc),
36059 +	},
36060 +	{
36061 +		.procname	= "l2_double_linefill",
36062 +		.mode		= 0644,
36063 +		.proc_handler	= litmus_l2_double_linefill_proc_handler,
36064 +		.data		= &l2_double_linefill_proc,
36065 +		.maxlen		= sizeof(l2_double_linefill_proc),
36066 +	},
36067 +	{
36068 +		.procname	= "l2_data_prefetch",
36069 +		.mode		= 0644,
36070 +		.proc_handler	= litmus_l2_data_prefetch_proc_handler,
36071 +		.data		= &l2_data_prefetch_proc,
36072 +		.maxlen		= sizeof(l2_data_prefetch_proc),
36073 +	},
36074  	{ }
36075  };
36076  
36077 -- 
36078 1.8.1.2
36079 
36080 
36081 From 701f70e21800aabf5d5d6042fd105adf531843a4 Mon Sep 17 00:00:00 2001
36082 From: ChengYang Fu <chengyangfu@gmail.com>
36083 Date: Thu, 26 Mar 2015 20:01:33 -0400
36084 Subject: [PATCH 109/119] modify the bank_proc.c
36085 
36086 ---
36087  litmus/bank_proc.c | 2 +-
36088  1 file changed, 1 insertion(+), 1 deletion(-)
36089 
36090 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
36091 index 7be55b7..888b6a6 100644
36092 --- a/litmus/bank_proc.c
36093 +++ b/litmus/bank_proc.c
36094 @@ -328,7 +328,7 @@ static struct  page *new_alloc_page_color( unsigned long color)
36095  out_unlock:
36096  	spin_unlock(&cgroup->lock);
36097  out:
36098 -	if( smallest_nr_pages() == 0)
36099 +	while( smallest_nr_pages() == 0)
36100          {
36101  		do_add_pages();
36102         //     printk("ERROR(bank_proc.c) = We don't have enough pages in bank_proc.c\n");        
36103 -- 
36104 1.8.1.2
36105 
36106 
36107 From 1bbe2730b0bd7755253ab8e5471699d3f2297d22 Mon Sep 17 00:00:00 2001
36108 From: ChengYang Fu <chengyangfu@gmail.com>
36109 Date: Fri, 27 Mar 2015 14:27:42 -0400
36110 Subject: [PATCH 110/119] make bank_proc be more robust
36111 
36112 ---
36113  litmus/bank_proc.c | 30 +++++++++++++++++++-----------
36114  1 file changed, 19 insertions(+), 11 deletions(-)
36115 
36116 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
36117 index 888b6a6..e1025b5 100644
36118 --- a/litmus/bank_proc.c
36119 +++ b/litmus/bank_proc.c
36120 @@ -97,9 +97,10 @@ unsigned int counting_one_set(unsigned int v)
36121  //    unsigned int v; // count the number of bits set in v
36122      unsigned int c; // c accumulates the total bits set in v
36123  
36124 -    for (c = 0; v; v >>= 1)
36125 +    for (c = 0; v; v = v>>1)
36126      {
36127 -        c += v & 1;
36128 +        if(v&1)
36129 +            c++;
36130      }
36131      return c;
36132  }
36133 @@ -214,7 +215,7 @@ void add_page_to_color_list(struct page *page)
36134   */
36135  static int do_add_pages(void)
36136  {
36137 -	printk("LITMUS do add pages\n");
36138 +//	printk("LITMUS do add pages\n");
36139  	
36140  	struct page *page, *page_tmp;
36141  	LIST_HEAD(free_later);
36142 @@ -236,7 +237,7 @@ static int do_add_pages(void)
36143  	    //    page = alloc_pages_exact_node(0, GFP_HIGHUSER_MOVABLE, 0);
36144  	
36145  		if (unlikely(!page)) {
36146 -			printk(KERN_WARNING "Could not allocate pages.\n");
36147 +	//		printk(KERN_WARNING "Could not allocate pages.\n");
36148  			ret = -ENOMEM;
36149  			goto out;
36150  		}
36151 @@ -271,7 +272,7 @@ static int do_add_pages(void)
36152                  }
36153                  */
36154          }
36155 -        printk("page counter = \n");
36156 +        /*printk("page counter = \n");
36157          for (i=0; i<128; i++)
36158          {
36159              printk("(%03d) = %4d, ", i , counter[i]);
36160 @@ -280,6 +281,7 @@ static int do_add_pages(void)
36161              }
36162  
36163          }
36164 +        */
36165          printk("After refill : \n");
36166          show_nr_pages();
36167  #if 1
36168 @@ -357,18 +359,24 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
36169  	struct color_group *cgroup;
36170  	struct page *rPage = NULL;
36171  	unsigned int color;
36172 -	
36173 +	unsigned int randvalue;
36174 +	get_random_bytes(&randvalue, sizeof(unsigned int));
36175 +        	
36176  
36177          unsigned int idx = 0;
36178 -        idx += num_by_bitmask_index(set_partition[node], set_index[node]);
36179 -        idx += number_cachecolors* num_by_bitmask_index(bank_partition[node], bank_index[node]);
36180 -	//printk("node  = %d, idx = %d\n", node, idx);
36181 +//        printk("set = %lx, counting %d\n", set_partition[node],  counting_one_set(set_partition[node]));
36182 +  //      printk("bank = %lx, counting %d\n", bank_partition[node],  counting_one_set(bank_partition[node]));
36183 +        
36184 +
36185 +        idx += num_by_bitmask_index(set_partition[node], randvalue % counting_one_set(set_partition[node]));
36186 +        idx += number_cachecolors* num_by_bitmask_index(bank_partition[node],randvalue % counting_one_set(bank_partition[node]) );
36187 +//	printk("node  = %d, idx = %d\n", node, idx);
36188  
36189  	rPage =  new_alloc_page_color(idx);
36190          
36191              
36192 -        set_index[node] = (set_index[node]+1) % counting_one_set(set_partition[node]);
36193 -        bank_index[node] = (bank_index[node]+1) % counting_one_set(bank_partition[node]);
36194 +  //      set_index[node] = (set_index[node]+1) % counting_one_set(set_partition[node]);
36195 +//        bank_index[node] = (bank_index[node]+1) % counting_one_set(bank_partition[node]);
36196  	return rPage; 
36197  }
36198  
36199 -- 
36200 1.8.1.2
36201 
36202 
36203 From 2e42005ed354304c219b8379a83d403122073a50 Mon Sep 17 00:00:00 2001
36204 From: Namhoon Kim <namhoonk@cs.unc.edu>
36205 Date: Sat, 28 Mar 2015 08:46:09 -0400
36206 Subject: [PATCH 111/119] Added OS isolation interface
36207 
36208 ---
36209  arch/arm/kernel/irq.c |  4 ++--
36210  litmus/cache_proc.c   | 65 ++++++++++++++++++++++++++++++++++++++++-----------
36211  2 files changed, 53 insertions(+), 16 deletions(-)
36212 
36213 diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
36214 index 145f290..ce01835 100644
36215 --- a/arch/arm/kernel/irq.c
36216 +++ b/arch/arm/kernel/irq.c
36217 @@ -68,7 +68,7 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
36218  {
36219  	struct pt_regs *old_regs = set_irq_regs(regs);
36220  
36221 -	//enter_irq_mode();
36222 +	enter_irq_mode();
36223  	irq_enter();
36224  
36225  	/*
36226 @@ -84,7 +84,7 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
36227  	}
36228  
36229  	irq_exit();
36230 -	//exit_irq_mode();
36231 +	exit_irq_mode();
36232  	set_irq_regs(old_regs);
36233  }
36234  
36235 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
36236 index c331dd6..c7f39b5 100644
36237 --- a/litmus/cache_proc.c
36238 +++ b/litmus/cache_proc.c
36239 @@ -58,18 +58,18 @@ u32 set_partitions[2] = {
36240  };
36241  
36242  u32 prev_lockdown_d_reg[5] = {
36243 -	0x00000000,
36244 -	0x00000000,
36245 -	0x00000000,
36246 -	0x00000000,
36247 +	0xFFFFFF00,
36248 +	0xFFFFFF00,
36249 +	0xFFFFFF00,
36250 +	0xFFFFFF00,
36251  	0xFFFF00FF, /* share with level-C */
36252  };
36253  
36254  u32 prev_lockdown_i_reg[5] = {
36255 -	0x00000000,
36256 -	0x00000000,
36257 -	0x00000000,
36258 -	0x00000000,
36259 +	0xFFFFFF00,
36260 +	0xFFFFFF00,
36261 +	0xFFFFFF00,
36262 +	0xFFFFFF00,
36263  	0xFFFF00FF, /* share with level-C */
36264  };
36265  
36266 @@ -92,6 +92,7 @@ static int l1_prefetch_proc;
36267  static int l2_prefetch_hint_proc;
36268  static int l2_double_linefill_proc;
36269  static int l2_data_prefetch_proc;
36270 +static int os_isolation;
36271  
36272  #define ld_d_reg(cpu) ({ int __cpu = cpu; \
36273  			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_D_BASE + \
36274 @@ -228,6 +229,7 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
36275  			writel_relaxed(nr_unlocked_way[16], ld_i_reg(i));
36276  		}
36277  	}
36278 +	printk("LOCK_ALL HANDLER\n");
36279  	print_lockdown_registers();
36280  
36281  out:
36282 @@ -285,21 +287,49 @@ void do_partition(enum crit_level lv, int cpu)
36283  */
36284  }
36285  
36286 -void enter_irq_mode(void)
36287 +int os_isolation_proc_handler(struct ctl_table *table, int write, void __user *buffer,
36288 +		size_t *lenp, loff_t *ppos)
36289 +{
36290 +	int ret = 0;
36291 +	
36292 +	mutex_lock(&lockdown_proc);
36293 +	
36294 +	flush_cache_all();
36295 +	
36296 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
36297 +	if (ret)
36298 +		goto out;
36299 +	
36300 +
36301 +	printk("OS_ISOLATION HANDLER = %d\n", os_isolation);
36302 +
36303 +out:
36304 +	mutex_unlock(&lockdown_proc);
36305 +	return ret;
36306 +}
36307 +
36308 +void inline enter_irq_mode(void)
36309  {
36310  	int cpu = smp_processor_id();
36311 +
36312  	
36313 -	prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
36314 -	prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
36315 +	//prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
36316 +	//prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
36317 +	
36318 +	if (os_isolation == 0)
36319 +		return;	
36320  	
36321  	writel_relaxed(prev_lockdown_i_reg[4], ld_i_reg(cpu));
36322  	writel_relaxed(prev_lockdown_d_reg[4], ld_d_reg(cpu));
36323  }
36324  
36325 -void exit_irq_mode(void)
36326 +void inline exit_irq_mode(void)
36327  {
36328  	int cpu = smp_processor_id();
36329 -	
36330 +
36331 +	if (os_isolation == 0)
36332 +		return;
36333 +
36334  	writel_relaxed(prev_lockdown_i_reg[cpu], ld_i_reg(cpu));
36335  	writel_relaxed(prev_lockdown_d_reg[cpu], ld_d_reg(cpu));	
36336  }
36337 @@ -551,6 +581,13 @@ static struct ctl_table cache_table[] =
36338  		.data		= &l2_data_prefetch_proc,
36339  		.maxlen		= sizeof(l2_data_prefetch_proc),
36340  	},
36341 +	{
36342 +		.procname	= "os_isolation",
36343 +		.mode		= 0644,
36344 +		.proc_handler	= os_isolation_proc_handler,
36345 +		.data		= &os_isolation,
36346 +		.maxlen		= sizeof(os_isolation),
36347 +	},
36348  	{ }
36349  };
36350  
36351 @@ -579,7 +616,7 @@ static int __init litmus_sysctl_init(void)
36352  
36353  	way_partition_min = 0x00000000;
36354  	way_partition_max = 0x0000FFFF;
36355 -	
36356 +	os_isolation = 0;
36357  out:
36358  	return ret;
36359  }
36360 -- 
36361 1.8.1.2
36362 
36363 
36364 From ee66dccb3330c6fdf90040d038fc0e6c5f9f286f Mon Sep 17 00:00:00 2001
36365 From: Namhoon Kim <namhoonk@cs.unc.edu>
36366 Date: Wed, 8 Apr 2015 20:47:40 -0400
36367 Subject: [PATCH 112/119] Added cache flush func. and fixed bug
36368 
36369 ---
36370  arch/arm/mm/cache-l2x0.c    |   6 +-
36371  include/litmus/cache_proc.h |   3 +
36372  litmus/bank_proc.c          |  51 ++--
36373  litmus/cache_proc.c         | 562 +++++++++++++++++++++++++++++++++++++++++---
36374  litmus/litmus.c             | 104 ++++----
36375  litmus/sched_mc2.c          |  20 +-
36376  6 files changed, 624 insertions(+), 122 deletions(-)
36377 
36378 diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
36379 index cff808e..b57810a 100644
36380 --- a/arch/arm/mm/cache-l2x0.c
36381 +++ b/arch/arm/mm/cache-l2x0.c
36382 @@ -148,7 +148,7 @@ static void __l2x0_flush_all(void)
36383  	debug_writel(0x00);
36384  }
36385  
36386 -static void l2x0_flush_all(void)
36387 +void l2x0_flush_all(void)
36388  {
36389  	unsigned long flags;
36390  
36391 @@ -338,7 +338,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
36392  	else
36393  		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
36394  	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
36395 -	
36396 +	printk("AUX READ VALUE = %08x\n", aux);
36397  	cache_type = readl_relaxed(l2x0_base + L2X0_CACHE_TYPE);
36398  
36399  	aux &= aux_mask;
36400 @@ -398,7 +398,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
36401  		l2x0_unlock(cache_id);
36402  
36403  		/* l2x0 controller is disabled */
36404 -		//aux |= (1 << 12);
36405 +		//aux |= (1 << 12); // exclusive
36406  		//printk("AUX BIT = %08x\n", aux);
36407  		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
36408  
36409 diff --git a/include/litmus/cache_proc.h b/include/litmus/cache_proc.h
36410 index 5a66c34..24128d7 100644
36411 --- a/include/litmus/cache_proc.h
36412 +++ b/include/litmus/cache_proc.h
36413 @@ -6,6 +6,9 @@
36414  void litmus_setup_lockdown(void __iomem*, u32);
36415  void enter_irq_mode(void);
36416  void exit_irq_mode(void);
36417 +void flush_cache(void);
36418 +
36419 +extern struct page *new_alloc_page_color(unsigned long color);
36420  
36421  #endif
36422  
36423 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
36424 index e1025b5..655eb27 100644
36425 --- a/litmus/bank_proc.c
36426 +++ b/litmus/bank_proc.c
36427 @@ -97,10 +97,9 @@ unsigned int counting_one_set(unsigned int v)
36428  //    unsigned int v; // count the number of bits set in v
36429      unsigned int c; // c accumulates the total bits set in v
36430  
36431 -    for (c = 0; v; v = v>>1)
36432 +    for (c = 0; v; v >>= 1)
36433      {
36434 -        if(v&1)
36435 -            c++;
36436 +        c += v & 1;
36437      }
36438      return c;
36439  }
36440 @@ -215,7 +214,7 @@ void add_page_to_color_list(struct page *page)
36441   */
36442  static int do_add_pages(void)
36443  {
36444 -//	printk("LITMUS do add pages\n");
36445 +	//printk("LITMUS do add pages\n");
36446  	
36447  	struct page *page, *page_tmp;
36448  	LIST_HEAD(free_later);
36449 @@ -225,19 +224,19 @@ static int do_add_pages(void)
36450          int free_counter = 0;
36451          unsigned long counter[128]= {0}; 
36452          
36453 -        printk("Before refill : \n");
36454 -        show_nr_pages();
36455 +        //printk("Before refill : \n");
36456 +        //show_nr_pages();
36457  
36458  	// until all the page lists contain enough pages 
36459  	//for (i =0; i<5; i++) {
36460  	for (i=0; i< 1024*100;i++) {
36461 -//	while (smallest_nr_pages() < PAGES_PER_COLOR) {
36462 +	//while (smallest_nr_pages() < PAGES_PER_COLOR) {
36463         //         printk("smallest = %d\n", smallest_nr_pages());	
36464  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
36465  	    //    page = alloc_pages_exact_node(0, GFP_HIGHUSER_MOVABLE, 0);
36466  	
36467  		if (unlikely(!page)) {
36468 -	//		printk(KERN_WARNING "Could not allocate pages.\n");
36469 +			printk(KERN_WARNING "Could not allocate pages.\n");
36470  			ret = -ENOMEM;
36471  			goto out;
36472  		}
36473 @@ -272,7 +271,7 @@ static int do_add_pages(void)
36474                  }
36475                  */
36476          }
36477 -        /*printk("page counter = \n");
36478 +/*        printk("page counter = \n");
36479          for (i=0; i<128; i++)
36480          {
36481              printk("(%03d) = %4d, ", i , counter[i]);
36482 @@ -281,9 +280,9 @@ static int do_add_pages(void)
36483              }
36484  
36485          }
36486 -        */
36487 -        printk("After refill : \n");
36488 -        show_nr_pages();
36489 +*/	
36490 +        //printk("After refill : \n");
36491 +        //show_nr_pages();
36492  #if 1
36493  	// Free the unwanted pages
36494  	list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
36495 @@ -310,7 +309,7 @@ static struct  page *new_alloc_page_color( unsigned long color)
36496  	if( (color <0) || (color)>(number_cachecolors*number_banks -1)) {
36497  		TRACE_CUR("Wrong color %lu\n", color);	
36498  //		printk(KERN_WARNING "Wrong color %lu\n", color);
36499 -		goto out_unlock;
36500 +		goto out;
36501  	}
36502  
36503  		
36504 @@ -323,14 +322,14 @@ static struct  page *new_alloc_page_color( unsigned long color)
36505  	}
36506  	rPage = list_first_entry(&cgroup->list, struct page, lru);
36507  	BUG_ON(page_count(rPage) > 1);
36508 -	get_page(rPage);
36509 +	//get_page(rPage);
36510  	list_del(&rPage->lru);
36511  	atomic_dec(&cgroup->nr_pages);
36512  	ClearPageLRU(rPage);
36513  out_unlock:
36514  	spin_unlock(&cgroup->lock);
36515  out:
36516 -	while( smallest_nr_pages() == 0)
36517 +	if( smallest_nr_pages() == 0)
36518          {
36519  		do_add_pages();
36520         //     printk("ERROR(bank_proc.c) = We don't have enough pages in bank_proc.c\n");        
36521 @@ -339,6 +338,10 @@ out:
36522  	return rPage;
36523  }
36524  
36525 +struct page* get_colored_page(unsigned long color)
36526 +{
36527 +	return new_alloc_page_color(color);
36528 +}
36529  
36530  /*
36531   * provide pages for replacement according to  
36532 @@ -359,24 +362,18 @@ struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
36533  	struct color_group *cgroup;
36534  	struct page *rPage = NULL;
36535  	unsigned int color;
36536 -	unsigned int randvalue;
36537 -	get_random_bytes(&randvalue, sizeof(unsigned int));
36538 -        	
36539 +	
36540  
36541          unsigned int idx = 0;
36542 -//        printk("set = %lx, counting %d\n", set_partition[node],  counting_one_set(set_partition[node]));
36543 -  //      printk("bank = %lx, counting %d\n", bank_partition[node],  counting_one_set(bank_partition[node]));
36544 -        
36545 -
36546 -        idx += num_by_bitmask_index(set_partition[node], randvalue % counting_one_set(set_partition[node]));
36547 -        idx += number_cachecolors* num_by_bitmask_index(bank_partition[node],randvalue % counting_one_set(bank_partition[node]) );
36548 -//	printk("node  = %d, idx = %d\n", node, idx);
36549 +        idx += num_by_bitmask_index(set_partition[node], set_index[node]);
36550 +        idx += number_cachecolors* num_by_bitmask_index(bank_partition[node], bank_index[node]);
36551 +	//printk("node  = %d, idx = %d\n", node, idx);
36552  
36553  	rPage =  new_alloc_page_color(idx);
36554          
36555              
36556 -  //      set_index[node] = (set_index[node]+1) % counting_one_set(set_partition[node]);
36557 -//        bank_index[node] = (bank_index[node]+1) % counting_one_set(bank_partition[node]);
36558 +        set_index[node] = (set_index[node]+1) % counting_one_set(set_partition[node]);
36559 +        bank_index[node] = (bank_index[node]+1) % counting_one_set(bank_partition[node]);
36560  	return rPage; 
36561  }
36562  
36563 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
36564 index c7f39b5..0e123fac 100644
36565 --- a/litmus/cache_proc.c
36566 +++ b/litmus/cache_proc.c
36567 @@ -6,6 +6,7 @@
36568  #include <linux/slab.h>
36569  #include <linux/io.h>
36570  #include <linux/mutex.h>
36571 +#include <linux/time.h>
36572  
36573  #include <litmus/litmus_proc.h>
36574  #include <litmus/sched_trace.h>
36575 @@ -19,6 +20,34 @@
36576  #define UNLOCK_ALL	0x00000000 /* allocation in any way */
36577  #define LOCK_ALL        (~UNLOCK_ALL)
36578  #define MAX_NR_WAYS	16
36579 +#define MAX_NR_COLORS	16
36580 +
36581 +void mem_lock(u32 lock_val, int cpu);
36582 +
36583 +/*
36584 + * unlocked_way[i] : allocation can occur in way i
36585 + *
36586 + * 0 = allocation can occur in the corresponding way
36587 + * 1 = allocation cannot occur in the corresponding way
36588 + */
36589 +u32 unlocked_way[MAX_NR_WAYS]  = {
36590 +	0xFFFFFFFE, /* way 0 unlocked */
36591 +	0xFFFFFFFD,
36592 +	0xFFFFFFFB,
36593 +	0xFFFFFFF7,
36594 +	0xFFFFFFEF, /* way 4 unlocked */
36595 +	0xFFFFFFDF,
36596 +	0xFFFFFFBF,
36597 +	0xFFFFFF7F,
36598 +	0xFFFFFEFF, /* way 8 unlocked */
36599 +	0xFFFFFDFF,
36600 +	0xFFFFFBFF,
36601 +	0xFFFFF7FF,
36602 +	0xFFFFEFFF, /* way 12 unlocked */
36603 +	0xFFFFDFFF,
36604 +	0xFFFFBFFF,
36605 +	0xFFFF7FFF,
36606 +};
36607  
36608  u32 nr_unlocked_way[MAX_NR_WAYS+1]  = {
36609  	0xFFFFFFFF, /* all ways are locked. usable = 0*/
36610 @@ -40,16 +69,23 @@ u32 nr_unlocked_way[MAX_NR_WAYS+1]  = {
36611  	0xFFFF0000, /* way ~15 unlocked. usable = 16 */
36612  };
36613  
36614 +u32 way_partition[4] = {
36615 +	0xfffffff0, /* cpu0 */
36616 +	0xffffff0f, /* cpu1 */
36617 +	0xfffff0ff, /* cpu2 */
36618 +	0xffff0fff, /* cpu3 */
36619 +};
36620 +
36621  u32 way_partitions[9] = {
36622 -	0x00000003, /* cpu0 A */
36623 -	0x00000003, /* cpu0 B */
36624 -	0x0000000C, /* cpu1 A */
36625 -	0x0000000C, /* cpu1 B */
36626 -	0x00000030, /* cpu2 A */
36627 -	0x00000030, /* cpu2 B */
36628 -	0x000000C0, /* cpu3 A */
36629 -	0x000000C0, /* cpu3 B */
36630 -	0x0000FF00, /* lv C */
36631 +	0xffff0003, /* cpu0 A */
36632 +	0xffff0003, /* cpu0 B */
36633 +	0xffff000C, /* cpu1 A */
36634 +	0xffff000C, /* cpu1 B */
36635 +	0xffff0030, /* cpu2 A */
36636 +	0xffff0030, /* cpu2 B */
36637 +	0xffff00C0, /* cpu3 A */
36638 +	0xffff00C0, /* cpu3 B */
36639 +	0xffffff00, /* lv C */
36640  };
36641  
36642  u32 set_partitions[2] = {
36643 @@ -82,8 +118,8 @@ static u32 cache_id;
36644  struct mutex actlr_mutex;
36645  struct mutex l2x0_prefetch_mutex;
36646  struct mutex lockdown_proc;
36647 -static unsigned int way_partition_min;
36648 -static unsigned int way_partition_max;
36649 +static u32 way_partition_min;
36650 +static u32 way_partition_max;
36651  
36652  static int zero = 0;
36653  static int one = 1;
36654 @@ -93,6 +129,7 @@ static int l2_prefetch_hint_proc;
36655  static int l2_double_linefill_proc;
36656  static int l2_data_prefetch_proc;
36657  static int os_isolation;
36658 +static int use_part;
36659  
36660  #define ld_d_reg(cpu) ({ int __cpu = cpu; \
36661  			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_D_BASE + \
36662 @@ -103,15 +140,45 @@ static int os_isolation;
36663  
36664  int lock_all;
36665  int nr_lockregs;
36666 +static raw_spinlock_t cache_lock;
36667 +static raw_spinlock_t prefetch_lock;
36668 +static void ***flusher_pages = NULL;
36669 +
36670 +extern void l2x0_flush_all(void);
36671 +
36672 +static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
36673 +{
36674 +	/* wait for cache operation by line or way to complete */
36675 +	while (readl_relaxed(reg) & mask)
36676 +		cpu_relax();
36677 +}
36678 +
36679 +#ifdef CONFIG_CACHE_PL310
36680 +static inline void cache_wait(void __iomem *reg, unsigned long mask)
36681 +{
36682 +	/* cache operations by line are atomic on PL310 */
36683 +}
36684 +#else
36685 +#define cache_wait	cache_wait_way
36686 +#endif
36687 +
36688 +static inline void cache_sync(void)
36689 +{
36690 +	void __iomem *base = cache_base;
36691 +
36692 +	writel_relaxed(0, base + L2X0_CACHE_SYNC);
36693 +	cache_wait(base + L2X0_CACHE_SYNC, 1);
36694 +}
36695  
36696  static void print_lockdown_registers(void)
36697  {
36698  	int i;
36699  
36700 -	for (i = 0; i < nr_lockregs; i++) {
36701 -		printk("Lockdown Data CPU %2d: 0x%8x\n",
36702 +	//for (i = 0; i < nr_lockregs; i++) {
36703 +	for (i = 0; i < 4; i++) {
36704 +		printk("Lockdown Data CPU %2d: 0x%04x\n",
36705  				i, readl_relaxed(ld_d_reg(i)));
36706 -		printk("Lockdown Inst CPU %2d: 0x%8x\n",
36707 +		printk("Lockdown Inst CPU %2d: 0x%04x\n",
36708  				i, readl_relaxed(ld_i_reg(i)));
36709  	}
36710  }
36711 @@ -159,7 +226,7 @@ static void test_lockdown(void *ignore)
36712  
36713  void litmus_setup_lockdown(void __iomem *base, u32 id)
36714  {
36715 -    cache_base = base;
36716 +	cache_base = base;
36717  	cache_id = id;
36718  	lockreg_d = cache_base + L2X0_LOCKDOWN_WAY_D_BASE;
36719  	lockreg_i = cache_base + L2X0_LOCKDOWN_WAY_I_BASE;
36720 @@ -174,6 +241,8 @@ void litmus_setup_lockdown(void __iomem *base, u32 id)
36721  	mutex_init(&actlr_mutex);
36722  	mutex_init(&l2x0_prefetch_mutex);
36723  	mutex_init(&lockdown_proc);
36724 +	raw_spin_lock_init(&cache_lock);
36725 +	raw_spin_lock_init(&prefetch_lock);
36726  	
36727  	test_lockdown(NULL);
36728  }
36729 @@ -185,18 +254,26 @@ int way_partition_handler(struct ctl_table *table, int write, void __user *buffe
36730  	
36731  	mutex_lock(&lockdown_proc);
36732  	
36733 -	flush_cache_all();
36734 +	//flush_cache_all();
36735 +	//cache_sync();
36736 +	l2x0_flush_all();
36737  	
36738  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
36739  	if (ret)
36740  		goto out;
36741  	
36742 -	//if (write) {
36743 +	if (write) {
36744  		printk("Way-partition settings:\n");
36745  		for (i = 0; i < 9; i++) {
36746 -			printk("0x%08X\n", ~way_partitions[i]);
36747 +			printk("0x%08X\n", way_partitions[i]);
36748 +		}
36749 +		for (i = 0; i < 4; i++) {
36750 +			writel_relaxed(~way_partitions[i*2], cache_base + L2X0_LOCKDOWN_WAY_D_BASE +
36751 +				       i * L2X0_LOCKDOWN_STRIDE);
36752 +			writel_relaxed(~way_partitions[i*2], cache_base + L2X0_LOCKDOWN_WAY_I_BASE +
36753 +				       i * L2X0_LOCKDOWN_STRIDE);
36754  		}
36755 -	//}
36756 +	}
36757  	print_lockdown_registers();
36758  
36759  out:
36760 @@ -211,23 +288,49 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
36761  	
36762  	mutex_lock(&lockdown_proc);
36763  	
36764 -	flush_cache_all();
36765 +	//flush_cache_all();
36766 +	//outer_flush_all();
36767 +	//cache_sync();
36768 +	//l2x0_flush_all();
36769 +	flush_cache();
36770  	
36771  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
36772  	if (ret)
36773  		goto out;
36774  	
36775  	if (write && lock_all == 1) {
36776 +		for (i = 0; i < nr_lockregs; i++) {
36777 +			writel_relaxed(0xFFFF, cache_base + L2X0_LOCKDOWN_WAY_D_BASE +
36778 +				       i * L2X0_LOCKDOWN_STRIDE);
36779 +			writel_relaxed(0xFFFF, cache_base + L2X0_LOCKDOWN_WAY_I_BASE +
36780 +				       i * L2X0_LOCKDOWN_STRIDE);
36781 +		}
36782 +/*		
36783  		for (i = 0; i < nr_lockregs;  i++) {
36784 -			writel_relaxed(nr_unlocked_way[0], ld_d_reg(i));
36785 -			writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
36786 +			barrier();
36787 +			mem_lock(LOCK_ALL, i);
36788 +			barrier();
36789 +			//writel_relaxed(nr_unlocked_way[0], ld_d_reg(i));
36790 +			//writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
36791  		}
36792 +*/		
36793  	}
36794  	if (write && lock_all == 0) {
36795 +		for (i = 0; i < nr_lockregs; i++) {
36796 +			writel_relaxed(0x0, cache_base + L2X0_LOCKDOWN_WAY_D_BASE +
36797 +				       i * L2X0_LOCKDOWN_STRIDE);
36798 +			writel_relaxed(0x0, cache_base + L2X0_LOCKDOWN_WAY_I_BASE +
36799 +				       i * L2X0_LOCKDOWN_STRIDE);
36800 +		}
36801 +/*
36802  		for (i = 0; i < nr_lockregs;  i++) {
36803 -			writel_relaxed(nr_unlocked_way[16], ld_d_reg(i));
36804 -			writel_relaxed(nr_unlocked_way[16], ld_i_reg(i));
36805 +			barrier();
36806 +			mem_lock(UNLOCK_ALL, i);
36807 +			barrier();
36808 +			//writel_relaxed(nr_unlocked_way[16], ld_d_reg(i));
36809 +			//writel_relaxed(nr_unlocked_way[16], ld_i_reg(i));
36810  		}
36811 +*/
36812  	}
36813  	printk("LOCK_ALL HANDLER\n");
36814  	print_lockdown_registers();
36815 @@ -237,32 +340,60 @@ out:
36816  	return ret;
36817  }
36818  
36819 +void mem_lock(u32 lock_val, int cpu)
36820 +{
36821 +	unsigned long flags;
36822 +
36823 +	raw_spin_lock_irqsave(&cache_lock, flags);
36824 +
36825 +	__asm__ __volatile__ (
36826 +"	str	%[lockval], [%[dcachereg]]\n"
36827 +"	str	%[lockval], [%[icachereg]]\n"
36828 +	: 
36829 +	: [dcachereg] "r" (ld_d_reg(cpu)),
36830 +	  [icachereg] "r" (ld_i_reg(cpu)),
36831 +	  [lockval] "r" (lock_val)
36832 +	: "cc");
36833 +
36834 +	raw_spin_unlock_irqrestore(&cache_lock, flags);
36835 +}
36836 +
36837  void do_partition(enum crit_level lv, int cpu)
36838  {
36839  	u32 regs;
36840 +	//unsigned long flags;
36841  	
36842 -	if (lock_all)
36843 +	if (lock_all || !use_part)
36844  		return;
36845  	switch(lv) {
36846  		case CRIT_LEVEL_A:
36847  			regs = ~way_partitions[cpu*2];
36848 +			regs |= 0xffff0000;
36849  			writel_relaxed(regs, ld_d_reg(cpu));
36850  			writel_relaxed(regs, ld_i_reg(cpu));
36851  			break;
36852  		case CRIT_LEVEL_B:
36853  			regs = ~way_partitions[cpu*2+1];
36854 +			regs |= 0xffff0000;
36855  			writel_relaxed(regs, ld_d_reg(cpu));
36856  			writel_relaxed(regs, ld_i_reg(cpu));
36857  			break;
36858  		case CRIT_LEVEL_C:
36859  		case NUM_CRIT_LEVELS:
36860  			regs = ~way_partitions[8];
36861 +			regs |= 0xffff0000;
36862  			writel_relaxed(regs, ld_d_reg(cpu));
36863  			writel_relaxed(regs, ld_i_reg(cpu));
36864  			break;
36865  		default:
36866  			BUG();
36867 +
36868  	}
36869 +	//cache_sync();
36870 +//	barrier();
36871 +//	mem_lock(regs, cpu);
36872 +//	barrier();		
36873 +	//print_lockdown_registers();
36874  /*
36875  	if (use_set_partition == 1 && use_way_partition == 1)
36876  		printk(KERN_ALERT "BOTH SET, WAY ARE SET!!!!\n");
36877 @@ -287,6 +418,29 @@ void do_partition(enum crit_level lv, int cpu)
36878  */
36879  }
36880  
36881 +int use_part_proc_handler(struct ctl_table *table, int write, void __user *buffer,
36882 +		size_t *lenp, loff_t *ppos)
36883 +{
36884 +	int ret = 0;
36885 +	
36886 +	mutex_lock(&lockdown_proc);
36887 +	
36888 +	//flush_cache_all();
36889 +	//cache_sync();
36890 +	//l2x0_flush_all();
36891 +	
36892 +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
36893 +	if (ret)
36894 +		goto out;
36895 +	
36896 +
36897 +	printk("USE_PART HANDLER = %d\n", use_part);
36898 +
36899 +out:
36900 +	mutex_unlock(&lockdown_proc);
36901 +	return ret;
36902 +}
36903 +
36904  int os_isolation_proc_handler(struct ctl_table *table, int write, void __user *buffer,
36905  		size_t *lenp, loff_t *ppos)
36906  {
36907 @@ -294,7 +448,10 @@ int os_isolation_proc_handler(struct ctl_table *table, int write, void __user *b
36908  	
36909  	mutex_lock(&lockdown_proc);
36910  	
36911 -	flush_cache_all();
36912 +	//flush_cache_all();
36913 +	//cache_sync();
36914 +	//l2x0_flush_all();
36915 +	//flush_cache();
36916  	
36917  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
36918  	if (ret)
36919 @@ -312,9 +469,9 @@ void inline enter_irq_mode(void)
36920  {
36921  	int cpu = smp_processor_id();
36922  
36923 -	
36924 -	//prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
36925 -	//prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
36926 +	return;
36927 +	prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
36928 +	prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
36929  	
36930  	if (os_isolation == 0)
36931  		return;	
36932 @@ -326,7 +483,7 @@ void inline enter_irq_mode(void)
36933  void inline exit_irq_mode(void)
36934  {
36935  	int cpu = smp_processor_id();
36936 -
36937 +	return;
36938  	if (os_isolation == 0)
36939  		return;
36940  
36941 @@ -404,6 +561,7 @@ int litmus_l2_prefetch_hint_proc_handler(struct ctl_table *table, int write,
36942  
36943  /* Operate on the PL-310's Prefetch Control Register, L2X0_PREFETCH_CTRL */
36944  #define L2X0_PREFETCH_DOUBLE_LINEFILL	(1 << 30)
36945 +#define L2X0_PREFETCH_INST_PREFETCH	(1 << 29)
36946  #define L2X0_PREFETCH_DATA_PREFETCH	(1 << 28)
36947  static void l2x0_prefetch_change(int mode, int mask)
36948  {
36949 @@ -454,13 +612,19 @@ int litmus_l2_data_prefetch_proc_handler(struct ctl_table *table, int write,
36950  	ret = proc_dointvec(table, write, buffer, lenp, ppos);
36951  	if (!ret && write) {
36952  		mode = *((int*)table->data);
36953 -		l2x0_prefetch_change(mode, L2X0_PREFETCH_DATA_PREFETCH);
36954 +		l2x0_prefetch_change(mode, L2X0_PREFETCH_DATA_PREFETCH|L2X0_PREFETCH_INST_PREFETCH);
36955  	}
36956  	mutex_unlock(&l2x0_prefetch_mutex);
36957  
36958  	return ret;
36959  }
36960  
36961 +int do_perf_test_proc_handler(struct ctl_table *table, int write,
36962 +		void __user *buffer, size_t *lenp, loff_t *ppos);
36963 +
36964 +int setup_flusher_proc_handler(struct ctl_table *table, int write,
36965 +		void __user *buffer, size_t *lenp, loff_t *ppos);
36966 +		
36967  static struct ctl_table cache_table[] =
36968  {
36969  	{
36970 @@ -588,6 +752,23 @@ static struct ctl_table cache_table[] =
36971  		.data		= &os_isolation,
36972  		.maxlen		= sizeof(os_isolation),
36973  	},
36974 +	{
36975 +		.procname	= "use_part",
36976 +		.mode		= 0644,
36977 +		.proc_handler	= use_part_proc_handler,
36978 +		.data		= &use_part,
36979 +		.maxlen		= sizeof(use_part),
36980 +	},
36981 +	{
36982 +		.procname	= "do_perf_test",
36983 +		.mode		= 0644,
36984 +		.proc_handler	= do_perf_test_proc_handler,
36985 +	},
36986 +	{
36987 +		.procname	= "setup_flusher",
36988 +		.mode		= 0644,
36989 +		.proc_handler	= setup_flusher_proc_handler,
36990 +	},
36991  	{ }
36992  };
36993  
36994 @@ -600,6 +781,321 @@ static struct ctl_table litmus_dir_table[] = {
36995  	{ }
36996  };
36997  
36998 +u32 color_read_in_mem(u32 lock_val, u32 unlock_val, void *start, void *end)
36999 +{
37000 +	u32 v = 0;
37001 +
37002 +	__asm__ __volatile__ (
37003 +"	.align 5\n"
37004 +"	str	%[lockval], [%[cachereg]]\n"
37005 +"1:	ldr	%[val], [%[addr]], #32		@ 32 bytes = 1 cache line\n"
37006 +"	cmp	%[end], %[addr]			@ subtracts addr from end\n"
37007 +"	bgt	1b\n				@ read more, if necessary\n"
37008 +	: [addr] "+r" (start),
37009 +	  [val] "+r" (v)
37010 +	: [end] "r" (end),
37011 +#ifdef CONFIG_CACHE_PL310
37012 +	  [cachereg] "r" (ld_d_reg(raw_smp_processor_id())),
37013 +#else
37014 +	  [cachereg] "r" (lockreg_d),
37015 +#endif
37016 +	  [lockval] "r" (lock_val)
37017 +	: "cc");
37018 +
37019 +	return v;
37020 +}
37021 +
37022 +
37023 +/*
37024 + * Prefetch by reading the first word of each cache line in a page.
37025 + *
37026 + * @lockdown_reg: address of the lockdown register to write
37027 + * @lock_val: value to be written to @lockdown_reg
37028 + * @unlock_val: will unlock the cache to this value
37029 + * @addr: start address to be prefetched
37030 + * @end_addr: end address to prefetch (exclusive)
37031 + *
37032 + * Assumes: addr < end_addr AND addr != end_addr
37033 + */
37034 +u32 color_read_in_mem_lock(u32 lock_val, u32 unlock_val, void *start, void *end)
37035 +{
37036 +#ifndef CONFIG_CACHE_PL310
37037 +	unsigned long flags;
37038 +#endif
37039 +	u32 v = 0;
37040 +
37041 +#ifndef CONFIG_CACHE_PL310
37042 +	raw_spin_lock_irqsave(&prefetch_lock, flags);
37043 +#endif
37044 +
37045 +	__asm__ __volatile__ (
37046 +"	.align 5\n"
37047 +"	str	%[lockval], [%[cachereg]]\n"
37048 +"1:	ldr	%[val], [%[addr]], #32		@ 32 bytes = 1 cache line\n"
37049 +"	cmp	%[end], %[addr]			@ subtracts addr from end\n"
37050 +"	bgt	1b\n				@ read more, if necessary\n"
37051 +"	str	%[unlockval], [%[cachereg]]\n"
37052 +	: [addr] "+r" (start),
37053 +	  [val] "+r" (v)
37054 +	: [end] "r" (end),
37055 +#ifdef CONFIG_CACHE_PL310
37056 +	  [cachereg] "r" (ld_d_reg(raw_smp_processor_id())),
37057 +#else
37058 +	  [cachereg] "r" (lockreg_d),
37059 +#endif
37060 +	  [lockval] "r" (lock_val),
37061 +	  [unlockval] "r" (unlock_val)
37062 +	: "cc");
37063 +
37064 +#ifndef CONFIG_CACHE_PL310
37065 +	raw_spin_unlock_irqrestore(&prefetch_lock, flags);
37066 +#endif
37067 +
37068 +	return v;
37069 +}
37070 +
37071 +static long update_timeval(struct timespec lhs, struct timespec rhs)
37072 +{
37073 +	long val;
37074 +	struct timespec ts;
37075 +
37076 +	ts = timespec_sub(rhs, lhs);
37077 +	val = ts.tv_sec*NSEC_PER_SEC + ts.tv_nsec;
37078 +
37079 +	return val;
37080 +}
37081 +
37082 +extern void v7_flush_kern_dcache_area(void *, size_t);
37083 +
37084 +/*
37085 + * Ensure that this page is not in the L1 or L2 cache.
37086 + * Since the L1 cache is VIPT and the L2 cache is PIPT, we can use either the
37087 + * kernel or user vaddr.
37088 + */
37089 +void color_flush_page(void *vaddr)
37090 +{
37091 +	v7_flush_kern_dcache_area(vaddr, PAGE_SIZE);
37092 +}
37093 +
37094 +extern struct page* get_colored_page(unsigned long color);
37095 +
37096 +int setup_flusher_array(void)
37097 +{
37098 +	int color, way, ret = 0;
37099 +	struct page *page;
37100 +
37101 +	if (flusher_pages != NULL)
37102 +		goto out;
37103 +
37104 +	flusher_pages = (void***) kmalloc(MAX_NR_WAYS
37105 +			* sizeof(*flusher_pages), GFP_KERNEL);
37106 +	if (!flusher_pages) {
37107 +		printk(KERN_WARNING "No memory for flusher array!\n");
37108 +		ret = -EINVAL;
37109 +		goto out;
37110 +	}
37111 +
37112 +	for (way = 0; way < MAX_NR_WAYS; way++) {
37113 +		void **flusher_color_arr;
37114 +		flusher_color_arr = (void**) kmalloc(sizeof(**flusher_pages)
37115 +				* MAX_NR_COLORS, GFP_KERNEL);
37116 +		if (!flusher_color_arr) {
37117 +			printk(KERN_WARNING "No memory for flusher array!\n");
37118 +			ret = -ENOMEM;
37119 +			goto out_free;
37120 +		}
37121 +
37122 +		flusher_pages[way] = flusher_color_arr;
37123 +
37124 +		for (color = 0; color < MAX_NR_COLORS; color++) {
37125 +			int node;
37126 +			switch (color) {
37127 +				case 0:
37128 +					node = 32;
37129 +					break;
37130 +				case 1:
37131 +					node = 33;
37132 +					break;
37133 +				case 2:
37134 +					node = 50;
37135 +					break;
37136 +				case 3:
37137 +					node = 51;
37138 +					break;
37139 +				case 4:
37140 +					node = 68;
37141 +					break;
37142 +				case 5:
37143 +					node = 69;
37144 +					break;
37145 +				case 6:
37146 +					node = 86;
37147 +					break;
37148 +				case 7:
37149 +					node = 87;
37150 +					break;
37151 +				case 8:
37152 +					node = 88;
37153 +					break;
37154 +				case 9:
37155 +					node = 105;
37156 +					break;
37157 +				case 10:
37158 +					node = 106;
37159 +					break;
37160 +				case 11:
37161 +					node = 107;
37162 +					break;
37163 +				case 12:
37164 +					node = 108;
37165 +					break;					
37166 +				case 13:
37167 +					node = 125;
37168 +					break;
37169 +				case 14:
37170 +					node = 126;
37171 +					break;
37172 +				case 15:
37173 +					node = 127;
37174 +					break;
37175 +			}	
37176 +			page = get_colored_page(node);
37177 +			if (!page) {
37178 +				printk(KERN_WARNING "no more colored pages\n");
37179 +				ret = -EINVAL;
37180 +				goto out_free;
37181 +			}
37182 +			flusher_pages[way][color] = page_address(page);
37183 +			if (!flusher_pages[way][color]) {
37184 +				printk(KERN_WARNING "bad page address\n");
37185 +				ret = -EINVAL;
37186 +				goto out_free;
37187 +			}
37188 +		}
37189 +	}
37190 +out:
37191 +	return ret;
37192 +out_free:
37193 +	for (way = 0; way < MAX_NR_WAYS; way++) {
37194 +		for (color = 0; color < MAX_NR_COLORS; color++) {
37195 +			/* not bothering to try and give back colored pages */
37196 +		}
37197 +		kfree(flusher_pages[way]);
37198 +	}
37199 +	kfree(flusher_pages);
37200 +	flusher_pages = NULL;
37201 +	return ret;
37202 +}
37203 +
37204 +void flush_cache(void)
37205 +{
37206 +/*	int *dummy;
37207 +	
37208 +	flush_cache_all();
37209 +	int size = 128, i, t = 0;
37210 +	
37211 +	dummy = kmalloc(PAGE_SIZE*size, GFP_KERNEL);
37212 +	for (i = 0; i<PAGE_SIZE*size/sizeof(int); i++) {
37213 +		dummy[i] = t++;
37214 +	}
37215 +	
37216 +	kfree(dummy);
37217 +*/	
37218 +	int way, color;
37219 +	for (way=0;way<MAX_NR_WAYS;way++) {
37220 +		for (color=0;color<MAX_NR_COLORS;color++) {
37221 +			void *vaddr = flusher_pages[way][color];
37222 +			u32 lvalue  = unlocked_way[way];
37223 +			color_read_in_mem_lock(lvalue, LOCK_ALL,
37224 +					       vaddr, vaddr + PAGE_SIZE);
37225 +		}
37226 +
37227 +	}	
37228 +}
37229 +
37230 +#define TRIALS 1000
37231 +
37232 +static int perf_test(void) {
37233 +	struct timespec before, after;
37234 +	struct page *page;
37235 +	void *vaddr;
37236 +	u32 *data;
37237 +	long time;
37238 +	int i;
37239 +
37240 +	page = alloc_page(__GFP_MOVABLE);
37241 +	if (!page) {
37242 +		printk(KERN_WARNING "No memory\n");
37243 +		return -ENOMEM;
37244 +	}
37245 +
37246 +	vaddr = page_address(page);
37247 +	if (!vaddr)
37248 +		printk(KERN_WARNING "%s: vaddr is null\n", __FUNCTION__);
37249 +	data = (u32*) vaddr;
37250 +
37251 +	getnstimeofday(&before);
37252 +	barrier();
37253 +	for (i = 0; i < TRIALS; i++) {
37254 +		color_flush_page(vaddr);
37255 +	}
37256 +	barrier();
37257 +	getnstimeofday(&after);
37258 +	time = update_timeval(before, after);
37259 +	printk("Average for flushes without re-reading: %ld\n", time / TRIALS);
37260 +
37261 +	color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
37262 +	barrier();
37263 +	getnstimeofday(&before);
37264 +	barrier();
37265 +	for (i = 0; i < TRIALS; i++) {
37266 +		color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
37267 +	}
37268 +	barrier();
37269 +	getnstimeofday(&after);
37270 +	time = update_timeval(before, after);
37271 +	printk("Average for read in (no flush): %ld\n", time / TRIALS);
37272 +
37273 +	getnstimeofday(&before);
37274 +	barrier();
37275 +	for (i = 0; i < TRIALS; i++) {
37276 +		color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
37277 +		color_flush_page(vaddr);
37278 +	}
37279 +	barrier();
37280 +	getnstimeofday(&after);
37281 +	time = update_timeval(before, after);
37282 +	printk("Average for read in and then flush: %ld\n", time / TRIALS);
37283 +
37284 +	free_page((unsigned long)vaddr);
37285 +	return 0;
37286 +}
37287 +
37288 +int do_perf_test_proc_handler(struct ctl_table *table, int write,
37289 +		void __user *buffer, size_t *lenp, loff_t *ppos)
37290 +{
37291 +	int ret = 0;
37292 +
37293 +	if (write) {
37294 +		ret = perf_test();
37295 +	}
37296 +
37297 +	return ret;
37298 +}
37299 +
37300 +int setup_flusher_proc_handler(struct ctl_table *table, int write,
37301 +		void __user *buffer, size_t *lenp, loff_t *ppos)
37302 +{
37303 +	int ret = -EINVAL;
37304 +
37305 +	if (write && flusher_pages == NULL) {
37306 +		ret = setup_flusher_array();
37307 +	}
37308 +	
37309 +	printk(KERN_INFO "setup flusher return: %d\n", ret);
37310 +	return ret;
37311 +}
37312 +
37313  static struct ctl_table_header *litmus_sysctls;
37314  
37315  static int __init litmus_sysctl_init(void)
37316 @@ -614,9 +1110,13 @@ static int __init litmus_sysctl_init(void)
37317  		goto out;
37318  	}
37319  
37320 +	//setup_flusher_array();
37321 +	printk(KERN_INFO "Setup flush_array.\n");
37322  	way_partition_min = 0x00000000;
37323  	way_partition_max = 0x0000FFFF;
37324  	os_isolation = 0;
37325 +	use_part = 0;
37326 +	
37327  out:
37328  	return ret;
37329  }
37330 diff --git a/litmus/litmus.c b/litmus/litmus.c
37331 index c8ed597..70342e7 100644
37332 --- a/litmus/litmus.c
37333 +++ b/litmus/litmus.c
37334 @@ -26,6 +26,7 @@
37335  #include <litmus/sched_trace.h>
37336  #include <litmus/litmus_proc.h>
37337  #include <litmus/clock.h>
37338 +#include <litmus/cache_proc.h>
37339  
37340  #include <asm/cacheflush.h>
37341  
37342 @@ -46,6 +47,8 @@ atomic_t rt_task_count 		= ATOMIC_INIT(0);
37343  atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
37344  #endif
37345  
37346 +extern void l2x0_flush_all(void);
37347 +
37348  static struct kmem_cache * bheap_node_cache;
37349  extern struct kmem_cache * release_heap_cache;
37350  
37351 @@ -339,15 +342,6 @@ asmlinkage long sys_reservation_destroy(unsigned int reservation_id, int cpu)
37352  	return litmus->reservation_destroy(reservation_id, cpu);
37353  }
37354  
37355 -struct task_page {
37356 -	unsigned long vm_start;
37357 -	unsigned long vm_end;
37358 -	struct page* page;
37359 -	struct list_head list;
37360 -};
37361 -
37362 -LIST_HEAD(task_page_list);
37363 -
37364  static unsigned long color_mask;
37365  
37366  static inline unsigned long page_color(struct page *page)
37367 @@ -403,11 +397,8 @@ extern struct page *new_alloc_page(struct page *page, unsigned long node, int **
37368  asmlinkage long sys_set_page_color(int cpu)
37369  {
37370  	long ret = 0;
37371 -	//struct task_page *task_page_itr = NULL;
37372 -	//struct task_page *task_page_itr_next = NULL;
37373  	struct page *page_itr = NULL;
37374  	struct vm_area_struct *vma_itr = NULL;
37375 -	//struct task_page *entry = NULL;
37376  	int nr_pages = 0, nr_shared_pages = 0, nr_failed = 0;
37377  	unsigned long node;
37378  		
37379 @@ -420,27 +411,13 @@ asmlinkage long sys_set_page_color(int cpu)
37380  	while (vma_itr != NULL) {
37381  		unsigned int num_pages = 0, i;
37382  		struct page *old_page = NULL;
37383 -		/*
37384 -		entry = kmalloc(sizeof(struct task_page), GFP_ATOMIC);
37385 -		if (entry == NULL) {
37386 -			return -ENOSPC;
37387 -		}
37388 -		entry->vm_start = vma_itr->vm_start;
37389 -		entry->vm_end = vma_itr->vm_end;
37390 -		*/
37391 +		
37392  		num_pages = (vma_itr->vm_end - vma_itr->vm_start) / PAGE_SIZE;
37393  		// print vma flags
37394  		//printk(KERN_INFO "flags: 0x%lx\n", vma_itr->vm_flags);
37395  		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", vma_itr->vm_start, vma_itr->vm_end, (vma_itr->vm_end - vma_itr->vm_start)/PAGE_SIZE);
37396  		
37397  		for (i = 0; i < num_pages; i++) {
37398 -/*
37399 -			new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma_itr, vma_itr->vm_start);
37400 -			if (!new_page)
37401 -				return -ENOSPC;
37402 -			printk(KERN_INFO "PAGE_COLOR: %lu\n", page_color(new_page));
37403 -*/
37404 -			//old_page = walk_page_table(vma_itr->vm_start + PAGE_SIZE*i);
37405  			old_page = follow_page(vma_itr, vma_itr->vm_start + PAGE_SIZE*i, FOLL_GET|FOLL_SPLIT);
37406  			
37407  			if (IS_ERR(old_page))
37408 @@ -481,9 +458,6 @@ asmlinkage long sys_set_page_color(int cpu)
37409  			}
37410  		}
37411  		
37412 -		//INIT_LIST_HEAD(&entry->list);
37413 -		//list_add(&entry->list, &task_page_list);
37414 -		
37415  		vma_itr = vma_itr->vm_next;
37416  	}
37417  
37418 @@ -493,13 +467,13 @@ asmlinkage long sys_set_page_color(int cpu)
37419  	
37420  	ret = 0;
37421  	if (cpu == -1)
37422 -		node = 4;
37423 +		node = 8;
37424  	else
37425  		node = cpu;
37426  
37427          //node= 0;
37428  	if (!list_empty(&pagelist)) {
37429 -		ret = litmus_migrate_pages(&pagelist, new_alloc_page, node, MIGRATE_ASYNC, MR_SYSCALL);
37430 +		ret = migrate_pages(&pagelist, new_alloc_page, node, MIGRATE_ASYNC, MR_SYSCALL);
37431  		TRACE_TASK(current, "%ld pages not migrated.\n", ret);
37432  		if (ret) {
37433  			putback_lru_pages(&pagelist);
37434 @@ -516,32 +490,17 @@ asmlinkage long sys_set_page_color(int cpu)
37435  		}
37436  		vma_itr = vma_itr->vm_next;
37437  	}
37438 -	
37439 -	/* copy shared pages HERE */
37440 -/*	
37441 -	ret = 0;
37442 -	if (!list_empty(&shared_pagelist)) {
37443 -		ret = migrate_shared_pages(&shared_pagelist, new_alloc_page, 0, MIGRATE_ASYNC, MR_SYSCALL);
37444 -		if (ret) {
37445 -			printk(KERN_INFO "%ld shared pages not migrated.\n", ret);
37446 -			putback_lru_pages(&shared_pagelist);
37447 -		}
37448 -	}
37449 -*/
37450 +
37451  	up_read(&current->mm->mmap_sem);
37452  
37453  	list_for_each_entry(page_itr, &shared_pagelist, lru) {
37454  		TRACE("S Anon=%d, pfn = %lu, _mapcount = %d, _count = %d\n", PageAnon(page_itr), __page_to_pfn(page_itr), page_mapcount(page_itr), page_count(page_itr));
37455  	}
37456  	
37457 -/*	
37458 -	list_for_each_entry_safe(task_page_itr, task_page_itr_next, &task_page_list, list) {
37459 -		//printk(KERN_INFO "start - end: 0x%lx - 0x%lx (%lu)\n", task_page_itr->vm_start, task_page_itr->vm_end, (task_page_itr->vm_end - task_page_itr->vm_start)/PAGE_SIZE);
37460 -		list_del(&task_page_itr->list);
37461 -		kfree(task_page_itr);		
37462 -	}
37463 -*/	
37464  	TRACE_TASK(current, "nr_pages = %d nr_failed = %d\n", nr_pages, nr_failed);
37465 +	printk(KERN_INFO "nr_pages = %d nr_failed = %d\n", nr_pages, nr_failed);
37466 +	flush_cache();
37467 +	
37468  	return ret;
37469  }
37470  
37471 @@ -834,6 +793,44 @@ static struct notifier_block shutdown_notifier = {
37472  	.notifier_call = litmus_shutdown_nb,
37473  };
37474  
37475 +static void litmus_nsacr_register(void)
37476 +{
37477 +	u32 val, new_value, reread;
37478 +
37479 +	asm volatile("mrc p15, 0, %0, c1, c1, 2" : "=r" (val));
37480 +	
37481 +	new_value = val | 0x00048c00;
37482 +	
37483 +	asm volatile("mcr p15, 0, %0, c1, c1, 2" : : "r" (new_value));
37484 +	asm volatile("mrc p15, 0, %0, c1, c1, 2" : "=r" (reread));
37485 +	printk("NSACR REGISTER = orig: 0x%08x new: 0x%08x reread: 0x%08x\n", val, new_value, reread);
37486 +	
37487 +	
37488 +	return;
37489 +}
37490 +
37491 +static void litmus_pmu_register(void)
37492 +{
37493 +	u32 val, new_value, reread;
37494 +
37495 +	asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
37496 +	
37497 +	new_value = val | 0x00000003;
37498 +	
37499 +	asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (new_value));
37500 +	asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (reread));
37501 +	printk("SDER REGISTER = orig: 0x%08x new: 0x%08x reread: 0x%08x\n", val, new_value, reread);
37502 +	
37503 +	
37504 +	asm volatile("mrc p15, 5, %0, c15, c7, 2" : "=r" (val));
37505 +	
37506 +	//asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (new_value));
37507 +	//asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (reread));
37508 +	printk("TLB ATTR REGISTER = orig: 0x%08x\n", val);
37509 +	
37510 +	return;
37511 +}
37512 +
37513  #if defined(CONFIG_CPU_V7) && !defined(CONFIG_HW_PERF_EVENTS)
37514  static void __init litmus_enable_perfcounters_v7(void *_ignore)
37515  {
37516 @@ -866,7 +863,7 @@ static void __init litmus_enable_perfcounters_v7(void *_ignore)
37517  	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(enable_val));
37518  
37519  	/* enables counters (cycle counter and event 1) */
37520 -    asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x80000001));
37521 +	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x80000001));
37522  }
37523  
37524  static void __init litmus_enable_perfcounters(void)
37525 @@ -910,7 +907,8 @@ static int __init _init_litmus(void)
37526  #if defined(CONFIG_CPU_V7) && !defined(CONFIG_HW_PERF_EVENTS)	
37527  	litmus_enable_perfcounters();
37528  #endif
37529 -	
37530 +	//litmus_nsacr_register();
37531 +	//litmus_pmu_register();
37532  	color_mask = ((cache_info_sets << line_size_log) - 1) ^ (PAGE_SIZE - 1);
37533  	printk("Page color mask %lx\n", color_mask);
37534  	return 0;
37535 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
37536 index 885218e..4800bc4 100644
37537 --- a/litmus/sched_mc2.c
37538 +++ b/litmus/sched_mc2.c
37539 @@ -29,6 +29,7 @@
37540  #define BUDGET_ENFORCEMENT_AT_C 1
37541  
37542  extern void do_partition(enum crit_level lv, int cpu);
37543 +extern void l2x0_flush_all(void);
37544  
37545  /* _global_env - reservation container for level-C tasks*/
37546  struct gmp_reservation_environment _global_env;
37547 @@ -626,7 +627,7 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
37548  }
37549  
37550  /* not used now */
37551 -static void pre_schedule(struct task_struct *prev, int cpu)
37552 +static inline void pre_schedule(struct task_struct *prev, int cpu)
37553  {
37554  	if (!prev || !is_realtime(prev))
37555  		return;
37556 @@ -635,13 +636,15 @@ static void pre_schedule(struct task_struct *prev, int cpu)
37557  }
37558  
37559  /* not used now */
37560 -static void post_schedule(struct task_struct *next, int cpu)
37561 +static inline void post_schedule(struct task_struct *next, int cpu)
37562  {
37563  	enum crit_level lev;
37564 -	if (!next || !is_realtime(next))
37565 +	if (!next) // || !is_realtime(next))
37566  		return;
37567 -	
37568 -	lev = get_task_crit_level(next);
37569 +	if (!is_realtime(next))
37570 +		lev = NUM_CRIT_LEVELS;
37571 +	else
37572 +		lev = get_task_crit_level(next);
37573  	do_partition(lev, cpu);
37574  }
37575  
37576 @@ -653,7 +656,7 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
37577  	lt_t now;
37578  	struct mc2_cpu_state *state = local_cpu_state();
37579  
37580 -	pre_schedule(prev, state->cpu);
37581 +	//pre_schedule(prev, state->cpu);
37582  	
37583  	raw_spin_lock(&_global_env.lock);
37584  	raw_spin_lock(&state->lock);
37585 @@ -873,6 +876,7 @@ static long mc2_complete_job(void)
37586  	next_release = ns_to_ktime(get_release(current));
37587  	preempt_disable();
37588  	TRACE_CUR("next_release=%llu\n", get_release(current));
37589 +	flush_cache();
37590  	if (get_release(current) > litmus_clock()) {
37591  		/* sleep until next_release */
37592  		set_current_state(TASK_INTERRUPTIBLE);
37593 @@ -888,7 +892,7 @@ static long mc2_complete_job(void)
37594  //		if (get_task_crit_level(current) == CRIT_LEVEL_A)
37595  		sched_trace_task_release(current);
37596  	}
37597 -
37598 +	//l2x0_flush_all();
37599  	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
37600  
37601  	return err;
37602 @@ -1048,7 +1052,7 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
37603  	struct reservation *res = NULL, *next;
37604  	struct sup_reservation_environment *sup_env;
37605  	int found = 0;
37606 -	enum crit_level lv = get_task_crit_level(current);
37607 +	//enum crit_level lv = get_task_crit_level(current);
37608  	unsigned long flags;
37609  	
37610  	if (cpu == -1) {
37611 -- 
37612 1.8.1.2
37613 
37614 
37615 From a2927ecc8f7f3d02d8178eaef3e426fa597d530e Mon Sep 17 00:00:00 2001
37616 From: Namhoon Kim <namhoonk@cs.unc.edu>
37617 Date: Wed, 8 Apr 2015 22:47:25 -0400
37618 Subject: [PATCH 113/119] cache flush bug
37619 
37620 ---
37621  litmus/cache_proc.c | 14 ++++++++++++--
37622  1 file changed, 12 insertions(+), 2 deletions(-)
37623 
37624 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
37625 index 0e123fac..e91862f 100644
37626 --- a/litmus/cache_proc.c
37627 +++ b/litmus/cache_proc.c
37628 @@ -1001,7 +1001,7 @@ void flush_cache(void)
37629  	
37630  	kfree(dummy);
37631  */	
37632 -	int way, color;
37633 +	int way, color, i;
37634  	for (way=0;way<MAX_NR_WAYS;way++) {
37635  		for (color=0;color<MAX_NR_COLORS;color++) {
37636  			void *vaddr = flusher_pages[way][color];
37637 @@ -1010,6 +1010,11 @@ void flush_cache(void)
37638  					       vaddr, vaddr + PAGE_SIZE);
37639  		}
37640  
37641 +	}
37642 +
37643 +	for (i = 0; i < nr_lockregs; i++) {
37644 +		writel_relaxed(UNLOCK_ALL, ld_d_reg(i));
37645 +		writel_relaxed(UNLOCK_ALL, ld_i_reg(i));
37646  	}	
37647  }
37648  
37649 @@ -1090,9 +1095,14 @@ int setup_flusher_proc_handler(struct ctl_table *table, int write,
37650  
37651  	if (write && flusher_pages == NULL) {
37652  		ret = setup_flusher_array();
37653 +		printk(KERN_INFO "setup flusher return: %d\n", ret);
37654 +	
37655 +	}
37656 +	else if (flusher_pages) {
37657 +		printk(KERN_INFO "flusher_pages is already set!\n");
37658 +		ret = 0;
37659  	}
37660  	
37661 -	printk(KERN_INFO "setup flusher return: %d\n", ret);
37662  	return ret;
37663  }
37664  
37665 -- 
37666 1.8.1.2
37667 
37668 
37669 From c3079b56cfd3b62c08e02684bee671d2361ad9c9 Mon Sep 17 00:00:00 2001
37670 From: Namhoon Kim <namhoonk@cs.unc.edu>
37671 Date: Wed, 8 Apr 2015 23:42:58 -0400
37672 Subject: [PATCH 114/119] fixed flush bug
37673 
37674 ---
37675  litmus/cache_proc.c  | 32 ++++++++++++++++++++++++++++++--
37676  litmus/reservation.c |  2 +-
37677  litmus/sched_mc2.c   | 10 +++++++---
37678  3 files changed, 38 insertions(+), 6 deletions(-)
37679 
37680 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
37681 index e91862f..4f7fc00 100644
37682 --- a/litmus/cache_proc.c
37683 +++ b/litmus/cache_proc.c
37684 @@ -109,6 +109,28 @@ u32 prev_lockdown_i_reg[5] = {
37685  	0xFFFF00FF, /* share with level-C */
37686  };
37687  
37688 +u32 prev_lbm_i_reg[8] = {
37689 +	0x00000000,
37690 +	0x00000000,
37691 +	0x00000000,
37692 +	0x00000000,
37693 +	0x00000000,
37694 +	0x00000000,
37695 +	0x00000000,
37696 +	0x00000000,
37697 +};
37698 +
37699 +u32 prev_lbm_d_reg[8] = {
37700 +	0x00000000,
37701 +	0x00000000,
37702 +	0x00000000,
37703 +	0x00000000,
37704 +	0x00000000,
37705 +	0x00000000,
37706 +	0x00000000,
37707 +	0x00000000,
37708 +};
37709 +
37710  static void __iomem *cache_base;
37711  static void __iomem *lockreg_d;
37712  static void __iomem *lockreg_i;
37713 @@ -1002,6 +1024,12 @@ void flush_cache(void)
37714  	kfree(dummy);
37715  */	
37716  	int way, color, i;
37717 +	
37718 +	for (i = 0; i < nr_lockregs; i++) {
37719 +		prev_lbm_i_reg[i] = readl_relaxed(ld_i_reg(i));
37720 +		prev_lbm_d_reg[i] = readl_relaxed(ld_d_reg(i));
37721 +	}
37722 +	
37723  	for (way=0;way<MAX_NR_WAYS;way++) {
37724  		for (color=0;color<MAX_NR_COLORS;color++) {
37725  			void *vaddr = flusher_pages[way][color];
37726 @@ -1013,8 +1041,8 @@ void flush_cache(void)
37727  	}
37728  
37729  	for (i = 0; i < nr_lockregs; i++) {
37730 -		writel_relaxed(UNLOCK_ALL, ld_d_reg(i));
37731 -		writel_relaxed(UNLOCK_ALL, ld_i_reg(i));
37732 +		writel_relaxed(prev_lbm_i_reg[i], ld_i_reg(i));
37733 +		writel_relaxed(prev_lbm_d_reg[i], ld_d_reg(i));
37734  	}	
37735  }
37736  
37737 diff --git a/litmus/reservation.c b/litmus/reservation.c
37738 index af5a934..efd16da 100644
37739 --- a/litmus/reservation.c
37740 +++ b/litmus/reservation.c
37741 @@ -206,7 +206,7 @@ static void sup_charge_budget(
37742  				encountered_active = 1;
37743  			}			
37744  		} else {
37745 -			BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
37746 +			//BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
37747  			TRACE("sup_charge_budget INACTIVE R%u drain %llu\n", res->id, delta);
37748  			res->ops->drain_budget(res, delta);
37749  		}
37750 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
37751 index 4800bc4..1e39362 100644
37752 --- a/litmus/sched_mc2.c
37753 +++ b/litmus/sched_mc2.c
37754 @@ -123,7 +123,7 @@ static enum crit_level get_task_crit_level(struct task_struct *tsk)
37755  static struct reservation* res_find_by_id(struct mc2_cpu_state *state,
37756                                            unsigned int id)
37757  {
37758 -	struct reservation *res;
37759 +	struct reservation *res = NULL;
37760  
37761  	res = sup_find_by_id(&state->sup_env, id);
37762  	if (!res)
37763 @@ -404,8 +404,12 @@ static lt_t mc2_update_ghost_state(struct mc2_cpu_state *state)
37764  				continue;
37765  			
37766  			res = res_find_by_id(state, tinfo->mc2_param.res_id);
37767 -			BUG_ON(!res);
37768 -//printk(KERN_ALERT "R%d found!\n", res->id);			
37769 +			//BUG_ON(!res);
37770 +			if (!res) {
37771 +				printk(KERN_ALERT "mc2_update_ghost_state(): R%d not found!\n", tinfo->mc2_param.res_id);			
37772 +				return 0;
37773 +			}
37774 +			
37775  			TRACE("LV %d running id %d budget %llu\n", 
37776  			       lv, tinfo->mc2_param.res_id, res->cur_budget);
37777  			/* If the budget is exhausted, clear is_ghost and reschedule */
37778 -- 
37779 1.8.1.2
37780 
37781 
37782 From e5d0df8359d1a297b4ffb59ebae18df63d7dab4f Mon Sep 17 00:00:00 2001
37783 From: Namhoon Kim <namhoonk@cs.unc.edu>
37784 Date: Thu, 9 Apr 2015 23:27:52 -0400
37785 Subject: [PATCH 115/119] fix
37786 
37787 ---
37788  litmus/cache_proc.c | 120 ++++++++++++++++++++++------------------------------
37789  litmus/litmus.c     |  14 ++++--
37790  litmus/sched_mc2.c  |  14 +++---
37791  3 files changed, 66 insertions(+), 82 deletions(-)
37792 
37793 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
37794 index 4f7fc00..68b451d 100644
37795 --- a/litmus/cache_proc.c
37796 +++ b/litmus/cache_proc.c
37797 @@ -192,24 +192,24 @@ static inline void cache_sync(void)
37798  	cache_wait(base + L2X0_CACHE_SYNC, 1);
37799  }
37800  
37801 -static void print_lockdown_registers(void)
37802 +static void print_lockdown_registers(int cpu)
37803  {
37804  	int i;
37805 -
37806  	//for (i = 0; i < nr_lockregs; i++) {
37807  	for (i = 0; i < 4; i++) {
37808 -		printk("Lockdown Data CPU %2d: 0x%04x\n",
37809 +		printk("P%d Lockdown Data CPU %2d: 0x%04x\n", cpu,
37810  				i, readl_relaxed(ld_d_reg(i)));
37811 -		printk("Lockdown Inst CPU %2d: 0x%04x\n",
37812 +		printk("P%d Lockdown Inst CPU %2d: 0x%04x\n", cpu,
37813  				i, readl_relaxed(ld_i_reg(i)));
37814  	}
37815  }
37816  
37817  static void test_lockdown(void *ignore)
37818  {
37819 -	int i;
37820 +	int i, cpu;
37821  
37822 -	printk("Start lockdown test on CPU %d.\n", smp_processor_id());
37823 +	cpu = smp_processor_id();
37824 +	printk("Start lockdown test on CPU %d.\n", cpu);
37825  
37826  	for (i = 0; i < nr_lockregs; i++) {
37827  		printk("CPU %2d data reg: 0x%8p\n", i, ld_d_reg(i));
37828 @@ -217,7 +217,7 @@ static void test_lockdown(void *ignore)
37829  	}
37830  
37831  	printk("Lockdown initial state:\n");
37832 -	print_lockdown_registers();
37833 +	print_lockdown_registers(cpu);
37834  	printk("---\n");
37835  
37836  	for (i = 0; i < nr_lockregs; i++) {
37837 @@ -225,7 +225,7 @@ static void test_lockdown(void *ignore)
37838  		writel_relaxed(2, ld_i_reg(i));
37839  	}
37840  	printk("Lockdown all data=1 instr=2:\n");
37841 -	print_lockdown_registers();
37842 +	print_lockdown_registers(cpu);
37843  	printk("---\n");
37844  
37845  	for (i = 0; i < nr_lockregs; i++) {
37846 @@ -233,7 +233,7 @@ static void test_lockdown(void *ignore)
37847  		writel_relaxed(((1 << 8) >> i), ld_i_reg(i));
37848  	}
37849  	printk("Lockdown varies:\n");
37850 -	print_lockdown_registers();
37851 +	print_lockdown_registers(cpu);
37852  	printk("---\n");
37853  
37854  	for (i = 0; i < nr_lockregs; i++) {
37855 @@ -241,7 +241,7 @@ static void test_lockdown(void *ignore)
37856  		writel_relaxed(UNLOCK_ALL, ld_i_reg(i));
37857  	}
37858  	printk("Lockdown all zero:\n");
37859 -	print_lockdown_registers();
37860 +	print_lockdown_registers(cpu);
37861  
37862  	printk("End lockdown test.\n");
37863  }
37864 @@ -273,12 +273,14 @@ int way_partition_handler(struct ctl_table *table, int write, void __user *buffe
37865  		size_t *lenp, loff_t *ppos)
37866  {
37867  	int ret = 0, i;
37868 +	unsigned long flags;
37869  	
37870  	mutex_lock(&lockdown_proc);
37871  	
37872  	//flush_cache_all();
37873  	//cache_sync();
37874 -	l2x0_flush_all();
37875 +	//l2x0_flush_all();
37876 +	flush_cache();
37877  	
37878  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
37879  	if (ret)
37880 @@ -296,8 +298,10 @@ int way_partition_handler(struct ctl_table *table, int write, void __user *buffe
37881  				       i * L2X0_LOCKDOWN_STRIDE);
37882  		}
37883  	}
37884 -	print_lockdown_registers();
37885 -
37886 +	
37887 +	local_irq_save(flags);
37888 +	print_lockdown_registers(smp_processor_id());
37889 +	local_irq_restore(flags);
37890  out:
37891  	mutex_unlock(&lockdown_proc);
37892  	return ret;
37893 @@ -307,6 +311,7 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
37894  		size_t *lenp, loff_t *ppos)
37895  {
37896  	int ret = 0, i;
37897 +	unsigned long flags;
37898  	
37899  	mutex_lock(&lockdown_proc);
37900  	
37901 @@ -355,8 +360,9 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
37902  */
37903  	}
37904  	printk("LOCK_ALL HANDLER\n");
37905 -	print_lockdown_registers();
37906 -
37907 +	local_irq_save(flags);
37908 +	print_lockdown_registers(smp_processor_id());
37909 +	local_irq_restore(flags);
37910  out:
37911  	mutex_unlock(&lockdown_proc);
37912  	return ret;
37913 @@ -383,27 +389,28 @@ void mem_lock(u32 lock_val, int cpu)
37914  void do_partition(enum crit_level lv, int cpu)
37915  {
37916  	u32 regs;
37917 -	//unsigned long flags;
37918 +	unsigned long flags;
37919  	
37920  	if (lock_all || !use_part)
37921  		return;
37922 +	raw_spin_lock_irqsave(&cache_lock, flags);
37923  	switch(lv) {
37924  		case CRIT_LEVEL_A:
37925  			regs = ~way_partitions[cpu*2];
37926 -			regs |= 0xffff0000;
37927 +			//regs |= 0xffff0000;
37928  			writel_relaxed(regs, ld_d_reg(cpu));
37929  			writel_relaxed(regs, ld_i_reg(cpu));
37930  			break;
37931  		case CRIT_LEVEL_B:
37932  			regs = ~way_partitions[cpu*2+1];
37933 -			regs |= 0xffff0000;
37934 +			//regs |= 0xffff0000;
37935  			writel_relaxed(regs, ld_d_reg(cpu));
37936  			writel_relaxed(regs, ld_i_reg(cpu));
37937  			break;
37938  		case CRIT_LEVEL_C:
37939  		case NUM_CRIT_LEVELS:
37940  			regs = ~way_partitions[8];
37941 -			regs |= 0xffff0000;
37942 +			//regs |= 0xffff0000;
37943  			writel_relaxed(regs, ld_d_reg(cpu));
37944  			writel_relaxed(regs, ld_i_reg(cpu));
37945  			break;
37946 @@ -411,33 +418,16 @@ void do_partition(enum crit_level lv, int cpu)
37947  			BUG();
37948  
37949  	}
37950 +/*	
37951 +	printk(KERN_INFO "P%d lockdown on P%d\n", smp_processor_id(), cpu);
37952 +	printk(KERN_INFO "CRIT_LEVEL %d\n", lv);
37953 +	print_lockdown_registers(smp_processor_id());
37954 +*/	
37955 +	raw_spin_unlock_irqrestore(&cache_lock, flags);
37956  	//cache_sync();
37957  //	barrier();
37958  //	mem_lock(regs, cpu);
37959 -//	barrier();		
37960 -	//print_lockdown_registers();
37961 -/*
37962 -	if (use_set_partition == 1 && use_way_partition == 1)
37963 -		printk(KERN_ALERT "BOTH SET, WAY ARE SET!!!!\n");
37964 -	
37965 -	if (use_way_partition == 1) {
37966 -		if (lv < CRIT_LEVEL_C) {
37967 -			writel_relaxed(way_partitions[cpu], ld_d_reg(cpu));
37968 -			writel_relaxed(way_partitions[cpu], ld_i_reg(cpu));
37969 -		} else {
37970 -			writel_relaxed(way_partitions[4], ld_d_reg(cpu));
37971 -			writel_relaxed(way_partitions[4], ld_i_reg(cpu));
37972 -		}
37973 -	} else if (use_set_partition == 1) {
37974 -		if (lv < CRIT_LEVEL_C) {
37975 -			writel_relaxed(set_partitions[0], ld_d_reg(cpu));
37976 -			writel_relaxed(set_partitions[0], ld_i_reg(cpu));
37977 -		} else {
37978 -			writel_relaxed(set_partitions[1], ld_d_reg(cpu));
37979 -			writel_relaxed(set_partitions[1], ld_i_reg(cpu));
37980 -		}
37981 -	}
37982 -*/
37983 +//	barrier();
37984  }
37985  
37986  int use_part_proc_handler(struct ctl_table *table, int write, void __user *buffer,
37987 @@ -491,21 +481,21 @@ void inline enter_irq_mode(void)
37988  {
37989  	int cpu = smp_processor_id();
37990  
37991 -	return;
37992 +	//return;
37993  	prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
37994  	prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
37995  	
37996  	if (os_isolation == 0)
37997  		return;	
37998  	
37999 -	writel_relaxed(prev_lockdown_i_reg[4], ld_i_reg(cpu));
38000 -	writel_relaxed(prev_lockdown_d_reg[4], ld_d_reg(cpu));
38001 +	writel_relaxed(way_partitions[8], ld_i_reg(cpu));
38002 +	writel_relaxed(way_partitions[8], ld_d_reg(cpu));
38003  }
38004  
38005  void inline exit_irq_mode(void)
38006  {
38007  	int cpu = smp_processor_id();
38008 -	return;
38009 +	//return;
38010  	if (os_isolation == 0)
38011  		return;
38012  
38013 @@ -1011,26 +1001,19 @@ out_free:
38014  
38015  void flush_cache(void)
38016  {
38017 -/*	int *dummy;
38018 -	
38019 -	flush_cache_all();
38020 -	int size = 128, i, t = 0;
38021 -	
38022 -	dummy = kmalloc(PAGE_SIZE*size, GFP_KERNEL);
38023 -	for (i = 0; i<PAGE_SIZE*size/sizeof(int); i++) {
38024 -		dummy[i] = t++;
38025 -	}
38026 -	
38027 -	kfree(dummy);
38028 -*/	
38029 -	int way, color, i;
38030 +	int way, color, cpu;
38031 +	unsigned long flags;
38032  	
38033 -	for (i = 0; i < nr_lockregs; i++) {
38034 -		prev_lbm_i_reg[i] = readl_relaxed(ld_i_reg(i));
38035 -		prev_lbm_d_reg[i] = readl_relaxed(ld_d_reg(i));
38036 -	}
38037 +	raw_spin_lock_irqsave(&cache_lock, flags);
38038 +	cpu = raw_smp_processor_id();
38039  	
38040 +	prev_lbm_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
38041 +	prev_lbm_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
38042 +	//printk("P%d reg value = 0x%04x\n", cpu, prev_lbm_d_reg[cpu]);
38043  	for (way=0;way<MAX_NR_WAYS;way++) {
38044 +		if ( (0x00000001 << way) & (prev_lbm_d_reg[cpu]) )
38045 +			continue;
38046 +		//printk("P%d flushes way #%d\n", cpu, way);
38047  		for (color=0;color<MAX_NR_COLORS;color++) {
38048  			void *vaddr = flusher_pages[way][color];
38049  			u32 lvalue  = unlocked_way[way];
38050 @@ -1040,10 +1023,9 @@ void flush_cache(void)
38051  
38052  	}
38053  
38054 -	for (i = 0; i < nr_lockregs; i++) {
38055 -		writel_relaxed(prev_lbm_i_reg[i], ld_i_reg(i));
38056 -		writel_relaxed(prev_lbm_d_reg[i], ld_d_reg(i));
38057 -	}	
38058 +	writel_relaxed(prev_lbm_i_reg[cpu], ld_i_reg(cpu));
38059 +	writel_relaxed(prev_lbm_d_reg[cpu], ld_d_reg(cpu));
38060 +	raw_spin_unlock_irqrestore(&cache_lock, flags);
38061  }
38062  
38063  #define TRIALS 1000
38064 @@ -1148,8 +1130,6 @@ static int __init litmus_sysctl_init(void)
38065  		goto out;
38066  	}
38067  
38068 -	//setup_flusher_array();
38069 -	printk(KERN_INFO "Setup flush_array.\n");
38070  	way_partition_min = 0x00000000;
38071  	way_partition_max = 0x0000FFFF;
38072  	os_isolation = 0;
38073 diff --git a/litmus/litmus.c b/litmus/litmus.c
38074 index 70342e7..5692905 100644
38075 --- a/litmus/litmus.c
38076 +++ b/litmus/litmus.c
38077 @@ -27,6 +27,7 @@
38078  #include <litmus/litmus_proc.h>
38079  #include <litmus/clock.h>
38080  #include <litmus/cache_proc.h>
38081 +#include <litmus/mc2_common.h>
38082  
38083  #include <asm/cacheflush.h>
38084  
38085 @@ -394,6 +395,8 @@ extern struct page *new_alloc_page(struct page *page, unsigned long node, int **
38086  
38087  #endif
38088  
38089 +//static raw_spinlock_t migrate_lock;
38090 +
38091  asmlinkage long sys_set_page_color(int cpu)
38092  {
38093  	long ret = 0;
38094 @@ -401,10 +404,12 @@ asmlinkage long sys_set_page_color(int cpu)
38095  	struct vm_area_struct *vma_itr = NULL;
38096  	int nr_pages = 0, nr_shared_pages = 0, nr_failed = 0;
38097  	unsigned long node;
38098 +	enum crit_level lv;
38099  		
38100  	LIST_HEAD(pagelist);
38101  	LIST_HEAD(shared_pagelist);
38102  	
38103 +	
38104  	down_read(&current->mm->mmap_sem);
38105  	TRACE_TASK(current, "SYSCALL set_page_color\n");
38106  	vma_itr = current->mm->mmap;
38107 @@ -466,10 +471,11 @@ asmlinkage long sys_set_page_color(int cpu)
38108  //	}
38109  	
38110  	ret = 0;
38111 +	lv = tsk_rt(current)->mc2_data->crit;
38112  	if (cpu == -1)
38113  		node = 8;
38114  	else
38115 -		node = cpu;
38116 +		node = cpu*2 + lv;
38117  
38118          //node= 0;
38119  	if (!list_empty(&pagelist)) {
38120 @@ -492,13 +498,13 @@ asmlinkage long sys_set_page_color(int cpu)
38121  	}
38122  
38123  	up_read(&current->mm->mmap_sem);
38124 -
38125 +	
38126  	list_for_each_entry(page_itr, &shared_pagelist, lru) {
38127  		TRACE("S Anon=%d, pfn = %lu, _mapcount = %d, _count = %d\n", PageAnon(page_itr), __page_to_pfn(page_itr), page_mapcount(page_itr), page_count(page_itr));
38128  	}
38129  	
38130  	TRACE_TASK(current, "nr_pages = %d nr_failed = %d\n", nr_pages, nr_failed);
38131 -	printk(KERN_INFO "nr_pages = %d nr_failed = %d\n", nr_pages, nr_failed);
38132 +	printk(KERN_INFO "node = %ld, nr_pages = %d, nr_failed = %d\n", node, nr_pages, nr_failed);
38133  	flush_cache();
38134  	
38135  	return ret;
38136 @@ -911,6 +917,8 @@ static int __init _init_litmus(void)
38137  	//litmus_pmu_register();
38138  	color_mask = ((cache_info_sets << line_size_log) - 1) ^ (PAGE_SIZE - 1);
38139  	printk("Page color mask %lx\n", color_mask);
38140 +	
38141 +	//raw_spin_lock_init(&migrate_lock);
38142  	return 0;
38143  }
38144  
38145 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
38146 index 1e39362..e59030f 100644
38147 --- a/litmus/sched_mc2.c
38148 +++ b/litmus/sched_mc2.c
38149 @@ -630,7 +630,6 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
38150  	return NULL;
38151  }
38152  
38153 -/* not used now */
38154  static inline void pre_schedule(struct task_struct *prev, int cpu)
38155  {
38156  	if (!prev || !is_realtime(prev))
38157 @@ -639,16 +638,15 @@ static inline void pre_schedule(struct task_struct *prev, int cpu)
38158  	do_partition(CRIT_LEVEL_C, cpu);
38159  }
38160  
38161 -/* not used now */
38162  static inline void post_schedule(struct task_struct *next, int cpu)
38163  {
38164  	enum crit_level lev;
38165 -	if (!next) // || !is_realtime(next))
38166 +	if ((!next) || !is_realtime(next))
38167  		return;
38168 -	if (!is_realtime(next))
38169 +/*	if (!is_realtime(next))
38170  		lev = NUM_CRIT_LEVELS;
38171 -	else
38172 -		lev = get_task_crit_level(next);
38173 +	else */
38174 +	lev = get_task_crit_level(next);
38175  	do_partition(lev, cpu);
38176  }
38177  
38178 @@ -660,7 +658,7 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
38179  	lt_t now;
38180  	struct mc2_cpu_state *state = local_cpu_state();
38181  
38182 -	//pre_schedule(prev, state->cpu);
38183 +	pre_schedule(prev, state->cpu);
38184  	
38185  	raw_spin_lock(&_global_env.lock);
38186  	raw_spin_lock(&state->lock);
38187 @@ -705,8 +703,6 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
38188  	/* NOTE: drops state->lock */
38189  	mc2_update_timer_and_unlock(state);
38190  
38191 -
38192 -	
38193  	if (prev != state->scheduled && is_realtime(prev)) {
38194  		struct mc2_task_state* tinfo = get_mc2_state(prev);
38195  		struct reservation* res = tinfo->res_info.client.reservation;
38196 -- 
38197 1.8.1.2
38198 
38199 
38200 From 8ea8941a1aa8fff86a51fe9d5c7f0b6e80e5c23d Mon Sep 17 00:00:00 2001
38201 From: Namhoon Kim <namhoonk@cs.unc.edu>
38202 Date: Thu, 9 Apr 2015 23:48:27 -0400
38203 Subject: [PATCH 116/119] commit
38204 
38205 ---
38206  litmus/cache_proc.c | 1 +
38207  1 file changed, 1 insertion(+)
38208 
38209 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
38210 index 68b451d..7e2e355 100644
38211 --- a/litmus/cache_proc.c
38212 +++ b/litmus/cache_proc.c
38213 @@ -418,6 +418,7 @@ void do_partition(enum crit_level lv, int cpu)
38214  			BUG();
38215  
38216  	}
38217 +	
38218  /*	
38219  	printk(KERN_INFO "P%d lockdown on P%d\n", smp_processor_id(), cpu);
38220  	printk(KERN_INFO "CRIT_LEVEL %d\n", lv);
38221 -- 
38222 1.8.1.2
38223 
38224 
38225 From 9d6f8815cc53c932a4cf4d0457374b2c5fc100e3 Mon Sep 17 00:00:00 2001
38226 From: Namhoon Kim <namhoonk@cs.unc.edu>
38227 Date: Fri, 10 Apr 2015 10:13:36 -0400
38228 Subject: [PATCH 117/119] fix
38229 
38230 ---
38231  include/litmus/cache_proc.h |   2 +-
38232  litmus/cache_proc.c         | 112 ++++++++++++++++++++++----------------------
38233  litmus/litmus.c             |   3 +-
38234  litmus/sched_mc2.c          |   5 +-
38235  4 files changed, 60 insertions(+), 62 deletions(-)
38236 
38237 diff --git a/include/litmus/cache_proc.h b/include/litmus/cache_proc.h
38238 index 24128d7..cf5fb04 100644
38239 --- a/include/litmus/cache_proc.h
38240 +++ b/include/litmus/cache_proc.h
38241 @@ -6,7 +6,7 @@
38242  void litmus_setup_lockdown(void __iomem*, u32);
38243  void enter_irq_mode(void);
38244  void exit_irq_mode(void);
38245 -void flush_cache(void);
38246 +void flush_cache(int all);
38247  
38248  extern struct page *new_alloc_page_color(unsigned long color);
38249  
38250 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
38251 index 7e2e355..3c7724d 100644
38252 --- a/litmus/cache_proc.c
38253 +++ b/litmus/cache_proc.c
38254 @@ -50,23 +50,23 @@ u32 unlocked_way[MAX_NR_WAYS]  = {
38255  };
38256  
38257  u32 nr_unlocked_way[MAX_NR_WAYS+1]  = {
38258 -	0xFFFFFFFF, /* all ways are locked. usable = 0*/
38259 -	0xFFFFFFFE, /* way ~0 unlocked. usable = 1 */
38260 -	0xFFFFFFFC,
38261 -	0xFFFFFFF8,
38262 -	0xFFFFFFF0,
38263 -	0xFFFFFFE0,
38264 -	0xFFFFFFC0,
38265 -	0xFFFFFF80,
38266 -	0xFFFFFF00,
38267 -	0xFFFFFE00,
38268 -	0xFFFFFC00,
38269 -	0xFFFFF800,
38270 -	0xFFFFF000,
38271 -	0xFFFFE000,
38272 -	0xFFFFC000,
38273 -	0xFFFF8000,
38274 -	0xFFFF0000, /* way ~15 unlocked. usable = 16 */
38275 +	0x0000FFFF, /* all ways are locked. usable = 0*/
38276 +	0x0000FFFE, /* way ~0 unlocked. usable = 1 */
38277 +	0x0000FFFC,
38278 +	0x0000FFF8,
38279 +	0x0000FFF0,
38280 +	0x0000FFE0,
38281 +	0x0000FFC0,
38282 +	0x0000FF80,
38283 +	0x0000FF00,
38284 +	0x0000FE00,
38285 +	0x0000FC00,
38286 +	0x0000F800,
38287 +	0x0000F000,
38288 +	0x0000E000,
38289 +	0x0000C000,
38290 +	0x00008000,
38291 +	0x00000000, /* way ~15 unlocked. usable = 16 */
38292  };
38293  
38294  u32 way_partition[4] = {
38295 @@ -88,25 +88,20 @@ u32 way_partitions[9] = {
38296  	0xffffff00, /* lv C */
38297  };
38298  
38299 -u32 set_partitions[2] = {
38300 -	0xFFFFFF00, /* cpuX A and B */
38301 -	0xFFFF00FF, /* lv C */
38302 -};
38303 -
38304  u32 prev_lockdown_d_reg[5] = {
38305 -	0xFFFFFF00,
38306 -	0xFFFFFF00,
38307 -	0xFFFFFF00,
38308 -	0xFFFFFF00,
38309 -	0xFFFF00FF, /* share with level-C */
38310 +	0x0000FF00,
38311 +	0x0000FF00,
38312 +	0x0000FF00,
38313 +	0x0000FF00,
38314 +	0x000000FF, /* share with level-C */
38315  };
38316  
38317  u32 prev_lockdown_i_reg[5] = {
38318 -	0xFFFFFF00,
38319 -	0xFFFFFF00,
38320 -	0xFFFFFF00,
38321 -	0xFFFFFF00,
38322 -	0xFFFF00FF, /* share with level-C */
38323 +	0x0000FF00,
38324 +	0x0000FF00,
38325 +	0x0000FF00,
38326 +	0x0000FF00,
38327 +	0x000000FF, /* share with level-C */
38328  };
38329  
38330  u32 prev_lbm_i_reg[8] = {
38331 @@ -280,7 +275,7 @@ int way_partition_handler(struct ctl_table *table, int write, void __user *buffe
38332  	//flush_cache_all();
38333  	//cache_sync();
38334  	//l2x0_flush_all();
38335 -	flush_cache();
38336 +	flush_cache(1);
38337  	
38338  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
38339  	if (ret)
38340 @@ -319,7 +314,7 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
38341  	//outer_flush_all();
38342  	//cache_sync();
38343  	//l2x0_flush_all();
38344 -	flush_cache();
38345 +	flush_cache(1);
38346  	
38347  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
38348  	if (ret)
38349 @@ -368,11 +363,11 @@ out:
38350  	return ret;
38351  }
38352  
38353 -void mem_lock(u32 lock_val, int cpu)
38354 +void cache_lockdown(u32 lock_val, int cpu)
38355  {
38356 -	unsigned long flags;
38357 +	//unsigned long flags;
38358  
38359 -	raw_spin_lock_irqsave(&cache_lock, flags);
38360 +	//raw_spin_lock_irqsave(&cache_lock, flags);
38361  
38362  	__asm__ __volatile__ (
38363  "	str	%[lockval], [%[dcachereg]]\n"
38364 @@ -383,7 +378,7 @@ void mem_lock(u32 lock_val, int cpu)
38365  	  [lockval] "r" (lock_val)
38366  	: "cc");
38367  
38368 -	raw_spin_unlock_irqrestore(&cache_lock, flags);
38369 +	//raw_spin_unlock_irqrestore(&cache_lock, flags);
38370  }
38371  
38372  void do_partition(enum crit_level lv, int cpu)
38373 @@ -397,34 +392,39 @@ void do_partition(enum crit_level lv, int cpu)
38374  	switch(lv) {
38375  		case CRIT_LEVEL_A:
38376  			regs = ~way_partitions[cpu*2];
38377 -			//regs |= 0xffff0000;
38378 -			writel_relaxed(regs, ld_d_reg(cpu));
38379 -			writel_relaxed(regs, ld_i_reg(cpu));
38380 +			regs &= 0x0000ffff;
38381 +			//writel_relaxed(regs, ld_d_reg(cpu));
38382 +			//writel_relaxed(regs, ld_i_reg(cpu));
38383  			break;
38384  		case CRIT_LEVEL_B:
38385  			regs = ~way_partitions[cpu*2+1];
38386 -			//regs |= 0xffff0000;
38387 -			writel_relaxed(regs, ld_d_reg(cpu));
38388 -			writel_relaxed(regs, ld_i_reg(cpu));
38389 +			regs &= 0x0000ffff;
38390 +			//writel_relaxed(regs, ld_d_reg(cpu));
38391 +			//writel_relaxed(regs, ld_i_reg(cpu));
38392  			break;
38393  		case CRIT_LEVEL_C:
38394  		case NUM_CRIT_LEVELS:
38395  			regs = ~way_partitions[8];
38396 -			//regs |= 0xffff0000;
38397 -			writel_relaxed(regs, ld_d_reg(cpu));
38398 -			writel_relaxed(regs, ld_i_reg(cpu));
38399 +			regs &= 0x0000ffff;
38400 +			//writel_relaxed(regs, ld_d_reg(cpu));
38401 +			//writel_relaxed(regs, ld_i_reg(cpu));
38402  			break;
38403  		default:
38404  			BUG();
38405  
38406  	}
38407 -	
38408 +	barrier();
38409 +	cache_lockdown(regs, cpu);
38410 +	barrier();
38411  /*	
38412  	printk(KERN_INFO "P%d lockdown on P%d\n", smp_processor_id(), cpu);
38413  	printk(KERN_INFO "CRIT_LEVEL %d\n", lv);
38414  	print_lockdown_registers(smp_processor_id());
38415  */	
38416  	raw_spin_unlock_irqrestore(&cache_lock, flags);
38417 +	
38418 +	flush_cache(0);
38419 +	
38420  	//cache_sync();
38421  //	barrier();
38422  //	mem_lock(regs, cpu);
38423 @@ -483,11 +483,11 @@ void inline enter_irq_mode(void)
38424  	int cpu = smp_processor_id();
38425  
38426  	//return;
38427 -	prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
38428 -	prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
38429 -	
38430  	if (os_isolation == 0)
38431  		return;	
38432 +
38433 +	prev_lockdown_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
38434 +	prev_lockdown_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
38435  	
38436  	writel_relaxed(way_partitions[8], ld_i_reg(cpu));
38437  	writel_relaxed(way_partitions[8], ld_d_reg(cpu));
38438 @@ -499,7 +499,6 @@ void inline exit_irq_mode(void)
38439  	//return;
38440  	if (os_isolation == 0)
38441  		return;
38442 -
38443  	writel_relaxed(prev_lockdown_i_reg[cpu], ld_i_reg(cpu));
38444  	writel_relaxed(prev_lockdown_d_reg[cpu], ld_d_reg(cpu));	
38445  }
38446 @@ -1000,7 +999,7 @@ out_free:
38447  	return ret;
38448  }
38449  
38450 -void flush_cache(void)
38451 +void flush_cache(int all)
38452  {
38453  	int way, color, cpu;
38454  	unsigned long flags;
38455 @@ -1010,11 +1009,12 @@ void flush_cache(void)
38456  	
38457  	prev_lbm_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
38458  	prev_lbm_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
38459 -	//printk("P%d reg value = 0x%04x\n", cpu, prev_lbm_d_reg[cpu]);
38460 +	printk("P%d reg value = 0x%04x\n", cpu, prev_lbm_d_reg[cpu]);
38461  	for (way=0;way<MAX_NR_WAYS;way++) {
38462 -		if ( (0x00000001 << way) & (prev_lbm_d_reg[cpu]) )
38463 +		if (( (0x00000001 << way) & (prev_lbm_d_reg[cpu]) ) &&
38464 +			!all)
38465  			continue;
38466 -		//printk("P%d flushes way #%d\n", cpu, way);
38467 +		printk("P%d flushes way #%d\n", cpu, way);
38468  		for (color=0;color<MAX_NR_COLORS;color++) {
38469  			void *vaddr = flusher_pages[way][color];
38470  			u32 lvalue  = unlocked_way[way];
38471 diff --git a/litmus/litmus.c b/litmus/litmus.c
38472 index 5692905..d720390 100644
38473 --- a/litmus/litmus.c
38474 +++ b/litmus/litmus.c
38475 @@ -319,7 +319,6 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
38476  		ret = put_user(now, ts);
38477  	}
38478  	else {
38479 -		//flush_cache_all();
38480  		int *dummy;
38481  		int size = 20, i, t = 0;
38482  		dummy = kmalloc(PAGE_SIZE*size, GFP_ATOMIC);
38483 @@ -505,7 +504,7 @@ asmlinkage long sys_set_page_color(int cpu)
38484  	
38485  	TRACE_TASK(current, "nr_pages = %d nr_failed = %d\n", nr_pages, nr_failed);
38486  	printk(KERN_INFO "node = %ld, nr_pages = %d, nr_failed = %d\n", node, nr_pages, nr_failed);
38487 -	flush_cache();
38488 +	flush_cache(1);
38489  	
38490  	return ret;
38491  }
38492 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
38493 index e59030f..4536556 100644
38494 --- a/litmus/sched_mc2.c
38495 +++ b/litmus/sched_mc2.c
38496 @@ -29,7 +29,6 @@
38497  #define BUDGET_ENFORCEMENT_AT_C 1
38498  
38499  extern void do_partition(enum crit_level lv, int cpu);
38500 -extern void l2x0_flush_all(void);
38501  
38502  /* _global_env - reservation container for level-C tasks*/
38503  struct gmp_reservation_environment _global_env;
38504 @@ -658,7 +657,7 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
38505  	lt_t now;
38506  	struct mc2_cpu_state *state = local_cpu_state();
38507  
38508 -	pre_schedule(prev, state->cpu);
38509 +	//pre_schedule(prev, state->cpu);
38510  	
38511  	raw_spin_lock(&_global_env.lock);
38512  	raw_spin_lock(&state->lock);
38513 @@ -876,7 +875,7 @@ static long mc2_complete_job(void)
38514  	next_release = ns_to_ktime(get_release(current));
38515  	preempt_disable();
38516  	TRACE_CUR("next_release=%llu\n", get_release(current));
38517 -	flush_cache();
38518 +	//flush_cache();
38519  	if (get_release(current) > litmus_clock()) {
38520  		/* sleep until next_release */
38521  		set_current_state(TASK_INTERRUPTIBLE);
38522 -- 
38523 1.8.1.2
38524 
38525 
38526 From e7fb9c6907dbf764dadf9b05038dc7c80c2aa95b Mon Sep 17 00:00:00 2001
38527 From: Namhoon Kim <namhoonk@cs.unc.edu>
38528 Date: Fri, 17 Apr 2015 12:50:42 -0400
38529 Subject: [PATCH 118/119] comment printk
38530 
38531 ---
38532  litmus/cache_proc.c | 4 ++--
38533  1 file changed, 2 insertions(+), 2 deletions(-)
38534 
38535 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
38536 index 3c7724d..703b290 100644
38537 --- a/litmus/cache_proc.c
38538 +++ b/litmus/cache_proc.c
38539 @@ -1009,12 +1009,12 @@ void flush_cache(int all)
38540  	
38541  	prev_lbm_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
38542  	prev_lbm_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
38543 -	printk("P%d reg value = 0x%04x\n", cpu, prev_lbm_d_reg[cpu]);
38544 +	//printk("P%d reg value = 0x%04x\n", cpu, prev_lbm_d_reg[cpu]);
38545  	for (way=0;way<MAX_NR_WAYS;way++) {
38546  		if (( (0x00000001 << way) & (prev_lbm_d_reg[cpu]) ) &&
38547  			!all)
38548  			continue;
38549 -		printk("P%d flushes way #%d\n", cpu, way);
38550 +		//printk("P%d flushes way #%d\n", cpu, way);
38551  		for (color=0;color<MAX_NR_COLORS;color++) {
38552  			void *vaddr = flusher_pages[way][color];
38553  			u32 lvalue  = unlocked_way[way];
38554 -- 
38555 1.8.1.2
38556 
38557 
38558 From a302acc51e029fcf78e339a9360ba6231caae98d Mon Sep 17 00:00:00 2001
38559 From: Namhoon Kim <namhoonk@cs.unc.edu>
38560 Date: Thu, 4 Jun 2015 17:13:25 -0400
38561 Subject: [PATCH 119/119] Submit version
38562 
38563 ---
38564  litmus/bank_proc.c            |  99 ++++++-------------------------
38565  litmus/cache_proc.c           |  67 ++-------------------
38566  litmus/polling_reservations.c |  55 +++---------------
38567  litmus/reservation.c          |  95 +++++-------------------------
38568  litmus/sched_mc2.c            | 131 +++++-------------------------------------
38569  litmus/uncachedev.c           |   4 +-
38570  6 files changed, 59 insertions(+), 392 deletions(-)
38571 
38572 diff --git a/litmus/bank_proc.c b/litmus/bank_proc.c
38573 index 655eb27..932340d 100644
38574 --- a/litmus/bank_proc.c
38575 +++ b/litmus/bank_proc.c
38576 @@ -94,7 +94,6 @@ static struct color_group *color_groups;
38577   */
38578  unsigned int counting_one_set(unsigned int v)
38579  {
38580 -//    unsigned int v; // count the number of bits set in v
38581      unsigned int c; // c accumulates the total bits set in v
38582  
38583      for (c = 0; v; v >>= 1)
38584 @@ -152,8 +151,6 @@ static inline unsigned int page_list_index(struct page *page)
38585  {
38586      unsigned int idx;  
38587      idx = (page_color(page) + page_bank(page)*(number_cachecolors));
38588 -//    printk("address = %lx, ", page_to_phys(page));
38589 -//    printk("color(%d), bank(%d), indx = %d\n", page_color(page), page_bank(page), idx);
38590  
38591      return idx; 
38592  }
38593 @@ -214,26 +211,16 @@ void add_page_to_color_list(struct page *page)
38594   */
38595  static int do_add_pages(void)
38596  {
38597 -	//printk("LITMUS do add pages\n");
38598 -	
38599  	struct page *page, *page_tmp;
38600  	LIST_HEAD(free_later);
38601  	unsigned long color;
38602  	int ret = 0;
38603  	int i = 0;
38604 -        int free_counter = 0;
38605 -        unsigned long counter[128]= {0}; 
38606 -        
38607 -        //printk("Before refill : \n");
38608 -        //show_nr_pages();
38609 +	int free_counter = 0;
38610 +	unsigned long counter[128]= {0}; 
38611  
38612 -	// until all the page lists contain enough pages 
38613 -	//for (i =0; i<5; i++) {
38614  	for (i=0; i< 1024*100;i++) {
38615 -	//while (smallest_nr_pages() < PAGES_PER_COLOR) {
38616 -       //         printk("smallest = %d\n", smallest_nr_pages());	
38617  		page = alloc_page(GFP_HIGHUSER_MOVABLE);
38618 -	    //    page = alloc_pages_exact_node(0, GFP_HIGHUSER_MOVABLE, 0);
38619  	
38620  		if (unlikely(!page)) {
38621  			printk(KERN_WARNING "Could not allocate pages.\n");
38622 @@ -242,47 +229,16 @@ static int do_add_pages(void)
38623  		}
38624  		color = page_list_index(page);
38625  		counter[color]++;
38626 -	//	printk("page(%d) = color %x, bank %x, [color] =%d \n", color, page_color(page), page_bank(page), atomic_read(&color_groups[color].nr_pages));
38627 -                //show_nr_pages();
38628 +
38629  		if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR && color>=32) {
38630 -	//	if ( PAGES_PER_COLOR && color>=16*2) {
38631  			add_page_to_color_list(page);
38632 -	//		printk("add page(%d) = color %x, bank %x\n", color, page_color(page), page_bank(page));
38633 -		} else{
38634 +		} else {
38635  			// Pages here will be freed later 
38636  			list_add_tail(&page->lru, &free_later);
38637  			free_counter++;
38638 -		        //list_del(&page->lru);
38639 -		//        __free_page(page);
38640 -	//		printk("useless page(%d) = color %x, bank %x\n", color,  page_color(page), page_bank(page));
38641  		}
38642 -               //show_nr_pages();
38643 -                /*
38644 -                if(free_counter >= PAGES_PER_COLOR)
38645 -                {
38646 -                    printk("free unwanted page list eariler");
38647 -                    free_counter = 0;
38648 -	            list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
38649 -		        list_del(&page->lru);
38650 -		        __free_page(page);
38651 -	            }
38652 -
38653 -                    show_nr_pages();
38654 -                }
38655 -                */
38656 -        }
38657 -/*        printk("page counter = \n");
38658 -        for (i=0; i<128; i++)
38659 -        {
38660 -            printk("(%03d) = %4d, ", i , counter[i]);
38661 -            if(i%8 == 7){
38662 -                printk("\n");
38663 -            }
38664 +	}
38665  
38666 -        }
38667 -*/	
38668 -        //printk("After refill : \n");
38669 -        //show_nr_pages();
38670  #if 1
38671  	// Free the unwanted pages
38672  	list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
38673 @@ -302,13 +258,11 @@ out:
38674   */ 
38675  static struct  page *new_alloc_page_color( unsigned long color)
38676  {
38677 -//	printk("allocate new page color = %d\n", color);	
38678  	struct color_group *cgroup;
38679  	struct page *rPage = NULL;
38680  		
38681  	if( (color <0) || (color)>(number_cachecolors*number_banks -1)) {
38682  		TRACE_CUR("Wrong color %lu\n", color);	
38683 -//		printk(KERN_WARNING "Wrong color %lu\n", color);
38684  		goto out;
38685  	}
38686  
38687 @@ -317,24 +271,19 @@ static struct  page *new_alloc_page_color( unsigned long color)
38688  	spin_lock(&cgroup->lock);
38689  	if (unlikely(!atomic_read(&cgroup->nr_pages))) {
38690  		TRACE_CUR("No free %lu colored pages.\n", color);
38691 -//		printk(KERN_WARNING "no free %lu colored pages.\n", color);
38692  		goto out_unlock;
38693  	}
38694  	rPage = list_first_entry(&cgroup->list, struct page, lru);
38695  	BUG_ON(page_count(rPage) > 1);
38696 -	//get_page(rPage);
38697  	list_del(&rPage->lru);
38698  	atomic_dec(&cgroup->nr_pages);
38699  	ClearPageLRU(rPage);
38700  out_unlock:
38701  	spin_unlock(&cgroup->lock);
38702  out:
38703 -	if( smallest_nr_pages() == 0)
38704 -        {
38705 +	if(smallest_nr_pages() == 0) {
38706  		do_add_pages();
38707 -       //     printk("ERROR(bank_proc.c) = We don't have enough pages in bank_proc.c\n");        
38708 -        
38709 -        }
38710 +	}
38711  	return rPage;
38712  }
38713  
38714 @@ -357,23 +306,17 @@ struct page* get_colored_page(unsigned long color)
38715   */
38716  struct page *new_alloc_page(struct page *page, unsigned long node, int **x)
38717  {
38718 -//	printk("allocate new page node = %d\n", node);	
38719 -//	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
38720  	struct color_group *cgroup;
38721  	struct page *rPage = NULL;
38722  	unsigned int color;
38723 +	unsigned int idx = 0;
38724 +	idx += num_by_bitmask_index(set_partition[node], set_index[node]);
38725 +	idx += number_cachecolors* num_by_bitmask_index(bank_partition[node], bank_index[node]);
38726  	
38727 -
38728 -        unsigned int idx = 0;
38729 -        idx += num_by_bitmask_index(set_partition[node], set_index[node]);
38730 -        idx += number_cachecolors* num_by_bitmask_index(bank_partition[node], bank_index[node]);
38731 -	//printk("node  = %d, idx = %d\n", node, idx);
38732 -
38733  	rPage =  new_alloc_page_color(idx);
38734 -        
38735 -            
38736 -        set_index[node] = (set_index[node]+1) % counting_one_set(set_partition[node]);
38737 -        bank_index[node] = (bank_index[node]+1) % counting_one_set(bank_partition[node]);
38738 +	set_index[node] = (set_index[node]+1) % counting_one_set(set_partition[node]);
38739 +	bank_index[node] = (bank_index[node]+1) % counting_one_set(bank_partition[node]);
38740 +
38741  	return rPage; 
38742  }
38743  
38744 @@ -386,11 +329,10 @@ void reclaim_page(struct page *page)
38745  	const unsigned long color = page_list_index(page);
38746  	unsigned long nr_reclaimed = 0;
38747  	spin_lock(&reclaim_lock);
38748 -    	put_page(page);
38749 +	put_page(page);
38750  	add_page_to_color_list(page);
38751  
38752  	spin_unlock(&reclaim_lock);
38753 -	printk("Reclaimed page(%d) = color %x, bank %x, [color] =%d \n", color, page_color(page), page_bank(page), atomic_read(&color_groups[color].nr_pages));
38754  }
38755  
38756  
38757 @@ -405,10 +347,9 @@ static int __init init_variables(void)
38758  	number_cachecolors = counting_one_set(CACHE_MASK);
38759  	number_cachecolors = two_exp(number_cachecolors);
38760  	NUM_PAGE_LIST = number_banks * number_cachecolors; 
38761 -        printk(KERN_WARNING "number of banks = %d, number of cachecolors=%d\n", number_banks, number_cachecolors);
38762 +
38763  	mutex_init(&void_lockdown_proc);
38764  	spin_lock_init(&reclaim_lock);
38765 -
38766  }
38767  
38768  
38769 @@ -421,14 +362,13 @@ static int __init init_color_groups(void)
38770  	unsigned long i;
38771  	int err = 0;
38772  
38773 -        printk("NUM_PAGE_LIST = %d\n", NUM_PAGE_LIST);
38774 -        color_groups = kmalloc(NUM_PAGE_LIST *sizeof(struct color_group), GFP_KERNEL);
38775 +	printk("NUM_PAGE_LIST = %d\n", NUM_PAGE_LIST);
38776 +	color_groups = kmalloc(NUM_PAGE_LIST *sizeof(struct color_group), GFP_KERNEL);
38777  
38778  	if (!color_groups) {
38779  		printk(KERN_WARNING "Could not allocate color groups.\n");
38780  		err = -ENOMEM;
38781 -	}else{
38782 -
38783 +	} else {
38784  		for (i = 0; i < NUM_PAGE_LIST; ++i) {
38785  			cgroup = &color_groups[i];
38786  			atomic_set(&cgroup->nr_pages, 0);
38787 @@ -436,7 +376,7 @@ static int __init init_color_groups(void)
38788  			spin_lock_init(&cgroup->lock);
38789  		}
38790  	}
38791 -        return err;
38792 +	return err;
38793  }
38794  
38795  int set_partition_handler(struct ctl_table *table, int write, void __user *buffer,
38796 @@ -733,4 +673,3 @@ out:
38797  }
38798  
38799  module_init(litmus_color_init);
38800 -
38801 diff --git a/litmus/cache_proc.c b/litmus/cache_proc.c
38802 index 703b290..e343e73 100644
38803 --- a/litmus/cache_proc.c
38804 +++ b/litmus/cache_proc.c
38805 @@ -190,7 +190,6 @@ static inline void cache_sync(void)
38806  static void print_lockdown_registers(int cpu)
38807  {
38808  	int i;
38809 -	//for (i = 0; i < nr_lockregs; i++) {
38810  	for (i = 0; i < 4; i++) {
38811  		printk("P%d Lockdown Data CPU %2d: 0x%04x\n", cpu,
38812  				i, readl_relaxed(ld_d_reg(i)));
38813 @@ -272,9 +271,6 @@ int way_partition_handler(struct ctl_table *table, int write, void __user *buffe
38814  	
38815  	mutex_lock(&lockdown_proc);
38816  	
38817 -	//flush_cache_all();
38818 -	//cache_sync();
38819 -	//l2x0_flush_all();
38820  	flush_cache(1);
38821  	
38822  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
38823 @@ -310,10 +306,6 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
38824  	
38825  	mutex_lock(&lockdown_proc);
38826  	
38827 -	//flush_cache_all();
38828 -	//outer_flush_all();
38829 -	//cache_sync();
38830 -	//l2x0_flush_all();
38831  	flush_cache(1);
38832  	
38833  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
38834 @@ -327,15 +319,7 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
38835  			writel_relaxed(0xFFFF, cache_base + L2X0_LOCKDOWN_WAY_I_BASE +
38836  				       i * L2X0_LOCKDOWN_STRIDE);
38837  		}
38838 -/*		
38839 -		for (i = 0; i < nr_lockregs;  i++) {
38840 -			barrier();
38841 -			mem_lock(LOCK_ALL, i);
38842 -			barrier();
38843 -			//writel_relaxed(nr_unlocked_way[0], ld_d_reg(i));
38844 -			//writel_relaxed(nr_unlocked_way[0], ld_i_reg(i));
38845 -		}
38846 -*/		
38847 +
38848  	}
38849  	if (write && lock_all == 0) {
38850  		for (i = 0; i < nr_lockregs; i++) {
38851 @@ -344,15 +328,6 @@ int lock_all_handler(struct ctl_table *table, int write, void __user *buffer,
38852  			writel_relaxed(0x0, cache_base + L2X0_LOCKDOWN_WAY_I_BASE +
38853  				       i * L2X0_LOCKDOWN_STRIDE);
38854  		}
38855 -/*
38856 -		for (i = 0; i < nr_lockregs;  i++) {
38857 -			barrier();
38858 -			mem_lock(UNLOCK_ALL, i);
38859 -			barrier();
38860 -			//writel_relaxed(nr_unlocked_way[16], ld_d_reg(i));
38861 -			//writel_relaxed(nr_unlocked_way[16], ld_i_reg(i));
38862 -		}
38863 -*/
38864  	}
38865  	printk("LOCK_ALL HANDLER\n");
38866  	local_irq_save(flags);
38867 @@ -365,10 +340,6 @@ out:
38868  
38869  void cache_lockdown(u32 lock_val, int cpu)
38870  {
38871 -	//unsigned long flags;
38872 -
38873 -	//raw_spin_lock_irqsave(&cache_lock, flags);
38874 -
38875  	__asm__ __volatile__ (
38876  "	str	%[lockval], [%[dcachereg]]\n"
38877  "	str	%[lockval], [%[icachereg]]\n"
38878 @@ -378,7 +349,6 @@ void cache_lockdown(u32 lock_val, int cpu)
38879  	  [lockval] "r" (lock_val)
38880  	: "cc");
38881  
38882 -	//raw_spin_unlock_irqrestore(&cache_lock, flags);
38883  }
38884  
38885  void do_partition(enum crit_level lv, int cpu)
38886 @@ -393,42 +363,26 @@ void do_partition(enum crit_level lv, int cpu)
38887  		case CRIT_LEVEL_A:
38888  			regs = ~way_partitions[cpu*2];
38889  			regs &= 0x0000ffff;
38890 -			//writel_relaxed(regs, ld_d_reg(cpu));
38891 -			//writel_relaxed(regs, ld_i_reg(cpu));
38892  			break;
38893  		case CRIT_LEVEL_B:
38894  			regs = ~way_partitions[cpu*2+1];
38895  			regs &= 0x0000ffff;
38896 -			//writel_relaxed(regs, ld_d_reg(cpu));
38897 -			//writel_relaxed(regs, ld_i_reg(cpu));
38898  			break;
38899  		case CRIT_LEVEL_C:
38900  		case NUM_CRIT_LEVELS:
38901  			regs = ~way_partitions[8];
38902  			regs &= 0x0000ffff;
38903 -			//writel_relaxed(regs, ld_d_reg(cpu));
38904 -			//writel_relaxed(regs, ld_i_reg(cpu));
38905  			break;
38906  		default:
38907  			BUG();
38908 -
38909  	}
38910  	barrier();
38911  	cache_lockdown(regs, cpu);
38912  	barrier();
38913 -/*	
38914 -	printk(KERN_INFO "P%d lockdown on P%d\n", smp_processor_id(), cpu);
38915 -	printk(KERN_INFO "CRIT_LEVEL %d\n", lv);
38916 -	print_lockdown_registers(smp_processor_id());
38917 -*/	
38918 +
38919  	raw_spin_unlock_irqrestore(&cache_lock, flags);
38920  	
38921  	flush_cache(0);
38922 -	
38923 -	//cache_sync();
38924 -//	barrier();
38925 -//	mem_lock(regs, cpu);
38926 -//	barrier();
38927  }
38928  
38929  int use_part_proc_handler(struct ctl_table *table, int write, void __user *buffer,
38930 @@ -438,15 +392,10 @@ int use_part_proc_handler(struct ctl_table *table, int write, void __user *buffe
38931  	
38932  	mutex_lock(&lockdown_proc);
38933  	
38934 -	//flush_cache_all();
38935 -	//cache_sync();
38936 -	//l2x0_flush_all();
38937 -	
38938  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
38939  	if (ret)
38940  		goto out;
38941  	
38942 -
38943  	printk("USE_PART HANDLER = %d\n", use_part);
38944  
38945  out:
38946 @@ -461,15 +410,9 @@ int os_isolation_proc_handler(struct ctl_table *table, int write, void __user *b
38947  	
38948  	mutex_lock(&lockdown_proc);
38949  	
38950 -	//flush_cache_all();
38951 -	//cache_sync();
38952 -	//l2x0_flush_all();
38953 -	//flush_cache();
38954 -	
38955  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
38956  	if (ret)
38957  		goto out;
38958 -	
38959  
38960  	printk("OS_ISOLATION HANDLER = %d\n", os_isolation);
38961  
38962 @@ -482,7 +425,6 @@ void inline enter_irq_mode(void)
38963  {
38964  	int cpu = smp_processor_id();
38965  
38966 -	//return;
38967  	if (os_isolation == 0)
38968  		return;	
38969  
38970 @@ -496,7 +438,7 @@ void inline enter_irq_mode(void)
38971  void inline exit_irq_mode(void)
38972  {
38973  	int cpu = smp_processor_id();
38974 -	//return;
38975 +
38976  	if (os_isolation == 0)
38977  		return;
38978  	writel_relaxed(prev_lockdown_i_reg[cpu], ld_i_reg(cpu));
38979 @@ -921,6 +863,7 @@ int setup_flusher_array(void)
38980  
38981  		for (color = 0; color < MAX_NR_COLORS; color++) {
38982  			int node;
38983 +			/* manually assigned node numbers */
38984  			switch (color) {
38985  				case 0:
38986  					node = 32;
38987 @@ -1009,12 +952,10 @@ void flush_cache(int all)
38988  	
38989  	prev_lbm_i_reg[cpu] = readl_relaxed(ld_i_reg(cpu));
38990  	prev_lbm_d_reg[cpu] = readl_relaxed(ld_d_reg(cpu));
38991 -	//printk("P%d reg value = 0x%04x\n", cpu, prev_lbm_d_reg[cpu]);
38992  	for (way=0;way<MAX_NR_WAYS;way++) {
38993  		if (( (0x00000001 << way) & (prev_lbm_d_reg[cpu]) ) &&
38994  			!all)
38995  			continue;
38996 -		//printk("P%d flushes way #%d\n", cpu, way);
38997  		for (color=0;color<MAX_NR_COLORS;color++) {
38998  			void *vaddr = flusher_pages[way][color];
38999  			u32 lvalue  = unlocked_way[way];
39000 diff --git a/litmus/polling_reservations.c b/litmus/polling_reservations.c
39001 index df1aeb0..fa73070 100644
39002 --- a/litmus/polling_reservations.c
39003 +++ b/litmus/polling_reservations.c
39004 @@ -30,11 +30,6 @@ static void periodic_polling_client_arrives(
39005  				instances =  div64_u64(tmp, pres->period);
39006  				res->next_replenishment = res->env->time_zero + instances * pres->period;
39007  			}
39008 -				
39009 -			TRACE("ENV_TIME_ZERO %llu\n", res->env->time_zero);
39010 -			TRACE("pol-res: R%d activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
39011 -				res->id, tmp, instances, pres->period, res->next_replenishment,
39012 -				res->env->current_time);
39013  
39014  			res->env->change_state(res->env, res,
39015  				RESERVATION_DEPLETED);
39016 @@ -72,9 +67,6 @@ static void periodic_polling_client_departs(
39017  			if (list_empty(&res->clients)) {
39018  				res->env->change_state(res->env, res,
39019  						RESERVATION_ACTIVE_IDLE);
39020 -//					did_signal_job_completion ?
39021 -//						RESERVATION_DEPLETED :
39022 -//						RESERVATION_ACTIVE_IDLE);
39023  			} /* else: nothing to do, more clients ready */
39024  			break;
39025  
39026 @@ -96,7 +88,6 @@ static void periodic_polling_on_replenishment(
39027  	res->next_replenishment += pres->period;
39028  	res->budget_consumed = 0;
39029  
39030 -	TRACE("polling_replenish(%u): next_replenishment=%llu\n", res->id, res->next_replenishment);
39031  	switch (res->state) {
39032  		case RESERVATION_DEPLETED:
39033  		case RESERVATION_INACTIVE:
39034 @@ -147,8 +138,7 @@ static void common_drain_budget(
39035  	switch (res->state) {
39036  		case RESERVATION_DEPLETED:
39037  		case RESERVATION_INACTIVE:
39038 -			//BUG();
39039 -			TRACE("!!!!!!!!!!!!!!!STATE ERROR R%d STATE(%d)\n", res->id, res->state);
39040 +			BUG();
39041  			break;
39042  
39043  		case RESERVATION_ACTIVE_IDLE:
39044 @@ -282,7 +272,7 @@ void polling_reservation_init(
39045  	pres->period = period;
39046  	pres->deadline = deadline;
39047  	pres->offset = offset;
39048 -	TRACE_TASK(current, "polling_reservation_init: periodic %d, use_edf %d\n", use_periodic_polling, use_edf_prio);
39049 +
39050  	if (use_periodic_polling) {
39051  		if (use_edf_prio)
39052  			pres->res.ops = &periodic_polling_ops_edf;
39053 @@ -363,7 +353,7 @@ static void td_client_departs(
39054  	switch (res->state) {
39055  		case RESERVATION_INACTIVE:
39056  		case RESERVATION_ACTIVE_IDLE:
39057 -			//BUG(); /* INACTIVE or IDLE <=> no client */
39058 +			/* INACTIVE or IDLE <=> no client */
39059  			break;
39060  
39061  		case RESERVATION_ACTIVE:
39062 @@ -383,7 +373,6 @@ static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
39063  {
39064  	lt_t now = tdres->res.env->current_time;
39065  	lt_t end = tdres->cur_interval.end;
39066 -	//TRACE("td_remaining(%u): start=%llu now=%llu end=%llu state=%d\n", tdres->res.id,	tdres->cur_interval.start, now, end, tdres->res.state);
39067  	if (now >=  end)
39068  		return 0;
39069  	else
39070 @@ -396,22 +385,15 @@ static void td_replenish(
39071  	struct table_driven_reservation *tdres =
39072  		container_of(res, struct table_driven_reservation, res);
39073  
39074 -	//TRACE("td_replenish(%u): expected_replenishment=%llu\n", res->id, res->next_replenishment);
39075 -
39076  	/* figure out current interval */
39077  	tdres->cur_interval.start = tdres->major_cycle_start +
39078  		tdres->intervals[tdres->next_interval].start;
39079  	tdres->cur_interval.end =  tdres->major_cycle_start +
39080  		tdres->intervals[tdres->next_interval].end;
39081 -/*	TRACE("major_cycle_start=%llu => [%llu, %llu]\n",
39082 -		tdres->major_cycle_start,
39083 -		tdres->cur_interval.start,
39084 -		tdres->cur_interval.end);
39085 -*/
39086 +
39087  	/* reset budget */
39088  	res->cur_budget = td_time_remaining_until_end(tdres);
39089  	res->budget_consumed = 0;
39090 -	//TRACE("td_replenish(%u): %s budget=%llu\n", res->id, res->cur_budget ? "" : "WARNING", res->cur_budget);
39091  
39092  	/* prepare next slot */
39093  	tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
39094 @@ -422,8 +404,6 @@ static void td_replenish(
39095  	/* determine next time this reservation becomes eligible to execute */
39096  	res->next_replenishment  = tdres->major_cycle_start;
39097  	res->next_replenishment += tdres->intervals[tdres->next_interval].start;
39098 -	//TRACE("td_replenish(%u): next_replenishment=%llu\n", res->id, res->next_replenishment);
39099 -
39100  
39101  	switch (res->state) {
39102  		case RESERVATION_DEPLETED:
39103 @@ -457,34 +437,22 @@ static void td_drain_budget(
39104  	/* Table-driven scheduling: instead of tracking the budget, we compute
39105  	 * how much time is left in this allocation interval. */
39106  
39107 -	/* sanity check: we should never try to drain from future slots */
39108 -	//TRACE("TD_DRAIN STATE(%d) [%llu,%llu]  %llu ?\n", res->state, tdres->cur_interval.start, tdres->cur_interval.end, res->env->current_time);
39109 -	//BUG_ON(tdres->cur_interval.start > res->env->current_time);
39110 -	if (tdres->cur_interval.start > res->env->current_time)
39111 -		TRACE("TD_DRAIN BUG!!!!!!!!!!\n");
39112 -
39113  	switch (res->state) {
39114  		case RESERVATION_DEPLETED:
39115  		case RESERVATION_INACTIVE:
39116 -			//BUG();
39117 -			TRACE("TD_DRAIN!!!!!!!!! RES_STATE = %d\n", res->state);
39118 +			BUG();
39119  			break;
39120  
39121  		case RESERVATION_ACTIVE_IDLE:
39122  		case RESERVATION_ACTIVE:
39123  			res->cur_budget = td_time_remaining_until_end(tdres);
39124 -			//TRACE("td_drain_budget(%u): drained to budget=%llu\n", res->id, res->cur_budget);
39125  			if (!res->cur_budget) {
39126  				res->env->change_state(res->env, res,
39127  					RESERVATION_DEPLETED);
39128  			} else {
39129  				/* sanity check budget calculation */
39130 -				//BUG_ON(res->env->current_time >= tdres->cur_interval.end);
39131 -				//BUG_ON(res->env->current_time < tdres->cur_interval.start);
39132 -				if (res->env->current_time >= tdres->cur_interval.end)
39133 -					printk(KERN_ALERT "TD_DRAIN_BUDGET WARNING1\n");
39134 -				if (res->env->current_time < tdres->cur_interval.start)
39135 -					printk(KERN_ALERT "TD_DRAIN_BUDGET WARNING2\n");
39136 +				BUG_ON(res->env->current_time >= tdres->cur_interval.end);
39137 +				BUG_ON(res->env->current_time < tdres->cur_interval.start);
39138  			}
39139  
39140  			break;
39141 @@ -502,24 +470,15 @@ static struct task_struct* td_dispatch_client(
39142  	/* usual logic for selecting a client */
39143  	t = default_dispatch_client(res, for_at_most);
39144  
39145 -	TRACE_TASK(t, "td_dispatch_client(%u): selected, budget=%llu\n",
39146 -		res->id, res->cur_budget);
39147 -
39148  	/* check how much budget we have left in this time slot */
39149  	res->cur_budget = td_time_remaining_until_end(tdres);
39150  
39151 -	TRACE_TASK(t, "td_dispatch_client(%u): updated to budget=%llu next=%d\n",
39152 -		res->id, res->cur_budget, tdres->next_interval);
39153 -
39154  	if (unlikely(!res->cur_budget)) {
39155  		/* Unlikely case: if we ran out of budget, the user configured
39156  		 * a broken scheduling table (overlapping table slots).
39157  		 * Not much we can do about this, but we can't dispatch a job
39158  		 * now without causing overload. So let's register this reservation
39159  		 * as depleted and wait for the next allocation. */
39160 -		TRACE("td_dispatch_client(%u): budget unexpectedly depleted "
39161 -			"(check scheduling table for unintended overlap)\n",
39162 -			res->id);
39163  		res->env->change_state(res->env, res,
39164  			RESERVATION_DEPLETED);
39165  		return NULL;
39166 diff --git a/litmus/reservation.c b/litmus/reservation.c
39167 index efd16da..2afb8ee 100644
39168 --- a/litmus/reservation.c
39169 +++ b/litmus/reservation.c
39170 @@ -54,7 +54,6 @@ static void sup_scheduler_update_at(
39171  	struct sup_reservation_environment* sup_env,
39172  	lt_t when)
39173  {
39174 -	//TRACE("SCHEDULER_UPDATE_AT update: %llu > when %llu\n", sup_env->next_scheduler_update, when);
39175  	if (sup_env->next_scheduler_update > when)
39176  		sup_env->next_scheduler_update = when;
39177  }
39178 @@ -199,15 +198,11 @@ static void sup_charge_budget(
39179  		/* charge all ACTIVE_IDLE up to the first ACTIVE reservation */
39180  		res = list_entry(pos, struct reservation, list);
39181  		if (res->state == RESERVATION_ACTIVE) {
39182 -			TRACE("sup_charge_budget ACTIVE R%u drain %llu\n", res->id, delta);
39183  			if (encountered_active == 0 && res->blocked_by_ghost == 0) {
39184 -				TRACE("DRAIN !!\n");
39185  				res->ops->drain_budget(res, delta);
39186  				encountered_active = 1;
39187  			}			
39188  		} else {
39189 -			//BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
39190 -			TRACE("sup_charge_budget INACTIVE R%u drain %llu\n", res->id, delta);
39191  			res->ops->drain_budget(res, delta);
39192  		}
39193  		if (res->state == RESERVATION_ACTIVE ||
39194 @@ -215,15 +210,9 @@ static void sup_charge_budget(
39195  		{
39196  			/* make sure scheduler is invoked when this reservation expires
39197  			 * its remaining budget */
39198 -			 TRACE("requesting scheduler update for reservation %u in %llu nanoseconds\n",
39199 -				res->id, res->cur_budget);
39200  			 sup_scheduler_update_after(sup_env, res->cur_budget);
39201  		}
39202 -		//if (encountered_active == 2)
39203 -			/* stop at the first ACTIVE reservation */
39204 -		//	break;
39205  	}
39206 -	//TRACE("finished charging budgets\n");
39207  }
39208  
39209  static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
39210 @@ -240,7 +229,6 @@ static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
39211  			break;
39212  		}
39213  	}
39214 -	//TRACE("finished replenishing budgets\n");
39215  
39216  	/* request a scheduler update at the next replenishment instant */
39217  	res = list_first_entry_or_null(&sup_env->depleted_reservations,
39218 @@ -258,7 +246,6 @@ void sup_update_time(
39219  	/* If the time didn't advance, there is nothing to do.
39220  	 * This check makes it safe to call sup_advance_time() potentially
39221  	 * multiple times (e.g., via different code paths. */
39222 -	//TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now, sup_env->env.current_time);
39223  	if (unlikely(now <= sup_env->env.current_time))
39224  		return;
39225  
39226 @@ -270,11 +257,9 @@ void sup_update_time(
39227  		sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
39228  
39229  	/* deplete budgets by passage of time */
39230 -	//TRACE("CHARGE###\n");
39231  	sup_charge_budget(sup_env, delta);
39232  
39233  	/* check if any budgets where replenished */
39234 -	//TRACE("REPLENISH###\n");
39235  	sup_replenish_budgets(sup_env);
39236  }
39237  
39238 @@ -308,9 +293,6 @@ static void sup_res_change_state(
39239  
39240  	sup_env = container_of(env, struct sup_reservation_environment, env);
39241  
39242 -	TRACE("reservation R%d state %d->%d at %llu\n",
39243 -		res->id, res->state, new_state, env->current_time);
39244 -
39245  	list_del(&res->list);
39246  	/* check if we need to reschedule because we lost an active reservation */
39247  	if (res->state == RESERVATION_ACTIVE && !sup_env->will_schedule)
39248 @@ -391,18 +373,11 @@ static void gmp_add_event(
39249  	struct list_head *pos;
39250  	int found = 0, update = 0;
39251  
39252 -	//when = div64_u64(when, TIMER_RESOLUTION);
39253 -	//when *= TIMER_RESOLUTION;
39254 -//printk(KERN_ALERT "GMP_ADD id=%d type=%d when=%llu\n", id, type, when);
39255  	nevent = gmp_find_event_by_id(gmp_env, id);
39256  	
39257 -	if (nevent)
39258 -		TRACE("EVENT R%d update prev = %llu, new = %llu\n", nevent->id, nevent->next_update, when);
39259 -	
39260  	if (nevent && nevent->next_update > when) {
39261  		list_del(&nevent->list);
39262  		update = 1;
39263 -		
39264  	}
39265  	
39266  	if (!nevent || nevent->type != type || update == 1) {
39267 @@ -419,29 +394,21 @@ static void gmp_add_event(
39268  			if (queued->next_update > nevent->next_update) {
39269  				list_add(&nevent->list, pos->prev);
39270  				found = 1;
39271 -				TRACE("NEXT_EVENT id=%d type=%d update=%llu ADDED at before %llu\n", nevent->id, nevent->type, nevent->next_update, queued->next_update);
39272  				break;
39273  			}
39274  		}
39275  		
39276  		if (!found) {
39277  			list_add_tail(&nevent->list, &gmp_env->next_events);
39278 -			TRACE("NEXT_EVENT id=%d type=%d update=%llu ADDED at TAIL\n", nevent->id, nevent->type, nevent->next_update);
39279  		}
39280 -	} else {
39281 -		//TRACE("EVENT FOUND id = %d type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->id, nevent->type, nevent->next_update, type, when);
39282 -; //printk(KERN_ALERT "EVENT FOUND id = %d type=%d when=%llu, NEW EVENT type=%d when=%llu\n", nevent->id, nevent->type, nevent->next_update, type, when);
39283  	}
39284  	
39285 -	TRACE("======START PRINTING EVENT LIST======\n");
39286 -	gmp_print_events(gmp_env, litmus_clock());
39287 -	TRACE("======FINISH PRINTING EVENT LIST======\n");
39288 +	/* gmp_print_events(gmp_env, litmus_clock()); */
39289  }
39290  
39291  void gmp_add_event_after(
39292  	struct gmp_reservation_environment* gmp_env, lt_t timeout, unsigned int id, event_type_t type)
39293  {
39294 -	//printk(KERN_ALERT "ADD_EVENT_AFTER id = %d\n", id);
39295  	gmp_add_event(gmp_env, gmp_env->env.current_time + timeout, id, type);
39296  }
39297  
39298 @@ -452,13 +419,10 @@ static void gmp_queue_depleted(
39299  	struct list_head *pos;
39300  	struct reservation *queued;
39301  	int found = 0;
39302 -
39303 -//printk(KERN_ALERT "R%d request to enqueue depleted_list\n", res->id);
39304  	
39305  	list_for_each(pos, &gmp_env->depleted_reservations) {
39306  		queued = list_entry(pos, struct reservation, list);
39307  		if (queued && (queued->next_replenishment > res->next_replenishment)) {
39308 -//printk(KERN_ALERT "QUEUED R%d %llu\n", queued->id, queued->next_replenishment);
39309  			list_add(&res->list, pos->prev);
39310  			found = 1;
39311  			break;
39312 @@ -468,8 +432,6 @@ static void gmp_queue_depleted(
39313  	if (!found)
39314  		list_add_tail(&res->list, &gmp_env->depleted_reservations);
39315  
39316 -	TRACE("R%d queued to depleted_list\n", res->id);
39317 -//printk(KERN_ALERT "R%d queued to depleted_list\n", res->id);
39318  	gmp_add_event(gmp_env, res->next_replenishment, res->id, EVENT_REPLENISH);
39319  }
39320  
39321 @@ -498,9 +460,9 @@ static void gmp_queue_active(
39322  	if (res->state == RESERVATION_ACTIVE && check_preempt)
39323  		gmp_env->schedule_now++;
39324  
39325 -//#if BUDGET_ENFORCEMENT_AT_C	
39326 +#if BUDGET_ENFORCEMENT_AT_C	
39327  	gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
39328 -//#endif
39329 +#endif
39330  	res->event_added = 1;	
39331  }
39332  
39333 @@ -508,8 +470,6 @@ static void gmp_queue_reservation(
39334  	struct gmp_reservation_environment* gmp_env,
39335  	struct reservation *res)
39336  {
39337 -
39338 -//printk(KERN_ALERT "DEBUG: Passed %s %d %p R%d STATE %d\n",__FUNCTION__,__LINE__, gmp_env, res->id, res->state);
39339  	switch (res->state) {
39340  		case RESERVATION_INACTIVE:
39341  			list_add(&res->list, &gmp_env->inactive_reservations);
39342 @@ -534,7 +494,7 @@ void gmp_add_new_reservation(
39343  	gmp_queue_reservation(gmp_env, new_res);
39344  }
39345  
39346 -//#if BUDGET_ENFORCEMENT_AT_C
39347 +#if BUDGET_ENFORCEMENT_AT_C
39348  static void gmp_charge_budget(
39349  	struct gmp_reservation_environment* gmp_env,
39350  	lt_t delta)
39351 @@ -547,50 +507,35 @@ static void gmp_charge_budget(
39352  		/* charge all ACTIVE_IDLE up to the first ACTIVE reservation */
39353  		res = list_entry(pos, struct reservation, list);
39354  		if (res->state == RESERVATION_ACTIVE) {
39355 -			TRACE("gmp_charge_budget ACTIVE R%u scheduled_on=%d drain %llu\n", res->id, res->scheduled_on, delta);
39356  			if (res->scheduled_on != NO_CPU && res->blocked_by_ghost == 0) {
39357 -				TRACE("DRAIN !!\n");
39358  				drained = 1;
39359  				res->ops->drain_budget(res, delta);
39360  			} else {
39361 -				TRACE("NO DRAIN (not scheduled)!!\n");
39362 +				; /* Do not drain budget (not scheduled) */
39363  			}
39364  		} else {
39365 -			//BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
39366 -			if (res->state != RESERVATION_ACTIVE_IDLE)
39367 -				TRACE("BUG!!!!!!!!!!!! gmp_charge_budget()\n");
39368 -			TRACE("gmp_charge_budget INACTIVE R%u drain %llu\n", res->id, delta);
39369 -			//if (res->is_ghost != NO_CPU) {
39370 -				TRACE("DRAIN !!\n");
39371 -				drained = 1;
39372 -				res->ops->drain_budget(res, delta);
39373 -			//}
39374 +			BUG_ON(res->state != RESERVATION_ACTIVE_IDLE);
39375 +			drained = 1;
39376 +			res->ops->drain_budget(res, delta);
39377  		}
39378  		if ((res->state == RESERVATION_ACTIVE ||
39379  			res->state == RESERVATION_ACTIVE_IDLE) && (drained == 1))
39380  		{
39381  			/* make sure scheduler is invoked when this reservation expires
39382  			 * its remaining budget */
39383 -			 TRACE("requesting gmp_scheduler update for reservation %u in %llu nanoseconds\n", res->id, res->cur_budget);
39384  			 gmp_add_event_after(gmp_env, res->cur_budget, res->id, EVENT_DRAIN);
39385  			 res->event_added = 1;
39386  		}
39387 -		//if (encountered_active == 2)
39388 -			/* stop at the first ACTIVE reservation */
39389 -		//	break;
39390  	}
39391 -	//TRACE("finished charging budgets\n");
39392  }
39393 -//#else
39394 -/*
39395 +#else
39396  static void gmp_charge_budget(
39397  	struct gmp_reservation_environment* gmp_env,
39398  	lt_t delta)
39399  {
39400  	return;
39401  }
39402 -*/
39403 -//#endif
39404 +#endif
39405  
39406  static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
39407  {
39408 @@ -601,20 +546,18 @@ static void gmp_replenish_budgets(struct gmp_reservation_environment* gmp_env)
39409  		res = list_entry(pos, struct reservation, list);
39410  		if (res->next_replenishment <= gmp_env->env.current_time) {
39411  			res->ops->replenish(res);
39412 -			if (res->is_ghost != NO_CPU) {
39413 -				TRACE("R%d replenished! scheduled_on=%d\n", res->id, res->scheduled_on);
39414 -			}
39415  		} else {
39416  			/* list is ordered by increasing depletion times */
39417  			break;
39418  		}
39419  	}
39420 -	//TRACE("finished replenishing budgets\n");
39421  }
39422  
39423  #define EPSILON	50
39424  
39425 -/* return schedule_now */
39426 +/* return value: schedule_now (the variable indicates the number of jobs
39427 + *               that need to reschedule.)
39428 + */
39429  int gmp_update_time(
39430  	struct gmp_reservation_environment* gmp_env,
39431  	lt_t now)
39432 @@ -625,35 +568,28 @@ int gmp_update_time(
39433  	/* If the time didn't advance, there is nothing to do.
39434  	 * This check makes it safe to call sup_advance_time() potentially
39435  	 * multiple times (e.g., via different code paths. */
39436 -	//TRACE("(gmp_update_time) now: %llu, current_time: %llu\n", now, gmp_env->env.current_time);
39437  	if (unlikely(now <= gmp_env->env.current_time + EPSILON))
39438  		return 0;
39439  
39440  	delta = now - gmp_env->env.current_time;
39441  	gmp_env->env.current_time = now;
39442  
39443 -
39444 -	//gmp_print_events(gmp_env, now);
39445  	/* deplete budgets by passage of time */
39446 -	//TRACE("CHARGE###\n");
39447  	gmp_charge_budget(gmp_env, delta);
39448  
39449  	/* check if any budgets where replenished */
39450 -	//TRACE("REPLENISH###\n");
39451  	gmp_replenish_budgets(gmp_env);
39452 -
39453  	
39454  	list_for_each_entry_safe(event, next, &gmp_env->next_events, list) {
39455  		if (event->next_update < now) {
39456  			list_del(&event->list);
39457 -			TRACE("EVENT at %llu IS DELETED\n", event->next_update);
39458  			kfree(event);
39459  		} else {
39460  			break;
39461  		}
39462  	}		
39463  	
39464 -	//gmp_print_events(gmp_env, litmus_clock());
39465 +	/* gmp_print_events(gmp_env, litmus_clock()); */
39466  	
39467  	ret = min(gmp_env->schedule_now, NR_CPUS);
39468  	gmp_env->schedule_now = 0;
39469 @@ -680,9 +616,6 @@ static void gmp_res_change_state(
39470  
39471  	gmp_env = container_of(env, struct gmp_reservation_environment, env);
39472  
39473 -	TRACE("GMP reservation R%d state %d->%d at %llu\n",
39474 -		res->id, res->state, new_state, env->current_time);
39475 -
39476  	list_del(&res->list);
39477  	/* check if we need to reschedule because we lost an active reservation */
39478  	if (res->state == RESERVATION_ACTIVE)
39479 diff --git a/litmus/sched_mc2.c b/litmus/sched_mc2.c
39480 index 4536556..2e299d8 100644
39481 --- a/litmus/sched_mc2.c
39482 +++ b/litmus/sched_mc2.c
39483 @@ -160,9 +160,9 @@ static void task_departs(struct task_struct *tsk, int job_complete)
39484  		ce = &state->crit_entries[lv];
39485  		ce->running = tsk;
39486  		res->is_ghost = state->cpu;
39487 -//#if BUDGET_ENFORCEMENT_AT_C		
39488 +#if BUDGET_ENFORCEMENT_AT_C		
39489  		gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
39490 -//#endif
39491 +#endif
39492  		TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock());
39493  		
39494  	}		
39495 @@ -397,13 +397,11 @@ static lt_t mc2_update_ghost_state(struct mc2_cpu_state *state)
39496  	for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) {
39497  		ce = &state->crit_entries[lv];
39498  		if (ce->running != NULL) {
39499 -//printk(KERN_ALERT "P%d ce->running : %s/%d\n", state->cpu,  ce->running ? (ce->running)->comm : "null", ce->running ? (ce->running)->pid : 0);
39500  			tinfo = get_mc2_state(ce->running);
39501  			if (!tinfo)
39502  				continue;
39503  			
39504  			res = res_find_by_id(state, tinfo->mc2_param.res_id);
39505 -			//BUG_ON(!res);
39506  			if (!res) {
39507  				printk(KERN_ALERT "mc2_update_ghost_state(): R%d not found!\n", tinfo->mc2_param.res_id);			
39508  				return 0;
39509 @@ -434,8 +432,6 @@ static lt_t mc2_update_ghost_state(struct mc2_cpu_state *state)
39510  						litmus_reschedule(state->cpu);
39511  				}
39512  			} else {
39513 -				//TRACE("GHOST NOT FINISH id %d budget %llu\n", res->id, res->cur_budget);
39514 -				//gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
39515  				if (ret > res->cur_budget) {
39516  					ret = res->cur_budget;
39517  				}
39518 @@ -497,11 +493,9 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
39519  	TRACE("TIMER FIRED at %llu\n", litmus_clock());
39520  	raw_spin_lock_irqsave(&_global_env.lock, flags);
39521  	raw_spin_lock(&state->lock);
39522 -//printk(KERN_ALERT "P%d on_scheduling_timer() hold lock %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);			
39523  	now = litmus_clock();
39524  	sup_update_time(&state->sup_env, now);
39525  	global_schedule_now = gmp_update_time(&_global_env, now);
39526 -//printk(KERN_ALERT "P%d update_time in timer() %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);			
39527  	remain_budget = mc2_update_ghost_state(state);
39528  	
39529  	update = state->sup_env.next_scheduler_update;
39530 @@ -512,7 +506,6 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
39531  	}
39532  	
39533  	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d remain_budget:%llu\n", now, update, state->cpu, global_schedule_now, remain_budget);
39534 -//printk(KERN_ALERT "on_scheduling_timer at %llu, upd:%llu (for cpu=%d) g_schedule_now:%d\n", now, update, state->cpu, global_schedule_now);
39535  	if (update <= now) {
39536  		litmus_reschedule_local();
39537  	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
39538 @@ -536,7 +529,7 @@ static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
39539  	
39540  	raw_spin_unlock(&state->lock);
39541  	raw_spin_unlock_irqrestore(&_global_env.lock, flags);
39542 -//printk(KERN_ALERT "P%d on_scheduling_timer() release lock %s/%d\n", state->cpu, current ? (current)->comm : "null", current ? (current)->pid : 0);	
39543 +
39544  	return restart;
39545  }
39546  
39547 @@ -564,35 +557,13 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
39548  					res->blocked_by_ghost = 0;
39549  					res->is_ghost = NO_CPU;
39550  					return tsk;
39551 -/*
39552 -					if (likely(!ce->running)) {
39553 -						sup_scheduler_update_after(sup_env, res->cur_budget);
39554 -						res->blocked_by_ghost = 0;
39555 -						res->is_ghost = NO_CPU;
39556 -						return tsk;
39557 -					} else {
39558 -						res->blocked_by_ghost = 1;
39559 -						TRACE_TASK(ce->running, " is GHOST\n");
39560 -					}
39561 -*/
39562  				}
39563  			}
39564  		}
39565  	}
39566  	
39567 -	/* no eligible level A or B tasks exists */
39568 -	/* check the ghost job */
39569 -	/*
39570 -	ce = &state->crit_entries[CRIT_LEVEL_C];
39571 -	if (ce->running) {
39572 -		TRACE_TASK(ce->running," is GHOST\n");
39573 -		return NULL;
39574 -	}
39575 -	*/
39576  	cur_priority = _lowest_prio_cpu.cpu_entries[state->cpu].deadline;
39577  	
39578 -	TRACE("****** ACTIVE LIST ******\n");
39579 -	TRACE_TASK(_lowest_prio_cpu.cpu_entries[state->cpu].scheduled, "** CURRENT JOB deadline %llu **\n", cur_priority);
39580  	list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
39581  		TRACE("R%d deadline=%llu, scheduled_on=%d\n", res->id, res->priority, res->scheduled_on);
39582  		if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) {
39583 @@ -601,24 +572,16 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
39584  				lv = get_task_crit_level(tsk);
39585  				if (lv == NUM_CRIT_LEVELS) {
39586  					gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
39587 -					//res->event_added = 1;
39588  					return tsk;
39589  				} else if (lv == CRIT_LEVEL_C) {
39590 -					//ce = &state->crit_entries[lv];
39591 -					//if (likely(!ce->running)) {
39592 -//#if BUDGET_ENFORCEMENT_AT_C						
39593 +#if BUDGET_ENFORCEMENT_AT_C						
39594  						gmp_add_event_after(&_global_env, res->cur_budget, res->id, EVENT_DRAIN);
39595 -//#endif
39596 +#endif
39597  						res->event_added = 1;
39598  						res->blocked_by_ghost = 0;
39599  						res->is_ghost = NO_CPU;
39600  						res->scheduled_on = state->cpu;
39601  						return tsk;
39602 -					//} else {
39603 -					//	res->blocked_by_ghost = 1;
39604 -					//	TRACE_TASK(ce->running, " is GHOST\n");
39605 -					//	return NULL;
39606 -					//}
39607  				} else {
39608  					BUG();
39609  				}
39610 @@ -631,9 +594,6 @@ struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, st
39611  
39612  static inline void pre_schedule(struct task_struct *prev, int cpu)
39613  {
39614 -	if (!prev || !is_realtime(prev))
39615 -		return;
39616 -	
39617  	do_partition(CRIT_LEVEL_C, cpu);
39618  }
39619  
39620 @@ -642,9 +602,7 @@ static inline void post_schedule(struct task_struct *next, int cpu)
39621  	enum crit_level lev;
39622  	if ((!next) || !is_realtime(next))
39623  		return;
39624 -/*	if (!is_realtime(next))
39625 -		lev = NUM_CRIT_LEVELS;
39626 -	else */
39627 +
39628  	lev = get_task_crit_level(next);
39629  	do_partition(lev, cpu);
39630  }
39631 @@ -657,17 +615,13 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
39632  	lt_t now;
39633  	struct mc2_cpu_state *state = local_cpu_state();
39634  
39635 -	//pre_schedule(prev, state->cpu);
39636 +	pre_schedule(prev, state->cpu);
39637  	
39638  	raw_spin_lock(&_global_env.lock);
39639  	raw_spin_lock(&state->lock);
39640  	
39641  	//BUG_ON(state->scheduled && state->scheduled != prev);
39642  	//BUG_ON(state->scheduled && !is_realtime(prev));
39643 -	if (state->scheduled && state->scheduled != prev)
39644 -		; //printk(KERN_ALERT "BUG1!!!!!!!! %s %s\n", state->scheduled ? (state->scheduled)->comm : "null", prev ? (prev)->comm : "null");
39645 -	if (state->scheduled && !is_realtime(prev))
39646 -		; //printk(KERN_ALERT "BUG2!!!!!!!! \n");
39647  
39648  	/* update time */
39649  	state->sup_env.will_schedule = true;
39650 @@ -713,7 +667,7 @@ static struct task_struct* mc2_schedule(struct task_struct * prev)
39651  			int cpu;
39652  			raw_spin_lock(&_global_env.lock);
39653  			cpu = get_lowest_prio_cpu(res?res->priority:0);
39654 -			//TRACE("LEVEL-C TASK PREEMPTED!! poking CPU %d to reschedule\n", cpu);
39655 +			TRACE("LEVEL-C TASK PREEMPTED!! poking CPU %d to reschedule\n", cpu);
39656  			if (cpu != NO_CPU) {
39657  				raw_spin_lock(&_lowest_prio_cpu.lock);
39658  				_lowest_prio_cpu.cpu_entries[cpu].will_schedule = true;
39659 @@ -767,7 +721,7 @@ static void mc2_task_resume(struct task_struct  *tsk)
39660  		state = local_cpu_state();
39661  
39662  	raw_spin_lock(&_global_env.lock);
39663 -//printk(KERN_ALERT "P%d resume() hold lock\n", state->cpu);	
39664 +
39665  	/* Requeue only if self-suspension was already processed. */
39666  	if (tinfo->has_departed)
39667  	{
39668 @@ -778,9 +732,7 @@ static void mc2_task_resume(struct task_struct  *tsk)
39669  		if (tinfo->cpu != -1) {
39670  			sup_update_time(&state->sup_env, litmus_clock());
39671  		} else {
39672 -			//TRACE("RESUME UPDATE ####\n");
39673  			gmp_update_time(&_global_env, litmus_clock());
39674 -			//TRACE("RESUME UPDATE $$$$\n");
39675  		}
39676  			
39677  		mc2_update_ghost_state(state);
39678 @@ -788,11 +740,9 @@ static void mc2_task_resume(struct task_struct  *tsk)
39679  		/* NOTE: drops state->lock */
39680  		TRACE_TASK(tsk, "mc2_resume()\n");
39681  		mc2_update_timer_and_unlock(state);	
39682 -//printk(KERN_ALERT "P%d resume() dropped lock\n", state->cpu);			
39683  	} else {
39684  		TRACE_TASK(tsk, "resume event ignored, still scheduled\n");
39685  		raw_spin_unlock(&_global_env.lock);
39686 -//printk(KERN_ALERT "P%d resume() release lock\n", state->cpu);			
39687  	}
39688  
39689  	local_irq_restore(flags);
39690 @@ -833,7 +783,6 @@ static long mc2_complete_job(void)
39691  		
39692  		raw_spin_lock(&_global_env.lock);
39693  		raw_spin_lock(&state->lock);
39694 -//printk(KERN_ALERT "P%d complete() hold lock\n", state->cpu);
39695  		env = &(state->sup_env.env);
39696  		
39697  		res = res_find_by_id(state, tinfo->mc2_param.res_id);
39698 @@ -846,23 +795,11 @@ static long mc2_complete_job(void)
39699  		
39700  		/* set next_replenishtime to synchronous release time */
39701  		res->next_replenishment = tsk_rt(current)->sporadic_release_time;
39702 -/*		
39703 -		if (get_task_crit_level(current) == CRIT_LEVEL_A) {
39704 -			struct table_driven_reservation *tdres;
39705 -			tdres = container_of(res, struct table_driven_reservation, res);
39706 -			tdres->next_interval = 0;
39707 -			tdres->major_cycle_start = tsk_rt(current)->sporadic_release_time;
39708 -			res->next_replenishment += tdres->intervals[0].start;			
39709 -		}
39710 -*/		
39711  		res->cur_budget = 0;
39712  		res->env->change_state(res->env, res, RESERVATION_DEPLETED);
39713 -		
39714 -		//TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update);
39715 -		
39716 +
39717  		raw_spin_unlock(&state->lock);
39718  		raw_spin_unlock(&_global_env.lock);
39719 -//printk(KERN_ALERT "P%d complete() release lock\n", state->cpu);				
39720  		local_irq_restore(flags);
39721  		preempt_enable();
39722  	}
39723 @@ -875,23 +812,19 @@ static long mc2_complete_job(void)
39724  	next_release = ns_to_ktime(get_release(current));
39725  	preempt_disable();
39726  	TRACE_CUR("next_release=%llu\n", get_release(current));
39727 -	//flush_cache();
39728 +
39729  	if (get_release(current) > litmus_clock()) {
39730  		/* sleep until next_release */
39731  		set_current_state(TASK_INTERRUPTIBLE);
39732  		preempt_enable_no_resched();
39733  		err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
39734 -//		if (get_task_crit_level(current) == CRIT_LEVEL_A)
39735 -//			sched_trace_task_release(current);
39736  	} else {
39737  		/* release the next job immediately */
39738  		err = 0;
39739  		TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock());
39740  		preempt_enable();
39741 -//		if (get_task_crit_level(current) == CRIT_LEVEL_A)
39742  		sched_trace_task_release(current);
39743  	}
39744 -	//l2x0_flush_all();
39745  	TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock());
39746  
39747  	return err;
39748 @@ -945,7 +878,7 @@ static long mc2_admit_task(struct task_struct *tsk)
39749  		raw_spin_unlock_irqrestore(&state->lock, flags);
39750  	} else if (lv == CRIT_LEVEL_C) {
39751  		raw_spin_lock_irqsave(&_global_env.lock, flags);
39752 -//printk(KERN_ALERT "admit() hold lock\n");		
39753 +
39754  		state = local_cpu_state();
39755  		
39756  		raw_spin_lock(&state->lock);
39757 @@ -954,7 +887,6 @@ static long mc2_admit_task(struct task_struct *tsk)
39758  
39759  		/* found the appropriate reservation (or vCPU) */
39760  		if (res) {
39761 -			TRACE_TASK(tsk, "GMP FOUND RES ID\n");
39762  			tinfo->mc2_param.crit = mp->crit;
39763  			tinfo->mc2_param.res_id = mp->res_id;
39764  			
39765 @@ -970,7 +902,6 @@ static long mc2_admit_task(struct task_struct *tsk)
39766  
39767  		raw_spin_unlock(&state->lock);
39768  		raw_spin_unlock_irqrestore(&_global_env.lock, flags);
39769 -//printk(KERN_ALERT "admit() release lock\n");		
39770  	}
39771  	
39772  	preempt_enable();
39773 @@ -1006,7 +937,7 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
39774  	/* acquire the lock protecting the state and disable interrupts */
39775  	raw_spin_lock(&_global_env.lock);
39776  	raw_spin_lock(&state->lock);
39777 -//printk(KERN_ALERT "new() hold lock R%d\n", tinfo->mc2_param.res_id);	
39778 +
39779  	if (is_running) {
39780  		state->scheduled = tsk;
39781  		/* make sure this task should actually be running */
39782 @@ -1023,14 +954,10 @@ static void mc2_task_new(struct task_struct *tsk, int on_runqueue,
39783  		mc2_update_ghost_state(state);
39784  		task_arrives(state, tsk);
39785  		/* NOTE: drops state->lock */
39786 -		TRACE("mc2_new()\n");
39787 -		
39788  		mc2_update_timer_and_unlock(state);
39789 -//printk(KERN_ALERT "new() dropped lock R%d\n",tinfo->mc2_param.res_id);		
39790  	} else {
39791  		raw_spin_unlock(&state->lock);
39792  		raw_spin_unlock(&_global_env.lock);
39793 -//printk(KERN_ALERT "new() release lock R%d\n",tinfo->mc2_param.res_id);		
39794  	}
39795  	local_irq_restore(flags);
39796  	
39797 @@ -1051,7 +978,6 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39798  	struct reservation *res = NULL, *next;
39799  	struct sup_reservation_environment *sup_env;
39800  	int found = 0;
39801 -	//enum crit_level lv = get_task_crit_level(current);
39802  	unsigned long flags;
39803  	
39804  	if (cpu == -1) {
39805 @@ -1063,7 +989,6 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39806  		
39807  		list_for_each_entry_safe(res, next, &_global_env.depleted_reservations, list) {
39808  			if (res->id == reservation_id) {
39809 -				TRACE("DESTROY RES FOUND!!!\n");
39810  				list_del(&res->list);
39811  				kfree(res);
39812  				found = 1;
39813 @@ -1073,7 +998,6 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39814  		if (!found) {
39815  			list_for_each_entry_safe(res, next, &_global_env.inactive_reservations, list) {
39816  				if (res->id == reservation_id) {
39817 -					TRACE("DESTROY RES FOUND!!!\n");
39818  					list_del(&res->list);
39819  					kfree(res);
39820  					found = 1;
39821 @@ -1084,7 +1008,6 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39822  		if (!found) {
39823  			list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) {
39824  				if (res->id == reservation_id) {
39825 -					TRACE("DESTROY RES FOUND!!!\n");
39826  					list_del(&res->list);
39827  					kfree(res);
39828  					found = 1;
39829 @@ -1101,17 +1024,9 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39830  		state = cpu_state_for(cpu);
39831  		raw_spin_lock_irqsave(&state->lock, flags);
39832  		
39833 -	//	res = sup_find_by_id(&state->sup_env, reservation_id);
39834  		sup_env = &state->sup_env;
39835  		list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) {
39836  			if (res->id == reservation_id) {
39837 -/*
39838 -			if (lv == CRIT_LEVEL_A) {
39839 -					struct table_driven_reservation *tdres;
39840 -					tdres = container_of(res, struct table_driven_reservation, res);
39841 -					kfree(tdres->intervals);
39842 -			}
39843 -*/
39844  				list_del(&res->list);
39845  				kfree(res);
39846  				found = 1;
39847 @@ -1121,12 +1036,6 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39848  		if (!found) {
39849  			list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) {
39850  				if (res->id == reservation_id) {
39851 -/*					if (lv == CRIT_LEVEL_A) {
39852 -						struct table_driven_reservation *tdres;
39853 -						tdres = container_of(res, struct table_driven_reservation, res);
39854 -						kfree(tdres->intervals);
39855 -					}
39856 -*/
39857  					list_del(&res->list);
39858  					kfree(res);
39859  					found = 1;
39860 @@ -1137,12 +1046,6 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39861  		if (!found) {
39862  			list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
39863  				if (res->id == reservation_id) {
39864 -/*					if (lv == CRIT_LEVEL_A) {
39865 -						struct table_driven_reservation *tdres;
39866 -						tdres = container_of(res, struct table_driven_reservation, res);
39867 -						kfree(tdres->intervals);
39868 -					}
39869 -*/
39870  					list_del(&res->list);
39871  					kfree(res);
39872  					found = 1;
39873 @@ -1154,7 +1057,6 @@ static long mc2_reservation_destroy(unsigned int reservation_id, int cpu)
39874  		raw_spin_unlock_irqrestore(&state->lock, flags);
39875  	}
39876  	
39877 -	TRACE("RESERVATION_DESTROY ret = %d\n", ret);
39878  	return ret;
39879  }
39880  
39881 @@ -1196,8 +1098,6 @@ static void mc2_task_exit(struct task_struct *tsk)
39882  		task_departs(tsk, 0);
39883  		
39884  		/* NOTE: drops state->lock */
39885 -		TRACE("mc2_exit()\n");
39886 -
39887  		mc2_update_timer_and_unlock(state);	
39888  	} else {
39889  		raw_spin_unlock(&state->lock);
39890 @@ -1547,7 +1447,6 @@ static void mc2_finish_switch(struct task_struct *prev)
39891  	struct mc2_cpu_state *state = local_cpu_state();
39892  	
39893  	state->scheduled = is_realtime(current) ? current : NULL;
39894 -	//TRACE("FINISH CXS! from %s/%d to %s/%d\n", prev ? (prev)->comm : "null", prev ? (prev)->pid : 0, current ? (current)->comm : "null", current ? (current)->pid : 0);
39895  }
39896  
39897  static long mc2_deactivate_plugin(void)
39898 @@ -1606,7 +1505,6 @@ static long mc2_deactivate_plugin(void)
39899  
39900  	
39901  	while (!list_empty(&_global_env.active_reservations)) {
39902 -		TRACE("RES FOUND!!!\n");
39903  		res = list_first_entry(
39904  			&_global_env.active_reservations,
39905  				struct reservation, list);
39906 @@ -1615,7 +1513,6 @@ static long mc2_deactivate_plugin(void)
39907  	}
39908  
39909  	while (!list_empty(&_global_env.inactive_reservations)) {
39910 -		TRACE("RES FOUND!!!\n");
39911  		res = list_first_entry(
39912  			&_global_env.inactive_reservations,
39913  				struct reservation, list);
39914 @@ -1624,7 +1521,6 @@ static long mc2_deactivate_plugin(void)
39915  	}
39916  
39917  	while (!list_empty(&_global_env.depleted_reservations)) {
39918 -		TRACE("RES FOUND!!!\n");
39919  		res = list_first_entry(
39920  			&_global_env.depleted_reservations,
39921  				struct reservation, list);
39922 @@ -1633,7 +1529,6 @@ static long mc2_deactivate_plugin(void)
39923  	}
39924  	
39925  	while (!list_empty(&_global_env.next_events)) {
39926 -		TRACE("EVENT FOUND!!!\n");
39927  		event = list_first_entry(
39928  			&_global_env.next_events,
39929  				struct next_timer_event, list);
39930 diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c
39931 index 06a6a7c..cf8217ee 100644
39932 --- a/litmus/uncachedev.c
39933 +++ b/litmus/uncachedev.c
39934 @@ -54,8 +54,8 @@ static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma)
39935  		return -EINVAL;
39936  
39937  	/* you can't share it with anyone */
39938 -	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
39939 -		return -EINVAL;
39940 +	//if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
39941 +		//return -EINVAL;
39942  
39943  	/* cannot be expanded, and is not a "normal" page. */
39944  	vma->vm_flags |= VM_DONTEXPAND;
39945 -- 
39946 1.8.1.2

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2015-06-04 21:22:59, 244.8 KB) [[attachment:MC2-liblitmus-imx6-rtss15.patch]]
  • [get | view] (2016-05-12 14:35:37, 51.9 KB) [[attachment:MC2-liblitmus-rtss16.patch]]
  • [get | view] (2016-05-12 14:36:06, 190.4 KB) [[attachment:MC2-litmus-rt-rtss16.patch]]
  • [get | view] (2015-07-19 10:27:52, 1119.9 KB) [[attachment:MC2-litmut-rt-imx6-rtss15.patch]]
  • [get | view] (2014-05-27 20:46:19, 58.3 KB) [[attachment:MC2_liblitmus_ipdps15.patch]]
  • [get | view] (2014-05-27 20:45:43, 1044.3 KB) [[attachment:MC2_litmusrt_ipdps15.patch]]
  • [get | view] (2017-04-07 21:48:09, 6099.5 KB) [[attachment:buff_sharing.tar]]
  • [get | view] (2015-01-08 14:20:07, 61.0 KB) [[attachment:feather-trace-patch-against-sched-deadline-v8.patch]]
  • [get | view] (2014-04-01 23:10:10, 38.9 KB) [[attachment:gedf-mp-rtas14.patch]]
  • [get | view] (2012-03-02 20:13:59, 1.9 KB) [[attachment:gpu-klmirqd-liblitmus-rt-ecrts12.patch]]
  • [get | view] (2012-03-02 20:14:25, 389.8 KB) [[attachment:gpu-klmirqd-litmus-rt-ecrts12.patch]]
  • [get | view] (2012-05-26 21:41:34, 418.0 KB) [[attachment:gpusync-rtss12.patch]]
  • [get | view] (2012-05-26 21:42:20, 8.6 KB) [[attachment:gpusync_liblitmus-rtss12.patch]]
  • [get | view] (2013-05-21 15:32:08, 208.6 KB) [[attachment:gpusync_rtss13_liblitmus.patch]]
  • [get | view] (2013-05-21 15:31:32, 779.5 KB) [[attachment:gpusync_rtss13_litmus.patch]]
  • [get | view] (2012-05-26 21:42:41, 71.4 KB) [[attachment:klt_tracker_v1.0.litmus.tgz]]
  • [get | view] (2016-10-13 21:14:05, 19.6 KB) [[attachment:liblitmus-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:22, 90.0 KB) [[attachment:liblitmus-rtns17.patch]]
  • [get | view] (2018-12-11 01:38:53, 49.1 KB) [[attachment:liblitmus-semi-part-with-edfos.patch]]
  • [get | view] (2017-10-09 19:16:09, 304.0 KB) [[attachment:litmus-rt-os-isolation.patch]]
  • [get | view] (2016-10-13 21:13:27, 207.6 KB) [[attachment:litmus-rt-rtas17.patch]]
  • [get | view] (2017-05-01 20:46:40, 207.6 KB) [[attachment:litmus-rt-rtns17.patch]]
  • [get | view] (2018-12-11 01:39:04, 100.5 KB) [[attachment:litmus-rt-semi-part-with-edfos.patch]]
  • [get | view] (2018-06-26 04:31:48, 7.0 KB) [[attachment:mc2_liblitmus_2015.1-rtns18.patch]]
  • [get | view] (2018-06-26 04:31:33, 292.7 KB) [[attachment:mc2_litmus-rt_2015.1-rtns18.patch]]
  • [get | view] (2017-05-01 20:45:10, 2596.9 KB) [[attachment:mcp_study.zip]]
  • [get | view] (2013-07-13 14:11:53, 58.0 KB) [[attachment:omip-ecrts13.patch]]
  • [get | view] (2014-02-19 21:48:33, 17.2 KB) [[attachment:pgmrt-liblitmus-ecrts14.patch]]
  • [get | view] (2014-02-19 21:47:57, 87.8 KB) [[attachment:pgmrt-litmusrt-ecrts14.patch]]
  • [get | view] (2015-01-08 14:22:32, 61.0 KB) [[attachment:sched-deadline-v8-feather-trace-rtas14.patch]]
  • [get | view] (2018-06-26 04:32:13, 2545.1 KB) [[attachment:sched_study_rtns2018.tar.gz]]
  • [get | view] (2017-04-07 21:53:39, 5969.5 KB) [[attachment:seminal.tar]]
  • [get | view] (2017-04-07 21:51:13, 6064.0 KB) [[attachment:shared_libraries.tar]]
  • [get | view] (2013-07-13 13:58:25, 42.7 KB) [[attachment:tracing-and-dflp-rtas13.patch]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.