Attachment 'litmus-rt-2012.1.patch'
Download 1 Makefile | 4 +-
2 arch/arm/Kconfig | 8 +
3 arch/arm/include/asm/timex.h | 2 +
4 arch/arm/include/asm/unistd.h | 3 +
5 arch/arm/kernel/calls.S | 12 +
6 arch/arm/kernel/smp.c | 4 +
7 arch/arm/mach-realview/include/mach/timex.h | 27 +
8 arch/x86/Kconfig | 8 +
9 arch/x86/include/asm/entry_arch.h | 1 +
10 arch/x86/include/asm/feather_trace.h | 17 +
11 arch/x86/include/asm/feather_trace_32.h | 79 ++
12 arch/x86/include/asm/feather_trace_64.h | 67 ++
13 arch/x86/include/asm/hw_irq.h | 3 +
14 arch/x86/include/asm/irq_vectors.h | 7 +
15 arch/x86/include/asm/processor.h | 4 +
16 arch/x86/include/asm/unistd_32.h | 6 +-
17 arch/x86/include/asm/unistd_64.h | 4 +
18 arch/x86/kernel/Makefile | 2 +
19 arch/x86/kernel/cpu/intel_cacheinfo.c | 17 +
20 arch/x86/kernel/entry_64.S | 2 +
21 arch/x86/kernel/ft_event.c | 118 +++
22 arch/x86/kernel/irqinit.c | 3 +
23 arch/x86/kernel/smp.c | 31 +
24 arch/x86/kernel/syscall_table_32.S | 12 +
25 fs/exec.c | 13 +-
26 fs/inode.c | 2 +
27 include/linux/completion.h | 1 +
28 include/linux/fs.h | 21 +-
29 include/linux/hardirq.h | 4 +
30 include/linux/hrtimer.h | 32 +
31 include/linux/sched.h | 19 +-
32 include/linux/smp.h | 5 +
33 include/linux/tick.h | 5 +
34 include/litmus/affinity.h | 80 ++
35 include/litmus/bheap.h | 77 ++
36 include/litmus/budget.h | 8 +
37 include/litmus/clustered.h | 44 ++
38 include/litmus/debug_trace.h | 37 +
39 include/litmus/edf_common.h | 25 +
40 include/litmus/fdso.h | 71 ++
41 include/litmus/feather_buffer.h | 94 +++
42 include/litmus/feather_trace.h | 65 ++
43 include/litmus/ftdev.h | 55 ++
44 include/litmus/jobs.h | 9 +
45 include/litmus/litmus.h | 275 +++++++
46 include/litmus/litmus_proc.h | 25 +
47 include/litmus/locking.h | 28 +
48 include/litmus/preempt.h | 164 ++++
49 include/litmus/rt_domain.h | 182 +++++
50 include/litmus/rt_param.h | 209 ++++++
51 include/litmus/sched_plugin.h | 111 +++
52 include/litmus/sched_trace.h | 200 +++++
53 include/litmus/srp.h | 28 +
54 include/litmus/trace.h | 116 +++
55 include/litmus/trace_irq.h | 21 +
56 include/litmus/unistd_32.h | 21 +
57 include/litmus/unistd_64.h | 33 +
58 kernel/exit.c | 4 +
59 kernel/fork.c | 7 +
60 kernel/hrtimer.c | 95 +++
61 kernel/printk.c | 14 +-
62 kernel/sched.c | 137 ++++-
63 kernel/sched_fair.c | 3 +
64 kernel/sched_rt.c | 2 +-
65 kernel/time/tick-sched.c | 47 ++
66 litmus/Kconfig | 218 ++++++
67 litmus/Makefile | 29 +
68 litmus/affinity.c | 42 ++
69 litmus/bheap.c | 314 ++++++++
70 litmus/budget.c | 111 +++
71 litmus/clustered.c | 111 +++
72 litmus/ctrldev.c | 150 ++++
73 litmus/edf_common.c | 118 +++
74 litmus/fdso.c | 293 ++++++++
75 litmus/ft_event.c | 43 ++
76 litmus/ftdev.c | 439 +++++++++++
77 litmus/jobs.c | 43 ++
78 litmus/litmus.c | 564 ++++++++++++++
79 litmus/litmus_proc.c | 347 +++++++++
80 litmus/locking.c | 139 ++++
81 litmus/preempt.c | 133 ++++
82 litmus/rt_domain.c | 357 +++++++++
83 litmus/sched_cedf.c | 863 ++++++++++++++++++++++
84 litmus/sched_gsn_edf.c | 1030 ++++++++++++++++++++++++++
85 litmus/sched_litmus.c | 325 ++++++++
86 litmus/sched_pfair.c | 1067 +++++++++++++++++++++++++++
87 litmus/sched_plugin.c | 227 ++++++
88 litmus/sched_psn_edf.c | 645 ++++++++++++++++
89 litmus/sched_task_trace.c | 241 ++++++
90 litmus/sched_trace.c | 252 +++++++
91 litmus/srp.c | 295 ++++++++
92 litmus/sync.c | 104 +++
93 litmus/trace.c | 225 ++++++
94 93 files changed, 11521 insertions(+), 34 deletions(-)
95
96 diff --git a/Makefile b/Makefile
97 index 6a5bdad..a327725 100644
98 --- a/Makefile
99 +++ b/Makefile
100 @@ -1,7 +1,7 @@
101 VERSION = 3
102 PATCHLEVEL = 0
103 SUBLEVEL = 0
104 -EXTRAVERSION =
105 +EXTRAVERSION =-litmus
106 NAME = Sneaky Weasel
107
108 # *DOCUMENTATION*
109 @@ -708,7 +708,7 @@ export mod_strip_cmd
110
111
112 ifeq ($(KBUILD_EXTMOD),)
113 -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
114 +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
115
116 vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
117 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
118 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
119 index 9adc278..fb228ea 100644
120 --- a/arch/arm/Kconfig
121 +++ b/arch/arm/Kconfig
122 @@ -2040,3 +2040,11 @@ source "security/Kconfig"
123 source "crypto/Kconfig"
124
125 source "lib/Kconfig"
126 +
127 +config ARCH_HAS_SEND_PULL_TIMERS
128 + def_bool n
129 +
130 +config ARCH_HAS_FEATHER_TRACE
131 + def_bool n
132 +
133 +source "litmus/Kconfig"
134 diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
135 index 3be8de3..8a102a3 100644
136 --- a/arch/arm/include/asm/timex.h
137 +++ b/arch/arm/include/asm/timex.h
138 @@ -16,9 +16,11 @@
139
140 typedef unsigned long cycles_t;
141
142 +#ifndef get_cycles
143 static inline cycles_t get_cycles (void)
144 {
145 return 0;
146 }
147 +#endif
148
149 #endif
150 diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
151 index 2c04ed5..0196edf 100644
152 --- a/arch/arm/include/asm/unistd.h
153 +++ b/arch/arm/include/asm/unistd.h
154 @@ -403,6 +403,9 @@
155 #define __NR_sendmmsg (__NR_SYSCALL_BASE+374)
156 #define __NR_setns (__NR_SYSCALL_BASE+375)
157
158 +#define __NR_LITMUS (__NR_SYSCALL_BASE+376)
159 +#include <litmus/unistd_32.h>
160 +
161 /*
162 * The following SWIs are ARM private.
163 */
164 diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
165 index 80f7896..ed2ae93 100644
166 --- a/arch/arm/kernel/calls.S
167 +++ b/arch/arm/kernel/calls.S
168 @@ -385,6 +385,18 @@
169 CALL(sys_syncfs)
170 CALL(sys_sendmmsg)
171 /* 375 */ CALL(sys_setns)
172 + CALL(sys_set_rt_task_param)
173 + CALL(sys_get_rt_task_param)
174 + CALL(sys_complete_job)
175 + CALL(sys_od_open)
176 +/* 380 */ CALL(sys_od_close)
177 + CALL(sys_litmus_lock)
178 + CALL(sys_litmus_unlock)
179 + CALL(sys_query_job_no)
180 + CALL(sys_wait_for_job_release)
181 +/* 385 */ CALL(sys_wait_for_ts_release)
182 + CALL(sys_release_ts)
183 + CALL(sys_null_call)
184 #ifndef syscalls_counted
185 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
186 #define syscalls_counted
187 diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
188 index e7f92a4..5a57429 100644
189 --- a/arch/arm/kernel/smp.c
190 +++ b/arch/arm/kernel/smp.c
191 @@ -40,6 +40,8 @@
192 #include <asm/ptrace.h>
193 #include <asm/localtimer.h>
194
195 +#include <litmus/preempt.h>
196 +
197 /*
198 * as from 2.5, kernels no longer have an init_tasks structure
199 * so we need some other way of telling a new secondary core
200 @@ -572,6 +574,8 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
201 break;
202
203 case IPI_RESCHEDULE:
204 + /* LITMUS^RT: take action based on scheduler state */
205 + sched_state_ipi();
206 scheduler_ipi();
207 break;
208
209 diff --git a/arch/arm/mach-realview/include/mach/timex.h b/arch/arm/mach-realview/include/mach/timex.h
210 index 4eeb069..e8bcc40 100644
211 --- a/arch/arm/mach-realview/include/mach/timex.h
212 +++ b/arch/arm/mach-realview/include/mach/timex.h
213 @@ -21,3 +21,30 @@
214 */
215
216 #define CLOCK_TICK_RATE (50000000 / 16)
217 +
218 +#if defined(CONFIG_MACH_REALVIEW_PB11MP) || defined(CONFIG_MACH_REALVIEW_PB1176)
219 +
220 +static inline unsigned long realview_get_arm11_cp15_ccnt(void)
221 +{
222 + unsigned long cycles;
223 + /* Read CP15 CCNT register. */
224 + asm volatile ("mrc p15, 0, %0, c15, c12, 1" : "=r" (cycles));
225 + return cycles;
226 +}
227 +
228 +#define get_cycles realview_get_arm11_cp15_ccnt
229 +
230 +#elif defined(CONFIG_MACH_REALVIEW_PBA8)
231 +
232 +
233 +static inline unsigned long realview_get_a8_cp15_ccnt(void)
234 +{
235 + unsigned long cycles;
236 + /* Read CP15 CCNT register. */
237 + asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
238 + return cycles;
239 +}
240 +
241 +#define get_cycles realview_get_a8_cp15_ccnt
242 +
243 +#endif
244 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
245 index 37357a5..9f5e143 100644
246 --- a/arch/x86/Kconfig
247 +++ b/arch/x86/Kconfig
248 @@ -2166,3 +2166,11 @@ source "crypto/Kconfig"
249 source "arch/x86/kvm/Kconfig"
250
251 source "lib/Kconfig"
252 +
253 +config ARCH_HAS_FEATHER_TRACE
254 + def_bool y
255 +
256 +config ARCH_HAS_SEND_PULL_TIMERS
257 + def_bool y
258 +
259 +source "litmus/Kconfig"
260 diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
261 index 1cd6d26..3b0d7ef 100644
262 --- a/arch/x86/include/asm/entry_arch.h
263 +++ b/arch/x86/include/asm/entry_arch.h
264 @@ -13,6 +13,7 @@
265 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
266 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
267 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
268 +BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR)
269 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
270 BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
271
272 diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
273 new file mode 100644
274 index 0000000..4fd3163
275 --- /dev/null
276 +++ b/arch/x86/include/asm/feather_trace.h
277 @@ -0,0 +1,17 @@
278 +#ifndef _ARCH_FEATHER_TRACE_H
279 +#define _ARCH_FEATHER_TRACE_H
280 +
281 +#include <asm/msr.h>
282 +
283 +static inline unsigned long long ft_timestamp(void)
284 +{
285 + return __native_read_tsc();
286 +}
287 +
288 +#ifdef CONFIG_X86_32
289 +#include "feather_trace_32.h"
290 +#else
291 +#include "feather_trace_64.h"
292 +#endif
293 +
294 +#endif
295 diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
296 new file mode 100644
297 index 0000000..70202f9
298 --- /dev/null
299 +++ b/arch/x86/include/asm/feather_trace_32.h
300 @@ -0,0 +1,79 @@
301 +/* Do not directly include this file. Include feather_trace.h instead */
302 +
303 +#define feather_callback __attribute__((regparm(0)))
304 +
305 +/*
306 + * make the compiler reload any register that is not saved in
307 + * a cdecl function call
308 + */
309 +#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
310 +
311 +#define ft_event(id, callback) \
312 + __asm__ __volatile__( \
313 + "1: jmp 2f \n\t" \
314 + " call " #callback " \n\t" \
315 + ".section __event_table, \"aw\" \n\t" \
316 + ".long " #id ", 0, 1b, 2f \n\t" \
317 + ".previous \n\t" \
318 + "2: \n\t" \
319 + : : : CLOBBER_LIST)
320 +
321 +#define ft_event0(id, callback) \
322 + __asm__ __volatile__( \
323 + "1: jmp 2f \n\t" \
324 + " subl $4, %%esp \n\t" \
325 + " movl $" #id ", (%%esp) \n\t" \
326 + " call " #callback " \n\t" \
327 + " addl $4, %%esp \n\t" \
328 + ".section __event_table, \"aw\" \n\t" \
329 + ".long " #id ", 0, 1b, 2f \n\t" \
330 + ".previous \n\t" \
331 + "2: \n\t" \
332 + : : : CLOBBER_LIST)
333 +
334 +#define ft_event1(id, callback, param) \
335 + __asm__ __volatile__( \
336 + "1: jmp 2f \n\t" \
337 + " subl $8, %%esp \n\t" \
338 + " movl %0, 4(%%esp) \n\t" \
339 + " movl $" #id ", (%%esp) \n\t" \
340 + " call " #callback " \n\t" \
341 + " addl $8, %%esp \n\t" \
342 + ".section __event_table, \"aw\" \n\t" \
343 + ".long " #id ", 0, 1b, 2f \n\t" \
344 + ".previous \n\t" \
345 + "2: \n\t" \
346 + : : "r" (param) : CLOBBER_LIST)
347 +
348 +#define ft_event2(id, callback, param, param2) \
349 + __asm__ __volatile__( \
350 + "1: jmp 2f \n\t" \
351 + " subl $12, %%esp \n\t" \
352 + " movl %1, 8(%%esp) \n\t" \
353 + " movl %0, 4(%%esp) \n\t" \
354 + " movl $" #id ", (%%esp) \n\t" \
355 + " call " #callback " \n\t" \
356 + " addl $12, %%esp \n\t" \
357 + ".section __event_table, \"aw\" \n\t" \
358 + ".long " #id ", 0, 1b, 2f \n\t" \
359 + ".previous \n\t" \
360 + "2: \n\t" \
361 + : : "r" (param), "r" (param2) : CLOBBER_LIST)
362 +
363 +
364 +#define ft_event3(id, callback, p, p2, p3) \
365 + __asm__ __volatile__( \
366 + "1: jmp 2f \n\t" \
367 + " subl $16, %%esp \n\t" \
368 + " movl %2, 12(%%esp) \n\t" \
369 + " movl %1, 8(%%esp) \n\t" \
370 + " movl %0, 4(%%esp) \n\t" \
371 + " movl $" #id ", (%%esp) \n\t" \
372 + " call " #callback " \n\t" \
373 + " addl $16, %%esp \n\t" \
374 + ".section __event_table, \"aw\" \n\t" \
375 + ".long " #id ", 0, 1b, 2f \n\t" \
376 + ".previous \n\t" \
377 + "2: \n\t" \
378 + : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
379 +
380 diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
381 new file mode 100644
382 index 0000000..54ac2ae
383 --- /dev/null
384 +++ b/arch/x86/include/asm/feather_trace_64.h
385 @@ -0,0 +1,67 @@
386 +/* Do not directly include this file. Include feather_trace.h instead */
387 +
388 +/* regparm is the default on x86_64 */
389 +#define feather_callback
390 +
391 +# define _EVENT_TABLE(id,from,to) \
392 + ".section __event_table, \"aw\"\n\t" \
393 + ".balign 8\n\t" \
394 + ".quad " #id ", 0, " #from ", " #to " \n\t" \
395 + ".previous \n\t"
396 +
397 +/*
398 + * x86_64 callee only owns rbp, rbx, r12 -> r15
399 + * the called can freely modify the others
400 + */
401 +#define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \
402 + "r8", "r9", "r10", "r11", "rax"
403 +
404 +#define ft_event(id, callback) \
405 + __asm__ __volatile__( \
406 + "1: jmp 2f \n\t" \
407 + " call " #callback " \n\t" \
408 + _EVENT_TABLE(id,1b,2f) \
409 + "2: \n\t" \
410 + : : : CLOBBER_LIST)
411 +
412 +#define ft_event0(id, callback) \
413 + __asm__ __volatile__( \
414 + "1: jmp 2f \n\t" \
415 + " movq $" #id ", %%rdi \n\t" \
416 + " call " #callback " \n\t" \
417 + _EVENT_TABLE(id,1b,2f) \
418 + "2: \n\t" \
419 + : : : CLOBBER_LIST)
420 +
421 +#define ft_event1(id, callback, param) \
422 + __asm__ __volatile__( \
423 + "1: jmp 2f \n\t" \
424 + " movq %0, %%rsi \n\t" \
425 + " movq $" #id ", %%rdi \n\t" \
426 + " call " #callback " \n\t" \
427 + _EVENT_TABLE(id,1b,2f) \
428 + "2: \n\t" \
429 + : : "r" (param) : CLOBBER_LIST)
430 +
431 +#define ft_event2(id, callback, param, param2) \
432 + __asm__ __volatile__( \
433 + "1: jmp 2f \n\t" \
434 + " movq %1, %%rdx \n\t" \
435 + " movq %0, %%rsi \n\t" \
436 + " movq $" #id ", %%rdi \n\t" \
437 + " call " #callback " \n\t" \
438 + _EVENT_TABLE(id,1b,2f) \
439 + "2: \n\t" \
440 + : : "r" (param), "r" (param2) : CLOBBER_LIST)
441 +
442 +#define ft_event3(id, callback, p, p2, p3) \
443 + __asm__ __volatile__( \
444 + "1: jmp 2f \n\t" \
445 + " movq %2, %%rcx \n\t" \
446 + " movq %1, %%rdx \n\t" \
447 + " movq %0, %%rsi \n\t" \
448 + " movq $" #id ", %%rdi \n\t" \
449 + " call " #callback " \n\t" \
450 + _EVENT_TABLE(id,1b,2f) \
451 + "2: \n\t" \
452 + : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
453 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
454 index bb9efe8..c490d89 100644
455 --- a/arch/x86/include/asm/hw_irq.h
456 +++ b/arch/x86/include/asm/hw_irq.h
457 @@ -77,6 +77,8 @@ extern void threshold_interrupt(void);
458 extern void call_function_interrupt(void);
459 extern void call_function_single_interrupt(void);
460
461 +extern void pull_timers_interrupt(void);
462 +
463 /* IOAPIC */
464 #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
465 extern unsigned long io_apic_irqs;
466 @@ -155,6 +157,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
467 extern void smp_reschedule_interrupt(struct pt_regs *);
468 extern void smp_call_function_interrupt(struct pt_regs *);
469 extern void smp_call_function_single_interrupt(struct pt_regs *);
470 +extern void smp_pull_timers_interrupt(struct pt_regs *);
471 #ifdef CONFIG_X86_32
472 extern void smp_invalidate_interrupt(struct pt_regs *);
473 #else
474 diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
475 index 6e976ee..99a44cf 100644
476 --- a/arch/x86/include/asm/irq_vectors.h
477 +++ b/arch/x86/include/asm/irq_vectors.h
478 @@ -135,6 +135,13 @@
479 #define INVALIDATE_TLB_VECTOR_START \
480 (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
481
482 +/*
483 + * LITMUS^RT pull timers IRQ vector
484 + * Make sure it's below the above max 32 vectors.
485 + */
486 +#define PULL_TIMERS_VECTOR 0xce
487 +
488 +
489 #define NR_VECTORS 256
490
491 #define FPU_IRQ 13
492 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
493 index 2193715..b844edc 100644
494 --- a/arch/x86/include/asm/processor.h
495 +++ b/arch/x86/include/asm/processor.h
496 @@ -166,6 +166,10 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
497 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
498 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
499 extern unsigned short num_cache_leaves;
500 +#ifdef CONFIG_SYSFS
501 +extern int get_shared_cpu_map(cpumask_var_t mask,
502 + unsigned int cpu, int index);
503 +#endif
504
505 extern void detect_extended_topology(struct cpuinfo_x86 *c);
506 extern void detect_ht(struct cpuinfo_x86 *c);
507 diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
508 index 593485b..2f6e127 100644
509 --- a/arch/x86/include/asm/unistd_32.h
510 +++ b/arch/x86/include/asm/unistd_32.h
511 @@ -353,9 +353,13 @@
512 #define __NR_sendmmsg 345
513 #define __NR_setns 346
514
515 +#define __NR_LITMUS 347
516 +
517 +#include "litmus/unistd_32.h"
518 +
519 #ifdef __KERNEL__
520
521 -#define NR_syscalls 347
522 +#define NR_syscalls 347 + NR_litmus_syscalls
523
524 #define __ARCH_WANT_IPC_PARSE_VERSION
525 #define __ARCH_WANT_OLD_READDIR
526 diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
527 index 705bf13..e347f07 100644
528 --- a/arch/x86/include/asm/unistd_64.h
529 +++ b/arch/x86/include/asm/unistd_64.h
530 @@ -682,6 +682,10 @@ __SYSCALL(__NR_sendmmsg, sys_sendmmsg)
531 #define __NR_setns 308
532 __SYSCALL(__NR_setns, sys_setns)
533
534 +#define __NR_LITMUS 309
535 +
536 +#include "litmus/unistd_64.h"
537 +
538 #ifndef __NO_STUBS
539 #define __ARCH_WANT_OLD_READDIR
540 #define __ARCH_WANT_OLD_STAT
541 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
542 index 90b06d4..d727f8f 100644
543 --- a/arch/x86/kernel/Makefile
544 +++ b/arch/x86/kernel/Makefile
545 @@ -116,6 +116,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
546 obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
547 obj-$(CONFIG_OF) += devicetree.o
548
549 +obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
550 +
551 ###
552 # 64 bit specific files
553 ifeq ($(CONFIG_X86_64),y)
554 diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
555 index c105c53..0bf1264 100644
556 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c
557 +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
558 @@ -747,6 +747,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
559 static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
560 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
561
562 +/* returns CPUs that share the index cache with cpu */
563 +int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
564 +{
565 + int ret = 0;
566 + struct _cpuid4_info *this_leaf;
567 +
568 + if (index >= num_cache_leaves) {
569 + index = num_cache_leaves - 1;
570 + ret = index;
571 + }
572 +
573 + this_leaf = CPUID4_INFO_IDX(cpu,index);
574 + cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map));
575 +
576 + return ret;
577 +}
578 +
579 #ifdef CONFIG_SMP
580 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
581 {
582 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
583 index 8a445a0..47a4bcd 100644
584 --- a/arch/x86/kernel/entry_64.S
585 +++ b/arch/x86/kernel/entry_64.S
586 @@ -1003,6 +1003,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \
587 call_function_interrupt smp_call_function_interrupt
588 apicinterrupt RESCHEDULE_VECTOR \
589 reschedule_interrupt smp_reschedule_interrupt
590 +apicinterrupt PULL_TIMERS_VECTOR \
591 + pull_timers_interrupt smp_pull_timers_interrupt
592 #endif
593
594 apicinterrupt ERROR_APIC_VECTOR \
595 diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
596 new file mode 100644
597 index 0000000..37cc332
598 --- /dev/null
599 +++ b/arch/x86/kernel/ft_event.c
600 @@ -0,0 +1,118 @@
601 +#include <linux/types.h>
602 +
603 +#include <litmus/feather_trace.h>
604 +
605 +/* the feather trace management functions assume
606 + * exclusive access to the event table
607 + */
608 +
609 +#ifndef CONFIG_DEBUG_RODATA
610 +
611 +#define BYTE_JUMP 0xeb
612 +#define BYTE_JUMP_LEN 0x02
613 +
614 +/* for each event, there is an entry in the event table */
615 +struct trace_event {
616 + long id;
617 + long count;
618 + long start_addr;
619 + long end_addr;
620 +};
621 +
622 +extern struct trace_event __start___event_table[];
623 +extern struct trace_event __stop___event_table[];
624 +
625 +/* Workaround: if no events are defined, then the event_table section does not
626 + * exist and the above references cause linker errors. This could probably be
627 + * fixed by adjusting the linker script, but it is easier to maintain for us if
628 + * we simply create a dummy symbol in the event table section.
629 + */
630 +int __event_table_dummy[0] __attribute__ ((section("__event_table")));
631 +
632 +int ft_enable_event(unsigned long id)
633 +{
634 + struct trace_event* te = __start___event_table;
635 + int count = 0;
636 + char* delta;
637 + unsigned char* instr;
638 +
639 + while (te < __stop___event_table) {
640 + if (te->id == id && ++te->count == 1) {
641 + instr = (unsigned char*) te->start_addr;
642 + /* make sure we don't clobber something wrong */
643 + if (*instr == BYTE_JUMP) {
644 + delta = (((unsigned char*) te->start_addr) + 1);
645 + *delta = 0;
646 + }
647 + }
648 + if (te->id == id)
649 + count++;
650 + te++;
651 + }
652 +
653 + printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
654 + return count;
655 +}
656 +
657 +int ft_disable_event(unsigned long id)
658 +{
659 + struct trace_event* te = __start___event_table;
660 + int count = 0;
661 + char* delta;
662 + unsigned char* instr;
663 +
664 + while (te < __stop___event_table) {
665 + if (te->id == id && --te->count == 0) {
666 + instr = (unsigned char*) te->start_addr;
667 + if (*instr == BYTE_JUMP) {
668 + delta = (((unsigned char*) te->start_addr) + 1);
669 + *delta = te->end_addr - te->start_addr -
670 + BYTE_JUMP_LEN;
671 + }
672 + }
673 + if (te->id == id)
674 + count++;
675 + te++;
676 + }
677 +
678 + printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
679 + return count;
680 +}
681 +
682 +int ft_disable_all_events(void)
683 +{
684 + struct trace_event* te = __start___event_table;
685 + int count = 0;
686 + char* delta;
687 + unsigned char* instr;
688 +
689 + while (te < __stop___event_table) {
690 + if (te->count) {
691 + instr = (unsigned char*) te->start_addr;
692 + if (*instr == BYTE_JUMP) {
693 + delta = (((unsigned char*) te->start_addr)
694 + + 1);
695 + *delta = te->end_addr - te->start_addr -
696 + BYTE_JUMP_LEN;
697 + te->count = 0;
698 + count++;
699 + }
700 + }
701 + te++;
702 + }
703 + return count;
704 +}
705 +
706 +int ft_is_event_enabled(unsigned long id)
707 +{
708 + struct trace_event* te = __start___event_table;
709 +
710 + while (te < __stop___event_table) {
711 + if (te->id == id)
712 + return te->count;
713 + te++;
714 + }
715 + return 0;
716 +}
717 +
718 +#endif
719 diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
720 index f470e4e..48acf71 100644
721 --- a/arch/x86/kernel/irqinit.c
722 +++ b/arch/x86/kernel/irqinit.c
723 @@ -252,6 +252,9 @@ static void __init smp_intr_init(void)
724 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
725 call_function_single_interrupt);
726
727 + /* IPI for hrtimer pulling on remote cpus */
728 + alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt);
729 +
730 /* Low priority IPI to cleanup after moving an irq */
731 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
732 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
733 diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
734 index 013e7eb..ed4c4f5 100644
735 --- a/arch/x86/kernel/smp.c
736 +++ b/arch/x86/kernel/smp.c
737 @@ -23,6 +23,10 @@
738 #include <linux/cpu.h>
739 #include <linux/gfp.h>
740
741 +#include <litmus/preempt.h>
742 +#include <litmus/debug_trace.h>
743 +#include <litmus/trace.h>
744 +
745 #include <asm/mtrr.h>
746 #include <asm/tlbflush.h>
747 #include <asm/mmu_context.h>
748 @@ -118,6 +122,7 @@ static void native_smp_send_reschedule(int cpu)
749 WARN_ON(1);
750 return;
751 }
752 + TS_SEND_RESCHED_START(cpu);
753 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
754 }
755
756 @@ -147,6 +152,16 @@ void native_send_call_func_ipi(const struct cpumask *mask)
757 free_cpumask_var(allbutself);
758 }
759
760 +/* trigger timers on remote cpu */
761 +void smp_send_pull_timers(int cpu)
762 +{
763 + if (unlikely(cpu_is_offline(cpu))) {
764 + WARN_ON(1);
765 + return;
766 + }
767 + apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR);
768 +}
769 +
770 /*
771 * this function calls the 'stop' function on all other CPUs in the system.
772 */
773 @@ -199,8 +214,15 @@ static void native_stop_other_cpus(int wait)
774 void smp_reschedule_interrupt(struct pt_regs *regs)
775 {
776 ack_APIC_irq();
777 + /* LITMUS^RT: this IPI might need to trigger the sched state machine. */
778 + sched_state_ipi();
779 inc_irq_stat(irq_resched_count);
780 + /*
781 + * LITMUS^RT: starting from 3.0 schedule_ipi() actually does something.
782 + * This may increase IPI latencies compared with previous versions.
783 + */
784 scheduler_ipi();
785 + TS_SEND_RESCHED_END;
786 /*
787 * KVM uses this interrupt to force a cpu out of guest mode
788 */
789 @@ -224,6 +246,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
790 irq_exit();
791 }
792
793 +extern void hrtimer_pull(void);
794 +
795 +void smp_pull_timers_interrupt(struct pt_regs *regs)
796 +{
797 + ack_APIC_irq();
798 + TRACE("pull timer interrupt\n");
799 + hrtimer_pull();
800 +}
801 +
802 struct smp_ops smp_ops = {
803 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
804 .smp_prepare_cpus = native_smp_prepare_cpus,
805 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
806 index fbb0a04..d012622 100644
807 --- a/arch/x86/kernel/syscall_table_32.S
808 +++ b/arch/x86/kernel/syscall_table_32.S
809 @@ -346,3 +346,15 @@ ENTRY(sys_call_table)
810 .long sys_syncfs
811 .long sys_sendmmsg /* 345 */
812 .long sys_setns
813 + .long sys_set_rt_task_param /* LITMUS^RT 347 */
814 + .long sys_get_rt_task_param
815 + .long sys_complete_job
816 + .long sys_od_open
817 + .long sys_od_close
818 + .long sys_litmus_lock /* +5 */
819 + .long sys_litmus_unlock
820 + .long sys_query_job_no
821 + .long sys_wait_for_job_release
822 + .long sys_wait_for_ts_release
823 + .long sys_release_ts /* +10 */
824 + .long sys_null_call
825 diff --git a/fs/exec.c b/fs/exec.c
826 index 6075a1e..9984562 100644
827 --- a/fs/exec.c
828 +++ b/fs/exec.c
829 @@ -19,7 +19,7 @@
830 * current->executable is only used by the procfs. This allows a dispatch
831 * table to check for several different types of binary formats. We keep
832 * trying until we recognize the file or we run out of supported binary
833 - * formats.
834 + * formats.
835 */
836
837 #include <linux/slab.h>
838 @@ -56,6 +56,8 @@
839 #include <linux/oom.h>
840 #include <linux/compat.h>
841
842 +#include <litmus/litmus.h>
843 +
844 #include <asm/uaccess.h>
845 #include <asm/mmu_context.h>
846 #include <asm/tlb.h>
847 @@ -85,7 +87,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
848 insert ? list_add(&fmt->lh, &formats) :
849 list_add_tail(&fmt->lh, &formats);
850 write_unlock(&binfmt_lock);
851 - return 0;
852 + return 0;
853 }
854
855 EXPORT_SYMBOL(__register_binfmt);
856 @@ -1160,7 +1162,7 @@ void setup_new_exec(struct linux_binprm * bprm)
857 group */
858
859 current->self_exec_id++;
860 -
861 +
862 flush_signal_handlers(current, 0);
863 flush_old_files(current->files);
864 }
865 @@ -1250,8 +1252,8 @@ int check_unsafe_exec(struct linux_binprm *bprm)
866 return res;
867 }
868
869 -/*
870 - * Fill the binprm structure from the inode.
871 +/*
872 + * Fill the binprm structure from the inode.
873 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
874 *
875 * This may be called multiple times for binary chains (scripts for example).
876 @@ -1459,6 +1461,7 @@ static int do_execve_common(const char *filename,
877 goto out_unmark;
878
879 sched_exec();
880 + litmus_exec();
881
882 bprm->file = file;
883 bprm->filename = filename;
884 diff --git a/fs/inode.c b/fs/inode.c
885 index 43566d1..dbf0e76 100644
886 --- a/fs/inode.c
887 +++ b/fs/inode.c
888 @@ -308,6 +308,8 @@ void inode_init_once(struct inode *inode)
889 #ifdef CONFIG_FSNOTIFY
890 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
891 #endif
892 + INIT_LIST_HEAD(&inode->i_obj_list);
893 + mutex_init(&inode->i_obj_mutex);
894 }
895 EXPORT_SYMBOL(inode_init_once);
896
897 diff --git a/include/linux/completion.h b/include/linux/completion.h
898 index 51494e6..9d72727 100644
899 --- a/include/linux/completion.h
900 +++ b/include/linux/completion.h
901 @@ -90,6 +90,7 @@ extern bool completion_done(struct completion *x);
902
903 extern void complete(struct completion *);
904 extern void complete_all(struct completion *);
905 +extern void complete_n(struct completion *, int n);
906
907 /**
908 * INIT_COMPLETION - reinitialize a completion structure
909 diff --git a/include/linux/fs.h b/include/linux/fs.h
910 index b5b9792..8d5834b 100644
911 --- a/include/linux/fs.h
912 +++ b/include/linux/fs.h
913 @@ -17,8 +17,8 @@
914 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
915 * upper limit on files-per-process.
916 *
917 - * Some programs (notably those using select()) may have to be
918 - * recompiled to take full advantage of the new limits..
919 + * Some programs (notably those using select()) may have to be
920 + * recompiled to take full advantage of the new limits..
921 */
922
923 /* Fixed constants first: */
924 @@ -172,7 +172,7 @@ struct inodes_stat_t {
925 #define SEL_EX 4
926
927 /* public flags for file_system_type */
928 -#define FS_REQUIRES_DEV 1
929 +#define FS_REQUIRES_DEV 1
930 #define FS_BINARY_MOUNTDATA 2
931 #define FS_HAS_SUBTYPE 4
932 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
933 @@ -480,7 +480,7 @@ struct iattr {
934 */
935 #include <linux/quota.h>
936
937 -/**
938 +/**
939 * enum positive_aop_returns - aop return codes with specific semantics
940 *
941 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
942 @@ -490,7 +490,7 @@ struct iattr {
943 * be a candidate for writeback again in the near
944 * future. Other callers must be careful to unlock
945 * the page if they get this return. Returned by
946 - * writepage();
947 + * writepage();
948 *
949 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
950 * unlocked it and the page might have been truncated.
951 @@ -734,6 +734,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
952
953 struct posix_acl;
954 #define ACL_NOT_CACHED ((void *)(-1))
955 +struct inode_obj_id_table;
956
957 struct inode {
958 /* RCU path lookup touches following: */
959 @@ -807,6 +808,8 @@ struct inode {
960 struct posix_acl *i_acl;
961 struct posix_acl *i_default_acl;
962 #endif
963 + struct list_head i_obj_list;
964 + struct mutex i_obj_mutex;
965 void *i_private; /* fs or device private pointer */
966 };
967
968 @@ -1032,10 +1035,10 @@ static inline int file_check_writeable(struct file *filp)
969
970 #define MAX_NON_LFS ((1UL<<31) - 1)
971
972 -/* Page cache limit. The filesystems should put that into their s_maxbytes
973 - limits, otherwise bad things can happen in VM. */
974 +/* Page cache limit. The filesystems should put that into their s_maxbytes
975 + limits, otherwise bad things can happen in VM. */
976 #if BITS_PER_LONG==32
977 -#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
978 +#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
979 #elif BITS_PER_LONG==64
980 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
981 #endif
982 @@ -2234,7 +2237,7 @@ extern void free_write_pipe(struct file *);
983
984 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
985 extern struct file * open_exec(const char *);
986 -
987 +
988 /* fs/dcache.c -- generic fs support functions */
989 extern int is_subdir(struct dentry *, struct dentry *);
990 extern int path_is_under(struct path *, struct path *);
991 diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
992 index ba36217..e6dd5a4 100644
993 --- a/include/linux/hardirq.h
994 +++ b/include/linux/hardirq.h
995 @@ -6,6 +6,8 @@
996 #include <linux/ftrace_irq.h>
997 #include <asm/hardirq.h>
998
999 +#include <litmus/trace_irq.h>
1000 +
1001 /*
1002 * We put the hardirq and softirq counter into the preemption
1003 * counter. The bitmask has the following meaning:
1004 @@ -186,6 +188,7 @@ extern void rcu_nmi_exit(void);
1005 account_system_vtime(current); \
1006 add_preempt_count(HARDIRQ_OFFSET); \
1007 trace_hardirq_enter(); \
1008 + ft_irq_fired(); \
1009 } while (0)
1010
1011 /*
1012 @@ -216,6 +219,7 @@ extern void irq_exit(void);
1013 lockdep_off(); \
1014 rcu_nmi_enter(); \
1015 trace_hardirq_enter(); \
1016 + ft_irq_fired(); \
1017 } while (0)
1018
1019 #define nmi_exit() \
1020 diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
1021 index fd0dc30..d91bba5 100644
1022 --- a/include/linux/hrtimer.h
1023 +++ b/include/linux/hrtimer.h
1024 @@ -174,6 +174,7 @@ enum hrtimer_base_type {
1025 * @nr_hangs: Total number of hrtimer interrupt hangs
1026 * @max_hang_time: Maximum time spent in hrtimer_interrupt
1027 * @clock_base: array of clock bases for this cpu
1028 + * @to_pull: LITMUS^RT list of timers to be pulled on this cpu
1029 */
1030 struct hrtimer_cpu_base {
1031 raw_spinlock_t lock;
1032 @@ -188,8 +189,32 @@ struct hrtimer_cpu_base {
1033 ktime_t max_hang_time;
1034 #endif
1035 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
1036 + struct list_head to_pull;
1037 };
1038
1039 +#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
1040 +
1041 +#define HRTIMER_START_ON_INACTIVE 0
1042 +#define HRTIMER_START_ON_QUEUED 1
1043 +
1044 +/*
1045 + * struct hrtimer_start_on_info - save timer info on remote cpu
1046 + * @list: list of hrtimer_start_on_info on remote cpu (to_pull)
1047 + * @timer: timer to be triggered on remote cpu
1048 + * @time: time event
1049 + * @mode: timer mode
1050 + * @state: activity flag
1051 + */
1052 +struct hrtimer_start_on_info {
1053 + struct list_head list;
1054 + struct hrtimer *timer;
1055 + ktime_t time;
1056 + enum hrtimer_mode mode;
1057 + atomic_t state;
1058 +};
1059 +
1060 +#endif
1061 +
1062 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
1063 {
1064 timer->node.expires = time;
1065 @@ -355,6 +380,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1066 unsigned long delta_ns,
1067 const enum hrtimer_mode mode, int wakeup);
1068
1069 +#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
1070 +extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info);
1071 +extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
1072 + struct hrtimer *timer, ktime_t time,
1073 + const enum hrtimer_mode mode);
1074 +#endif
1075 +
1076 extern int hrtimer_cancel(struct hrtimer *timer);
1077 extern int hrtimer_try_to_cancel(struct hrtimer *timer);
1078
1079 diff --git a/include/linux/sched.h b/include/linux/sched.h
1080 index 14a6c7b..9c990d1 100644
1081 --- a/include/linux/sched.h
1082 +++ b/include/linux/sched.h
1083 @@ -39,6 +39,7 @@
1084 #define SCHED_BATCH 3
1085 /* SCHED_ISO: reserved but not implemented yet */
1086 #define SCHED_IDLE 5
1087 +#define SCHED_LITMUS 6
1088 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
1089 #define SCHED_RESET_ON_FORK 0x40000000
1090
1091 @@ -93,6 +94,9 @@ struct sched_param {
1092
1093 #include <asm/processor.h>
1094
1095 +#include <litmus/rt_param.h>
1096 +#include <litmus/preempt.h>
1097 +
1098 struct exec_domain;
1099 struct futex_pi_state;
1100 struct robust_list_head;
1101 @@ -1209,6 +1213,7 @@ struct sched_rt_entity {
1102 };
1103
1104 struct rcu_node;
1105 +struct od_table_entry;
1106
1107 enum perf_event_task_context {
1108 perf_invalid_context = -1,
1109 @@ -1313,9 +1318,9 @@ struct task_struct {
1110 unsigned long stack_canary;
1111 #endif
1112
1113 - /*
1114 + /*
1115 * pointers to (original) parent process, youngest child, younger sibling,
1116 - * older sibling, respectively. (p->father can be replaced with
1117 + * older sibling, respectively. (p->father can be replaced with
1118 * p->real_parent->pid)
1119 */
1120 struct task_struct *real_parent; /* real parent process */
1121 @@ -1526,6 +1531,13 @@ struct task_struct {
1122 int make_it_fail;
1123 #endif
1124 struct prop_local_single dirties;
1125 +
1126 + /* LITMUS RT parameters and state */
1127 + struct rt_param rt_param;
1128 +
1129 + /* references to PI semaphores, etc. */
1130 + struct od_table_entry *od_table;
1131 +
1132 #ifdef CONFIG_LATENCYTOP
1133 int latency_record_count;
1134 struct latency_record latency_record[LT_SAVECOUNT];
1135 @@ -2136,7 +2148,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
1136 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
1137
1138 return ret;
1139 -}
1140 +}
1141
1142 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
1143 sigset_t *mask);
1144 @@ -2446,6 +2458,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
1145 static inline void set_tsk_need_resched(struct task_struct *tsk)
1146 {
1147 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1148 + sched_state_will_schedule(tsk);
1149 }
1150
1151 static inline void clear_tsk_need_resched(struct task_struct *tsk)
1152 diff --git a/include/linux/smp.h b/include/linux/smp.h
1153 index 8cc38d3..53b1bee 100644
1154 --- a/include/linux/smp.h
1155 +++ b/include/linux/smp.h
1156 @@ -82,6 +82,11 @@ int smp_call_function_any(const struct cpumask *mask,
1157 smp_call_func_t func, void *info, int wait);
1158
1159 /*
1160 + * sends a 'pull timer' event to a remote CPU
1161 + */
1162 +extern void smp_send_pull_timers(int cpu);
1163 +
1164 +/*
1165 * Generic and arch helpers
1166 */
1167 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
1168 diff --git a/include/linux/tick.h b/include/linux/tick.h
1169 index b232ccc..1e29bd5 100644
1170 --- a/include/linux/tick.h
1171 +++ b/include/linux/tick.h
1172 @@ -74,6 +74,11 @@ extern int tick_is_oneshot_available(void);
1173 extern struct tick_device *tick_get_device(int cpu);
1174
1175 # ifdef CONFIG_HIGH_RES_TIMERS
1176 +/* LITMUS^RT tick alignment */
1177 +#define LINUX_DEFAULT_TICKS 0
1178 +#define LITMUS_ALIGNED_TICKS 1
1179 +#define LITMUS_STAGGERED_TICKS 2
1180 +
1181 extern int tick_init_highres(void);
1182 extern int tick_program_event(ktime_t expires, int force);
1183 extern void tick_setup_sched_timer(void);
1184 diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
1185 new file mode 100644
1186 index 0000000..ca2e442
1187 --- /dev/null
1188 +++ b/include/litmus/affinity.h
1189 @@ -0,0 +1,80 @@
1190 +#ifndef __LITMUS_AFFINITY_H
1191 +#define __LITMUS_AFFINITY_H
1192 +
1193 +#include <linux/cpumask.h>
1194 +
1195 +/*
1196 + L1 (instr) = depth 0
1197 + L1 (data) = depth 1
1198 + L2 = depth 2
1199 + L3 = depth 3
1200 + */
1201 +#define NUM_CACHE_LEVELS 4
1202 +
1203 +struct neighborhood
1204 +{
1205 + unsigned int size[NUM_CACHE_LEVELS];
1206 + cpumask_var_t neighbors[NUM_CACHE_LEVELS];
1207 +};
1208 +
1209 +/* topology info is stored redundently in a big array for fast lookups */
1210 +extern struct neighborhood neigh_info[NR_CPUS];
1211 +
1212 +void init_topology(void); /* called by Litmus module's _init_litmus() */
1213 +
1214 +/* Works like:
1215 +void get_nearest_available_cpu(
1216 + cpu_entry_t **nearest,
1217 + cpu_entry_t *start,
1218 + cpu_entry_t *entries,
1219 + int release_master)
1220 +
1221 +Set release_master = NO_CPU for no Release Master.
1222 +
1223 +We use a macro here to exploit the fact that C-EDF and G-EDF
1224 +have similar structures for their cpu_entry_t structs, even though
1225 +they do not share a common base-struct. The macro allows us to
1226 +avoid code duplication.
1227 +
1228 +TODO: Factor out the job-to-processor linking from C/G-EDF into
1229 +a reusable "processor mapping". (See B.B.'s RTSS'09 paper &
1230 +dissertation.)
1231 + */
1232 +#define get_nearest_available_cpu(nearest, start, entries, release_master) \
1233 +{ \
1234 + (nearest) = NULL; \
1235 + if (!(start)->linked) { \
1236 + (nearest) = (start); \
1237 + } else { \
1238 + int __level; \
1239 + int __cpu; \
1240 + int __release_master = ((release_master) == NO_CPU) ? -1 : (release_master); \
1241 + struct neighborhood *__neighbors = &neigh_info[(start)->cpu]; \
1242 + \
1243 + for (__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) { \
1244 + if (__neighbors->size[__level] > 1) { \
1245 + for_each_cpu(__cpu, __neighbors->neighbors[__level]) { \
1246 + if (__cpu != __release_master) { \
1247 + cpu_entry_t *__entry = &per_cpu((entries), __cpu); \
1248 + if (!__entry->linked) { \
1249 + (nearest) = __entry; \
1250 + break; \
1251 + } \
1252 + } \
1253 + } \
1254 + } else if (__neighbors->size[__level] == 0) { \
1255 + break; \
1256 + } \
1257 + } \
1258 + } \
1259 + \
1260 + if ((nearest)) { \
1261 + TRACE("P%d is closest available CPU to P%d\n", \
1262 + (nearest)->cpu, (start)->cpu); \
1263 + } else { \
1264 + TRACE("Could not find an available CPU close to P%d\n", \
1265 + (start)->cpu); \
1266 + } \
1267 +}
1268 +
1269 +#endif
1270 diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
1271 new file mode 100644
1272 index 0000000..cf4864a
1273 --- /dev/null
1274 +++ b/include/litmus/bheap.h
1275 @@ -0,0 +1,77 @@
1276 +/* bheaps.h -- Binomial Heaps
1277 + *
1278 + * (c) 2008, 2009 Bjoern Brandenburg
1279 + */
1280 +
1281 +#ifndef BHEAP_H
1282 +#define BHEAP_H
1283 +
1284 +#define NOT_IN_HEAP UINT_MAX
1285 +
1286 +struct bheap_node {
1287 + struct bheap_node* parent;
1288 + struct bheap_node* next;
1289 + struct bheap_node* child;
1290 +
1291 + unsigned int degree;
1292 + void* value;
1293 + struct bheap_node** ref;
1294 +};
1295 +
1296 +struct bheap {
1297 + struct bheap_node* head;
1298 + /* We cache the minimum of the heap.
1299 + * This speeds up repeated peek operations.
1300 + */
1301 + struct bheap_node* min;
1302 +};
1303 +
1304 +typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
1305 +
1306 +void bheap_init(struct bheap* heap);
1307 +void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
1308 +
1309 +static inline int bheap_node_in_heap(struct bheap_node* h)
1310 +{
1311 + return h->degree != NOT_IN_HEAP;
1312 +}
1313 +
1314 +static inline int bheap_empty(struct bheap* heap)
1315 +{
1316 + return heap->head == NULL && heap->min == NULL;
1317 +}
1318 +
1319 +/* insert (and reinitialize) a node into the heap */
1320 +void bheap_insert(bheap_prio_t higher_prio,
1321 + struct bheap* heap,
1322 + struct bheap_node* node);
1323 +
1324 +/* merge addition into target */
1325 +void bheap_union(bheap_prio_t higher_prio,
1326 + struct bheap* target,
1327 + struct bheap* addition);
1328 +
1329 +struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
1330 + struct bheap* heap);
1331 +
1332 +struct bheap_node* bheap_take(bheap_prio_t higher_prio,
1333 + struct bheap* heap);
1334 +
1335 +void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
1336 +int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
1337 +
1338 +void bheap_delete(bheap_prio_t higher_prio,
1339 + struct bheap* heap,
1340 + struct bheap_node* node);
1341 +
1342 +/* allocate from memcache */
1343 +struct bheap_node* bheap_node_alloc(int gfp_flags);
1344 +void bheap_node_free(struct bheap_node* hn);
1345 +
1346 +/* allocate a heap node for value and insert into the heap */
1347 +int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
1348 + void* value, int gfp_flags);
1349 +
1350 +void* bheap_take_del(bheap_prio_t higher_prio,
1351 + struct bheap* heap);
1352 +#endif
1353 diff --git a/include/litmus/budget.h b/include/litmus/budget.h
1354 new file mode 100644
1355 index 0000000..732530e
1356 --- /dev/null
1357 +++ b/include/litmus/budget.h
1358 @@ -0,0 +1,8 @@
1359 +#ifndef _LITMUS_BUDGET_H_
1360 +#define _LITMUS_BUDGET_H_
1361 +
1362 +/* Update the per-processor enforcement timer (arm/reproram/cancel) for
1363 + * the next task. */
1364 +void update_enforcement_timer(struct task_struct* t);
1365 +
1366 +#endif
1367 diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
1368 new file mode 100644
1369 index 0000000..0c18dcb
1370 --- /dev/null
1371 +++ b/include/litmus/clustered.h
1372 @@ -0,0 +1,44 @@
1373 +#ifndef CLUSTERED_H
1374 +#define CLUSTERED_H
1375 +
1376 +/* Which cache level should be used to group CPUs into clusters?
1377 + * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
1378 + * global scheduling).
1379 + */
1380 +enum cache_level {
1381 + GLOBAL_CLUSTER = 0,
1382 + L1_CLUSTER = 1,
1383 + L2_CLUSTER = 2,
1384 + L3_CLUSTER = 3
1385 +};
1386 +
1387 +int parse_cache_level(const char *str, enum cache_level *level);
1388 +const char* cache_level_name(enum cache_level level);
1389 +
1390 +/* expose a cache level in a /proc dir */
1391 +struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
1392 + enum cache_level* level);
1393 +
1394 +
1395 +
1396 +struct scheduling_cluster {
1397 + unsigned int id;
1398 + /* list of CPUs that are part of this cluster */
1399 + struct list_head cpus;
1400 +};
1401 +
1402 +struct cluster_cpu {
1403 + unsigned int id; /* which CPU is this? */
1404 + struct list_head cluster_list; /* List of the CPUs in this cluster. */
1405 + struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
1406 +};
1407 +
1408 +int get_cluster_size(enum cache_level level);
1409 +
1410 +int assign_cpus_to_clusters(enum cache_level level,
1411 + struct scheduling_cluster* clusters[],
1412 + unsigned int num_clusters,
1413 + struct cluster_cpu* cpus[],
1414 + unsigned int num_cpus);
1415 +
1416 +#endif
1417 diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
1418 new file mode 100644
1419 index 0000000..48d086d
1420 --- /dev/null
1421 +++ b/include/litmus/debug_trace.h
1422 @@ -0,0 +1,37 @@
1423 +#ifndef LITMUS_DEBUG_TRACE_H
1424 +#define LITMUS_DEBUG_TRACE_H
1425 +
1426 +#ifdef CONFIG_SCHED_DEBUG_TRACE
1427 +void sched_trace_log_message(const char* fmt, ...);
1428 +void dump_trace_buffer(int max);
1429 +#else
1430 +
1431 +#define sched_trace_log_message(fmt, ...)
1432 +
1433 +#endif
1434 +
1435 +extern atomic_t __log_seq_no;
1436 +
1437 +#ifdef CONFIG_SCHED_DEBUG_TRACE_CALLER
1438 +#define TRACE_PREFIX "%d P%d [%s@%s:%d]: "
1439 +#define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
1440 + raw_smp_processor_id(), \
1441 + __FUNCTION__, __FILE__, __LINE__
1442 +#else
1443 +#define TRACE_PREFIX "%d P%d: "
1444 +#define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
1445 + raw_smp_processor_id()
1446 +#endif
1447 +
1448 +#define TRACE(fmt, args...) \
1449 + sched_trace_log_message(TRACE_PREFIX fmt, \
1450 + TRACE_ARGS, ## args)
1451 +
1452 +#define TRACE_TASK(t, fmt, args...) \
1453 + TRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid, \
1454 + (t)->rt_param.job_params.job_no, ##args)
1455 +
1456 +#define TRACE_CUR(fmt, args...) \
1457 + TRACE_TASK(current, fmt, ## args)
1458 +
1459 +#endif
1460 diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
1461 new file mode 100644
1462 index 0000000..bbaf22e
1463 --- /dev/null
1464 +++ b/include/litmus/edf_common.h
1465 @@ -0,0 +1,25 @@
1466 +/*
1467 + * EDF common data structures and utility functions shared by all EDF
1468 + * based scheduler plugins
1469 + */
1470 +
1471 +/* CLEANUP: Add comments and make it less messy.
1472 + *
1473 + */
1474 +
1475 +#ifndef __UNC_EDF_COMMON_H__
1476 +#define __UNC_EDF_COMMON_H__
1477 +
1478 +#include <litmus/rt_domain.h>
1479 +
1480 +void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
1481 + release_jobs_t release);
1482 +
1483 +int edf_higher_prio(struct task_struct* first,
1484 + struct task_struct* second);
1485 +
1486 +int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
1487 +
1488 +int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
1489 +
1490 +#endif
1491 diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
1492 new file mode 100644
1493 index 0000000..caf2a1e
1494 --- /dev/null
1495 +++ b/include/litmus/fdso.h
1496 @@ -0,0 +1,71 @@
1497 +/* fdso.h - file descriptor attached shared objects
1498 + *
1499 + * (c) 2007 B. Brandenburg, LITMUS^RT project
1500 + */
1501 +
1502 +#ifndef _LINUX_FDSO_H_
1503 +#define _LINUX_FDSO_H_
1504 +
1505 +#include <linux/list.h>
1506 +#include <asm/atomic.h>
1507 +
1508 +#include <linux/fs.h>
1509 +#include <linux/slab.h>
1510 +
1511 +#define MAX_OBJECT_DESCRIPTORS 32
1512 +
1513 +typedef enum {
1514 + MIN_OBJ_TYPE = 0,
1515 +
1516 + FMLP_SEM = 0,
1517 + SRP_SEM = 1,
1518 +
1519 + MAX_OBJ_TYPE = 1
1520 +} obj_type_t;
1521 +
1522 +struct inode_obj_id {
1523 + struct list_head list;
1524 + atomic_t count;
1525 + struct inode* inode;
1526 +
1527 + obj_type_t type;
1528 + void* obj;
1529 + unsigned int id;
1530 +};
1531 +
1532 +struct fdso_ops;
1533 +
1534 +struct od_table_entry {
1535 + unsigned int used;
1536 +
1537 + struct inode_obj_id* obj;
1538 + const struct fdso_ops* class;
1539 +};
1540 +
1541 +struct fdso_ops {
1542 + int (*create)(void** obj_ref, obj_type_t type, void* __user);
1543 + void (*destroy)(obj_type_t type, void*);
1544 + int (*open) (struct od_table_entry*, void* __user);
1545 + int (*close) (struct od_table_entry*);
1546 +};
1547 +
1548 +/* translate a userspace supplied od into the raw table entry
1549 + * returns NULL if od is invalid
1550 + */
1551 +struct od_table_entry* get_entry_for_od(int od);
1552 +
1553 +/* translate a userspace supplied od into the associated object
1554 + * returns NULL if od is invalid
1555 + */
1556 +static inline void* od_lookup(int od, obj_type_t type)
1557 +{
1558 + struct od_table_entry* e = get_entry_for_od(od);
1559 + return e && e->obj->type == type ? e->obj->obj : NULL;
1560 +}
1561 +
1562 +#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
1563 +#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
1564 +#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
1565 +
1566 +
1567 +#endif
1568 diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
1569 new file mode 100644
1570 index 0000000..6c18277
1571 --- /dev/null
1572 +++ b/include/litmus/feather_buffer.h
1573 @@ -0,0 +1,94 @@
1574 +#ifndef _FEATHER_BUFFER_H_
1575 +#define _FEATHER_BUFFER_H_
1576 +
1577 +/* requires UINT_MAX and memcpy */
1578 +
1579 +#define SLOT_FREE 0
1580 +#define SLOT_BUSY 1
1581 +#define SLOT_READY 2
1582 +
1583 +struct ft_buffer {
1584 + unsigned int slot_count;
1585 + unsigned int slot_size;
1586 +
1587 + int free_count;
1588 + unsigned int write_idx;
1589 + unsigned int read_idx;
1590 +
1591 + char* slots;
1592 + void* buffer_mem;
1593 + unsigned int failed_writes;
1594 +};
1595 +
1596 +static inline int init_ft_buffer(struct ft_buffer* buf,
1597 + unsigned int slot_count,
1598 + unsigned int slot_size,
1599 + char* slots,
1600 + void* buffer_mem)
1601 +{
1602 + int i = 0;
1603 + if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
1604 + /* The slot count must divide UNIT_MAX + 1 so that when it
1605 + * wraps around the index correctly points to 0.
1606 + */
1607 + return 0;
1608 + } else {
1609 + buf->slot_count = slot_count;
1610 + buf->slot_size = slot_size;
1611 + buf->slots = slots;
1612 + buf->buffer_mem = buffer_mem;
1613 + buf->free_count = slot_count;
1614 + buf->write_idx = 0;
1615 + buf->read_idx = 0;
1616 + buf->failed_writes = 0;
1617 + for (i = 0; i < slot_count; i++)
1618 + buf->slots[i] = SLOT_FREE;
1619 + return 1;
1620 + }
1621 +}
1622 +
1623 +static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
1624 +{
1625 + int free = fetch_and_dec(&buf->free_count);
1626 + unsigned int idx;
1627 + if (free <= 0) {
1628 + fetch_and_inc(&buf->free_count);
1629 + *ptr = 0;
1630 + fetch_and_inc(&buf->failed_writes);
1631 + return 0;
1632 + } else {
1633 + idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
1634 + buf->slots[idx] = SLOT_BUSY;
1635 + *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
1636 + return 1;
1637 + }
1638 +}
1639 +
1640 +static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
1641 +{
1642 + unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
1643 + buf->slots[idx] = SLOT_READY;
1644 +}
1645 +
1646 +
1647 +/* exclusive reader access is assumed */
1648 +static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
1649 +{
1650 + unsigned int idx;
1651 + if (buf->free_count == buf->slot_count)
1652 + /* nothing available */
1653 + return 0;
1654 + idx = buf->read_idx % buf->slot_count;
1655 + if (buf->slots[idx] == SLOT_READY) {
1656 + memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
1657 + buf->slot_size);
1658 + buf->slots[idx] = SLOT_FREE;
1659 + buf->read_idx++;
1660 + fetch_and_inc(&buf->free_count);
1661 + return 1;
1662 + } else
1663 + return 0;
1664 +}
1665 +
1666 +
1667 +#endif
1668 diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
1669 new file mode 100644
1670 index 0000000..028dfb2
1671 --- /dev/null
1672 +++ b/include/litmus/feather_trace.h
1673 @@ -0,0 +1,65 @@
1674 +#ifndef _FEATHER_TRACE_H_
1675 +#define _FEATHER_TRACE_H_
1676 +
1677 +#include <asm/atomic.h>
1678 +
1679 +int ft_enable_event(unsigned long id);
1680 +int ft_disable_event(unsigned long id);
1681 +int ft_is_event_enabled(unsigned long id);
1682 +int ft_disable_all_events(void);
1683 +
1684 +/* atomic_* funcitons are inline anyway */
1685 +static inline int fetch_and_inc(int *val)
1686 +{
1687 + return atomic_add_return(1, (atomic_t*) val) - 1;
1688 +}
1689 +
1690 +static inline int fetch_and_dec(int *val)
1691 +{
1692 + return atomic_sub_return(1, (atomic_t*) val) + 1;
1693 +}
1694 +
1695 +/* Don't use rewriting implementation if kernel text pages are read-only.
1696 + * Ftrace gets around this by using the identity mapping, but that's more
1697 + * effort that is warrented right now for Feather-Trace.
1698 + * Eventually, it may make sense to replace Feather-Trace with ftrace.
1699 + */
1700 +#if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA)
1701 +
1702 +#include <asm/feather_trace.h>
1703 +
1704 +#else /* !__ARCH_HAS_FEATHER_TRACE */
1705 +
1706 +/* provide default implementation */
1707 +
1708 +#include <asm/timex.h> /* for get_cycles() */
1709 +
1710 +static inline unsigned long long ft_timestamp(void)
1711 +{
1712 + return get_cycles();
1713 +}
1714 +
1715 +#define feather_callback
1716 +
1717 +#define MAX_EVENTS 1024
1718 +
1719 +extern int ft_events[MAX_EVENTS];
1720 +
1721 +#define ft_event(id, callback) \
1722 + if (ft_events[id]) callback();
1723 +
1724 +#define ft_event0(id, callback) \
1725 + if (ft_events[id]) callback(id);
1726 +
1727 +#define ft_event1(id, callback, param) \
1728 + if (ft_events[id]) callback(id, param);
1729 +
1730 +#define ft_event2(id, callback, param, param2) \
1731 + if (ft_events[id]) callback(id, param, param2);
1732 +
1733 +#define ft_event3(id, callback, p, p2, p3) \
1734 + if (ft_events[id]) callback(id, p, p2, p3);
1735 +
1736 +#endif /* __ARCH_HAS_FEATHER_TRACE */
1737 +
1738 +#endif
1739 diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h
1740 new file mode 100644
1741 index 0000000..0b95987
1742 --- /dev/null
1743 +++ b/include/litmus/ftdev.h
1744 @@ -0,0 +1,55 @@
1745 +#ifndef _LITMUS_FTDEV_H_
1746 +#define _LITMUS_FTDEV_H_
1747 +
1748 +#include <litmus/feather_trace.h>
1749 +#include <litmus/feather_buffer.h>
1750 +#include <linux/mutex.h>
1751 +#include <linux/cdev.h>
1752 +
1753 +#define FTDEV_ENABLE_CMD 0
1754 +#define FTDEV_DISABLE_CMD 1
1755 +
1756 +struct ftdev;
1757 +
1758 +/* return 0 if buffer can be opened, otherwise -$REASON */
1759 +typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no);
1760 +/* return 0 on success, otherwise -$REASON */
1761 +typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no);
1762 +typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no);
1763 +/* Let devices handle writes from userspace. No synchronization provided. */
1764 +typedef ssize_t (*ftdev_write_t)(struct ft_buffer* buf, size_t len, const char __user *from);
1765 +
1766 +struct ftdev_event;
1767 +
1768 +struct ftdev_minor {
1769 + struct ft_buffer* buf;
1770 + unsigned int readers;
1771 + struct mutex lock;
1772 + /* FIXME: filter for authorized events */
1773 + struct ftdev_event* events;
1774 + struct device* device;
1775 + struct ftdev* ftdev;
1776 +};
1777 +
1778 +struct ftdev {
1779 + dev_t major;
1780 + struct cdev cdev;
1781 + struct class* class;
1782 + const char* name;
1783 + struct ftdev_minor* minor;
1784 + unsigned int minor_cnt;
1785 + ftdev_alloc_t alloc;
1786 + ftdev_free_t free;
1787 + ftdev_can_open_t can_open;
1788 + ftdev_write_t write;
1789 +};
1790 +
1791 +struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size);
1792 +void free_ft_buffer(struct ft_buffer* buf);
1793 +
1794 +int ftdev_init( struct ftdev* ftdev, struct module* owner,
1795 + const int minor_cnt, const char* name);
1796 +void ftdev_exit(struct ftdev* ftdev);
1797 +int register_ftdev(struct ftdev* ftdev);
1798 +
1799 +#endif
1800 diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
1801 new file mode 100644
1802 index 0000000..9bd361e
1803 --- /dev/null
1804 +++ b/include/litmus/jobs.h
1805 @@ -0,0 +1,9 @@
1806 +#ifndef __LITMUS_JOBS_H__
1807 +#define __LITMUS_JOBS_H__
1808 +
1809 +void prepare_for_next_period(struct task_struct *t);
1810 +void release_at(struct task_struct *t, lt_t start);
1811 +long complete_job(void);
1812 +
1813 +#endif
1814 +
1815 diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
1816 new file mode 100644
1817 index 0000000..12af222
1818 --- /dev/null
1819 +++ b/include/litmus/litmus.h
1820 @@ -0,0 +1,275 @@
1821 +/*
1822 + * Constant definitions related to
1823 + * scheduling policy.
1824 + */
1825 +
1826 +#ifndef _LINUX_LITMUS_H_
1827 +#define _LINUX_LITMUS_H_
1828 +
1829 +#include <litmus/debug_trace.h>
1830 +
1831 +#ifdef CONFIG_RELEASE_MASTER
1832 +extern atomic_t release_master_cpu;
1833 +#endif
1834 +
1835 +/* in_list - is a given list_head queued on some list?
1836 + */
1837 +static inline int in_list(struct list_head* list)
1838 +{
1839 + return !( /* case 1: deleted */
1840 + (list->next == LIST_POISON1 &&
1841 + list->prev == LIST_POISON2)
1842 + ||
1843 + /* case 2: initialized */
1844 + (list->next == list &&
1845 + list->prev == list)
1846 + );
1847 +}
1848 +
1849 +struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
1850 +
1851 +#define NO_CPU 0xffffffff
1852 +
1853 +void litmus_fork(struct task_struct *tsk);
1854 +void litmus_exec(void);
1855 +/* clean up real-time state of a task */
1856 +void exit_litmus(struct task_struct *dead_tsk);
1857 +
1858 +long litmus_admit_task(struct task_struct *tsk);
1859 +void litmus_exit_task(struct task_struct *tsk);
1860 +
1861 +#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
1862 +#define rt_transition_pending(t) \
1863 + ((t)->rt_param.transition_pending)
1864 +
1865 +#define tsk_rt(t) (&(t)->rt_param)
1866 +
1867 +/* Realtime utility macros */
1868 +#define get_rt_flags(t) (tsk_rt(t)->flags)
1869 +#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
1870 +#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
1871 +#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
1872 +#define get_rt_period(t) (tsk_rt(t)->task_params.period)
1873 +#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
1874 +#define get_partition(t) (tsk_rt(t)->task_params.cpu)
1875 +#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
1876 +#define get_release(t) (tsk_rt(t)->job_params.release)
1877 +#define get_class(t) (tsk_rt(t)->task_params.cls)
1878 +
1879 +#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
1880 +#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
1881 +
1882 +inline static int budget_exhausted(struct task_struct* t)
1883 +{
1884 + return get_exec_time(t) >= get_exec_cost(t);
1885 +}
1886 +
1887 +inline static lt_t budget_remaining(struct task_struct* t)
1888 +{
1889 + if (!budget_exhausted(t))
1890 + return get_exec_cost(t) - get_exec_time(t);
1891 + else
1892 + /* avoid overflow */
1893 + return 0;
1894 +}
1895 +
1896 +#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
1897 +
1898 +#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
1899 + == PRECISE_ENFORCEMENT)
1900 +
1901 +#define is_hrt(t) \
1902 + (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
1903 +#define is_srt(t) \
1904 + (tsk_rt(t)->task_params.cls == RT_CLASS_SOFT)
1905 +#define is_be(t) \
1906 + (tsk_rt(t)->task_params.cls == RT_CLASS_BEST_EFFORT)
1907 +
1908 +/* Our notion of time within LITMUS: kernel monotonic time. */
1909 +static inline lt_t litmus_clock(void)
1910 +{
1911 + return ktime_to_ns(ktime_get());
1912 +}
1913 +
1914 +/* A macro to convert from nanoseconds to ktime_t. */
1915 +#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
1916 +
1917 +#define get_domain(t) (tsk_rt(t)->domain)
1918 +
1919 +/* Honor the flag in the preempt_count variable that is set
1920 + * when scheduling is in progress.
1921 + */
1922 +#define is_running(t) \
1923 + ((t)->state == TASK_RUNNING || \
1924 + task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
1925 +
1926 +#define is_blocked(t) \
1927 + (!is_running(t))
1928 +#define is_released(t, now) \
1929 + (lt_before_eq(get_release(t), now))
1930 +#define is_tardy(t, now) \
1931 + (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
1932 +
1933 +/* real-time comparison macros */
1934 +#define earlier_deadline(a, b) (lt_before(\
1935 + (a)->rt_param.job_params.deadline,\
1936 + (b)->rt_param.job_params.deadline))
1937 +#define earlier_release(a, b) (lt_before(\
1938 + (a)->rt_param.job_params.release,\
1939 + (b)->rt_param.job_params.release))
1940 +
1941 +void preempt_if_preemptable(struct task_struct* t, int on_cpu);
1942 +
1943 +#ifdef CONFIG_LITMUS_LOCKING
1944 +void srp_ceiling_block(void);
1945 +#else
1946 +#define srp_ceiling_block() /* nothing */
1947 +#endif
1948 +
1949 +#define bheap2task(hn) ((struct task_struct*) hn->value)
1950 +
1951 +#ifdef CONFIG_NP_SECTION
1952 +
1953 +static inline int is_kernel_np(struct task_struct *t)
1954 +{
1955 + return tsk_rt(t)->kernel_np;
1956 +}
1957 +
1958 +static inline int is_user_np(struct task_struct *t)
1959 +{
1960 + return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
1961 +}
1962 +
1963 +static inline void request_exit_np(struct task_struct *t)
1964 +{
1965 + if (is_user_np(t)) {
1966 + /* Set the flag that tells user space to call
1967 + * into the kernel at the end of a critical section. */
1968 + if (likely(tsk_rt(t)->ctrl_page)) {
1969 + TRACE_TASK(t, "setting delayed_preemption flag\n");
1970 + tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
1971 + }
1972 + }
1973 +}
1974 +
1975 +static inline void make_np(struct task_struct *t)
1976 +{
1977 + tsk_rt(t)->kernel_np++;
1978 +}
1979 +
1980 +/* Caller should check if preemption is necessary when
1981 + * the function return 0.
1982 + */
1983 +static inline int take_np(struct task_struct *t)
1984 +{
1985 + return --tsk_rt(t)->kernel_np;
1986 +}
1987 +
1988 +/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
1989 +static inline int request_exit_np_atomic(struct task_struct *t)
1990 +{
1991 + union np_flag old, new;
1992 +
1993 + if (tsk_rt(t)->ctrl_page) {
1994 + old.raw = tsk_rt(t)->ctrl_page->sched.raw;
1995 + if (old.np.flag == 0) {
1996 + /* no longer non-preemptive */
1997 + return 0;
1998 + } else if (old.np.preempt) {
1999 + /* already set, nothing for us to do */
2000 + return 1;
2001 + } else {
2002 + /* non preemptive and flag not set */
2003 + new.raw = old.raw;
2004 + new.np.preempt = 1;
2005 + /* if we get old back, then we atomically set the flag */
2006 + return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
2007 + /* If we raced with a concurrent change, then so be
2008 + * it. Deliver it by IPI. We don't want an unbounded
2009 + * retry loop here since tasks might exploit that to
2010 + * keep the kernel busy indefinitely. */
2011 + }
2012 + } else
2013 + return 0;
2014 +}
2015 +
2016 +#else
2017 +
2018 +static inline int is_kernel_np(struct task_struct* t)
2019 +{
2020 + return 0;
2021 +}
2022 +
2023 +static inline int is_user_np(struct task_struct* t)
2024 +{
2025 + return 0;
2026 +}
2027 +
2028 +static inline void request_exit_np(struct task_struct *t)
2029 +{
2030 + /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
2031 + BUG();
2032 +}
2033 +
2034 +static inline int request_exist_np_atomic(struct task_struct *t)
2035 +{
2036 + return 0;
2037 +}
2038 +
2039 +#endif
2040 +
2041 +static inline void clear_exit_np(struct task_struct *t)
2042 +{
2043 + if (likely(tsk_rt(t)->ctrl_page))
2044 + tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
2045 +}
2046 +
2047 +static inline int is_np(struct task_struct *t)
2048 +{
2049 +#ifdef CONFIG_SCHED_DEBUG_TRACE
2050 + int kernel, user;
2051 + kernel = is_kernel_np(t);
2052 + user = is_user_np(t);
2053 + if (kernel || user)
2054 + TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
2055 +
2056 + kernel, user);
2057 + return kernel || user;
2058 +#else
2059 + return unlikely(is_kernel_np(t) || is_user_np(t));
2060 +#endif
2061 +}
2062 +
2063 +static inline int is_present(struct task_struct* t)
2064 +{
2065 + return t && tsk_rt(t)->present;
2066 +}
2067 +
2068 +
2069 +/* make the unit explicit */
2070 +typedef unsigned long quanta_t;
2071 +
2072 +enum round {
2073 + FLOOR,
2074 + CEIL
2075 +};
2076 +
2077 +
2078 +/* Tick period is used to convert ns-specified execution
2079 + * costs and periods into tick-based equivalents.
2080 + */
2081 +extern ktime_t tick_period;
2082 +
2083 +static inline quanta_t time2quanta(lt_t time, enum round round)
2084 +{
2085 + s64 quantum_length = ktime_to_ns(tick_period);
2086 +
2087 + if (do_div(time, quantum_length) && round == CEIL)
2088 + time++;
2089 + return (quanta_t) time;
2090 +}
2091 +
2092 +/* By how much is cpu staggered behind CPU 0? */
2093 +u64 cpu_stagger_offset(int cpu);
2094 +
2095 +#endif
2096 diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
2097 new file mode 100644
2098 index 0000000..6800e72
2099 --- /dev/null
2100 +++ b/include/litmus/litmus_proc.h
2101 @@ -0,0 +1,25 @@
2102 +#include <litmus/sched_plugin.h>
2103 +#include <linux/proc_fs.h>
2104 +
2105 +int __init init_litmus_proc(void);
2106 +void exit_litmus_proc(void);
2107 +
2108 +/*
2109 + * On success, returns 0 and sets the pointer to the location of the new
2110 + * proc dir entry, otherwise returns an error code and sets pde to NULL.
2111 + */
2112 +long make_plugin_proc_dir(struct sched_plugin* plugin,
2113 + struct proc_dir_entry** pde);
2114 +
2115 +/*
2116 + * Plugins should deallocate all child proc directory entries before
2117 + * calling this, to avoid memory leaks.
2118 + */
2119 +void remove_plugin_proc_dir(struct sched_plugin* plugin);
2120 +
2121 +
2122 +/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
2123 + * remove a '\n' if present. Returns the number of bytes that were read or
2124 + * -EFAULT. */
2125 +int copy_and_chomp(char *kbuf, unsigned long ksize,
2126 + __user const char* ubuf, unsigned long ulength);
2127 diff --git a/include/litmus/locking.h b/include/litmus/locking.h
2128 new file mode 100644
2129 index 0000000..4d7b870
2130 --- /dev/null
2131 +++ b/include/litmus/locking.h
2132 @@ -0,0 +1,28 @@
2133 +#ifndef LITMUS_LOCKING_H
2134 +#define LITMUS_LOCKING_H
2135 +
2136 +struct litmus_lock_ops;
2137 +
2138 +/* Generic base struct for LITMUS^RT userspace semaphores.
2139 + * This structure should be embedded in protocol-specific semaphores.
2140 + */
2141 +struct litmus_lock {
2142 + struct litmus_lock_ops *ops;
2143 + int type;
2144 +};
2145 +
2146 +struct litmus_lock_ops {
2147 + /* Current task tries to obtain / drop a reference to a lock.
2148 + * Optional methods, allowed by default. */
2149 + int (*open)(struct litmus_lock*, void* __user);
2150 + int (*close)(struct litmus_lock*);
2151 +
2152 + /* Current tries to lock/unlock this lock (mandatory methods). */
2153 + int (*lock)(struct litmus_lock*);
2154 + int (*unlock)(struct litmus_lock*);
2155 +
2156 + /* The lock is no longer being referenced (mandatory method). */
2157 + void (*deallocate)(struct litmus_lock*);
2158 +};
2159 +
2160 +#endif
2161 diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
2162 new file mode 100644
2163 index 0000000..380b886
2164 --- /dev/null
2165 +++ b/include/litmus/preempt.h
2166 @@ -0,0 +1,164 @@
2167 +#ifndef LITMUS_PREEMPT_H
2168 +#define LITMUS_PREEMPT_H
2169 +
2170 +#include <linux/types.h>
2171 +#include <linux/cache.h>
2172 +#include <linux/percpu.h>
2173 +#include <asm/atomic.h>
2174 +
2175 +#include <litmus/debug_trace.h>
2176 +
2177 +extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
2178 +
2179 +#ifdef CONFIG_PREEMPT_STATE_TRACE
2180 +const char* sched_state_name(int s);
2181 +#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
2182 +#else
2183 +#define TRACE_STATE(fmt, args...) /* ignore */
2184 +#endif
2185 +
2186 +#define VERIFY_SCHED_STATE(x) \
2187 + do { int __s = get_sched_state(); \
2188 + if ((__s & (x)) == 0) \
2189 + TRACE_STATE("INVALID s=0x%x (%s) not " \
2190 + "in 0x%x (%s) [%s]\n", \
2191 + __s, sched_state_name(__s), \
2192 + (x), #x, __FUNCTION__); \
2193 + } while (0);
2194 +
2195 +#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
2196 + TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
2197 + cpu, (x), sched_state_name(x), \
2198 + (y), sched_state_name(y))
2199 +
2200 +
2201 +typedef enum scheduling_state {
2202 + TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
2203 + * should be scheduled, and the processor does not
2204 + * plan to invoke schedule(). */
2205 + SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the
2206 + * processor should reschedule, but this has not
2207 + * been communicated yet (IPI still pending). */
2208 + WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to
2209 + * reschedule and will do so shortly. */
2210 + TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(),
2211 + * has selected a new task to schedule, but has not
2212 + * yet performed the actual context switch. */
2213 + PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context
2214 + * switch, but a remote processor has already
2215 + * determined that a higher-priority task became
2216 + * eligible after the task was picked. */
2217 +} sched_state_t;
2218 +
2219 +static inline sched_state_t get_sched_state_on(int cpu)
2220 +{
2221 + return atomic_read(&per_cpu(resched_state, cpu));
2222 +}
2223 +
2224 +static inline sched_state_t get_sched_state(void)
2225 +{
2226 + return atomic_read(&__get_cpu_var(resched_state));
2227 +}
2228 +
2229 +static inline int is_in_sched_state(int possible_states)
2230 +{
2231 + return get_sched_state() & possible_states;
2232 +}
2233 +
2234 +static inline int cpu_is_in_sched_state(int cpu, int possible_states)
2235 +{
2236 + return get_sched_state_on(cpu) & possible_states;
2237 +}
2238 +
2239 +static inline void set_sched_state(sched_state_t s)
2240 +{
2241 + TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
2242 + atomic_set(&__get_cpu_var(resched_state), s);
2243 +}
2244 +
2245 +static inline int sched_state_transition(sched_state_t from, sched_state_t to)
2246 +{
2247 + sched_state_t old_state;
2248 +
2249 + old_state = atomic_cmpxchg(&__get_cpu_var(resched_state), from, to);
2250 + if (old_state == from) {
2251 + TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
2252 + return 1;
2253 + } else
2254 + return 0;
2255 +}
2256 +
2257 +static inline int sched_state_transition_on(int cpu,
2258 + sched_state_t from,
2259 + sched_state_t to)
2260 +{
2261 + sched_state_t old_state;
2262 +
2263 + old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
2264 + if (old_state == from) {
2265 + TRACE_SCHED_STATE_CHANGE(from, to, cpu);
2266 + return 1;
2267 + } else
2268 + return 0;
2269 +}
2270 +
2271 +/* Plugins must call this function after they have decided which job to
2272 + * schedule next. IMPORTANT: this function must be called while still holding
2273 + * the lock that is used to serialize scheduling decisions.
2274 + *
2275 + * (Ideally, we would like to use runqueue locks for this purpose, but that
2276 + * would lead to deadlocks with the migration code.)
2277 + */
2278 +static inline void sched_state_task_picked(void)
2279 +{
2280 + VERIFY_SCHED_STATE(WILL_SCHEDULE);
2281 +
2282 + /* WILL_SCHEDULE has only a local tansition => simple store is ok */
2283 + set_sched_state(TASK_PICKED);
2284 +}
2285 +
2286 +static inline void sched_state_entered_schedule(void)
2287 +{
2288 + /* Update state for the case that we entered schedule() not due to
2289 + * set_tsk_need_resched() */
2290 + set_sched_state(WILL_SCHEDULE);
2291 +}
2292 +
2293 +/* Called by schedule() to check if the scheduling decision is still valid
2294 + * after a context switch. Returns 1 if the CPU needs to reschdule. */
2295 +static inline int sched_state_validate_switch(void)
2296 +{
2297 + int left_state_ok = 0;
2298 +
2299 + VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED);
2300 +
2301 + if (is_in_sched_state(TASK_PICKED)) {
2302 + /* Might be good; let's try to transition out of this
2303 + * state. This must be done atomically since remote processors
2304 + * may try to change the state, too. */
2305 + left_state_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
2306 + }
2307 +
2308 + if (!left_state_ok) {
2309 + /* We raced with a higher-priority task arrival => not
2310 + * valid. The CPU needs to reschedule. */
2311 + set_sched_state(WILL_SCHEDULE);
2312 + return 1;
2313 + } else
2314 + return 0;
2315 +}
2316 +
2317 +/* State transition events. See litmus/preempt.c for details. */
2318 +void sched_state_will_schedule(struct task_struct* tsk);
2319 +void sched_state_ipi(void);
2320 +/* Cause a CPU (remote or local) to reschedule. */
2321 +void litmus_reschedule(int cpu);
2322 +void litmus_reschedule_local(void);
2323 +
2324 +#ifdef CONFIG_DEBUG_KERNEL
2325 +void sched_state_plugin_check(void);
2326 +#else
2327 +#define sched_state_plugin_check() /* no check */
2328 +#endif
2329 +
2330 +#endif
2331 diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
2332 new file mode 100644
2333 index 0000000..ac24929
2334 --- /dev/null
2335 +++ b/include/litmus/rt_domain.h
2336 @@ -0,0 +1,182 @@
2337 +/* CLEANUP: Add comments and make it less messy.
2338 + *
2339 + */
2340 +
2341 +#ifndef __UNC_RT_DOMAIN_H__
2342 +#define __UNC_RT_DOMAIN_H__
2343 +
2344 +#include <litmus/bheap.h>
2345 +
2346 +#define RELEASE_QUEUE_SLOTS 127 /* prime */
2347 +
2348 +struct _rt_domain;
2349 +
2350 +typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
2351 +typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
2352 +
2353 +struct release_queue {
2354 + /* each slot maintains a list of release heaps sorted
2355 + * by release time */
2356 + struct list_head slot[RELEASE_QUEUE_SLOTS];
2357 +};
2358 +
2359 +typedef struct _rt_domain {
2360 + /* runnable rt tasks are in here */
2361 + raw_spinlock_t ready_lock;
2362 + struct bheap ready_queue;
2363 +
2364 + /* real-time tasks waiting for release are in here */
2365 + raw_spinlock_t release_lock;
2366 + struct release_queue release_queue;
2367 +
2368 +#ifdef CONFIG_RELEASE_MASTER
2369 + int release_master;
2370 +#endif
2371 +
2372 + /* for moving tasks to the release queue */
2373 + raw_spinlock_t tobe_lock;
2374 + struct list_head tobe_released;
2375 +
2376 + /* how do we check if we need to kick another CPU? */
2377 + check_resched_needed_t check_resched;
2378 +
2379 + /* how do we release jobs? */
2380 + release_jobs_t release_jobs;
2381 +
2382 + /* how are tasks ordered in the ready queue? */
2383 + bheap_prio_t order;
2384 +} rt_domain_t;
2385 +
2386 +struct release_heap {
2387 + /* list_head for per-time-slot list */
2388 + struct list_head list;
2389 + lt_t release_time;
2390 + /* all tasks to be released at release_time */
2391 + struct bheap heap;
2392 + /* used to trigger the release */
2393 + struct hrtimer timer;
2394 +
2395 +#ifdef CONFIG_RELEASE_MASTER
2396 + /* used to delegate releases */
2397 + struct hrtimer_start_on_info info;
2398 +#endif
2399 + /* required for the timer callback */
2400 + rt_domain_t* dom;
2401 +};
2402 +
2403 +
2404 +static inline struct task_struct* __next_ready(rt_domain_t* rt)
2405 +{
2406 + struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
2407 + if (hn)
2408 + return bheap2task(hn);
2409 + else
2410 + return NULL;
2411 +}
2412 +
2413 +void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
2414 + check_resched_needed_t check,
2415 + release_jobs_t relase);
2416 +
2417 +void __add_ready(rt_domain_t* rt, struct task_struct *new);
2418 +void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
2419 +void __add_release(rt_domain_t* rt, struct task_struct *task);
2420 +
2421 +static inline struct task_struct* __take_ready(rt_domain_t* rt)
2422 +{
2423 + struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
2424 + if (hn)
2425 + return bheap2task(hn);
2426 + else
2427 + return NULL;
2428 +}
2429 +
2430 +static inline struct task_struct* __peek_ready(rt_domain_t* rt)
2431 +{
2432 + struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
2433 + if (hn)
2434 + return bheap2task(hn);
2435 + else
2436 + return NULL;
2437 +}
2438 +
2439 +static inline int is_queued(struct task_struct *t)
2440 +{
2441 + BUG_ON(!tsk_rt(t)->heap_node);
2442 + return bheap_node_in_heap(tsk_rt(t)->heap_node);
2443 +}
2444 +
2445 +static inline void remove(rt_domain_t* rt, struct task_struct *t)
2446 +{
2447 + bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
2448 +}
2449 +
2450 +static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
2451 +{
2452 + unsigned long flags;
2453 + /* first we need the write lock for rt_ready_queue */
2454 + raw_spin_lock_irqsave(&rt->ready_lock, flags);
2455 + __add_ready(rt, new);
2456 + raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
2457 +}
2458 +
2459 +static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
2460 +{
2461 + unsigned long flags;
2462 + raw_spin_lock_irqsave(&rt->ready_lock, flags);
2463 + __merge_ready(rt, tasks);
2464 + raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
2465 +}
2466 +
2467 +static inline struct task_struct* take_ready(rt_domain_t* rt)
2468 +{
2469 + unsigned long flags;
2470 + struct task_struct* ret;
2471 + /* first we need the write lock for rt_ready_queue */
2472 + raw_spin_lock_irqsave(&rt->ready_lock, flags);
2473 + ret = __take_ready(rt);
2474 + raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
2475 + return ret;
2476 +}
2477 +
2478 +
2479 +static inline void add_release(rt_domain_t* rt, struct task_struct *task)
2480 +{
2481 + unsigned long flags;
2482 + raw_spin_lock_irqsave(&rt->tobe_lock, flags);
2483 + __add_release(rt, task);
2484 + raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
2485 +}
2486 +
2487 +#ifdef CONFIG_RELEASE_MASTER
2488 +void __add_release_on(rt_domain_t* rt, struct task_struct *task,
2489 + int target_cpu);
2490 +
2491 +static inline void add_release_on(rt_domain_t* rt,
2492 + struct task_struct *task,
2493 + int target_cpu)
2494 +{
2495 + unsigned long flags;
2496 + raw_spin_lock_irqsave(&rt->tobe_lock, flags);
2497 + __add_release_on(rt, task, target_cpu);
2498 + raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
2499 +}
2500 +#endif
2501 +
2502 +static inline int __jobs_pending(rt_domain_t* rt)
2503 +{
2504 + return !bheap_empty(&rt->ready_queue);
2505 +}
2506 +
2507 +static inline int jobs_pending(rt_domain_t* rt)
2508 +{
2509 + unsigned long flags;
2510 + int ret;
2511 + /* first we need the write lock for rt_ready_queue */
2512 + raw_spin_lock_irqsave(&rt->ready_lock, flags);
2513 + ret = !bheap_empty(&rt->ready_queue);
2514 + raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
2515 + return ret;
2516 +}
2517 +
2518 +#endif
2519 diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
2520 new file mode 100644
2521 index 0000000..d6d7991
2522 --- /dev/null
2523 +++ b/include/litmus/rt_param.h
2524 @@ -0,0 +1,209 @@
2525 +/*
2526 + * Definition of the scheduler plugin interface.
2527 + *
2528 + */
2529 +#ifndef _LINUX_RT_PARAM_H_
2530 +#define _LINUX_RT_PARAM_H_
2531 +
2532 +/* Litmus time type. */
2533 +typedef unsigned long long lt_t;
2534 +
2535 +static inline int lt_after(lt_t a, lt_t b)
2536 +{
2537 + return ((long long) b) - ((long long) a) < 0;
2538 +}
2539 +#define lt_before(a, b) lt_after(b, a)
2540 +
2541 +static inline int lt_after_eq(lt_t a, lt_t b)
2542 +{
2543 + return ((long long) a) - ((long long) b) >= 0;
2544 +}
2545 +#define lt_before_eq(a, b) lt_after_eq(b, a)
2546 +
2547 +/* different types of clients */
2548 +typedef enum {
2549 + RT_CLASS_HARD,
2550 + RT_CLASS_SOFT,
2551 + RT_CLASS_BEST_EFFORT
2552 +} task_class_t;
2553 +
2554 +typedef enum {
2555 + NO_ENFORCEMENT, /* job may overrun unhindered */
2556 + QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
2557 + PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */
2558 +} budget_policy_t;
2559 +
2560 +struct rt_task {
2561 + lt_t exec_cost;
2562 + lt_t period;
2563 + lt_t phase;
2564 + unsigned int cpu;
2565 + task_class_t cls;
2566 + budget_policy_t budget_policy; /* ignored by pfair */
2567 +};
2568 +
2569 +union np_flag {
2570 + uint32_t raw;
2571 + struct {
2572 + /* Is the task currently in a non-preemptive section? */
2573 + uint32_t flag:31;
2574 + /* Should the task call into the scheduler? */
2575 + uint32_t preempt:1;
2576 + } np;
2577 +};
2578 +
2579 +/* The definition of the data that is shared between the kernel and real-time
2580 + * tasks via a shared page (see litmus/ctrldev.c).
2581 + *
2582 + * WARNING: User space can write to this, so don't trust
2583 + * the correctness of the fields!
2584 + *
2585 + * This servees two purposes: to enable efficient signaling
2586 + * of non-preemptive sections (user->kernel) and
2587 + * delayed preemptions (kernel->user), and to export
2588 + * some real-time relevant statistics such as preemption and
2589 + * migration data to user space. We can't use a device to export
2590 + * statistics because we want to avoid system call overhead when
2591 + * determining preemption/migration overheads).
2592 + */
2593 +struct control_page {
2594 + volatile union np_flag sched;
2595 +
2596 + /* to be extended */
2597 +};
2598 +
2599 +/* don't export internal data structures to user space (liblitmus) */
2600 +#ifdef __KERNEL__
2601 +
2602 +struct _rt_domain;
2603 +struct bheap_node;
2604 +struct release_heap;
2605 +
2606 +struct rt_job {
2607 + /* Time instant the the job was or will be released. */
2608 + lt_t release;
2609 + /* What is the current deadline? */
2610 + lt_t deadline;
2611 +
2612 + /* How much service has this job received so far? */
2613 + lt_t exec_time;
2614 +
2615 + /* Which job is this. This is used to let user space
2616 + * specify which job to wait for, which is important if jobs
2617 + * overrun. If we just call sys_sleep_next_period() then we
2618 + * will unintentionally miss jobs after an overrun.
2619 + *
2620 + * Increase this sequence number when a job is released.
2621 + */
2622 + unsigned int job_no;
2623 +};
2624 +
2625 +struct pfair_param;
2626 +
2627 +/* RT task parameters for scheduling extensions
2628 + * These parameters are inherited during clone and therefore must
2629 + * be explicitly set up before the task set is launched.
2630 + */
2631 +struct rt_param {
2632 + /* is the task sleeping? */
2633 + unsigned int flags:8;
2634 +
2635 + /* do we need to check for srp blocking? */
2636 + unsigned int srp_non_recurse:1;
2637 +
2638 + /* is the task present? (true if it can be scheduled) */
2639 + unsigned int present:1;
2640 +
2641 +#ifdef CONFIG_LITMUS_LOCKING
2642 + /* Is the task being priority-boosted by a locking protocol? */
2643 + unsigned int priority_boosted:1;
2644 + /* If so, when did this start? */
2645 + lt_t boost_start_time;
2646 +#endif
2647 +
2648 + /* user controlled parameters */
2649 + struct rt_task task_params;
2650 +
2651 + /* timing parameters */
2652 + struct rt_job job_params;
2653 +
2654 + /* task representing the current "inherited" task
2655 + * priority, assigned by inherit_priority and
2656 + * return priority in the scheduler plugins.
2657 + * could point to self if PI does not result in
2658 + * an increased task priority.
2659 + */
2660 + struct task_struct* inh_task;
2661 +
2662 +#ifdef CONFIG_NP_SECTION
2663 + /* For the FMLP under PSN-EDF, it is required to make the task
2664 + * non-preemptive from kernel space. In order not to interfere with
2665 + * user space, this counter indicates the kernel space np setting.
2666 + * kernel_np > 0 => task is non-preemptive
2667 + */
2668 + unsigned int kernel_np;
2669 +#endif
2670 +
2671 + /* This field can be used by plugins to store where the task
2672 + * is currently scheduled. It is the responsibility of the
2673 + * plugin to avoid race conditions.
2674 + *
2675 + * This used by GSN-EDF and PFAIR.
2676 + */
2677 + volatile int scheduled_on;
2678 +
2679 + /* Is the stack of the task currently in use? This is updated by
2680 + * the LITMUS core.
2681 + *
2682 + * Be careful to avoid deadlocks!
2683 + */
2684 + volatile int stack_in_use;
2685 +
2686 + /* This field can be used by plugins to store where the task
2687 + * is currently linked. It is the responsibility of the plugin
2688 + * to avoid race conditions.
2689 + *
2690 + * Used by GSN-EDF.
2691 + */
2692 + volatile int linked_on;
2693 +
2694 + /* PFAIR/PD^2 state. Allocated on demand. */
2695 + struct pfair_param* pfair;
2696 +
2697 + /* Fields saved before BE->RT transition.
2698 + */
2699 + int old_policy;
2700 + int old_prio;
2701 +
2702 + /* ready queue for this task */
2703 + struct _rt_domain* domain;
2704 +
2705 + /* heap element for this task
2706 + *
2707 + * Warning: Don't statically allocate this node. The heap
2708 + * implementation swaps these between tasks, thus after
2709 + * dequeuing from a heap you may end up with a different node
2710 + * then the one you had when enqueuing the task. For the same
2711 + * reason, don't obtain and store references to this node
2712 + * other than this pointer (which is updated by the heap
2713 + * implementation).
2714 + */
2715 + struct bheap_node* heap_node;
2716 + struct release_heap* rel_heap;
2717 +
2718 + /* Used by rt_domain to queue task in release list.
2719 + */
2720 + struct list_head list;
2721 +
2722 + /* Pointer to the page shared between userspace and kernel. */
2723 + struct control_page * ctrl_page;
2724 +};
2725 +
2726 +/* Possible RT flags */
2727 +#define RT_F_RUNNING 0x00000000
2728 +#define RT_F_SLEEP 0x00000001
2729 +#define RT_F_EXIT_SEM 0x00000008
2730 +
2731 +#endif
2732 +
2733 +#endif
2734 diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
2735 new file mode 100644
2736 index 0000000..6e7cabd
2737 --- /dev/null
2738 +++ b/include/litmus/sched_plugin.h
2739 @@ -0,0 +1,111 @@
2740 +/*
2741 + * Definition of the scheduler plugin interface.
2742 + *
2743 + */
2744 +#ifndef _LINUX_SCHED_PLUGIN_H_
2745 +#define _LINUX_SCHED_PLUGIN_H_
2746 +
2747 +#include <linux/sched.h>
2748 +
2749 +#ifdef CONFIG_LITMUS_LOCKING
2750 +#include <litmus/locking.h>
2751 +#endif
2752 +
2753 +/************************ setup/tear down ********************/
2754 +
2755 +typedef long (*activate_plugin_t) (void);
2756 +typedef long (*deactivate_plugin_t) (void);
2757 +
2758 +
2759 +
2760 +/********************* scheduler invocation ******************/
2761 +
2762 +/* Plugin-specific realtime tick handler */
2763 +typedef void (*scheduler_tick_t) (struct task_struct *cur);
2764 +/* Novell make sched decision function */
2765 +typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
2766 +/* Clean up after the task switch has occured.
2767 + * This function is called after every (even non-rt) task switch.
2768 + */
2769 +typedef void (*finish_switch_t)(struct task_struct *prev);
2770 +
2771 +
2772 +/********************* task state changes ********************/
2773 +
2774 +/* Called to setup a new real-time task.
2775 + * Release the first job, enqueue, etc.
2776 + * Task may already be running.
2777 + */
2778 +typedef void (*task_new_t) (struct task_struct *task,
2779 + int on_rq,
2780 + int running);
2781 +
2782 +/* Called to re-introduce a task after blocking.
2783 + * Can potentially be called multiple times.
2784 + */
2785 +typedef void (*task_wake_up_t) (struct task_struct *task);
2786 +/* called to notify the plugin of a blocking real-time task
2787 + * it will only be called for real-time tasks and before schedule is called */
2788 +typedef void (*task_block_t) (struct task_struct *task);
2789 +/* Called when a real-time task exits or changes to a different scheduling
2790 + * class.
2791 + * Free any allocated resources
2792 + */
2793 +typedef void (*task_exit_t) (struct task_struct *);
2794 +
2795 +/* Called when the current task attempts to create a new lock of a given
2796 + * protocol type. */
2797 +typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
2798 + void* __user config);
2799 +
2800 +
2801 +/********************* sys call backends ********************/
2802 +/* This function causes the caller to sleep until the next release */
2803 +typedef long (*complete_job_t) (void);
2804 +
2805 +typedef long (*admit_task_t)(struct task_struct* tsk);
2806 +
2807 +typedef void (*release_at_t)(struct task_struct *t, lt_t start);
2808 +
2809 +struct sched_plugin {
2810 + struct list_head list;
2811 + /* basic info */
2812 + char *plugin_name;
2813 +
2814 + /* setup */
2815 + activate_plugin_t activate_plugin;
2816 + deactivate_plugin_t deactivate_plugin;
2817 +
2818 + /* scheduler invocation */
2819 + scheduler_tick_t tick;
2820 + schedule_t schedule;
2821 + finish_switch_t finish_switch;
2822 +
2823 + /* syscall backend */
2824 + complete_job_t complete_job;
2825 + release_at_t release_at;
2826 +
2827 + /* task state changes */
2828 + admit_task_t admit_task;
2829 +
2830 + task_new_t task_new;
2831 + task_wake_up_t task_wake_up;
2832 + task_block_t task_block;
2833 + task_exit_t task_exit;
2834 +
2835 +#ifdef CONFIG_LITMUS_LOCKING
2836 + /* locking protocols */
2837 + allocate_lock_t allocate_lock;
2838 +#endif
2839 +} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
2840 +
2841 +
2842 +extern struct sched_plugin *litmus;
2843 +
2844 +int register_sched_plugin(struct sched_plugin* plugin);
2845 +struct sched_plugin* find_sched_plugin(const char* name);
2846 +int print_sched_plugins(char* buf, int max);
2847 +
2848 +extern struct sched_plugin linux_sched_plugin;
2849 +
2850 +#endif
2851 diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
2852 new file mode 100644
2853 index 0000000..7ca34cb
2854 --- /dev/null
2855 +++ b/include/litmus/sched_trace.h
2856 @@ -0,0 +1,200 @@
2857 +/*
2858 + * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
2859 + */
2860 +#ifndef _LINUX_SCHED_TRACE_H_
2861 +#define _LINUX_SCHED_TRACE_H_
2862 +
2863 +/* all times in nanoseconds */
2864 +
2865 +struct st_trace_header {
2866 + u8 type; /* Of what type is this record? */
2867 + u8 cpu; /* On which CPU was it recorded? */
2868 + u16 pid; /* PID of the task. */
2869 + u32 job; /* The job sequence number. */
2870 +};
2871 +
2872 +#define ST_NAME_LEN 16
2873 +struct st_name_data {
2874 + char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
2875 +};
2876 +
2877 +struct st_param_data { /* regular params */
2878 + u32 wcet;
2879 + u32 period;
2880 + u32 phase;
2881 + u8 partition;
2882 + u8 class;
2883 + u8 __unused[2];
2884 +};
2885 +
2886 +struct st_release_data { /* A job is was/is going to be released. */
2887 + u64 release; /* What's the release time? */
2888 + u64 deadline; /* By when must it finish? */
2889 +};
2890 +
2891 +struct st_assigned_data { /* A job was asigned to a CPU. */
2892 + u64 when;
2893 + u8 target; /* Where should it execute? */
2894 + u8 __unused[7];
2895 +};
2896 +
2897 +struct st_switch_to_data { /* A process was switched to on a given CPU. */
2898 + u64 when; /* When did this occur? */
2899 + u32 exec_time; /* Time the current job has executed. */
2900 + u8 __unused[4];
2901 +
2902 +};
2903 +
2904 +struct st_switch_away_data { /* A process was switched away from on a given CPU. */
2905 + u64 when;
2906 + u64 exec_time;
2907 +};
2908 +
2909 +struct st_completion_data { /* A job completed. */
2910 + u64 when;
2911 + u8 forced:1; /* Set to 1 if job overran and kernel advanced to the
2912 + * next task automatically; set to 0 otherwise.
2913 + */
2914 + u8 __uflags:7;
2915 + u8 __unused[7];
2916 +};
2917 +
2918 +struct st_block_data { /* A task blocks. */
2919 + u64 when;
2920 + u64 __unused;
2921 +};
2922 +
2923 +struct st_resume_data { /* A task resumes. */
2924 + u64 when;
2925 + u64 __unused;
2926 +};
2927 +
2928 +struct st_action_data {
2929 + u64 when;
2930 + u8 action;
2931 + u8 __unused[7];
2932 +};
2933 +
2934 +struct st_sys_release_data {
2935 + u64 when;
2936 + u64 release;
2937 +};
2938 +
2939 +#define DATA(x) struct st_ ## x ## _data x;
2940 +
2941 +typedef enum {
2942 + ST_NAME = 1, /* Start at one, so that we can spot
2943 + * uninitialized records. */
2944 + ST_PARAM,
2945 + ST_RELEASE,
2946 + ST_ASSIGNED,
2947 + ST_SWITCH_TO,
2948 + ST_SWITCH_AWAY,
2949 + ST_COMPLETION,
2950 + ST_BLOCK,
2951 + ST_RESUME,
2952 + ST_ACTION,
2953 + ST_SYS_RELEASE
2954 +} st_event_record_type_t;
2955 +
2956 +struct st_event_record {
2957 + struct st_trace_header hdr;
2958 + union {
2959 + u64 raw[2];
2960 +
2961 + DATA(name);
2962 + DATA(param);
2963 + DATA(release);
2964 + DATA(assigned);
2965 + DATA(switch_to);
2966 + DATA(switch_away);
2967 + DATA(completion);
2968 + DATA(block);
2969 + DATA(resume);
2970 + DATA(action);
2971 + DATA(sys_release);
2972 + } data;
2973 +};
2974 +
2975 +#undef DATA
2976 +
2977 +#ifdef __KERNEL__
2978 +
2979 +#include <linux/sched.h>
2980 +#include <litmus/feather_trace.h>
2981 +
2982 +#ifdef CONFIG_SCHED_TASK_TRACE
2983 +
2984 +#define SCHED_TRACE(id, callback, task) \
2985 + ft_event1(id, callback, task)
2986 +#define SCHED_TRACE2(id, callback, task, xtra) \
2987 + ft_event2(id, callback, task, xtra)
2988 +
2989 +/* provide prototypes; needed on sparc64 */
2990 +#ifndef NO_TASK_TRACE_DECLS
2991 +feather_callback void do_sched_trace_task_name(unsigned long id,
2992 + struct task_struct* task);
2993 +feather_callback void do_sched_trace_task_param(unsigned long id,
2994 + struct task_struct* task);
2995 +feather_callback void do_sched_trace_task_release(unsigned long id,
2996 + struct task_struct* task);
2997 +feather_callback void do_sched_trace_task_switch_to(unsigned long id,
2998 + struct task_struct* task);
2999 +feather_callback void do_sched_trace_task_switch_away(unsigned long id,
3000 + struct task_struct* task);
3001 +feather_callback void do_sched_trace_task_completion(unsigned long id,
3002 + struct task_struct* task,
3003 + unsigned long forced);
3004 +feather_callback void do_sched_trace_task_block(unsigned long id,
3005 + struct task_struct* task);
3006 +feather_callback void do_sched_trace_task_resume(unsigned long id,
3007 + struct task_struct* task);
3008 +feather_callback void do_sched_trace_action(unsigned long id,
3009 + struct task_struct* task,
3010 + unsigned long action);
3011 +feather_callback void do_sched_trace_sys_release(unsigned long id,
3012 + lt_t* start);
3013 +
3014 +#endif
3015 +
3016 +#else
3017 +
3018 +#define SCHED_TRACE(id, callback, task) /* no tracing */
3019 +#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
3020 +
3021 +#endif
3022 +
3023 +
3024 +#define SCHED_TRACE_BASE_ID 500
3025 +
3026 +
3027 +#define sched_trace_task_name(t) \
3028 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t)
3029 +#define sched_trace_task_param(t) \
3030 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t)
3031 +#define sched_trace_task_release(t) \
3032 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t)
3033 +#define sched_trace_task_switch_to(t) \
3034 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t)
3035 +#define sched_trace_task_switch_away(t) \
3036 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t)
3037 +#define sched_trace_task_completion(t, forced) \
3038 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \
3039 + (unsigned long) forced)
3040 +#define sched_trace_task_block(t) \
3041 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t)
3042 +#define sched_trace_task_resume(t) \
3043 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t)
3044 +#define sched_trace_action(t, action) \
3045 + SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, do_sched_trace_action, t, \
3046 + (unsigned long) action);
3047 +/* when is a pointer, it does not need an explicit cast to unsigned long */
3048 +#define sched_trace_sys_release(when) \
3049 + SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
3050 +
3051 +
3052 +#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
3053 +
3054 +#endif /* __KERNEL__ */
3055 +
3056 +#endif
3057 diff --git a/include/litmus/srp.h b/include/litmus/srp.h
3058 new file mode 100644
3059 index 0000000..c9a4552
3060 --- /dev/null
3061 +++ b/include/litmus/srp.h
3062 @@ -0,0 +1,28 @@
3063 +#ifndef LITMUS_SRP_H
3064 +#define LITMUS_SRP_H
3065 +
3066 +struct srp_semaphore;
3067 +
3068 +struct srp_priority {
3069 + struct list_head list;
3070 + unsigned int priority;
3071 + pid_t pid;
3072 +};
3073 +#define list2prio(l) list_entry(l, struct srp_priority, list)
3074 +
3075 +/* struct for uniprocessor SRP "semaphore" */
3076 +struct srp_semaphore {
3077 + struct litmus_lock litmus_lock;
3078 + struct srp_priority ceiling;
3079 + struct task_struct* owner;
3080 + int cpu; /* cpu associated with this "semaphore" and resource */
3081 +};
3082 +
3083 +/* map a task to its SRP preemption level priority */
3084 +typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
3085 +/* Must be updated by each plugin that uses SRP.*/
3086 +extern srp_prioritization_t get_srp_prio;
3087 +
3088 +struct srp_semaphore* allocate_srp_semaphore(void);
3089 +
3090 +#endif
3091 diff --git a/include/litmus/trace.h b/include/litmus/trace.h
3092 new file mode 100644
3093 index 0000000..e809376
3094 --- /dev/null
3095 +++ b/include/litmus/trace.h
3096 @@ -0,0 +1,116 @@
3097 +#ifndef _SYS_TRACE_H_
3098 +#define _SYS_TRACE_H_
3099 +
3100 +#ifdef CONFIG_SCHED_OVERHEAD_TRACE
3101 +
3102 +#include <litmus/feather_trace.h>
3103 +#include <litmus/feather_buffer.h>
3104 +
3105 +
3106 +/*********************** TIMESTAMPS ************************/
3107 +
3108 +enum task_type_marker {
3109 + TSK_BE,
3110 + TSK_RT,
3111 + TSK_UNKNOWN
3112 +};
3113 +
3114 +struct timestamp {
3115 + uint64_t timestamp;
3116 + uint32_t seq_no;
3117 + uint8_t cpu;
3118 + uint8_t event;
3119 + uint8_t task_type:2;
3120 + uint8_t irq_flag:1;
3121 + uint8_t irq_count:5;
3122 +};
3123 +
3124 +/* tracing callbacks */
3125 +feather_callback void save_timestamp(unsigned long event);
3126 +feather_callback void save_timestamp_def(unsigned long event, unsigned long type);
3127 +feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
3128 +feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu);
3129 +feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr);
3130 +
3131 +#define TIMESTAMP(id) ft_event0(id, save_timestamp)
3132 +
3133 +#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def)
3134 +
3135 +#define TTIMESTAMP(id, task) \
3136 + ft_event1(id, save_timestamp_task, (unsigned long) task)
3137 +
3138 +#define CTIMESTAMP(id, cpu) \
3139 + ft_event1(id, save_timestamp_cpu, (unsigned long) cpu)
3140 +
3141 +#define LTIMESTAMP(id, task) \
3142 + ft_event1(id, save_task_latency, (unsigned long) task)
3143 +
3144 +#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
3145 +
3146 +#define TIMESTAMP(id) /* no tracing */
3147 +
3148 +#define DTIMESTAMP(id, def) /* no tracing */
3149 +
3150 +#define TTIMESTAMP(id, task) /* no tracing */
3151 +
3152 +#define CTIMESTAMP(id, cpu) /* no tracing */
3153 +
3154 +#define LTIMESTAMP(id, when_ptr) /* no tracing */
3155 +
3156 +#endif
3157 +
3158 +
3159 +/* Convention for timestamps
3160 + * =========================
3161 + *
3162 + * In order to process the trace files with a common tool, we use the following
3163 + * convention to measure execution times: The end time id of a code segment is
3164 + * always the next number after the start time event id.
3165 + */
3166 +
3167 +
3168 +
3169 +#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
3170 + * care
3171 + * about
3172 + * next */
3173 +#define TS_SCHED_END(t) TTIMESTAMP(101, t)
3174 +#define TS_SCHED2_START(t) TTIMESTAMP(102, t)
3175 +#define TS_SCHED2_END(t) TTIMESTAMP(103, t)
3176 +
3177 +#define TS_CXS_START(t) TTIMESTAMP(104, t)
3178 +#define TS_CXS_END(t) TTIMESTAMP(105, t)
3179 +
3180 +#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT)
3181 +#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT)
3182 +
3183 +#define TS_TICK_START(t) TTIMESTAMP(110, t)
3184 +#define TS_TICK_END(t) TTIMESTAMP(111, t)
3185 +
3186 +
3187 +#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
3188 +#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
3189 +
3190 +#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */
3191 +#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */
3192 +
3193 +#define TS_ENTER_NP_START TIMESTAMP(140)
3194 +#define TS_ENTER_NP_END TIMESTAMP(141)
3195 +
3196 +#define TS_EXIT_NP_START TIMESTAMP(150)
3197 +#define TS_EXIT_NP_END TIMESTAMP(151)
3198 +
3199 +#define TS_LOCK_START TIMESTAMP(170)
3200 +#define TS_LOCK_SUSPEND TIMESTAMP(171)
3201 +#define TS_LOCK_RESUME TIMESTAMP(172)
3202 +#define TS_LOCK_END TIMESTAMP(173)
3203 +
3204 +#define TS_UNLOCK_START TIMESTAMP(180)
3205 +#define TS_UNLOCK_END TIMESTAMP(181)
3206 +
3207 +#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
3208 +#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
3209 +
3210 +#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when))
3211 +
3212 +#endif /* !_SYS_TRACE_H_ */
3213 diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h
3214 new file mode 100644
3215 index 0000000..f18b127
3216 --- /dev/null
3217 +++ b/include/litmus/trace_irq.h
3218 @@ -0,0 +1,21 @@
3219 +#ifndef _LITMUS_TRACE_IRQ_H_
3220 +#define _LITMUS_TRACE_IRQ_H_
3221 +
3222 +#ifdef CONFIG_SCHED_OVERHEAD_TRACE
3223 +
3224 +extern DEFINE_PER_CPU(atomic_t, irq_fired_count);
3225 +
3226 +static inline void ft_irq_fired(void)
3227 +{
3228 + /* Only called with preemptions disabled. */
3229 + atomic_inc(&__get_cpu_var(irq_fired_count));
3230 +}
3231 +
3232 +
3233 +#else
3234 +
3235 +#define ft_irq_fired() /* nothing to do */
3236 +
3237 +#endif
3238 +
3239 +#endif
3240 diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
3241 new file mode 100644
3242 index 0000000..94264c2
3243 --- /dev/null
3244 +++ b/include/litmus/unistd_32.h
3245 @@ -0,0 +1,21 @@
3246 +/*
3247 + * included from arch/x86/include/asm/unistd_32.h
3248 + *
3249 + * LITMUS^RT syscalls with "relative" numbers
3250 + */
3251 +#define __LSC(x) (__NR_LITMUS + x)
3252 +
3253 +#define __NR_set_rt_task_param __LSC(0)
3254 +#define __NR_get_rt_task_param __LSC(1)
3255 +#define __NR_complete_job __LSC(2)
3256 +#define __NR_od_open __LSC(3)
3257 +#define __NR_od_close __LSC(4)
3258 +#define __NR_litmus_lock __LSC(5)
3259 +#define __NR_litmus_unlock __LSC(6)
3260 +#define __NR_query_job_no __LSC(7)
3261 +#define __NR_wait_for_job_release __LSC(8)
3262 +#define __NR_wait_for_ts_release __LSC(9)
3263 +#define __NR_release_ts __LSC(10)
3264 +#define __NR_null_call __LSC(11)
3265 +
3266 +#define NR_litmus_syscalls 12
3267 diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
3268 new file mode 100644
3269 index 0000000..d5ced0d
3270 --- /dev/null
3271 +++ b/include/litmus/unistd_64.h
3272 @@ -0,0 +1,33 @@
3273 +/*
3274 + * included from arch/x86/include/asm/unistd_64.h
3275 + *
3276 + * LITMUS^RT syscalls with "relative" numbers
3277 + */
3278 +#define __LSC(x) (__NR_LITMUS + x)
3279 +
3280 +#define __NR_set_rt_task_param __LSC(0)
3281 +__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
3282 +#define __NR_get_rt_task_param __LSC(1)
3283 +__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
3284 +#define __NR_complete_job __LSC(2)
3285 +__SYSCALL(__NR_complete_job, sys_complete_job)
3286 +#define __NR_od_open __LSC(3)
3287 +__SYSCALL(__NR_od_open, sys_od_open)
3288 +#define __NR_od_close __LSC(4)
3289 +__SYSCALL(__NR_od_close, sys_od_close)
3290 +#define __NR_litmus_lock __LSC(5)
3291 +__SYSCALL(__NR_litmus_lock, sys_litmus_lock)
3292 +#define __NR_litmus_unlock __LSC(6)
3293 +__SYSCALL(__NR_litmus_unlock, sys_litmus_unlock)
3294 +#define __NR_query_job_no __LSC(7)
3295 +__SYSCALL(__NR_query_job_no, sys_query_job_no)
3296 +#define __NR_wait_for_job_release __LSC(8)
3297 +__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
3298 +#define __NR_wait_for_ts_release __LSC(9)
3299 +__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
3300 +#define __NR_release_ts __LSC(10)
3301 +__SYSCALL(__NR_release_ts, sys_release_ts)
3302 +#define __NR_null_call __LSC(11)
3303 +__SYSCALL(__NR_null_call, sys_null_call)
3304 +
3305 +#define NR_litmus_syscalls 12
3306 diff --git a/kernel/exit.c b/kernel/exit.c
3307 index f2b321b..64879bd 100644
3308 --- a/kernel/exit.c
3309 +++ b/kernel/exit.c
3310 @@ -57,6 +57,8 @@
3311 #include <asm/pgtable.h>
3312 #include <asm/mmu_context.h>
3313
3314 +extern void exit_od_table(struct task_struct *t);
3315 +
3316 static void exit_mm(struct task_struct * tsk);
3317
3318 static void __unhash_process(struct task_struct *p, bool group_dead)
3319 @@ -980,6 +982,8 @@ NORET_TYPE void do_exit(long code)
3320 if (unlikely(tsk->audit_context))
3321 audit_free(tsk);
3322
3323 + exit_od_table(tsk);
3324 +
3325 tsk->exit_code = code;
3326 taskstats_exit(tsk, group_dead);
3327
3328 diff --git a/kernel/fork.c b/kernel/fork.c
3329 index 0276c30..25c6111 100644
3330 --- a/kernel/fork.c
3331 +++ b/kernel/fork.c
3332 @@ -77,6 +77,9 @@
3333
3334 #include <trace/events/sched.h>
3335
3336 +#include <litmus/litmus.h>
3337 +#include <litmus/sched_plugin.h>
3338 +
3339 /*
3340 * Protected counters by write_lock_irq(&tasklist_lock)
3341 */
3342 @@ -191,6 +194,7 @@ void __put_task_struct(struct task_struct *tsk)
3343 WARN_ON(atomic_read(&tsk->usage));
3344 WARN_ON(tsk == current);
3345
3346 + exit_litmus(tsk);
3347 exit_creds(tsk);
3348 delayacct_tsk_free(tsk);
3349 put_signal_struct(tsk->signal);
3350 @@ -275,6 +279,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
3351
3352 tsk->stack = ti;
3353
3354 + /* Don't let the new task be a real-time task. */
3355 + litmus_fork(tsk);
3356 +
3357 err = prop_local_init_single(&tsk->dirties);
3358 if (err)
3359 goto out;
3360 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
3361 index a9205e3..11e8969 100644
3362 --- a/kernel/hrtimer.c
3363 +++ b/kernel/hrtimer.c
3364 @@ -46,6 +46,8 @@
3365 #include <linux/sched.h>
3366 #include <linux/timer.h>
3367
3368 +#include <litmus/litmus.h>
3369 +
3370 #include <asm/uaccess.h>
3371
3372 #include <trace/events/timer.h>
3373 @@ -1026,6 +1028,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
3374 }
3375 EXPORT_SYMBOL_GPL(hrtimer_start);
3376
3377 +#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
3378 +
3379 +/**
3380 + * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info
3381 + */
3382 +void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
3383 +{
3384 + memset(info, 0, sizeof(struct hrtimer_start_on_info));
3385 + atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
3386 +}
3387 +
3388 +/**
3389 + * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu
3390 + */
3391 +void hrtimer_pull(void)
3392 +{
3393 + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
3394 + struct hrtimer_start_on_info *info;
3395 + struct list_head *pos, *safe, list;
3396 +
3397 + raw_spin_lock(&base->lock);
3398 + list_replace_init(&base->to_pull, &list);
3399 + raw_spin_unlock(&base->lock);
3400 +
3401 + list_for_each_safe(pos, safe, &list) {
3402 + info = list_entry(pos, struct hrtimer_start_on_info, list);
3403 + TRACE("pulled timer 0x%x\n", info->timer);
3404 + list_del(pos);
3405 + hrtimer_start(info->timer, info->time, info->mode);
3406 + }
3407 +}
3408 +
3409 +/**
3410 + * hrtimer_start_on - trigger timer arming on remote cpu
3411 + * @cpu: remote cpu
3412 + * @info: save timer information for enqueuing on remote cpu
3413 + * @timer: timer to be pulled
3414 + * @time: expire time
3415 + * @mode: timer mode
3416 + */
3417 +int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
3418 + struct hrtimer *timer, ktime_t time,
3419 + const enum hrtimer_mode mode)
3420 +{
3421 + unsigned long flags;
3422 + struct hrtimer_cpu_base* base;
3423 + int in_use = 0, was_empty;
3424 +
3425 + /* serialize access to info through the timer base */
3426 + lock_hrtimer_base(timer, &flags);
3427 +
3428 + in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE);
3429 + if (!in_use) {
3430 + INIT_LIST_HEAD(&info->list);
3431 + info->timer = timer;
3432 + info->time = time;
3433 + info->mode = mode;
3434 + /* mark as in use */
3435 + atomic_set(&info->state, HRTIMER_START_ON_QUEUED);
3436 + }
3437 +
3438 + unlock_hrtimer_base(timer, &flags);
3439 +
3440 + if (!in_use) {
3441 + /* initiate pull */
3442 + preempt_disable();
3443 + if (cpu == smp_processor_id()) {
3444 + /* start timer locally; we may get called
3445 + * with rq->lock held, do not wake up anything
3446 + */
3447 + TRACE("hrtimer_start_on: starting on local CPU\n");
3448 + __hrtimer_start_range_ns(info->timer, info->time,
3449 + 0, info->mode, 0);
3450 + } else {
3451 + TRACE("hrtimer_start_on: pulling to remote CPU\n");
3452 + base = &per_cpu(hrtimer_bases, cpu);
3453 + raw_spin_lock_irqsave(&base->lock, flags);
3454 + was_empty = list_empty(&base->to_pull);
3455 + list_add(&info->list, &base->to_pull);
3456 + raw_spin_unlock_irqrestore(&base->lock, flags);
3457 + if (was_empty)
3458 + /* only send IPI if other no else
3459 + * has done so already
3460 + */
3461 + smp_send_pull_timers(cpu);
3462 + }
3463 + preempt_enable();
3464 + }
3465 + return in_use;
3466 +}
3467 +
3468 +#endif
3469
3470 /**
3471 * hrtimer_try_to_cancel - try to deactivate a timer
3472 @@ -1625,6 +1719,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
3473 }
3474
3475 hrtimer_init_hres(cpu_base);
3476 + INIT_LIST_HEAD(&cpu_base->to_pull);
3477 }
3478
3479 #ifdef CONFIG_HOTPLUG_CPU
3480 diff --git a/kernel/printk.c b/kernel/printk.c
3481 index 3518539..b799a2e 100644
3482 --- a/kernel/printk.c
3483 +++ b/kernel/printk.c
3484 @@ -70,6 +70,13 @@ int console_printk[4] = {
3485 };
3486
3487 /*
3488 + * divert printk() messages when there is a LITMUS^RT debug listener
3489 + */
3490 +#include <litmus/litmus.h>
3491 +int trace_override = 0;
3492 +int trace_recurse = 0;
3493 +
3494 +/*
3495 * Low level drivers may need that to know if they can schedule in
3496 * their unblank() callback or not. So let's export it.
3497 */
3498 @@ -871,6 +878,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
3499 /* Emit the output into the temporary buffer */
3500 printed_len += vscnprintf(printk_buf + printed_len,
3501 sizeof(printk_buf) - printed_len, fmt, args);
3502 + /* if LITMUS^RT tracer is active divert printk() msgs */
3503 + if (trace_override && !trace_recurse)
3504 + TRACE("%s", printk_buf);
3505
3506 p = printk_buf;
3507
3508 @@ -947,7 +957,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
3509 * Try to acquire and then immediately release the
3510 * console semaphore. The release will do all the
3511 * actual magic (print out buffers, wake up klogd,
3512 - * etc).
3513 + * etc).
3514 *
3515 * The console_trylock_for_printk() function
3516 * will release 'logbuf_lock' regardless of whether it
3517 @@ -1220,7 +1230,7 @@ int printk_needs_cpu(int cpu)
3518
3519 void wake_up_klogd(void)
3520 {
3521 - if (waitqueue_active(&log_wait))
3522 + if (!trace_override && waitqueue_active(&log_wait))
3523 this_cpu_write(printk_pending, 1);
3524 }
3525
3526 diff --git a/kernel/sched.c b/kernel/sched.c
3527 index fde6ff9..baaca61 100644
3528 --- a/kernel/sched.c
3529 +++ b/kernel/sched.c
3530 @@ -80,6 +80,11 @@
3531 #include "workqueue_sched.h"
3532 #include "sched_autogroup.h"
3533
3534 +#include <litmus/sched_trace.h>
3535 +#include <litmus/trace.h>
3536 +
3537 +static void litmus_tick(struct rq*, struct task_struct*);
3538 +
3539 #define CREATE_TRACE_POINTS
3540 #include <trace/events/sched.h>
3541
3542 @@ -410,6 +415,12 @@ struct rt_rq {
3543 #endif
3544 };
3545
3546 +/* Litmus related fields in a runqueue */
3547 +struct litmus_rq {
3548 + unsigned long nr_running;
3549 + struct task_struct *prev;
3550 +};
3551 +
3552 #ifdef CONFIG_SMP
3553
3554 /*
3555 @@ -475,6 +486,7 @@ struct rq {
3556
3557 struct cfs_rq cfs;
3558 struct rt_rq rt;
3559 + struct litmus_rq litmus;
3560
3561 #ifdef CONFIG_FAIR_GROUP_SCHED
3562 /* list of leaf cfs_rq on this cpu: */
3563 @@ -1045,6 +1057,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
3564 raw_spin_lock(&rq->lock);
3565 update_rq_clock(rq);
3566 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
3567 + litmus_tick(rq, rq->curr);
3568 raw_spin_unlock(&rq->lock);
3569
3570 return HRTIMER_NORESTART;
3571 @@ -1773,7 +1786,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
3572
3573 static const struct sched_class rt_sched_class;
3574
3575 -#define sched_class_highest (&stop_sched_class)
3576 +#define sched_class_highest (&litmus_sched_class)
3577 #define for_each_class(class) \
3578 for (class = sched_class_highest; class; class = class->next)
3579
3580 @@ -2031,6 +2044,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
3581 #include "sched_rt.c"
3582 #include "sched_autogroup.c"
3583 #include "sched_stoptask.c"
3584 +#include "../litmus/sched_litmus.c"
3585 #ifdef CONFIG_SCHED_DEBUG
3586 # include "sched_debug.c"
3587 #endif
3588 @@ -2153,6 +2167,10 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
3589 * A queue event has occurred, and we're going to schedule. In
3590 * this case, we can save a useless back to back clock update.
3591 */
3592 + /* LITMUS^RT:
3593 + * The "disable-clock-update" approach was buggy in Linux 2.6.36.
3594 + * The issue has been solved in 2.6.37.
3595 + */
3596 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
3597 rq->skip_clock_update = 1;
3598 }
3599 @@ -2643,7 +2661,12 @@ static void ttwu_queue(struct task_struct *p, int cpu)
3600 struct rq *rq = cpu_rq(cpu);
3601
3602 #if defined(CONFIG_SMP)
3603 - if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
3604 + /*
3605 + * LITMUS^RT: whether to send an IPI to the remote CPU
3606 + * is plugin specific.
3607 + */
3608 + if (!is_realtime(p) &&
3609 + sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
3610 sched_clock_cpu(cpu); /* sync clocks x-cpu */
3611 ttwu_queue_remote(p, cpu);
3612 return;
3613 @@ -2676,6 +2699,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
3614 unsigned long flags;
3615 int cpu, success = 0;
3616
3617 + if (is_realtime(p))
3618 + TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
3619 +
3620 smp_wmb();
3621 raw_spin_lock_irqsave(&p->pi_lock, flags);
3622 if (!(p->state & state))
3623 @@ -2712,6 +2738,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
3624 */
3625 smp_rmb();
3626
3627 + /* LITMUS^RT: once the task can be safely referenced by this
3628 + * CPU, don't mess up with Linux load balancing stuff.
3629 + */
3630 + if (is_realtime(p))
3631 + goto litmus_out_activate;
3632 +
3633 p->sched_contributes_to_load = !!task_contributes_to_load(p);
3634 p->state = TASK_WAKING;
3635
3636 @@ -2723,12 +2755,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
3637 wake_flags |= WF_MIGRATED;
3638 set_task_cpu(p, cpu);
3639 }
3640 +
3641 +litmus_out_activate:
3642 #endif /* CONFIG_SMP */
3643
3644 ttwu_queue(p, cpu);
3645 stat:
3646 ttwu_stat(p, cpu, wake_flags);
3647 out:
3648 + if (is_realtime(p))
3649 + TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
3650 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3651
3652 return success;
3653 @@ -2839,7 +2875,8 @@ void sched_fork(struct task_struct *p)
3654 * Revert to default priority/policy on fork if requested.
3655 */
3656 if (unlikely(p->sched_reset_on_fork)) {
3657 - if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
3658 + if (p->policy == SCHED_FIFO || p->policy == SCHED_RR ||
3659 + p->policy == SCHED_LITMUS) {
3660 p->policy = SCHED_NORMAL;
3661 p->normal_prio = p->static_prio;
3662 }
3663 @@ -3050,6 +3087,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
3664 */
3665 prev_state = prev->state;
3666 finish_arch_switch(prev);
3667 + litmus->finish_switch(prev);
3668 + prev->rt_param.stack_in_use = NO_CPU;
3669 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
3670 local_irq_disable();
3671 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
3672 @@ -3079,6 +3118,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
3673 {
3674 if (prev->sched_class->pre_schedule)
3675 prev->sched_class->pre_schedule(rq, prev);
3676 +
3677 + /* LITMUS^RT not very clean hack: we need to save the prev task
3678 + * as our scheduling decision rely on it (as we drop the rq lock
3679 + * something in prev can change...); there is no way to escape
3680 + * this ack apart from modifying pick_nex_task(rq, _prev_) or
3681 + * falling back on the previous solution of decoupling
3682 + * scheduling decisions
3683 + */
3684 + rq->litmus.prev = prev;
3685 }
3686
3687 /* rq->lock is NOT held, but preemption is disabled */
3688 @@ -4094,18 +4142,26 @@ void scheduler_tick(void)
3689
3690 sched_clock_tick();
3691
3692 + TS_TICK_START(current);
3693 +
3694 raw_spin_lock(&rq->lock);
3695 update_rq_clock(rq);
3696 update_cpu_load_active(rq);
3697 curr->sched_class->task_tick(rq, curr, 0);
3698 +
3699 + /* litmus_tick may force current to resched */
3700 + litmus_tick(rq, curr);
3701 +
3702 raw_spin_unlock(&rq->lock);
3703
3704 perf_event_task_tick();
3705
3706 #ifdef CONFIG_SMP
3707 rq->idle_at_tick = idle_cpu(cpu);
3708 - trigger_load_balance(rq, cpu);
3709 + if (!is_realtime(current))
3710 + trigger_load_balance(rq, cpu);
3711 #endif
3712 + TS_TICK_END(current);
3713 }
3714
3715 notrace unsigned long get_parent_ip(unsigned long addr)
3716 @@ -4225,12 +4281,20 @@ pick_next_task(struct rq *rq)
3717 /*
3718 * Optimization: we know that if all tasks are in
3719 * the fair class we can call that function directly:
3720 - */
3721 - if (likely(rq->nr_running == rq->cfs.nr_running)) {
3722 +
3723 + * NOT IN LITMUS^RT!
3724 +
3725 + * This breaks many assumptions in the plugins.
3726 + * Do not uncomment without thinking long and hard
3727 + * about how this affects global plugins such as GSN-EDF.
3728 +
3729 + if (rq->nr_running == rq->cfs.nr_running) {
3730 + TRACE("taking shortcut in pick_next_task()\n");
3731 p = fair_sched_class.pick_next_task(rq);
3732 if (likely(p))
3733 return p;
3734 }
3735 + */
3736
3737 for_each_class(class) {
3738 p = class->pick_next_task(rq);
3739 @@ -4253,11 +4317,19 @@ asmlinkage void __sched schedule(void)
3740
3741 need_resched:
3742 preempt_disable();
3743 + sched_state_entered_schedule();
3744 cpu = smp_processor_id();
3745 rq = cpu_rq(cpu);
3746 rcu_note_context_switch(cpu);
3747 prev = rq->curr;
3748
3749 + /* LITMUS^RT: quickly re-evaluate the scheduling decision
3750 + * if the previous one is no longer valid after CTX.
3751 + */
3752 +litmus_need_resched_nonpreemptible:
3753 + TS_SCHED_START;
3754 + sched_trace_task_switch_away(prev);
3755 +
3756 schedule_debug(prev);
3757
3758 if (sched_feat(HRTICK))
3759 @@ -4314,7 +4386,10 @@ need_resched:
3760 rq->curr = next;
3761 ++*switch_count;
3762
3763 + TS_SCHED_END(next);
3764 + TS_CXS_START(next);
3765 context_switch(rq, prev, next); /* unlocks the rq */
3766 + TS_CXS_END(current);
3767 /*
3768 * The context switch have flipped the stack from under us
3769 * and restored the local variables which were saved when
3770 @@ -4323,14 +4398,23 @@ need_resched:
3771 */
3772 cpu = smp_processor_id();
3773 rq = cpu_rq(cpu);
3774 - } else
3775 + } else {
3776 + TS_SCHED_END(prev);
3777 raw_spin_unlock_irq(&rq->lock);
3778 + }
3779 +
3780 + sched_trace_task_switch_to(current);
3781
3782 post_schedule(rq);
3783
3784 + if (sched_state_validate_switch())
3785 + goto litmus_need_resched_nonpreemptible;
3786 +
3787 preempt_enable_no_resched();
3788 if (need_resched())
3789 goto need_resched;
3790 +
3791 + srp_ceiling_block();
3792 }
3793 EXPORT_SYMBOL(schedule);
3794
3795 @@ -4600,6 +4684,17 @@ void complete_all(struct completion *x)
3796 }
3797 EXPORT_SYMBOL(complete_all);
3798
3799 +void complete_n(struct completion *x, int n)
3800 +{
3801 + unsigned long flags;
3802 +
3803 + spin_lock_irqsave(&x->wait.lock, flags);
3804 + x->done += n;
3805 + __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL);
3806 + spin_unlock_irqrestore(&x->wait.lock, flags);
3807 +}
3808 +EXPORT_SYMBOL(complete_n);
3809 +
3810 static inline long __sched
3811 do_wait_for_common(struct completion *x, long timeout, int state)
3812 {
3813 @@ -5039,7 +5134,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
3814 p->normal_prio = normal_prio(p);
3815 /* we are holding p->pi_lock already */
3816 p->prio = rt_mutex_getprio(p);
3817 - if (rt_prio(p->prio))
3818 + if (p->policy == SCHED_LITMUS)
3819 + p->sched_class = &litmus_sched_class;
3820 + else if (rt_prio(p->prio))
3821 p->sched_class = &rt_sched_class;
3822 else
3823 p->sched_class = &fair_sched_class;
3824 @@ -5087,7 +5184,7 @@ recheck:
3825
3826 if (policy != SCHED_FIFO && policy != SCHED_RR &&
3827 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
3828 - policy != SCHED_IDLE)
3829 + policy != SCHED_IDLE && policy != SCHED_LITMUS)
3830 return -EINVAL;
3831 }
3832
3833 @@ -5102,6 +5199,8 @@ recheck:
3834 return -EINVAL;
3835 if (rt_policy(policy) != (param->sched_priority != 0))
3836 return -EINVAL;
3837 + if (policy == SCHED_LITMUS && policy == p->policy)
3838 + return -EINVAL;
3839
3840 /*
3841 * Allow unprivileged RT tasks to decrease priority:
3842 @@ -5145,6 +5244,12 @@ recheck:
3843 return retval;
3844 }
3845
3846 + if (policy == SCHED_LITMUS) {
3847 + retval = litmus_admit_task(p);
3848 + if (retval)
3849 + return retval;
3850 + }
3851 +
3852 /*
3853 * make sure no PI-waiters arrive (or leave) while we are
3854 * changing the priority of the task:
3855 @@ -5203,10 +5308,19 @@ recheck:
3856
3857 p->sched_reset_on_fork = reset_on_fork;
3858
3859 + if (p->policy == SCHED_LITMUS)
3860 + litmus_exit_task(p);
3861 +
3862 oldprio = p->prio;
3863 prev_class = p->sched_class;
3864 __setscheduler(rq, p, policy, param->sched_priority);
3865
3866 + if (policy == SCHED_LITMUS) {
3867 + p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
3868 + p->rt_param.present = running;
3869 + litmus->task_new(p, on_rq, running);
3870 + }
3871 +
3872 if (running)
3873 p->sched_class->set_curr_task(rq);
3874 if (on_rq)
3875 @@ -5374,10 +5488,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
3876 rcu_read_lock();
3877
3878 p = find_process_by_pid(pid);
3879 - if (!p) {
3880 + /* Don't set affinity if task not found and for LITMUS tasks */
3881 + if (!p || is_realtime(p)) {
3882 rcu_read_unlock();
3883 put_online_cpus();
3884 - return -ESRCH;
3885 + return p ? -EPERM : -ESRCH;
3886 }
3887
3888 /* Prevent p going away */
3889 diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
3890 index c768588..334eb47 100644
3891 --- a/kernel/sched_fair.c
3892 +++ b/kernel/sched_fair.c
3893 @@ -1890,6 +1890,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
3894 int scale = cfs_rq->nr_running >= sched_nr_latency;
3895 int next_buddy_marked = 0;
3896
3897 + if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS)
3898 + goto preempt;
3899 +
3900 if (unlikely(se == pse))
3901 return;
3902
3903 diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
3904 index 10d0182..58cf5d1 100644
3905 --- a/kernel/sched_rt.c
3906 +++ b/kernel/sched_rt.c
3907 @@ -1078,7 +1078,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
3908 */
3909 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
3910 {
3911 - if (p->prio < rq->curr->prio) {
3912 + if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) {
3913 resched_task(rq->curr);
3914 return;
3915 }
3916 diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
3917 index d5097c4..0c0e02f 100644
3918 --- a/kernel/time/tick-sched.c
3919 +++ b/kernel/time/tick-sched.c
3920 @@ -766,12 +766,53 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
3921 }
3922
3923 /**
3924 + * tick_set_quanta_type - get the quanta type as a boot option
3925 + * Default is standard setup with ticks staggered over first
3926 + * half of tick period.
3927 + */
3928 +int quanta_type = LINUX_DEFAULT_TICKS;
3929 +static int __init tick_set_quanta_type(char *str)
3930 +{
3931 + if (strcmp("aligned", str) == 0) {
3932 + quanta_type = LITMUS_ALIGNED_TICKS;
3933 + printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n");
3934 + }
3935 + else if (strcmp("staggered", str) == 0) {
3936 + quanta_type = LITMUS_STAGGERED_TICKS;
3937 + printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n");
3938 + }
3939 + return 1;
3940 +}
3941 +__setup("quanta=", tick_set_quanta_type);
3942 +
3943 +u64 cpu_stagger_offset(int cpu)
3944 +{
3945 + u64 offset = 0;
3946 + switch (quanta_type) {
3947 + case LITMUS_ALIGNED_TICKS:
3948 + offset = 0;
3949 + break;
3950 + case LITMUS_STAGGERED_TICKS:
3951 + offset = ktime_to_ns(tick_period);
3952 + do_div(offset, num_possible_cpus());
3953 + offset *= cpu;
3954 + break;
3955 + default:
3956 + offset = ktime_to_ns(tick_period) >> 1;
3957 + do_div(offset, num_possible_cpus());
3958 + offset *= cpu;
3959 + }
3960 + return offset;
3961 +}
3962 +
3963 +/**
3964 * tick_setup_sched_timer - setup the tick emulation timer
3965 */
3966 void tick_setup_sched_timer(void)
3967 {
3968 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
3969 ktime_t now = ktime_get();
3970 + u64 offset;
3971
3972 /*
3973 * Emulate tick processing via per-CPU hrtimers:
3974 @@ -782,6 +823,12 @@ void tick_setup_sched_timer(void)
3975 /* Get the next period (per cpu) */
3976 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
3977
3978 + /* Offset must be set correctly to achieve desired quanta type. */
3979 + offset = cpu_stagger_offset(smp_processor_id());
3980 +
3981 + /* Add the correct offset to expiration time */
3982 + hrtimer_add_expires_ns(&ts->sched_timer, offset);
3983 +
3984 for (;;) {
3985 hrtimer_forward(&ts->sched_timer, now, tick_period);
3986 hrtimer_start_expires(&ts->sched_timer,
3987 diff --git a/litmus/Kconfig b/litmus/Kconfig
3988 new file mode 100644
3989 index 0000000..94b48e1
3990 --- /dev/null
3991 +++ b/litmus/Kconfig
3992 @@ -0,0 +1,218 @@
3993 +menu "LITMUS^RT"
3994 +
3995 +menu "Scheduling"
3996 +
3997 +config PLUGIN_CEDF
3998 + bool "Clustered-EDF"
3999 + depends on X86 && SYSFS
4000 + default y
4001 + help
4002 + Include the Clustered EDF (C-EDF) plugin in the kernel.
4003 + This is appropriate for large platforms with shared caches.
4004 + On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
4005 + makes little sense since there aren't any shared caches.
4006 +
4007 +config PLUGIN_PFAIR
4008 + bool "PFAIR"
4009 + depends on HIGH_RES_TIMERS && !NO_HZ
4010 + default y
4011 + help
4012 + Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel.
4013 + The PFAIR plugin requires high resolution timers (for staggered quanta)
4014 + and does not support NO_HZ (quanta could be missed when the system is idle).
4015 +
4016 + If unsure, say Yes.
4017 +
4018 +config RELEASE_MASTER
4019 + bool "Release-master Support"
4020 + depends on ARCH_HAS_SEND_PULL_TIMERS
4021 + default n
4022 + help
4023 + Allow one processor to act as a dedicated interrupt processor
4024 + that services all timer interrupts, but that does not schedule
4025 + real-time tasks. See RTSS'09 paper for details
4026 + (http://www.cs.unc.edu/~anderson/papers.html).
4027 + Currently only supported by GSN-EDF.
4028 +
4029 +endmenu
4030 +
4031 +menu "Real-Time Synchronization"
4032 +
4033 +config NP_SECTION
4034 + bool "Non-preemptive section support"
4035 + default n
4036 + help
4037 + Allow tasks to become non-preemptable.
4038 + Note that plugins still need to explicitly support non-preemptivity.
4039 + Currently, only GSN-EDF and PSN-EDF have such support.
4040 +
4041 + This is required to support locking protocols such as the FMLP.
4042 + If disabled, all tasks will be considered preemptable at all times.
4043 +
4044 +config LITMUS_LOCKING
4045 + bool "Support for real-time locking protocols"
4046 + depends on NP_SECTION
4047 + default n
4048 + help
4049 + Enable LITMUS^RT's deterministic multiprocessor real-time
4050 + locking protocols.
4051 +
4052 + Say Yes if you want to include locking protocols such as the FMLP and
4053 + Baker's SRP.
4054 +
4055 +endmenu
4056 +
4057 +menu "Performance Enhancements"
4058 +
4059 +config SCHED_CPU_AFFINITY
4060 + bool "Local Migration Affinity"
4061 + depends on X86
4062 + default y
4063 + help
4064 + Rescheduled tasks prefer CPUs near to their previously used CPU. This
4065 + may improve performance through possible preservation of cache affinity.
4066 +
4067 + Warning: May make bugs harder to find since tasks may migrate less often.
4068 +
4069 + NOTES:
4070 + * Feature is not utilized by PFair/PD^2.
4071 +
4072 + Say Yes if unsure.
4073 +
4074 +endmenu
4075 +
4076 +menu "Tracing"
4077 +
4078 +config FEATHER_TRACE
4079 + bool "Feather-Trace Infrastructure"
4080 + default y
4081 + help
4082 + Feather-Trace basic tracing infrastructure. Includes device file
4083 + driver and instrumentation point support.
4084 +
4085 + There are actually two implementations of Feather-Trace.
4086 + 1) A slower, but portable, default implementation.
4087 + 2) Architecture-specific implementations that rewrite kernel .text at runtime.
4088 +
4089 + If enabled, Feather-Trace will be based on 2) if available (currently only for x86).
4090 + However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case
4091 + to avoid problems with write-protected .text pages.
4092 +
4093 + Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n.
4094 +
4095 + Note that this option only enables the basic Feather-Trace infrastructure;
4096 + you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
4097 + actually enable any events.
4098 +
4099 +config SCHED_TASK_TRACE
4100 + bool "Trace real-time tasks"
4101 + depends on FEATHER_TRACE
4102 + default y
4103 + help
4104 + Include support for the sched_trace_XXX() tracing functions. This
4105 + allows the collection of real-time task events such as job
4106 + completions, job releases, early completions, etc. This results in a
4107 + small overhead in the scheduling code. Disable if the overhead is not
4108 + acceptable (e.g., benchmarking).
4109 +
4110 + Say Yes for debugging.
4111 + Say No for overhead tracing.
4112 +
4113 +config SCHED_TASK_TRACE_SHIFT
4114 + int "Buffer size for sched_trace_xxx() events"
4115 + depends on SCHED_TASK_TRACE
4116 + range 8 13
4117 + default 9
4118 + help
4119 +
4120 + Select the buffer size of sched_trace_xxx() events as a power of two.
4121 + These buffers are statically allocated as per-CPU data. Each event
4122 + requires 24 bytes storage plus one additional flag byte. Too large
4123 + buffers can cause issues with the per-cpu allocator (and waste
4124 + memory). Too small buffers can cause scheduling events to be lost. The
4125 + "right" size is workload dependent and depends on the number of tasks,
4126 + each task's period, each task's number of suspensions, and how often
4127 + the buffer is flushed.
4128 +
4129 + Examples: 12 => 4k events
4130 + 10 => 1k events
4131 + 8 => 512 events
4132 +
4133 +config SCHED_OVERHEAD_TRACE
4134 + bool "Record timestamps for overhead measurements"
4135 + depends on FEATHER_TRACE
4136 + default n
4137 + help
4138 + Export event stream for overhead tracing.
4139 + Say Yes for overhead tracing.
4140 +
4141 +config SCHED_DEBUG_TRACE
4142 + bool "TRACE() debugging"
4143 + default y
4144 + help
4145 + Include support for sched_trace_log_messageg(), which is used to
4146 + implement TRACE(). If disabled, no TRACE() messages will be included
4147 + in the kernel, and no overheads due to debugging statements will be
4148 + incurred by the scheduler. Disable if the overhead is not acceptable
4149 + (e.g. benchmarking).
4150 +
4151 + Say Yes for debugging.
4152 + Say No for overhead tracing.
4153 +
4154 +config SCHED_DEBUG_TRACE_SHIFT
4155 + int "Buffer size for TRACE() buffer"
4156 + depends on SCHED_DEBUG_TRACE
4157 + range 14 22
4158 + default 18
4159 + help
4160 +
4161 + Select the amount of memory needed per for the TRACE() buffer, as a
4162 + power of two. The TRACE() buffer is global and statically allocated. If
4163 + the buffer is too small, there will be holes in the TRACE() log if the
4164 + buffer-flushing task is starved.
4165 +
4166 + The default should be sufficient for most systems. Increase the buffer
4167 + size if the log contains holes. Reduce the buffer size when running on
4168 + a memory-constrained system.
4169 +
4170 + Examples: 14 => 16KB
4171 + 18 => 256KB
4172 + 20 => 1MB
4173 +
4174 + This buffer is exported to usespace using a misc device as
4175 + 'litmus/log'. On a system with default udev rules, a corresponding
4176 + character device node should be created at /dev/litmus/log. The buffer
4177 + can be flushed using cat, e.g., 'cat /dev/litmus/log > my_log_file.txt'.
4178 +
4179 +config SCHED_DEBUG_TRACE_CALLER
4180 + bool "Include [function@file:line] tag in TRACE() log"
4181 + depends on SCHED_DEBUG_TRACE
4182 + default n
4183 + help
4184 + With this option enabled, TRACE() prepends
4185 +
4186 + "[<function name>@<filename>:<line number>]"
4187 +
4188 + to each message in the debug log. Enable this to aid in figuring out
4189 + what was called in which order. The downside is that it adds a lot of
4190 + clutter.
4191 +
4192 + If unsure, say No.
4193 +
4194 +config PREEMPT_STATE_TRACE
4195 + bool "Trace preemption state machine transitions"
4196 + depends on SCHED_DEBUG_TRACE
4197 + default n
4198 + help
4199 + With this option enabled, each CPU will log when it transitions
4200 + states in the preemption state machine. This state machine is
4201 + used to determine how to react to IPIs (avoid races with in-flight IPIs).
4202 +
4203 + Warning: this creates a lot of information in the debug trace. Only
4204 + recommended when you are debugging preemption-related races.
4205 +
4206 + If unsure, say No.
4207 +
4208 +endmenu
4209 +
4210 +endmenu
4211 diff --git a/litmus/Makefile b/litmus/Makefile
4212 new file mode 100644
4213 index 0000000..7338180
4214 --- /dev/null
4215 +++ b/litmus/Makefile
4216 @@ -0,0 +1,29 @@
4217 +#
4218 +# Makefile for LITMUS^RT
4219 +#
4220 +
4221 +obj-y = sched_plugin.o litmus.o \
4222 + preempt.o \
4223 + litmus_proc.o \
4224 + budget.o \
4225 + clustered.o \
4226 + jobs.o \
4227 + sync.o \
4228 + rt_domain.o \
4229 + edf_common.o \
4230 + fdso.o \
4231 + locking.o \
4232 + srp.o \
4233 + bheap.o \
4234 + ctrldev.o \
4235 + sched_gsn_edf.o \
4236 + sched_psn_edf.o
4237 +
4238 +obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
4239 +obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
4240 +obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
4241 +
4242 +obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
4243 +obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
4244 +obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
4245 +obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
4246 diff --git a/litmus/affinity.c b/litmus/affinity.c
4247 new file mode 100644
4248 index 0000000..3fa6dd7
4249 --- /dev/null
4250 +++ b/litmus/affinity.c
4251 @@ -0,0 +1,42 @@
4252 +#include <linux/cpu.h>
4253 +
4254 +#include <litmus/affinity.h>
4255 +
4256 +struct neighborhood neigh_info[NR_CPUS];
4257 +
4258 +/* called by _init_litmus() */
4259 +void init_topology(void) {
4260 + int cpu;
4261 + int i;
4262 + int chk;
4263 + int depth = num_cache_leaves;
4264 +
4265 + if (depth > NUM_CACHE_LEVELS)
4266 + depth = NUM_CACHE_LEVELS;
4267 +
4268 + for_each_online_cpu(cpu) {
4269 + for (i = 0; i < depth; ++i) {
4270 + chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
4271 + if (chk) {
4272 + /* failed */
4273 + neigh_info[cpu].size[i] = 0;
4274 + } else {
4275 + /* size = num bits in mask */
4276 + neigh_info[cpu].size[i] =
4277 + cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
4278 + }
4279 + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
4280 + cpu, neigh_info[cpu].size[i], i,
4281 + *cpumask_bits(neigh_info[cpu].neighbors[i]));
4282 + }
4283 +
4284 + /* set data for non-existent levels */
4285 + for (; i < NUM_CACHE_LEVELS; ++i) {
4286 + neigh_info[cpu].size[i] = 0;
4287 +
4288 + printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
4289 + cpu, neigh_info[cpu].size[i], i, 0lu);
4290 + }
4291 + }
4292 +}
4293 +
4294 diff --git a/litmus/bheap.c b/litmus/bheap.c
4295 new file mode 100644
4296 index 0000000..528af97
4297 --- /dev/null
4298 +++ b/litmus/bheap.c
4299 @@ -0,0 +1,314 @@
4300 +#include "linux/kernel.h"
4301 +#include "litmus/bheap.h"
4302 +
4303 +void bheap_init(struct bheap* heap)
4304 +{
4305 + heap->head = NULL;
4306 + heap->min = NULL;
4307 +}
4308 +
4309 +void bheap_node_init(struct bheap_node** _h, void* value)
4310 +{
4311 + struct bheap_node* h = *_h;
4312 + h->parent = NULL;
4313 + h->next = NULL;
4314 + h->child = NULL;
4315 + h->degree = NOT_IN_HEAP;
4316 + h->value = value;
4317 + h->ref = _h;
4318 +}
4319 +
4320 +
4321 +/* make child a subtree of root */
4322 +static void __bheap_link(struct bheap_node* root,
4323 + struct bheap_node* child)
4324 +{
4325 + child->parent = root;
4326 + child->next = root->child;
4327 + root->child = child;
4328 + root->degree++;
4329 +}
4330 +
4331 +/* merge root lists */
4332 +static struct bheap_node* __bheap_merge(struct bheap_node* a,
4333 + struct bheap_node* b)
4334 +{
4335 + struct bheap_node* head = NULL;
4336 + struct bheap_node** pos = &head;
4337 +
4338 + while (a && b) {
4339 + if (a->degree < b->degree) {
4340 + *pos = a;
4341 + a = a->next;
4342 + } else {
4343 + *pos = b;
4344 + b = b->next;
4345 + }
4346 + pos = &(*pos)->next;
4347 + }
4348 + if (a)
4349 + *pos = a;
4350 + else
4351 + *pos = b;
4352 + return head;
4353 +}
4354 +
4355 +/* reverse a linked list of nodes. also clears parent pointer */
4356 +static struct bheap_node* __bheap_reverse(struct bheap_node* h)
4357 +{
4358 + struct bheap_node* tail = NULL;
4359 + struct bheap_node* next;
4360 +
4361 + if (!h)
4362 + return h;
4363 +
4364 + h->parent = NULL;
4365 + while (h->next) {
4366 + next = h->next;
4367 + h->next = tail;
4368 + tail = h;
4369 + h = next;
4370 + h->parent = NULL;
4371 + }
4372 + h->next = tail;
4373 + return h;
4374 +}
4375 +
4376 +static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
4377 + struct bheap_node** prev, struct bheap_node** node)
4378 +{
4379 + struct bheap_node *_prev, *cur;
4380 + *prev = NULL;
4381 +
4382 + if (!heap->head) {
4383 + *node = NULL;
4384 + return;
4385 + }
4386 +
4387 + *node = heap->head;
4388 + _prev = heap->head;
4389 + cur = heap->head->next;
4390 + while (cur) {
4391 + if (higher_prio(cur, *node)) {
4392 + *node = cur;
4393 + *prev = _prev;
4394 + }
4395 + _prev = cur;
4396 + cur = cur->next;
4397 + }
4398 +}
4399 +
4400 +static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
4401 + struct bheap_node* h2)
4402 +{
4403 + struct bheap_node* h1;
4404 + struct bheap_node *prev, *x, *next;
4405 + if (!h2)
4406 + return;
4407 + h1 = heap->head;
4408 + if (!h1) {
4409 + heap->head = h2;
4410 + return;
4411 + }
4412 + h1 = __bheap_merge(h1, h2);
4413 + prev = NULL;
4414 + x = h1;
4415 + next = x->next;
4416 + while (next) {
4417 + if (x->degree != next->degree ||
4418 + (next->next && next->next->degree == x->degree)) {
4419 + /* nothing to do, advance */
4420 + prev = x;
4421 + x = next;
4422 + } else if (higher_prio(x, next)) {
4423 + /* x becomes the root of next */
4424 + x->next = next->next;
4425 + __bheap_link(x, next);
4426 + } else {
4427 + /* next becomes the root of x */
4428 + if (prev)
4429 + prev->next = next;
4430 + else
4431 + h1 = next;
4432 + __bheap_link(next, x);
4433 + x = next;
4434 + }
4435 + next = x->next;
4436 + }
4437 + heap->head = h1;
4438 +}
4439 +
4440 +static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
4441 + struct bheap* heap)
4442 +{
4443 + struct bheap_node *prev, *node;
4444 + __bheap_min(higher_prio, heap, &prev, &node);
4445 + if (!node)
4446 + return NULL;
4447 + if (prev)
4448 + prev->next = node->next;
4449 + else
4450 + heap->head = node->next;
4451 + __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
4452 + return node;
4453 +}
4454 +
4455 +/* insert (and reinitialize) a node into the heap */
4456 +void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
4457 + struct bheap_node* node)
4458 +{
4459 + struct bheap_node *min;
4460 + node->child = NULL;
4461 + node->parent = NULL;
4462 + node->next = NULL;
4463 + node->degree = 0;
4464 + if (heap->min && higher_prio(node, heap->min)) {
4465 + /* swap min cache */
4466 + min = heap->min;
4467 + min->child = NULL;
4468 + min->parent = NULL;
4469 + min->next = NULL;
4470 + min->degree = 0;
4471 + __bheap_union(higher_prio, heap, min);
4472 + heap->min = node;
4473 + } else
4474 + __bheap_union(higher_prio, heap, node);
4475 +}
4476 +
4477 +void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
4478 +{
4479 + struct bheap_node* min;
4480 + if (heap->min) {
4481 + min = heap->min;
4482 + heap->min = NULL;
4483 + bheap_insert(higher_prio, heap, min);
4484 + }
4485 +}
4486 +
4487 +/* merge addition into target */
4488 +void bheap_union(bheap_prio_t higher_prio,
4489 + struct bheap* target, struct bheap* addition)
4490 +{
4491 + /* first insert any cached minima, if necessary */
4492 + bheap_uncache_min(higher_prio, target);
4493 + bheap_uncache_min(higher_prio, addition);
4494 + __bheap_union(higher_prio, target, addition->head);
4495 + /* this is a destructive merge */
4496 + addition->head = NULL;
4497 +}
4498 +
4499 +struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
4500 + struct bheap* heap)
4501 +{
4502 + if (!heap->min)
4503 + heap->min = __bheap_extract_min(higher_prio, heap);
4504 + return heap->min;
4505 +}
4506 +
4507 +struct bheap_node* bheap_take(bheap_prio_t higher_prio,
4508 + struct bheap* heap)
4509 +{
4510 + struct bheap_node *node;
4511 + if (!heap->min)
4512 + heap->min = __bheap_extract_min(higher_prio, heap);
4513 + node = heap->min;
4514 + heap->min = NULL;
4515 + if (node)
4516 + node->degree = NOT_IN_HEAP;
4517 + return node;
4518 +}
4519 +
4520 +int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
4521 +{
4522 + struct bheap_node *parent;
4523 + struct bheap_node** tmp_ref;
4524 + void* tmp;
4525 +
4526 + /* bubble up */
4527 + parent = node->parent;
4528 + while (parent && higher_prio(node, parent)) {
4529 + /* swap parent and node */
4530 + tmp = parent->value;
4531 + parent->value = node->value;
4532 + node->value = tmp;
4533 + /* swap references */
4534 + *(parent->ref) = node;
4535 + *(node->ref) = parent;
4536 + tmp_ref = parent->ref;
4537 + parent->ref = node->ref;
4538 + node->ref = tmp_ref;
4539 + /* step up */
4540 + node = parent;
4541 + parent = node->parent;
4542 + }
4543 +
4544 + return parent != NULL;
4545 +}
4546 +
4547 +void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
4548 + struct bheap_node* node)
4549 +{
4550 + struct bheap_node *parent, *prev, *pos;
4551 + struct bheap_node** tmp_ref;
4552 + void* tmp;
4553 +
4554 + if (heap->min != node) {
4555 + /* bubble up */
4556 + parent = node->parent;
4557 + while (parent) {
4558 + /* swap parent and node */
4559 + tmp = parent->value;
4560 + parent->value = node->value;
4561 + node->value = tmp;
4562 + /* swap references */
4563 + *(parent->ref) = node;
4564 + *(node->ref) = parent;
4565 + tmp_ref = parent->ref;
4566 + parent->ref = node->ref;
4567 + node->ref = tmp_ref;
4568 + /* step up */
4569 + node = parent;
4570 + parent = node->parent;
4571 + }
4572 + /* now delete:
4573 + * first find prev */
4574 + prev = NULL;
4575 + pos = heap->head;
4576 + while (pos != node) {
4577 + prev = pos;
4578 + pos = pos->next;
4579 + }
4580 + /* we have prev, now remove node */
4581 + if (prev)
4582 + prev->next = node->next;
4583 + else
4584 + heap->head = node->next;
4585 + __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
4586 + } else
4587 + heap->min = NULL;
4588 + node->degree = NOT_IN_HEAP;
4589 +}
4590 +
4591 +/* allocate a heap node for value and insert into the heap */
4592 +int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
4593 + void* value, int gfp_flags)
4594 +{
4595 + struct bheap_node* hn = bheap_node_alloc(gfp_flags);
4596 + if (likely(hn)) {
4597 + bheap_node_init(&hn, value);
4598 + bheap_insert(higher_prio, heap, hn);
4599 + }
4600 + return hn != NULL;
4601 +}
4602 +
4603 +void* bheap_take_del(bheap_prio_t higher_prio,
4604 + struct bheap* heap)
4605 +{
4606 + struct bheap_node* hn = bheap_take(higher_prio, heap);
4607 + void* ret = NULL;
4608 + if (hn) {
4609 + ret = hn->value;
4610 + bheap_node_free(hn);
4611 + }
4612 + return ret;
4613 +}
4614 diff --git a/litmus/budget.c b/litmus/budget.c
4615 new file mode 100644
4616 index 0000000..310e9a3
4617 --- /dev/null
4618 +++ b/litmus/budget.c
4619 @@ -0,0 +1,111 @@
4620 +#include <linux/sched.h>
4621 +#include <linux/percpu.h>
4622 +#include <linux/hrtimer.h>
4623 +
4624 +#include <litmus/litmus.h>
4625 +#include <litmus/preempt.h>
4626 +
4627 +struct enforcement_timer {
4628 + /* The enforcement timer is used to accurately police
4629 + * slice budgets. */
4630 + struct hrtimer timer;
4631 + int armed;
4632 +};
4633 +
4634 +DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
4635 +
4636 +static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
4637 +{
4638 + struct enforcement_timer* et = container_of(timer,
4639 + struct enforcement_timer,
4640 + timer);
4641 + unsigned long flags;
4642 +
4643 + local_irq_save(flags);
4644 + TRACE("enforcement timer fired.\n");
4645 + et->armed = 0;
4646 + /* activate scheduler */
4647 + litmus_reschedule_local();
4648 + local_irq_restore(flags);
4649 +
4650 + return HRTIMER_NORESTART;
4651 +}
4652 +
4653 +/* assumes called with IRQs off */
4654 +static void cancel_enforcement_timer(struct enforcement_timer* et)
4655 +{
4656 + int ret;
4657 +
4658 + TRACE("cancelling enforcement timer.\n");
4659 +
4660 + /* Since interrupts are disabled and et->armed is only
4661 + * modified locally, we do not need any locks.
4662 + */
4663 +
4664 + if (et->armed) {
4665 + ret = hrtimer_try_to_cancel(&et->timer);
4666 + /* Should never be inactive. */
4667 + BUG_ON(ret == 0);
4668 + /* Should never be running concurrently. */
4669 + BUG_ON(ret == -1);
4670 +
4671 + et->armed = 0;
4672 + }
4673 +}
4674 +
4675 +/* assumes called with IRQs off */
4676 +static void arm_enforcement_timer(struct enforcement_timer* et,
4677 + struct task_struct* t)
4678 +{
4679 + lt_t when_to_fire;
4680 + TRACE_TASK(t, "arming enforcement timer.\n");
4681 +
4682 + /* Calling this when there is no budget left for the task
4683 + * makes no sense, unless the task is non-preemptive. */
4684 + BUG_ON(budget_exhausted(t) && (!is_np(t)));
4685 +
4686 + /* __hrtimer_start_range_ns() cancels the timer
4687 + * anyway, so we don't have to check whether it is still armed */
4688 +
4689 + if (likely(!is_np(t))) {
4690 + when_to_fire = litmus_clock() + budget_remaining(t);
4691 + __hrtimer_start_range_ns(&et->timer,
4692 + ns_to_ktime(when_to_fire),
4693 + 0 /* delta */,
4694 + HRTIMER_MODE_ABS_PINNED,
4695 + 0 /* no wakeup */);
4696 + et->armed = 1;
4697 + }
4698 +}
4699 +
4700 +
4701 +/* expects to be called with IRQs off */
4702 +void update_enforcement_timer(struct task_struct* t)
4703 +{
4704 + struct enforcement_timer* et = &__get_cpu_var(budget_timer);
4705 +
4706 + if (t && budget_precisely_enforced(t)) {
4707 + /* Make sure we call into the scheduler when this budget
4708 + * expires. */
4709 + arm_enforcement_timer(et, t);
4710 + } else if (et->armed) {
4711 + /* Make sure we don't cause unnecessary interrupts. */
4712 + cancel_enforcement_timer(et);
4713 + }
4714 +}
4715 +
4716 +
4717 +static int __init init_budget_enforcement(void)
4718 +{
4719 + int cpu;
4720 + struct enforcement_timer* et;
4721 +
4722 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
4723 + et = &per_cpu(budget_timer, cpu);
4724 + hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
4725 + et->timer.function = on_enforcement_timeout;
4726 + }
4727 + return 0;
4728 +}
4729 +
4730 +module_init(init_budget_enforcement);
4731 diff --git a/litmus/clustered.c b/litmus/clustered.c
4732 new file mode 100644
4733 index 0000000..6fe1b51
4734 --- /dev/null
4735 +++ b/litmus/clustered.c
4736 @@ -0,0 +1,111 @@
4737 +#include <linux/gfp.h>
4738 +#include <linux/cpumask.h>
4739 +#include <linux/list.h>
4740 +
4741 +#include <litmus/clustered.h>
4742 +
4743 +#ifndef CONFIG_X86
4744 +/* fake get_shared_cpu_map() on non-x86 architectures */
4745 +
4746 +int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
4747 +{
4748 + if (index != 1)
4749 + return 1;
4750 + else {
4751 + /* Fake L1: CPU is all by itself. */
4752 + cpumask_clear(mask);
4753 + cpumask_set_cpu(cpu, mask);
4754 + return 0;
4755 + }
4756 +}
4757 +
4758 +#endif
4759 +
4760 +int get_cluster_size(enum cache_level level)
4761 +{
4762 + cpumask_var_t mask;
4763 + int ok;
4764 + int num_cpus;
4765 +
4766 + if (level == GLOBAL_CLUSTER)
4767 + return num_online_cpus();
4768 + else {
4769 + if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
4770 + return -ENOMEM;
4771 + /* assumes CPU 0 is representative of all CPUs */
4772 + ok = get_shared_cpu_map(mask, 0, level);
4773 + /* ok == 0 means we got the map; otherwise it's an invalid cache level */
4774 + if (ok == 0)
4775 + num_cpus = cpumask_weight(mask);
4776 + free_cpumask_var(mask);
4777 +
4778 + if (ok == 0)
4779 + return num_cpus;
4780 + else
4781 + return -EINVAL;
4782 + }
4783 +}
4784 +
4785 +int assign_cpus_to_clusters(enum cache_level level,
4786 + struct scheduling_cluster* clusters[],
4787 + unsigned int num_clusters,
4788 + struct cluster_cpu* cpus[],
4789 + unsigned int num_cpus)
4790 +{
4791 + cpumask_var_t mask;
4792 + unsigned int i, free_cluster = 0, low_cpu;
4793 + int err = 0;
4794 +
4795 + if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
4796 + return -ENOMEM;
4797 +
4798 + /* clear cluster pointers */
4799 + for (i = 0; i < num_cpus; i++) {
4800 + cpus[i]->id = i;
4801 + cpus[i]->cluster = NULL;
4802 + }
4803 +
4804 + /* initialize clusters */
4805 + for (i = 0; i < num_clusters; i++) {
4806 + clusters[i]->id = i;
4807 + INIT_LIST_HEAD(&clusters[i]->cpus);
4808 + }
4809 +
4810 + /* Assign each CPU. Two assumtions are made:
4811 + * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
4812 + * 2) All cpus that belong to some cluster are online.
4813 + */
4814 + for_each_online_cpu(i) {
4815 + /* get lowest-id CPU in cluster */
4816 + if (level != GLOBAL_CLUSTER) {
4817 + err = get_shared_cpu_map(mask, cpus[i]->id, level);
4818 + if (err != 0) {
4819 + /* ugh... wrong cache level? Either caller screwed up
4820 + * or the CPU topology is weird. */
4821 + printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
4822 + level, err);
4823 + err = -EINVAL;
4824 + goto out;
4825 + }
4826 + low_cpu = cpumask_first(mask);
4827 + } else
4828 + low_cpu = 0;
4829 + if (low_cpu == i) {
4830 + /* caller must provide an appropriate number of clusters */
4831 + BUG_ON(free_cluster >= num_clusters);
4832 +
4833 + /* create new cluster */
4834 + cpus[i]->cluster = clusters[free_cluster++];
4835 + } else {
4836 + /* low_cpu points to the right cluster
4837 + * Assumption: low_cpu is actually online and was processed earlier. */
4838 + cpus[i]->cluster = cpus[low_cpu]->cluster;
4839 + }
4840 + /* enqueue in cpus list */
4841 + list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
4842 + printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
4843 + }
4844 +out:
4845 + free_cpumask_var(mask);
4846 + return err;
4847 +}
4848 diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
4849 new file mode 100644
4850 index 0000000..6677a67
4851 --- /dev/null
4852 +++ b/litmus/ctrldev.c
4853 @@ -0,0 +1,150 @@
4854 +#include <linux/sched.h>
4855 +#include <linux/mm.h>
4856 +#include <linux/fs.h>
4857 +#include <linux/miscdevice.h>
4858 +#include <linux/module.h>
4859 +
4860 +#include <litmus/litmus.h>
4861 +
4862 +/* only one page for now, but we might want to add a RO version at some point */
4863 +
4864 +#define CTRL_NAME "litmus/ctrl"
4865 +
4866 +/* allocate t->rt_param.ctrl_page*/
4867 +static int alloc_ctrl_page(struct task_struct *t)
4868 +{
4869 + int err = 0;
4870 +
4871 + /* only allocate if the task doesn't have one yet */
4872 + if (!tsk_rt(t)->ctrl_page) {
4873 + tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
4874 + if (!tsk_rt(t)->ctrl_page)
4875 + err = -ENOMEM;
4876 + /* will get de-allocated in task teardown */
4877 + TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
4878 + tsk_rt(t)->ctrl_page);
4879 + }
4880 + return err;
4881 +}
4882 +
4883 +static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
4884 +{
4885 + int err;
4886 + unsigned long pfn;
4887 +
4888 + struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
4889 +
4890 + /* Increase ref count. Is decreased when vma is destroyed. */
4891 + get_page(ctrl);
4892 +
4893 + /* compute page frame number */
4894 + pfn = page_to_pfn(ctrl);
4895 +
4896 + TRACE_CUR(CTRL_NAME
4897 + ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n",
4898 + tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start,
4899 + vma->vm_page_prot);
4900 +
4901 + /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise
4902 + * userspace actually gets a copy-on-write page. */
4903 + err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
4904 +
4905 + if (err)
4906 + TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err);
4907 +
4908 + return err;
4909 +}
4910 +
4911 +static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
4912 +{
4913 + TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
4914 + vma->vm_flags, vma->vm_page_prot);
4915 +
4916 + TRACE_CUR(CTRL_NAME
4917 + ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
4918 + (void*) vma->vm_start, (void*) vma->vm_end, vma,
4919 + vma->vm_private_data, current->comm,
4920 + current->pid);
4921 +}
4922 +
4923 +static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
4924 + struct vm_fault* vmf)
4925 +{
4926 + /* This function should never be called, since
4927 + * all pages should have been mapped by mmap()
4928 + * already. */
4929 + TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags);
4930 +
4931 + /* nope, you only get one page */
4932 + return VM_FAULT_SIGBUS;
4933 +}
4934 +
4935 +static struct vm_operations_struct litmus_ctrl_vm_ops = {
4936 + .close = litmus_ctrl_vm_close,
4937 + .fault = litmus_ctrl_vm_fault,
4938 +};
4939 +
4940 +static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
4941 +{
4942 + int err = 0;
4943 +
4944 + /* first make sure mapper knows what he's doing */
4945 +
4946 + /* you can only get one page */
4947 + if (vma->vm_end - vma->vm_start != PAGE_SIZE)
4948 + return -EINVAL;
4949 +
4950 + /* you can only map the "first" page */
4951 + if (vma->vm_pgoff != 0)
4952 + return -EINVAL;
4953 +
4954 + /* you can't share it with anyone */
4955 + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
4956 + return -EINVAL;
4957 +
4958 + vma->vm_ops = &litmus_ctrl_vm_ops;
4959 + /* this mapping should not be kept across forks,
4960 + * and cannot be expanded */
4961 + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
4962 +
4963 + err = alloc_ctrl_page(current);
4964 + if (!err)
4965 + err = map_ctrl_page(current, vma);
4966 +
4967 + TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
4968 + __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
4969 +
4970 + return err;
4971 +}
4972 +
4973 +static struct file_operations litmus_ctrl_fops = {
4974 + .owner = THIS_MODULE,
4975 + .mmap = litmus_ctrl_mmap,
4976 +};
4977 +
4978 +static struct miscdevice litmus_ctrl_dev = {
4979 + .name = CTRL_NAME,
4980 + .minor = MISC_DYNAMIC_MINOR,
4981 + .fops = &litmus_ctrl_fops,
4982 +};
4983 +
4984 +static int __init init_litmus_ctrl_dev(void)
4985 +{
4986 + int err;
4987 +
4988 + BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
4989 +
4990 + printk("Initializing LITMUS^RT control device.\n");
4991 + err = misc_register(&litmus_ctrl_dev);
4992 + if (err)
4993 + printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
4994 + return err;
4995 +}
4996 +
4997 +static void __exit exit_litmus_ctrl_dev(void)
4998 +{
4999 + misc_deregister(&litmus_ctrl_dev);
5000 +}
5001 +
5002 +module_init(init_litmus_ctrl_dev);
5003 +module_exit(exit_litmus_ctrl_dev);
5004 diff --git a/litmus/edf_common.c b/litmus/edf_common.c
5005 new file mode 100644
5006 index 0000000..9b44dc2
5007 --- /dev/null
5008 +++ b/litmus/edf_common.c
5009 @@ -0,0 +1,118 @@
5010 +/*
5011 + * kernel/edf_common.c
5012 + *
5013 + * Common functions for EDF based scheduler.
5014 + */
5015 +
5016 +#include <linux/percpu.h>
5017 +#include <linux/sched.h>
5018 +#include <linux/list.h>
5019 +
5020 +#include <litmus/litmus.h>
5021 +#include <litmus/sched_plugin.h>
5022 +#include <litmus/sched_trace.h>
5023 +
5024 +#include <litmus/edf_common.h>
5025 +
5026 +/* edf_higher_prio - returns true if first has a higher EDF priority
5027 + * than second. Deadline ties are broken by PID.
5028 + *
5029 + * both first and second may be NULL
5030 + */
5031 +int edf_higher_prio(struct task_struct* first,
5032 + struct task_struct* second)
5033 +{
5034 + struct task_struct *first_task = first;
5035 + struct task_struct *second_task = second;
5036 +
5037 + /* There is no point in comparing a task to itself. */
5038 + if (first && first == second) {
5039 + TRACE_TASK(first,
5040 + "WARNING: pointless edf priority comparison.\n");
5041 + return 0;
5042 + }
5043 +
5044 +
5045 + /* check for NULL tasks */
5046 + if (!first || !second)
5047 + return first && !second;
5048 +
5049 +#ifdef CONFIG_LITMUS_LOCKING
5050 +
5051 + /* Check for inherited priorities. Change task
5052 + * used for comparison in such a case.
5053 + */
5054 + if (unlikely(first->rt_param.inh_task))
5055 + first_task = first->rt_param.inh_task;
5056 + if (unlikely(second->rt_param.inh_task))
5057 + second_task = second->rt_param.inh_task;
5058 +
5059 + /* Check for priority boosting. Tie-break by start of boosting.
5060 + */
5061 + if (unlikely(is_priority_boosted(first_task))) {
5062 + /* first_task is boosted, how about second_task? */
5063 + if (!is_priority_boosted(second_task) ||
5064 + lt_before(get_boost_start(first_task),
5065 + get_boost_start(second_task)))
5066 + return 1;
5067 + else
5068 + return 0;
5069 + } else if (unlikely(is_priority_boosted(second_task)))
5070 + /* second_task is boosted, first is not*/
5071 + return 0;
5072 +
5073 +#endif
5074 +
5075 +
5076 + return !is_realtime(second_task) ||
5077 +
5078 + /* is the deadline of the first task earlier?
5079 + * Then it has higher priority.
5080 + */
5081 + earlier_deadline(first_task, second_task) ||
5082 +
5083 + /* Do we have a deadline tie?
5084 + * Then break by PID.
5085 + */
5086 + (get_deadline(first_task) == get_deadline(second_task) &&
5087 + (first_task->pid < second_task->pid ||
5088 +
5089 + /* If the PIDs are the same then the task with the inherited
5090 + * priority wins.
5091 + */
5092 + (first_task->pid == second_task->pid &&
5093 + !second->rt_param.inh_task)));
5094 +}
5095 +
5096 +int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
5097 +{
5098 + return edf_higher_prio(bheap2task(a), bheap2task(b));
5099 +}
5100 +
5101 +void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
5102 + release_jobs_t release)
5103 +{
5104 + rt_domain_init(rt, edf_ready_order, resched, release);
5105 +}
5106 +
5107 +/* need_to_preempt - check whether the task t needs to be preempted
5108 + * call only with irqs disabled and with ready_lock acquired
5109 + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
5110 + */
5111 +int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
5112 +{
5113 + /* we need the read lock for edf_ready_queue */
5114 + /* no need to preempt if there is nothing pending */
5115 + if (!__jobs_pending(rt))
5116 + return 0;
5117 + /* we need to reschedule if t doesn't exist */
5118 + if (!t)
5119 + return 1;
5120 +
5121 + /* NOTE: We cannot check for non-preemptibility since we
5122 + * don't know what address space we're currently in.
5123 + */
5124 +
5125 + /* make sure to get non-rt stuff out of the way */
5126 + return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
5127 +}
5128 diff --git a/litmus/fdso.c b/litmus/fdso.c
5129 new file mode 100644
5130 index 0000000..aa7b384
5131 --- /dev/null
5132 +++ b/litmus/fdso.c
5133 @@ -0,0 +1,293 @@
5134 +/* fdso.c - file descriptor attached shared objects
5135 + *
5136 + * (c) 2007 B. Brandenburg, LITMUS^RT project
5137 + *
5138 + * Notes:
5139 + * - objects descriptor (OD) tables are not cloned during a fork.
5140 + * - objects are created on-demand, and freed after the last reference
5141 + * is dropped.
5142 + * - for now, object types are hard coded.
5143 + * - As long as we have live objects, we keep a reference to the inode.
5144 + */
5145 +
5146 +#include <linux/errno.h>
5147 +#include <linux/sched.h>
5148 +#include <linux/mutex.h>
5149 +#include <linux/file.h>
5150 +#include <asm/uaccess.h>
5151 +
5152 +#include <litmus/fdso.h>
5153 +
5154 +extern struct fdso_ops generic_lock_ops;
5155 +
5156 +static const struct fdso_ops* fdso_ops[] = {
5157 + &generic_lock_ops, /* FMLP_SEM */
5158 + &generic_lock_ops, /* SRP_SEM */
5159 +};
5160 +
5161 +static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
5162 +{
5163 + if (fdso_ops[type]->create)
5164 + return fdso_ops[type]->create(obj_ref, type, config);
5165 + else
5166 + return -EINVAL;
5167 +}
5168 +
5169 +static void fdso_destroy(obj_type_t type, void* obj)
5170 +{
5171 + fdso_ops[type]->destroy(type, obj);
5172 +}
5173 +
5174 +static int fdso_open(struct od_table_entry* entry, void* __user config)
5175 +{
5176 + if (fdso_ops[entry->obj->type]->open)
5177 + return fdso_ops[entry->obj->type]->open(entry, config);
5178 + else
5179 + return 0;
5180 +}
5181 +
5182 +static int fdso_close(struct od_table_entry* entry)
5183 +{
5184 + if (fdso_ops[entry->obj->type]->close)
5185 + return fdso_ops[entry->obj->type]->close(entry);
5186 + else
5187 + return 0;
5188 +}
5189 +
5190 +/* inode must be locked already */
5191 +static int alloc_inode_obj(struct inode_obj_id** obj_ref,
5192 + struct inode* inode,
5193 + obj_type_t type,
5194 + unsigned int id,
5195 + void* __user config)
5196 +{
5197 + struct inode_obj_id* obj;
5198 + void* raw_obj;
5199 + int err;
5200 +
5201 + obj = kmalloc(sizeof(*obj), GFP_KERNEL);
5202 + if (!obj) {
5203 + return -ENOMEM;
5204 + }
5205 +
5206 + err = fdso_create(&raw_obj, type, config);
5207 + if (err != 0) {
5208 + kfree(obj);
5209 + return err;
5210 + }
5211 +
5212 + INIT_LIST_HEAD(&obj->list);
5213 + atomic_set(&obj->count, 1);
5214 + obj->type = type;
5215 + obj->id = id;
5216 + obj->obj = raw_obj;
5217 + obj->inode = inode;
5218 +
5219 + list_add(&obj->list, &inode->i_obj_list);
5220 + atomic_inc(&inode->i_count);
5221 +
5222 + printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
5223 +
5224 + *obj_ref = obj;
5225 + return 0;
5226 +}
5227 +
5228 +/* inode must be locked already */
5229 +static struct inode_obj_id* get_inode_obj(struct inode* inode,
5230 + obj_type_t type,
5231 + unsigned int id)
5232 +{
5233 + struct list_head* pos;
5234 + struct inode_obj_id* obj = NULL;
5235 +
5236 + list_for_each(pos, &inode->i_obj_list) {
5237 + obj = list_entry(pos, struct inode_obj_id, list);
5238 + if (obj->id == id && obj->type == type) {
5239 + atomic_inc(&obj->count);
5240 + return obj;
5241 + }
5242 + }
5243 + printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
5244 + return NULL;
5245 +}
5246 +
5247 +
5248 +static void put_inode_obj(struct inode_obj_id* obj)
5249 +{
5250 + struct inode* inode;
5251 + int let_go = 0;
5252 +
5253 + inode = obj->inode;
5254 + if (atomic_dec_and_test(&obj->count)) {
5255 +
5256 + mutex_lock(&inode->i_obj_mutex);
5257 + /* no new references can be obtained */
5258 + if (!atomic_read(&obj->count)) {
5259 + list_del(&obj->list);
5260 + fdso_destroy(obj->type, obj->obj);
5261 + kfree(obj);
5262 + let_go = 1;
5263 + }
5264 + mutex_unlock(&inode->i_obj_mutex);
5265 + if (let_go)
5266 + iput(inode);
5267 + }
5268 +}
5269 +
5270 +static struct od_table_entry* get_od_entry(struct task_struct* t)
5271 +{
5272 + struct od_table_entry* table;
5273 + int i;
5274 +
5275 +
5276 + table = t->od_table;
5277 + if (!table) {
5278 + table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
5279 + GFP_KERNEL);
5280 + t->od_table = table;
5281 + }
5282 +
5283 + for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
5284 + if (!table[i].used) {
5285 + table[i].used = 1;
5286 + return table + i;
5287 + }
5288 + return NULL;
5289 +}
5290 +
5291 +static int put_od_entry(struct od_table_entry* od)
5292 +{
5293 + put_inode_obj(od->obj);
5294 + od->used = 0;
5295 + return 0;
5296 +}
5297 +
5298 +void exit_od_table(struct task_struct* t)
5299 +{
5300 + int i;
5301 +
5302 + if (t->od_table) {
5303 + for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
5304 + if (t->od_table[i].used)
5305 + put_od_entry(t->od_table + i);
5306 + kfree(t->od_table);
5307 + t->od_table = NULL;
5308 + }
5309 +}
5310 +
5311 +static int do_sys_od_open(struct file* file, obj_type_t type, int id,
5312 + void* __user config)
5313 +{
5314 + int idx = 0, err = 0;
5315 + struct inode* inode;
5316 + struct inode_obj_id* obj = NULL;
5317 + struct od_table_entry* entry;
5318 +
5319 + inode = file->f_dentry->d_inode;
5320 +
5321 + entry = get_od_entry(current);
5322 + if (!entry)
5323 + return -ENOMEM;
5324 +
5325 + mutex_lock(&inode->i_obj_mutex);
5326 + obj = get_inode_obj(inode, type, id);
5327 + if (!obj)
5328 + err = alloc_inode_obj(&obj, inode, type, id, config);
5329 + if (err != 0) {
5330 + obj = NULL;
5331 + idx = err;
5332 + entry->used = 0;
5333 + } else {
5334 + entry->obj = obj;
5335 + entry->class = fdso_ops[type];
5336 + idx = entry - current->od_table;
5337 + }
5338 +
5339 + mutex_unlock(&inode->i_obj_mutex);
5340 +
5341 + /* open only if creation succeeded */
5342 + if (!err)
5343 + err = fdso_open(entry, config);
5344 + if (err < 0) {
5345 + /* The class rejected the open call.
5346 + * We need to clean up and tell user space.
5347 + */
5348 + if (obj)
5349 + put_od_entry(entry);
5350 + idx = err;
5351 + }
5352 +
5353 + return idx;
5354 +}
5355 +
5356 +
5357 +struct od_table_entry* get_entry_for_od(int od)
5358 +{
5359 + struct task_struct *t = current;
5360 +
5361 + if (!t->od_table)
5362 + return NULL;
5363 + if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
5364 + return NULL;
5365 + if (!t->od_table[od].used)
5366 + return NULL;
5367 + return t->od_table + od;
5368 +}
5369 +
5370 +
5371 +asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
5372 +{
5373 + int ret = 0;
5374 + struct file* file;
5375 +
5376 + /*
5377 + 1) get file from fd, get inode from file
5378 + 2) lock inode
5379 + 3) try to lookup object
5380 + 4) if not present create and enqueue object, inc inode refcnt
5381 + 5) increment refcnt of object
5382 + 6) alloc od_table_entry, setup ptrs
5383 + 7) unlock inode
5384 + 8) return offset in od_table as OD
5385 + */
5386 +
5387 + if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
5388 + ret = -EINVAL;
5389 + goto out;
5390 + }
5391 +
5392 + file = fget(fd);
5393 + if (!file) {
5394 + ret = -EBADF;
5395 + goto out;
5396 + }
5397 +
5398 + ret = do_sys_od_open(file, type, obj_id, config);
5399 +
5400 + fput(file);
5401 +
5402 +out:
5403 + return ret;
5404 +}
5405 +
5406 +
5407 +asmlinkage long sys_od_close(int od)
5408 +{
5409 + int ret = -EINVAL;
5410 + struct task_struct *t = current;
5411 +
5412 + if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
5413 + return ret;
5414 +
5415 + if (!t->od_table || !t->od_table[od].used)
5416 + return ret;
5417 +
5418 +
5419 + /* give the class a chance to reject the close
5420 + */
5421 + ret = fdso_close(t->od_table + od);
5422 + if (ret == 0)
5423 + ret = put_od_entry(t->od_table + od);
5424 +
5425 + return ret;
5426 +}
5427 diff --git a/litmus/ft_event.c b/litmus/ft_event.c
5428 new file mode 100644
5429 index 0000000..399a07b
5430 --- /dev/null
5431 +++ b/litmus/ft_event.c
5432 @@ -0,0 +1,43 @@
5433 +#include <linux/types.h>
5434 +
5435 +#include <litmus/feather_trace.h>
5436 +
5437 +#if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA)
5438 +/* provide dummy implementation */
5439 +
5440 +int ft_events[MAX_EVENTS];
5441 +
5442 +int ft_enable_event(unsigned long id)
5443 +{
5444 + if (id < MAX_EVENTS) {
5445 + ft_events[id]++;
5446 + return 1;
5447 + } else
5448 + return 0;
5449 +}
5450 +
5451 +int ft_disable_event(unsigned long id)
5452 +{
5453 + if (id < MAX_EVENTS && ft_events[id]) {
5454 + ft_events[id]--;
5455 + return 1;
5456 + } else
5457 + return 0;
5458 +}
5459 +
5460 +int ft_disable_all_events(void)
5461 +{
5462 + int i;
5463 +
5464 + for (i = 0; i < MAX_EVENTS; i++)
5465 + ft_events[i] = 0;
5466 +
5467 + return MAX_EVENTS;
5468 +}
5469 +
5470 +int ft_is_event_enabled(unsigned long id)
5471 +{
5472 + return id < MAX_EVENTS && ft_events[id];
5473 +}
5474 +
5475 +#endif
5476 diff --git a/litmus/ftdev.c b/litmus/ftdev.c
5477 new file mode 100644
5478 index 0000000..06fcf4c
5479 --- /dev/null
5480 +++ b/litmus/ftdev.c
5481 @@ -0,0 +1,439 @@
5482 +#include <linux/sched.h>
5483 +#include <linux/fs.h>
5484 +#include <linux/slab.h>
5485 +#include <linux/cdev.h>
5486 +#include <asm/uaccess.h>
5487 +#include <linux/module.h>
5488 +#include <linux/device.h>
5489 +
5490 +#include <litmus/litmus.h>
5491 +#include <litmus/feather_trace.h>
5492 +#include <litmus/ftdev.h>
5493 +
5494 +struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
5495 +{
5496 + struct ft_buffer* buf;
5497 + size_t total = (size + 1) * count;
5498 + char* mem;
5499 + int order = 0, pages = 1;
5500 +
5501 + buf = kmalloc(sizeof(*buf), GFP_KERNEL);
5502 + if (!buf)
5503 + return NULL;
5504 +
5505 + total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
5506 + while (pages < total) {
5507 + order++;
5508 + pages *= 2;
5509 + }
5510 +
5511 + mem = (char*) __get_free_pages(GFP_KERNEL, order);
5512 + if (!mem) {
5513 + kfree(buf);
5514 + return NULL;
5515 + }
5516 +
5517 + if (!init_ft_buffer(buf, count, size,
5518 + mem + (count * size), /* markers at the end */
5519 + mem)) { /* buffer objects */
5520 + free_pages((unsigned long) mem, order);
5521 + kfree(buf);
5522 + return NULL;
5523 + }
5524 + return buf;
5525 +}
5526 +
5527 +void free_ft_buffer(struct ft_buffer* buf)
5528 +{
5529 + int order = 0, pages = 1;
5530 + size_t total;
5531 +
5532 + if (buf) {
5533 + total = (buf->slot_size + 1) * buf->slot_count;
5534 + total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
5535 + while (pages < total) {
5536 + order++;
5537 + pages *= 2;
5538 + }
5539 + free_pages((unsigned long) buf->buffer_mem, order);
5540 + kfree(buf);
5541 + }
5542 +}
5543 +
5544 +struct ftdev_event {
5545 + int id;
5546 + struct ftdev_event* next;
5547 +};
5548 +
5549 +static int activate(struct ftdev_event** chain, int id)
5550 +{
5551 + struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL);
5552 + if (ev) {
5553 + printk(KERN_INFO
5554 + "Enabling feather-trace event %d.\n", (int) id);
5555 + ft_enable_event(id);
5556 + ev->id = id;
5557 + ev->next = *chain;
5558 + *chain = ev;
5559 + }
5560 + return ev ? 0 : -ENOMEM;
5561 +}
5562 +
5563 +static void deactivate(struct ftdev_event** chain, int id)
5564 +{
5565 + struct ftdev_event **cur = chain;
5566 + struct ftdev_event *nxt;
5567 + while (*cur) {
5568 + if ((*cur)->id == id) {
5569 + nxt = (*cur)->next;
5570 + kfree(*cur);
5571 + *cur = nxt;
5572 + printk(KERN_INFO
5573 + "Disabling feather-trace event %d.\n", (int) id);
5574 + ft_disable_event(id);
5575 + break;
5576 + }
5577 + cur = &(*cur)->next;
5578 + }
5579 +}
5580 +
5581 +static int ftdev_open(struct inode *in, struct file *filp)
5582 +{
5583 + struct ftdev* ftdev;
5584 + struct ftdev_minor* ftdm;
5585 + unsigned int buf_idx = iminor(in);
5586 + int err = 0;
5587 +
5588 + ftdev = container_of(in->i_cdev, struct ftdev, cdev);
5589 +
5590 + if (buf_idx >= ftdev->minor_cnt) {
5591 + err = -ENODEV;
5592 + goto out;
5593 + }
5594 + if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx)))
5595 + goto out;
5596 +
5597 + ftdm = ftdev->minor + buf_idx;
5598 + ftdm->ftdev = ftdev;
5599 + filp->private_data = ftdm;
5600 +
5601 + if (mutex_lock_interruptible(&ftdm->lock)) {
5602 + err = -ERESTARTSYS;
5603 + goto out;
5604 + }
5605 +
5606 + if (!ftdm->readers && ftdev->alloc)
5607 + err = ftdev->alloc(ftdev, buf_idx);
5608 + if (0 == err)
5609 + ftdm->readers++;
5610 +
5611 + mutex_unlock(&ftdm->lock);
5612 +out:
5613 + return err;
5614 +}
5615 +
5616 +static int ftdev_release(struct inode *in, struct file *filp)
5617 +{
5618 + struct ftdev* ftdev;
5619 + struct ftdev_minor* ftdm;
5620 + unsigned int buf_idx = iminor(in);
5621 + int err = 0;
5622 +
5623 + ftdev = container_of(in->i_cdev, struct ftdev, cdev);
5624 +
5625 + if (buf_idx >= ftdev->minor_cnt) {
5626 + err = -ENODEV;
5627 + goto out;
5628 + }
5629 + ftdm = ftdev->minor + buf_idx;
5630 +
5631 + if (mutex_lock_interruptible(&ftdm->lock)) {
5632 + err = -ERESTARTSYS;
5633 + goto out;
5634 + }
5635 +
5636 + if (ftdm->readers == 1) {
5637 + while (ftdm->events)
5638 + deactivate(&ftdm->events, ftdm->events->id);
5639 +
5640 + /* wait for any pending events to complete */
5641 + set_current_state(TASK_UNINTERRUPTIBLE);
5642 + schedule_timeout(HZ);
5643 +
5644 + printk(KERN_ALERT "Failed trace writes: %u\n",
5645 + ftdm->buf->failed_writes);
5646 +
5647 + if (ftdev->free)
5648 + ftdev->free(ftdev, buf_idx);
5649 + }
5650 +
5651 + ftdm->readers--;
5652 + mutex_unlock(&ftdm->lock);
5653 +out:
5654 + return err;
5655 +}
5656 +
5657 +/* based on ft_buffer_read
5658 + * @returns < 0 : page fault
5659 + * = 0 : no data available
5660 + * = 1 : one slot copied
5661 + */
5662 +static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest)
5663 +{
5664 + unsigned int idx;
5665 + int err = 0;
5666 + if (buf->free_count != buf->slot_count) {
5667 + /* data available */
5668 + idx = buf->read_idx % buf->slot_count;
5669 + if (buf->slots[idx] == SLOT_READY) {
5670 + err = copy_to_user(dest, ((char*) buf->buffer_mem) +
5671 + idx * buf->slot_size,
5672 + buf->slot_size);
5673 + if (err == 0) {
5674 + /* copy ok */
5675 + buf->slots[idx] = SLOT_FREE;
5676 + buf->read_idx++;
5677 + fetch_and_inc(&buf->free_count);
5678 + err = 1;
5679 + }
5680 + }
5681 + }
5682 + return err;
5683 +}
5684 +
5685 +static ssize_t ftdev_read(struct file *filp,
5686 + char __user *to, size_t len, loff_t *f_pos)
5687 +{
5688 + /* we ignore f_pos, this is strictly sequential */
5689 +
5690 + ssize_t err = 0;
5691 + size_t chunk;
5692 + int copied;
5693 + struct ftdev_minor* ftdm = filp->private_data;
5694 +
5695 + if (mutex_lock_interruptible(&ftdm->lock)) {
5696 + err = -ERESTARTSYS;
5697 + goto out;
5698 + }
5699 +
5700 +
5701 + chunk = ftdm->buf->slot_size;
5702 + while (len >= chunk) {
5703 + copied = ft_buffer_copy_to_user(ftdm->buf, to);
5704 + if (copied == 1) {
5705 + len -= chunk;
5706 + to += chunk;
5707 + err += chunk;
5708 + } else if (err == 0 && copied == 0 && ftdm->events) {
5709 + /* Only wait if there are any events enabled and only
5710 + * if we haven't copied some data yet. We cannot wait
5711 + * here with copied data because that data would get
5712 + * lost if the task is interrupted (e.g., killed).
5713 + */
5714 + set_current_state(TASK_INTERRUPTIBLE);
5715 + schedule_timeout(50);
5716 + if (signal_pending(current)) {
5717 + if (err == 0)
5718 + /* nothing read yet, signal problem */
5719 + err = -ERESTARTSYS;
5720 + break;
5721 + }
5722 + } else if (copied < 0) {
5723 + /* page fault */
5724 + err = copied;
5725 + break;
5726 + } else
5727 + /* nothing left to get, return to user space */
5728 + break;
5729 + }
5730 + mutex_unlock(&ftdm->lock);
5731 +out:
5732 + return err;
5733 +}
5734 +
5735 +static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
5736 +{
5737 + long err = -ENOIOCTLCMD;
5738 + struct ftdev_minor* ftdm = filp->private_data;
5739 +
5740 + if (mutex_lock_interruptible(&ftdm->lock)) {
5741 + err = -ERESTARTSYS;
5742 + goto out;
5743 + }
5744 +
5745 + /* FIXME: check id against list of acceptable events */
5746 +
5747 + switch (cmd) {
5748 + case FTDEV_ENABLE_CMD:
5749 + if (activate(&ftdm->events, arg))
5750 + err = -ENOMEM;
5751 + else
5752 + err = 0;
5753 + break;
5754 +
5755 + case FTDEV_DISABLE_CMD:
5756 + deactivate(&ftdm->events, arg);
5757 + err = 0;
5758 + break;
5759 +
5760 + default:
5761 + printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg);
5762 + };
5763 +
5764 + mutex_unlock(&ftdm->lock);
5765 +out:
5766 + return err;
5767 +}
5768 +
5769 +static ssize_t ftdev_write(struct file *filp, const char __user *from,
5770 + size_t len, loff_t *f_pos)
5771 +{
5772 + struct ftdev_minor* ftdm = filp->private_data;
5773 + ssize_t err = -EINVAL;
5774 + struct ftdev* ftdev = ftdm->ftdev;
5775 +
5776 + /* dispatch write to buffer-specific code, if available */
5777 + if (ftdev->write)
5778 + err = ftdev->write(ftdm->buf, len, from);
5779 +
5780 + return err;
5781 +}
5782 +
5783 +struct file_operations ftdev_fops = {
5784 + .owner = THIS_MODULE,
5785 + .open = ftdev_open,
5786 + .release = ftdev_release,
5787 + .write = ftdev_write,
5788 + .read = ftdev_read,
5789 + .unlocked_ioctl = ftdev_ioctl,
5790 +};
5791 +
5792 +int ftdev_init( struct ftdev* ftdev, struct module* owner,
5793 + const int minor_cnt, const char* name)
5794 +{
5795 + int i, err;
5796 +
5797 + BUG_ON(minor_cnt < 1);
5798 +
5799 + cdev_init(&ftdev->cdev, &ftdev_fops);
5800 + ftdev->name = name;
5801 + ftdev->minor_cnt = minor_cnt;
5802 + ftdev->cdev.owner = owner;
5803 + ftdev->cdev.ops = &ftdev_fops;
5804 + ftdev->alloc = NULL;
5805 + ftdev->free = NULL;
5806 + ftdev->can_open = NULL;
5807 + ftdev->write = NULL;
5808 +
5809 + ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor),
5810 + GFP_KERNEL);
5811 + if (!ftdev->minor) {
5812 + printk(KERN_WARNING "ftdev(%s): Could not allocate memory\n",
5813 + ftdev->name);
5814 + err = -ENOMEM;
5815 + goto err_out;
5816 + }
5817 +
5818 + for (i = 0; i < ftdev->minor_cnt; i++) {
5819 + mutex_init(&ftdev->minor[i].lock);
5820 + ftdev->minor[i].readers = 0;
5821 + ftdev->minor[i].buf = NULL;
5822 + ftdev->minor[i].events = NULL;
5823 + }
5824 +
5825 + ftdev->class = class_create(owner, ftdev->name);
5826 + if (IS_ERR(ftdev->class)) {
5827 + err = PTR_ERR(ftdev->class);
5828 + printk(KERN_WARNING "ftdev(%s): "
5829 + "Could not create device class.\n", ftdev->name);
5830 + goto err_dealloc;
5831 + }
5832 +
5833 + return 0;
5834 +
5835 +err_dealloc:
5836 + kfree(ftdev->minor);
5837 +err_out:
5838 + return err;
5839 +}
5840 +
5841 +/*
5842 + * Destroy minor devices up to, but not including, up_to.
5843 + */
5844 +static void ftdev_device_destroy(struct ftdev* ftdev, unsigned int up_to)
5845 +{
5846 + dev_t minor_cntr;
5847 +
5848 + if (up_to < 1)
5849 + up_to = (ftdev->minor_cnt < 1) ? 0 : ftdev->minor_cnt;
5850 +
5851 + for (minor_cntr = 0; minor_cntr < up_to; ++minor_cntr)
5852 + device_destroy(ftdev->class, MKDEV(ftdev->major, minor_cntr));
5853 +}
5854 +
5855 +void ftdev_exit(struct ftdev* ftdev)
5856 +{
5857 + printk("ftdev(%s): Exiting\n", ftdev->name);
5858 + ftdev_device_destroy(ftdev, -1);
5859 + cdev_del(&ftdev->cdev);
5860 + unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
5861 + class_destroy(ftdev->class);
5862 + kfree(ftdev->minor);
5863 +}
5864 +
5865 +int register_ftdev(struct ftdev* ftdev)
5866 +{
5867 + struct device **device;
5868 + dev_t trace_dev_tmp, minor_cntr;
5869 + int err;
5870 +
5871 + err = alloc_chrdev_region(&trace_dev_tmp, 0, ftdev->minor_cnt,
5872 + ftdev->name);
5873 + if (err) {
5874 + printk(KERN_WARNING "ftdev(%s): "
5875 + "Could not allocate char. device region (%d minors)\n",
5876 + ftdev->name, ftdev->minor_cnt);
5877 + goto err_out;
5878 + }
5879 +
5880 + ftdev->major = MAJOR(trace_dev_tmp);
5881 +
5882 + err = cdev_add(&ftdev->cdev, trace_dev_tmp, ftdev->minor_cnt);
5883 + if (err) {
5884 + printk(KERN_WARNING "ftdev(%s): "
5885 + "Could not add cdev for major %u with %u minor(s).\n",
5886 + ftdev->name, ftdev->major, ftdev->minor_cnt);
5887 + goto err_unregister;
5888 + }
5889 +
5890 + /* create the minor device(s) */
5891 + for (minor_cntr = 0; minor_cntr < ftdev->minor_cnt; ++minor_cntr)
5892 + {
5893 + trace_dev_tmp = MKDEV(ftdev->major, minor_cntr);
5894 + device = &ftdev->minor[minor_cntr].device;
5895 +
5896 + *device = device_create(ftdev->class, NULL, trace_dev_tmp, NULL,
5897 + "litmus/%s%d", ftdev->name, minor_cntr);
5898 + if (IS_ERR(*device)) {
5899 + err = PTR_ERR(*device);
5900 + printk(KERN_WARNING "ftdev(%s): "
5901 + "Could not create device major/minor number "
5902 + "%u/%u\n", ftdev->name, ftdev->major,
5903 + minor_cntr);
5904 + printk(KERN_WARNING "ftdev(%s): "
5905 + "will attempt deletion of allocated devices.\n",
5906 + ftdev->name);
5907 + goto err_minors;
5908 + }
5909 + }
5910 +
5911 + return 0;
5912 +
5913 +err_minors:
5914 + ftdev_device_destroy(ftdev, minor_cntr);
5915 + cdev_del(&ftdev->cdev);
5916 +err_unregister:
5917 + unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
5918 +err_out:
5919 + return err;
5920 +}
5921 diff --git a/litmus/jobs.c b/litmus/jobs.c
5922 new file mode 100644
5923 index 0000000..36e3146
5924 --- /dev/null
5925 +++ b/litmus/jobs.c
5926 @@ -0,0 +1,43 @@
5927 +/* litmus/jobs.c - common job control code
5928 + */
5929 +
5930 +#include <linux/sched.h>
5931 +
5932 +#include <litmus/litmus.h>
5933 +#include <litmus/jobs.h>
5934 +
5935 +void prepare_for_next_period(struct task_struct *t)
5936 +{
5937 + BUG_ON(!t);
5938 + /* prepare next release */
5939 + t->rt_param.job_params.release = t->rt_param.job_params.deadline;
5940 + t->rt_param.job_params.deadline += get_rt_period(t);
5941 + t->rt_param.job_params.exec_time = 0;
5942 + /* update job sequence number */
5943 + t->rt_param.job_params.job_no++;
5944 +
5945 + /* don't confuse Linux */
5946 + t->rt.time_slice = 1;
5947 +}
5948 +
5949 +void release_at(struct task_struct *t, lt_t start)
5950 +{
5951 + t->rt_param.job_params.deadline = start;
5952 + prepare_for_next_period(t);
5953 + set_rt_flags(t, RT_F_RUNNING);
5954 +}
5955 +
5956 +
5957 +/*
5958 + * Deactivate current task until the beginning of the next period.
5959 + */
5960 +long complete_job(void)
5961 +{
5962 + /* Mark that we do not excute anymore */
5963 + set_rt_flags(current, RT_F_SLEEP);
5964 + /* call schedule, this will return when a new job arrives
5965 + * it also takes care of preparing for the next release
5966 + */
5967 + schedule();
5968 + return 0;
5969 +}
5970 diff --git a/litmus/litmus.c b/litmus/litmus.c
5971 new file mode 100644
5972 index 0000000..3013901
5973 --- /dev/null
5974 +++ b/litmus/litmus.c
5975 @@ -0,0 +1,564 @@
5976 +/*
5977 + * litmus.c -- Implementation of the LITMUS syscalls,
5978 + * the LITMUS intialization code,
5979 + * and the procfs interface..
5980 + */
5981 +#include <asm/uaccess.h>
5982 +#include <linux/uaccess.h>
5983 +#include <linux/sysrq.h>
5984 +#include <linux/sched.h>
5985 +#include <linux/module.h>
5986 +#include <linux/slab.h>
5987 +
5988 +#include <litmus/litmus.h>
5989 +#include <litmus/bheap.h>
5990 +#include <litmus/trace.h>
5991 +#include <litmus/rt_domain.h>
5992 +#include <litmus/litmus_proc.h>
5993 +#include <litmus/sched_trace.h>
5994 +
5995 +#ifdef CONFIG_SCHED_CPU_AFFINITY
5996 +#include <litmus/affinity.h>
5997 +#endif
5998 +
5999 +/* Number of RT tasks that exist in the system */
6000 +atomic_t rt_task_count = ATOMIC_INIT(0);
6001 +static DEFINE_RAW_SPINLOCK(task_transition_lock);
6002 +/* synchronize plugin switching */
6003 +atomic_t cannot_use_plugin = ATOMIC_INIT(0);
6004 +
6005 +/* Give log messages sequential IDs. */
6006 +atomic_t __log_seq_no = ATOMIC_INIT(0);
6007 +
6008 +#ifdef CONFIG_RELEASE_MASTER
6009 +/* current master CPU for handling timer IRQs */
6010 +atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
6011 +#endif
6012 +
6013 +static struct kmem_cache * bheap_node_cache;
6014 +extern struct kmem_cache * release_heap_cache;
6015 +
6016 +struct bheap_node* bheap_node_alloc(int gfp_flags)
6017 +{
6018 + return kmem_cache_alloc(bheap_node_cache, gfp_flags);
6019 +}
6020 +
6021 +void bheap_node_free(struct bheap_node* hn)
6022 +{
6023 + kmem_cache_free(bheap_node_cache, hn);
6024 +}
6025 +
6026 +struct release_heap* release_heap_alloc(int gfp_flags);
6027 +void release_heap_free(struct release_heap* rh);
6028 +
6029 +/*
6030 + * sys_set_task_rt_param
6031 + * @pid: Pid of the task which scheduling parameters must be changed
6032 + * @param: New real-time extension parameters such as the execution cost and
6033 + * period
6034 + * Syscall for manipulating with task rt extension params
6035 + * Returns EFAULT if param is NULL.
6036 + * ESRCH if pid is not corrsponding
6037 + * to a valid task.
6038 + * EINVAL if either period or execution cost is <=0
6039 + * EPERM if pid is a real-time task
6040 + * 0 if success
6041 + *
6042 + * Only non-real-time tasks may be configured with this system call
6043 + * to avoid races with the scheduler. In practice, this means that a
6044 + * task's parameters must be set _before_ calling sys_prepare_rt_task()
6045 + *
6046 + * find_task_by_vpid() assumes that we are in the same namespace of the
6047 + * target.
6048 + */
6049 +asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
6050 +{
6051 + struct rt_task tp;
6052 + struct task_struct *target;
6053 + int retval = -EINVAL;
6054 +
6055 + printk("Setting up rt task parameters for process %d.\n", pid);
6056 +
6057 + if (pid < 0 || param == 0) {
6058 + goto out;
6059 + }
6060 + if (copy_from_user(&tp, param, sizeof(tp))) {
6061 + retval = -EFAULT;
6062 + goto out;
6063 + }
6064 +
6065 + /* Task search and manipulation must be protected */
6066 + read_lock_irq(&tasklist_lock);
6067 + if (!(target = find_task_by_vpid(pid))) {
6068 + retval = -ESRCH;
6069 + goto out_unlock;
6070 + }
6071 +
6072 + if (is_realtime(target)) {
6073 + /* The task is already a real-time task.
6074 + * We cannot not allow parameter changes at this point.
6075 + */
6076 + retval = -EBUSY;
6077 + goto out_unlock;
6078 + }
6079 +
6080 + if (tp.exec_cost <= 0)
6081 + goto out_unlock;
6082 + if (tp.period <= 0)
6083 + goto out_unlock;
6084 + if (!cpu_online(tp.cpu))
6085 + goto out_unlock;
6086 + if (tp.period < tp.exec_cost)
6087 + {
6088 + printk(KERN_INFO "litmus: real-time task %d rejected "
6089 + "because wcet > period\n", pid);
6090 + goto out_unlock;
6091 + }
6092 + if ( tp.cls != RT_CLASS_HARD &&
6093 + tp.cls != RT_CLASS_SOFT &&
6094 + tp.cls != RT_CLASS_BEST_EFFORT)
6095 + {
6096 + printk(KERN_INFO "litmus: real-time task %d rejected "
6097 + "because its class is invalid\n", pid);
6098 + goto out_unlock;
6099 + }
6100 + if (tp.budget_policy != NO_ENFORCEMENT &&
6101 + tp.budget_policy != QUANTUM_ENFORCEMENT &&
6102 + tp.budget_policy != PRECISE_ENFORCEMENT)
6103 + {
6104 + printk(KERN_INFO "litmus: real-time task %d rejected "
6105 + "because unsupported budget enforcement policy "
6106 + "specified (%d)\n",
6107 + pid, tp.budget_policy);
6108 + goto out_unlock;
6109 + }
6110 +
6111 + target->rt_param.task_params = tp;
6112 +
6113 + retval = 0;
6114 + out_unlock:
6115 + read_unlock_irq(&tasklist_lock);
6116 + out:
6117 + return retval;
6118 +}
6119 +
6120 +/*
6121 + * Getter of task's RT params
6122 + * returns EINVAL if param or pid is NULL
6123 + * returns ESRCH if pid does not correspond to a valid task
6124 + * returns EFAULT if copying of parameters has failed.
6125 + *
6126 + * find_task_by_vpid() assumes that we are in the same namespace of the
6127 + * target.
6128 + */
6129 +asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
6130 +{
6131 + int retval = -EINVAL;
6132 + struct task_struct *source;
6133 + struct rt_task lp;
6134 + if (param == 0 || pid < 0)
6135 + goto out;
6136 + read_lock(&tasklist_lock);
6137 + if (!(source = find_task_by_vpid(pid))) {
6138 + retval = -ESRCH;
6139 + goto out_unlock;
6140 + }
6141 + lp = source->rt_param.task_params;
6142 + read_unlock(&tasklist_lock);
6143 + /* Do copying outside the lock */
6144 + retval =
6145 + copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
6146 + return retval;
6147 + out_unlock:
6148 + read_unlock(&tasklist_lock);
6149 + out:
6150 + return retval;
6151 +
6152 +}
6153 +
6154 +/*
6155 + * This is the crucial function for periodic task implementation,
6156 + * It checks if a task is periodic, checks if such kind of sleep
6157 + * is permitted and calls plugin-specific sleep, which puts the
6158 + * task into a wait array.
6159 + * returns 0 on successful wakeup
6160 + * returns EPERM if current conditions do not permit such sleep
6161 + * returns EINVAL if current task is not able to go to sleep
6162 + */
6163 +asmlinkage long sys_complete_job(void)
6164 +{
6165 + int retval = -EPERM;
6166 + if (!is_realtime(current)) {
6167 + retval = -EINVAL;
6168 + goto out;
6169 + }
6170 + /* Task with negative or zero period cannot sleep */
6171 + if (get_rt_period(current) <= 0) {
6172 + retval = -EINVAL;
6173 + goto out;
6174 + }
6175 + /* The plugin has to put the task into an
6176 + * appropriate queue and call schedule
6177 + */
6178 + retval = litmus->complete_job();
6179 + out:
6180 + return retval;
6181 +}
6182 +
6183 +/* This is an "improved" version of sys_complete_job that
6184 + * addresses the problem of unintentionally missing a job after
6185 + * an overrun.
6186 + *
6187 + * returns 0 on successful wakeup
6188 + * returns EPERM if current conditions do not permit such sleep
6189 + * returns EINVAL if current task is not able to go to sleep
6190 + */
6191 +asmlinkage long sys_wait_for_job_release(unsigned int job)
6192 +{
6193 + int retval = -EPERM;
6194 + if (!is_realtime(current)) {
6195 + retval = -EINVAL;
6196 + goto out;
6197 + }
6198 +
6199 + /* Task with negative or zero period cannot sleep */
6200 + if (get_rt_period(current) <= 0) {
6201 + retval = -EINVAL;
6202 + goto out;
6203 + }
6204 +
6205 + retval = 0;
6206 +
6207 + /* first wait until we have "reached" the desired job
6208 + *
6209 + * This implementation has at least two problems:
6210 + *
6211 + * 1) It doesn't gracefully handle the wrap around of
6212 + * job_no. Since LITMUS is a prototype, this is not much
6213 + * of a problem right now.
6214 + *
6215 + * 2) It is theoretically racy if a job release occurs
6216 + * between checking job_no and calling sleep_next_period().
6217 + * A proper solution would requiring adding another callback
6218 + * in the plugin structure and testing the condition with
6219 + * interrupts disabled.
6220 + *
6221 + * FIXME: At least problem 2 should be taken care of eventually.
6222 + */
6223 + while (!retval && job > current->rt_param.job_params.job_no)
6224 + /* If the last job overran then job <= job_no and we
6225 + * don't send the task to sleep.
6226 + */
6227 + retval = litmus->complete_job();
6228 + out:
6229 + return retval;
6230 +}
6231 +
6232 +/* This is a helper syscall to query the current job sequence number.
6233 + *
6234 + * returns 0 on successful query
6235 + * returns EPERM if task is not a real-time task.
6236 + * returns EFAULT if &job is not a valid pointer.
6237 + */
6238 +asmlinkage long sys_query_job_no(unsigned int __user *job)
6239 +{
6240 + int retval = -EPERM;
6241 + if (is_realtime(current))
6242 + retval = put_user(current->rt_param.job_params.job_no, job);
6243 +
6244 + return retval;
6245 +}
6246 +
6247 +/* sys_null_call() is only used for determining raw system call
6248 + * overheads (kernel entry, kernel exit). It has no useful side effects.
6249 + * If ts is non-NULL, then the current Feather-Trace time is recorded.
6250 + */
6251 +asmlinkage long sys_null_call(cycles_t __user *ts)
6252 +{
6253 + long ret = 0;
6254 + cycles_t now;
6255 +
6256 + if (ts) {
6257 + now = get_cycles();
6258 + ret = put_user(now, ts);
6259 + }
6260 +
6261 + return ret;
6262 +}
6263 +
6264 +/* p is a real-time task. Re-init its state as a best-effort task. */
6265 +static void reinit_litmus_state(struct task_struct* p, int restore)
6266 +{
6267 + struct rt_task user_config = {};
6268 + void* ctrl_page = NULL;
6269 +
6270 + if (restore) {
6271 + /* Safe user-space provided configuration data.
6272 + * and allocated page. */
6273 + user_config = p->rt_param.task_params;
6274 + ctrl_page = p->rt_param.ctrl_page;
6275 + }
6276 +
6277 + /* We probably should not be inheriting any task's priority
6278 + * at this point in time.
6279 + */
6280 + WARN_ON(p->rt_param.inh_task);
6281 +
6282 + /* Cleanup everything else. */
6283 + memset(&p->rt_param, 0, sizeof(p->rt_param));
6284 +
6285 + /* Restore preserved fields. */
6286 + if (restore) {
6287 + p->rt_param.task_params = user_config;
6288 + p->rt_param.ctrl_page = ctrl_page;
6289 + }
6290 +}
6291 +
6292 +long litmus_admit_task(struct task_struct* tsk)
6293 +{
6294 + long retval = 0;
6295 + unsigned long flags;
6296 +
6297 + BUG_ON(is_realtime(tsk));
6298 +
6299 + if (get_rt_period(tsk) == 0 ||
6300 + get_exec_cost(tsk) > get_rt_period(tsk)) {
6301 + TRACE_TASK(tsk, "litmus admit: invalid task parameters "
6302 + "(%lu, %lu)\n",
6303 + get_exec_cost(tsk), get_rt_period(tsk));
6304 + retval = -EINVAL;
6305 + goto out;
6306 + }
6307 +
6308 + if (!cpu_online(get_partition(tsk))) {
6309 + TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
6310 + get_partition(tsk));
6311 + retval = -EINVAL;
6312 + goto out;
6313 + }
6314 +
6315 + INIT_LIST_HEAD(&tsk_rt(tsk)->list);
6316 +
6317 + /* avoid scheduler plugin changing underneath us */
6318 + raw_spin_lock_irqsave(&task_transition_lock, flags);
6319 +
6320 + /* allocate heap node for this task */
6321 + tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
6322 + tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
6323 +
6324 + if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
6325 + printk(KERN_WARNING "litmus: no more heap node memory!?\n");
6326 +
6327 + bheap_node_free(tsk_rt(tsk)->heap_node);
6328 + release_heap_free(tsk_rt(tsk)->rel_heap);
6329 +
6330 + retval = -ENOMEM;
6331 + goto out_unlock;
6332 + } else {
6333 + bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
6334 + }
6335 +
6336 + retval = litmus->admit_task(tsk);
6337 +
6338 + if (!retval) {
6339 + sched_trace_task_name(tsk);
6340 + sched_trace_task_param(tsk);
6341 + atomic_inc(&rt_task_count);
6342 + }
6343 +
6344 +out_unlock:
6345 + raw_spin_unlock_irqrestore(&task_transition_lock, flags);
6346 +out:
6347 + return retval;
6348 +}
6349 +
6350 +void litmus_exit_task(struct task_struct* tsk)
6351 +{
6352 + if (is_realtime(tsk)) {
6353 + sched_trace_task_completion(tsk, 1);
6354 +
6355 + litmus->task_exit(tsk);
6356 +
6357 + BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
6358 + bheap_node_free(tsk_rt(tsk)->heap_node);
6359 + release_heap_free(tsk_rt(tsk)->rel_heap);
6360 +
6361 + atomic_dec(&rt_task_count);
6362 + reinit_litmus_state(tsk, 1);
6363 + }
6364 +}
6365 +
6366 +/* IPI callback to synchronize plugin switching */
6367 +static void synch_on_plugin_switch(void* info)
6368 +{
6369 + atomic_inc(&cannot_use_plugin);
6370 + while (atomic_read(&cannot_use_plugin) > 0)
6371 + cpu_relax();
6372 +}
6373 +
6374 +/* Switching a plugin in use is tricky.
6375 + * We must watch out that no real-time tasks exists
6376 + * (and that none is created in parallel) and that the plugin is not
6377 + * currently in use on any processor (in theory).
6378 + */
6379 +int switch_sched_plugin(struct sched_plugin* plugin)
6380 +{
6381 + unsigned long flags;
6382 + int ret = 0;
6383 +
6384 + BUG_ON(!plugin);
6385 +
6386 + /* forbid other cpus to use the plugin */
6387 + atomic_set(&cannot_use_plugin, 1);
6388 + /* send IPI to force other CPUs to synch with us */
6389 + smp_call_function(synch_on_plugin_switch, NULL, 0);
6390 +
6391 + /* wait until all other CPUs have started synch */
6392 + while (atomic_read(&cannot_use_plugin) < num_online_cpus())
6393 + cpu_relax();
6394 +
6395 + /* stop task transitions */
6396 + raw_spin_lock_irqsave(&task_transition_lock, flags);
6397 +
6398 + /* don't switch if there are active real-time tasks */
6399 + if (atomic_read(&rt_task_count) == 0) {
6400 + ret = litmus->deactivate_plugin();
6401 + if (0 != ret)
6402 + goto out;
6403 + ret = plugin->activate_plugin();
6404 + if (0 != ret) {
6405 + printk(KERN_INFO "Can't activate %s (%d).\n",
6406 + plugin->plugin_name, ret);
6407 + plugin = &linux_sched_plugin;
6408 + }
6409 + printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
6410 + litmus = plugin;
6411 + } else
6412 + ret = -EBUSY;
6413 +out:
6414 + raw_spin_unlock_irqrestore(&task_transition_lock, flags);
6415 + atomic_set(&cannot_use_plugin, 0);
6416 + return ret;
6417 +}
6418 +
6419 +/* Called upon fork.
6420 + * p is the newly forked task.
6421 + */
6422 +void litmus_fork(struct task_struct* p)
6423 +{
6424 + if (is_realtime(p)) {
6425 + /* clean out any litmus related state, don't preserve anything */
6426 + reinit_litmus_state(p, 0);
6427 + /* Don't let the child be a real-time task. */
6428 + p->sched_reset_on_fork = 1;
6429 + } else
6430 + /* non-rt tasks might have ctrl_page set */
6431 + tsk_rt(p)->ctrl_page = NULL;
6432 +
6433 + /* od tables are never inherited across a fork */
6434 + p->od_table = NULL;
6435 +}
6436 +
6437 +/* Called upon execve().
6438 + * current is doing the exec.
6439 + * Don't let address space specific stuff leak.
6440 + */
6441 +void litmus_exec(void)
6442 +{
6443 + struct task_struct* p = current;
6444 +
6445 + if (is_realtime(p)) {
6446 + WARN_ON(p->rt_param.inh_task);
6447 + if (tsk_rt(p)->ctrl_page) {
6448 + free_page((unsigned long) tsk_rt(p)->ctrl_page);
6449 + tsk_rt(p)->ctrl_page = NULL;
6450 + }
6451 + }
6452 +}
6453 +
6454 +void exit_litmus(struct task_struct *dead_tsk)
6455 +{
6456 + /* We also allow non-RT tasks to
6457 + * allocate control pages to allow
6458 + * measurements with non-RT tasks.
6459 + * So check if we need to free the page
6460 + * in any case.
6461 + */
6462 + if (tsk_rt(dead_tsk)->ctrl_page) {
6463 + TRACE_TASK(dead_tsk,
6464 + "freeing ctrl_page %p\n",
6465 + tsk_rt(dead_tsk)->ctrl_page);
6466 + free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
6467 + }
6468 +
6469 + /* main cleanup only for RT tasks */
6470 + if (is_realtime(dead_tsk))
6471 + litmus_exit_task(dead_tsk);
6472 +}
6473 +
6474 +
6475 +#ifdef CONFIG_MAGIC_SYSRQ
6476 +int sys_kill(int pid, int sig);
6477 +
6478 +static void sysrq_handle_kill_rt_tasks(int key)
6479 +{
6480 + struct task_struct *t;
6481 + read_lock(&tasklist_lock);
6482 + for_each_process(t) {
6483 + if (is_realtime(t)) {
6484 + sys_kill(t->pid, SIGKILL);
6485 + }
6486 + }
6487 + read_unlock(&tasklist_lock);
6488 +}
6489 +
6490 +static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
6491 + .handler = sysrq_handle_kill_rt_tasks,
6492 + .help_msg = "quit-rt-tasks(X)",
6493 + .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
6494 +};
6495 +#endif
6496 +
6497 +extern struct sched_plugin linux_sched_plugin;
6498 +
6499 +static int __init _init_litmus(void)
6500 +{
6501 + /* Common initializers,
6502 + * mode change lock is used to enforce single mode change
6503 + * operation.
6504 + */
6505 + printk("Starting LITMUS^RT kernel\n");
6506 +
6507 + BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
6508 +
6509 + register_sched_plugin(&linux_sched_plugin);
6510 +
6511 + bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
6512 + release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
6513 +
6514 +#ifdef CONFIG_MAGIC_SYSRQ
6515 + /* offer some debugging help */
6516 + if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
6517 + printk("Registered kill rt tasks magic sysrq.\n");
6518 + else
6519 + printk("Could not register kill rt tasks magic sysrq.\n");
6520 +#endif
6521 +
6522 + init_litmus_proc();
6523 +
6524 +#ifdef CONFIG_SCHED_CPU_AFFINITY
6525 + init_topology();
6526 +#endif
6527 +
6528 + return 0;
6529 +}
6530 +
6531 +static void _exit_litmus(void)
6532 +{
6533 + exit_litmus_proc();
6534 + kmem_cache_destroy(bheap_node_cache);
6535 + kmem_cache_destroy(release_heap_cache);
6536 +}
6537 +
6538 +module_init(_init_litmus);
6539 +module_exit(_exit_litmus);
6540 diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
6541 new file mode 100644
6542 index 0000000..4bf725a
6543 --- /dev/null
6544 +++ b/litmus/litmus_proc.c
6545 @@ -0,0 +1,347 @@
6546 +/*
6547 + * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
6548 + */
6549 +
6550 +#include <linux/sched.h>
6551 +#include <linux/uaccess.h>
6552 +
6553 +#include <litmus/litmus.h>
6554 +#include <litmus/litmus_proc.h>
6555 +
6556 +#include <litmus/clustered.h>
6557 +
6558 +/* in litmus/litmus.c */
6559 +extern atomic_t rt_task_count;
6560 +
6561 +static struct proc_dir_entry *litmus_dir = NULL,
6562 + *curr_file = NULL,
6563 + *stat_file = NULL,
6564 + *plugs_dir = NULL,
6565 +#ifdef CONFIG_RELEASE_MASTER
6566 + *release_master_file = NULL,
6567 +#endif
6568 + *plugs_file = NULL;
6569 +
6570 +/* in litmus/sync.c */
6571 +int count_tasks_waiting_for_release(void);
6572 +
6573 +static int proc_read_stats(char *page, char **start,
6574 + off_t off, int count,
6575 + int *eof, void *data)
6576 +{
6577 + int len;
6578 +
6579 + len = snprintf(page, PAGE_SIZE,
6580 + "real-time tasks = %d\n"
6581 + "ready for release = %d\n",
6582 + atomic_read(&rt_task_count),
6583 + count_tasks_waiting_for_release());
6584 + return len;
6585 +}
6586 +
6587 +static int proc_read_plugins(char *page, char **start,
6588 + off_t off, int count,
6589 + int *eof, void *data)
6590 +{
6591 + int len;
6592 +
6593 + len = print_sched_plugins(page, PAGE_SIZE);
6594 + return len;
6595 +}
6596 +
6597 +static int proc_read_curr(char *page, char **start,
6598 + off_t off, int count,
6599 + int *eof, void *data)
6600 +{
6601 + int len;
6602 +
6603 + len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
6604 + return len;
6605 +}
6606 +
6607 +/* in litmus/litmus.c */
6608 +int switch_sched_plugin(struct sched_plugin*);
6609 +
6610 +static int proc_write_curr(struct file *file,
6611 + const char *buffer,
6612 + unsigned long count,
6613 + void *data)
6614 +{
6615 + int len, ret;
6616 + char name[65];
6617 + struct sched_plugin* found;
6618 +
6619 + len = copy_and_chomp(name, sizeof(name), buffer, count);
6620 + if (len < 0)
6621 + return len;
6622 +
6623 + found = find_sched_plugin(name);
6624 +
6625 + if (found) {
6626 + ret = switch_sched_plugin(found);
6627 + if (ret != 0)
6628 + printk(KERN_INFO "Could not switch plugin: %d\n", ret);
6629 + } else
6630 + printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
6631 +
6632 + return len;
6633 +}
6634 +
6635 +#ifdef CONFIG_RELEASE_MASTER
6636 +static int proc_read_release_master(char *page, char **start,
6637 + off_t off, int count,
6638 + int *eof, void *data)
6639 +{
6640 + int len, master;
6641 + master = atomic_read(&release_master_cpu);
6642 + if (master == NO_CPU)
6643 + len = snprintf(page, PAGE_SIZE, "NO_CPU\n");
6644 + else
6645 + len = snprintf(page, PAGE_SIZE, "%d\n", master);
6646 + return len;
6647 +}
6648 +
6649 +static int proc_write_release_master(struct file *file,
6650 + const char *buffer,
6651 + unsigned long count,
6652 + void *data)
6653 +{
6654 + int cpu, err, len, online = 0;
6655 + char msg[64];
6656 +
6657 + len = copy_and_chomp(msg, sizeof(msg), buffer, count);
6658 +
6659 + if (len < 0)
6660 + return len;
6661 +
6662 + if (strcmp(msg, "NO_CPU") == 0)
6663 + atomic_set(&release_master_cpu, NO_CPU);
6664 + else {
6665 + err = sscanf(msg, "%d", &cpu);
6666 + if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
6667 + atomic_set(&release_master_cpu, cpu);
6668 + } else {
6669 + TRACE("invalid release master: '%s' "
6670 + "(err:%d cpu:%d online:%d)\n",
6671 + msg, err, cpu, online);
6672 + len = -EINVAL;
6673 + }
6674 + }
6675 + return len;
6676 +}
6677 +#endif
6678 +
6679 +int __init init_litmus_proc(void)
6680 +{
6681 + litmus_dir = proc_mkdir("litmus", NULL);
6682 + if (!litmus_dir) {
6683 + printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
6684 + return -ENOMEM;
6685 + }
6686 +
6687 + curr_file = create_proc_entry("active_plugin",
6688 + 0644, litmus_dir);
6689 + if (!curr_file) {
6690 + printk(KERN_ERR "Could not allocate active_plugin "
6691 + "procfs entry.\n");
6692 + return -ENOMEM;
6693 + }
6694 + curr_file->read_proc = proc_read_curr;
6695 + curr_file->write_proc = proc_write_curr;
6696 +
6697 +#ifdef CONFIG_RELEASE_MASTER
6698 + release_master_file = create_proc_entry("release_master",
6699 + 0644, litmus_dir);
6700 + if (!release_master_file) {
6701 + printk(KERN_ERR "Could not allocate release_master "
6702 + "procfs entry.\n");
6703 + return -ENOMEM;
6704 + }
6705 + release_master_file->read_proc = proc_read_release_master;
6706 + release_master_file->write_proc = proc_write_release_master;
6707 +#endif
6708 +
6709 + stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
6710 + proc_read_stats, NULL);
6711 +
6712 + plugs_dir = proc_mkdir("plugins", litmus_dir);
6713 + if (!plugs_dir){
6714 + printk(KERN_ERR "Could not allocate plugins directory "
6715 + "procfs entry.\n");
6716 + return -ENOMEM;
6717 + }
6718 +
6719 + plugs_file = create_proc_read_entry("loaded", 0444, plugs_dir,
6720 + proc_read_plugins, NULL);
6721 +
6722 + return 0;
6723 +}
6724 +
6725 +void exit_litmus_proc(void)
6726 +{
6727 + if (plugs_file)
6728 + remove_proc_entry("loaded", plugs_dir);
6729 + if (plugs_dir)
6730 + remove_proc_entry("plugins", litmus_dir);
6731 + if (stat_file)
6732 + remove_proc_entry("stats", litmus_dir);
6733 + if (curr_file)
6734 + remove_proc_entry("active_plugin", litmus_dir);
6735 +#ifdef CONFIG_RELEASE_MASTER
6736 + if (release_master_file)
6737 + remove_proc_entry("release_master", litmus_dir);
6738 +#endif
6739 + if (litmus_dir)
6740 + remove_proc_entry("litmus", NULL);
6741 +}
6742 +
6743 +long make_plugin_proc_dir(struct sched_plugin* plugin,
6744 + struct proc_dir_entry** pde_in)
6745 +{
6746 + struct proc_dir_entry *pde_new = NULL;
6747 + long rv;
6748 +
6749 + if (!plugin || !plugin->plugin_name){
6750 + printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
6751 + __func__);
6752 + rv = -EINVAL;
6753 + goto out_no_pde;
6754 + }
6755 +
6756 + if (!plugs_dir){
6757 + printk(KERN_ERR "Could not make plugin sub-directory, because "
6758 + "/proc/litmus/plugins does not exist.\n");
6759 + rv = -ENOENT;
6760 + goto out_no_pde;
6761 + }
6762 +
6763 + pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
6764 + if (!pde_new){
6765 + printk(KERN_ERR "Could not make plugin sub-directory: "
6766 + "out of memory?.\n");
6767 + rv = -ENOMEM;
6768 + goto out_no_pde;
6769 + }
6770 +
6771 + rv = 0;
6772 + *pde_in = pde_new;
6773 + goto out_ok;
6774 +
6775 +out_no_pde:
6776 + *pde_in = NULL;
6777 +out_ok:
6778 + return rv;
6779 +}
6780 +
6781 +void remove_plugin_proc_dir(struct sched_plugin* plugin)
6782 +{
6783 + if (!plugin || !plugin->plugin_name){
6784 + printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
6785 + __func__);
6786 + return;
6787 + }
6788 + remove_proc_entry(plugin->plugin_name, plugs_dir);
6789 +}
6790 +
6791 +
6792 +
6793 +/* misc. I/O helper functions */
6794 +
6795 +int copy_and_chomp(char *kbuf, unsigned long ksize,
6796 + __user const char* ubuf, unsigned long ulength)
6797 +{
6798 + /* caller must provide buffer space */
6799 + BUG_ON(!ksize);
6800 +
6801 + ksize--; /* leave space for null byte */
6802 +
6803 + if (ksize > ulength)
6804 + ksize = ulength;
6805 +
6806 + if(copy_from_user(kbuf, ubuf, ksize))
6807 + return -EFAULT;
6808 +
6809 + kbuf[ksize] = '\0';
6810 +
6811 + /* chomp kbuf */
6812 + if (ksize > 0 && kbuf[ksize - 1] == '\n')
6813 + kbuf[ksize - 1] = '\0';
6814 +
6815 + return ksize;
6816 +}
6817 +
6818 +/* helper functions for clustered plugins */
6819 +static const char* cache_level_names[] = {
6820 + "ALL",
6821 + "L1",
6822 + "L2",
6823 + "L3",
6824 +};
6825 +
6826 +int parse_cache_level(const char *cache_name, enum cache_level *level)
6827 +{
6828 + int err = -EINVAL;
6829 + int i;
6830 + /* do a quick and dirty comparison to find the cluster size */
6831 + for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
6832 + if (!strcmp(cache_name, cache_level_names[i])) {
6833 + *level = (enum cache_level) i;
6834 + err = 0;
6835 + break;
6836 + }
6837 + return err;
6838 +}
6839 +
6840 +const char* cache_level_name(enum cache_level level)
6841 +{
6842 + int idx = level;
6843 +
6844 + if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
6845 + return cache_level_names[idx];
6846 + else
6847 + return "INVALID";
6848 +}
6849 +
6850 +
6851 +/* proc file interface to configure the cluster size */
6852 +static int proc_read_cluster_size(char *page, char **start,
6853 + off_t off, int count,
6854 + int *eof, void *data)
6855 +{
6856 + return snprintf(page, PAGE_SIZE, "%s\n",
6857 + cache_level_name(*((enum cache_level*) data)));;
6858 +}
6859 +
6860 +static int proc_write_cluster_size(struct file *file,
6861 + const char *buffer,
6862 + unsigned long count,
6863 + void *data)
6864 +{
6865 + int len;
6866 + char cache_name[8];
6867 +
6868 + len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
6869 +
6870 + if (len > 0 && parse_cache_level(cache_name, (enum cache_level*) data))
6871 + printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
6872 +
6873 + return len;
6874 +}
6875 +
6876 +struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
6877 + enum cache_level* level)
6878 +{
6879 + struct proc_dir_entry* cluster_file;
6880 +
6881 + cluster_file = create_proc_entry("cluster", 0644, parent);
6882 + if (!cluster_file) {
6883 + printk(KERN_ERR "Could not allocate %s/cluster "
6884 + "procfs entry.\n", parent->name);
6885 + } else {
6886 + cluster_file->read_proc = proc_read_cluster_size;
6887 + cluster_file->write_proc = proc_write_cluster_size;
6888 + cluster_file->data = level;
6889 + }
6890 + return cluster_file;
6891 +}
6892 +
6893 diff --git a/litmus/locking.c b/litmus/locking.c
6894 new file mode 100644
6895 index 0000000..0c1aa6a
6896 --- /dev/null
6897 +++ b/litmus/locking.c
6898 @@ -0,0 +1,139 @@
6899 +#include <litmus/fdso.h>
6900 +
6901 +#ifdef CONFIG_LITMUS_LOCKING
6902 +
6903 +#include <litmus/sched_plugin.h>
6904 +#include <litmus/trace.h>
6905 +
6906 +static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
6907 +static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
6908 +static int close_generic_lock(struct od_table_entry* entry);
6909 +static void destroy_generic_lock(obj_type_t type, void* sem);
6910 +
6911 +struct fdso_ops generic_lock_ops = {
6912 + .create = create_generic_lock,
6913 + .open = open_generic_lock,
6914 + .close = close_generic_lock,
6915 + .destroy = destroy_generic_lock
6916 +};
6917 +
6918 +static inline bool is_lock(struct od_table_entry* entry)
6919 +{
6920 + return entry->class == &generic_lock_ops;
6921 +}
6922 +
6923 +static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
6924 +{
6925 + BUG_ON(!is_lock(entry));
6926 + return (struct litmus_lock*) entry->obj->obj;
6927 +}
6928 +
6929 +static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
6930 +{
6931 + struct litmus_lock* lock;
6932 + int err;
6933 +
6934 + err = litmus->allocate_lock(&lock, type, arg);
6935 + if (err == 0)
6936 + *obj_ref = lock;
6937 + return err;
6938 +}
6939 +
6940 +static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
6941 +{
6942 + struct litmus_lock* lock = get_lock(entry);
6943 + if (lock->ops->open)
6944 + return lock->ops->open(lock, arg);
6945 + else
6946 + return 0; /* default: any task can open it */
6947 +}
6948 +
6949 +static int close_generic_lock(struct od_table_entry* entry)
6950 +{
6951 + struct litmus_lock* lock = get_lock(entry);
6952 + if (lock->ops->close)
6953 + return lock->ops->close(lock);
6954 + else
6955 + return 0; /* default: closing succeeds */
6956 +}
6957 +
6958 +static void destroy_generic_lock(obj_type_t type, void* obj)
6959 +{
6960 + struct litmus_lock* lock = (struct litmus_lock*) obj;
6961 + lock->ops->deallocate(lock);
6962 +}
6963 +
6964 +asmlinkage long sys_litmus_lock(int lock_od)
6965 +{
6966 + long err = -EINVAL;
6967 + struct od_table_entry* entry;
6968 + struct litmus_lock* l;
6969 +
6970 + TS_LOCK_START;
6971 +
6972 + entry = get_entry_for_od(lock_od);
6973 + if (entry && is_lock(entry)) {
6974 + l = get_lock(entry);
6975 + TRACE_CUR("attempts to lock 0x%p\n", l);
6976 + err = l->ops->lock(l);
6977 + }
6978 +
6979 + /* Note: task my have been suspended or preempted in between! Take
6980 + * this into account when computing overheads. */
6981 + TS_LOCK_END;
6982 +
6983 + return err;
6984 +}
6985 +
6986 +asmlinkage long sys_litmus_unlock(int lock_od)
6987 +{
6988 + long err = -EINVAL;
6989 + struct od_table_entry* entry;
6990 + struct litmus_lock* l;
6991 +
6992 + TS_UNLOCK_START;
6993 +
6994 + entry = get_entry_for_od(lock_od);
6995 + if (entry && is_lock(entry)) {
6996 + l = get_lock(entry);
6997 + TRACE_CUR("attempts to unlock 0x%p\n", l);
6998 + err = l->ops->unlock(l);
6999 + }
7000 +
7001 + /* Note: task my have been preempted in between! Take this into
7002 + * account when computing overheads. */
7003 + TS_UNLOCK_END;
7004 +
7005 + return err;
7006 +}
7007 +
7008 +struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
7009 +{
7010 + wait_queue_t* q;
7011 + struct task_struct* t = NULL;
7012 +
7013 + if (waitqueue_active(wq)) {
7014 + q = list_entry(wq->task_list.next,
7015 + wait_queue_t, task_list);
7016 + t = (struct task_struct*) q->private;
7017 + __remove_wait_queue(wq, q);
7018 + }
7019 + return(t);
7020 +}
7021 +
7022 +
7023 +#else
7024 +
7025 +struct fdso_ops generic_lock_ops = {};
7026 +
7027 +asmlinkage long sys_litmus_lock(int sem_od)
7028 +{
7029 + return -ENOSYS;
7030 +}
7031 +
7032 +asmlinkage long sys_litmus_unlock(int sem_od)
7033 +{
7034 + return -ENOSYS;
7035 +}
7036 +
7037 +#endif
7038 diff --git a/litmus/preempt.c b/litmus/preempt.c
7039 new file mode 100644
7040 index 0000000..5704d0b
7041 --- /dev/null
7042 +++ b/litmus/preempt.c
7043 @@ -0,0 +1,133 @@
7044 +#include <linux/sched.h>
7045 +
7046 +#include <litmus/litmus.h>
7047 +#include <litmus/preempt.h>
7048 +
7049 +/* The rescheduling state of each processor.
7050 + */
7051 +DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
7052 +
7053 +void sched_state_will_schedule(struct task_struct* tsk)
7054 +{
7055 + /* Litmus hack: we only care about processor-local invocations of
7056 + * set_tsk_need_resched(). We can't reliably set the flag remotely
7057 + * since it might race with other updates to the scheduling state. We
7058 + * can't rely on the runqueue lock protecting updates to the sched
7059 + * state since processors do not acquire the runqueue locks for all
7060 + * updates to the sched state (to avoid acquiring two runqueue locks at
7061 + * the same time). Further, if tsk is residing on a remote processor,
7062 + * then that processor doesn't actually know yet that it is going to
7063 + * reschedule; it still must receive an IPI (unless a local invocation
7064 + * races).
7065 + */
7066 + if (likely(task_cpu(tsk) == smp_processor_id())) {
7067 + VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
7068 + if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
7069 + set_sched_state(PICKED_WRONG_TASK);
7070 + else
7071 + set_sched_state(WILL_SCHEDULE);
7072 + } else
7073 + /* Litmus tasks should never be subject to a remote
7074 + * set_tsk_need_resched(). */
7075 + BUG_ON(is_realtime(tsk));
7076 +#ifdef CONFIG_PREEMPT_STATE_TRACE
7077 + TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
7078 + __builtin_return_address(0));
7079 +#endif
7080 +}
7081 +
7082 +/* Called by the IPI handler after another CPU called smp_send_resched(). */
7083 +void sched_state_ipi(void)
7084 +{
7085 + /* If the IPI was slow, we might be in any state right now. The IPI is
7086 + * only meaningful if we are in SHOULD_SCHEDULE. */
7087 + if (is_in_sched_state(SHOULD_SCHEDULE)) {
7088 + /* Cause scheduler to be invoked.
7089 + * This will cause a transition to WILL_SCHEDULE. */
7090 + set_tsk_need_resched(current);
7091 + TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
7092 + current->comm, current->pid);
7093 + } else {
7094 + /* ignore */
7095 + TRACE_STATE("ignoring IPI in state %x (%s)\n",
7096 + get_sched_state(),
7097 + sched_state_name(get_sched_state()));
7098 + }
7099 +}
7100 +
7101 +/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
7102 + * hold the lock that is used to serialize scheduling decisions. */
7103 +void litmus_reschedule(int cpu)
7104 +{
7105 + int picked_transition_ok = 0;
7106 + int scheduled_transition_ok = 0;
7107 +
7108 + /* The (remote) CPU could be in any state. */
7109 +
7110 + /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
7111 + * is not aware of the need to reschedule at this point. */
7112 +
7113 + /* is a context switch in progress? */
7114 + if (cpu_is_in_sched_state(cpu, TASK_PICKED))
7115 + picked_transition_ok = sched_state_transition_on(
7116 + cpu, TASK_PICKED, PICKED_WRONG_TASK);
7117 +
7118 + if (!picked_transition_ok &&
7119 + cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
7120 + /* We either raced with the end of the context switch, or the
7121 + * CPU was in TASK_SCHEDULED anyway. */
7122 + scheduled_transition_ok = sched_state_transition_on(
7123 + cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
7124 + }
7125 +
7126 + /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
7127 + * scheduler to be invoked. */
7128 + if (scheduled_transition_ok) {
7129 + if (smp_processor_id() == cpu)
7130 + set_tsk_need_resched(current);
7131 + else
7132 + smp_send_reschedule(cpu);
7133 + }
7134 +
7135 + TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
7136 + __FUNCTION__,
7137 + picked_transition_ok,
7138 + scheduled_transition_ok);
7139 +}
7140 +
7141 +void litmus_reschedule_local(void)
7142 +{
7143 + if (is_in_sched_state(TASK_PICKED))
7144 + set_sched_state(PICKED_WRONG_TASK);
7145 + else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
7146 + set_sched_state(WILL_SCHEDULE);
7147 + set_tsk_need_resched(current);
7148 + }
7149 +}
7150 +
7151 +#ifdef CONFIG_DEBUG_KERNEL
7152 +
7153 +void sched_state_plugin_check(void)
7154 +{
7155 + if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
7156 + TRACE("!!!! plugin did not call sched_state_task_picked()!"
7157 + "Calling sched_state_task_picked() is mandatory---fix this.\n");
7158 + set_sched_state(TASK_PICKED);
7159 + }
7160 +}
7161 +
7162 +#define NAME_CHECK(x) case x: return #x
7163 +const char* sched_state_name(int s)
7164 +{
7165 + switch (s) {
7166 + NAME_CHECK(TASK_SCHEDULED);
7167 + NAME_CHECK(SHOULD_SCHEDULE);
7168 + NAME_CHECK(WILL_SCHEDULE);
7169 + NAME_CHECK(TASK_PICKED);
7170 + NAME_CHECK(PICKED_WRONG_TASK);
7171 + default:
7172 + return "UNKNOWN";
7173 + };
7174 +}
7175 +
7176 +#endif
7177 diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
7178 new file mode 100644
7179 index 0000000..d405854
7180 --- /dev/null
7181 +++ b/litmus/rt_domain.c
7182 @@ -0,0 +1,357 @@
7183 +/*
7184 + * litmus/rt_domain.c
7185 + *
7186 + * LITMUS real-time infrastructure. This file contains the
7187 + * functions that manipulate RT domains. RT domains are an abstraction
7188 + * of a ready queue and a release queue.
7189 + */
7190 +
7191 +#include <linux/percpu.h>
7192 +#include <linux/sched.h>
7193 +#include <linux/list.h>
7194 +#include <linux/slab.h>
7195 +
7196 +#include <litmus/litmus.h>
7197 +#include <litmus/sched_plugin.h>
7198 +#include <litmus/sched_trace.h>
7199 +
7200 +#include <litmus/rt_domain.h>
7201 +
7202 +#include <litmus/trace.h>
7203 +
7204 +#include <litmus/bheap.h>
7205 +
7206 +/* Uncomment when debugging timer races... */
7207 +#if 0
7208 +#define VTRACE_TASK TRACE_TASK
7209 +#define VTRACE TRACE
7210 +#else
7211 +#define VTRACE_TASK(t, fmt, args...) /* shut up */
7212 +#define VTRACE(fmt, args...) /* be quiet already */
7213 +#endif
7214 +
7215 +static int dummy_resched(rt_domain_t *rt)
7216 +{
7217 + return 0;
7218 +}
7219 +
7220 +static int dummy_order(struct bheap_node* a, struct bheap_node* b)
7221 +{
7222 + return 0;
7223 +}
7224 +
7225 +/* default implementation: use default lock */
7226 +static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
7227 +{
7228 + merge_ready(rt, tasks);
7229 +}
7230 +
7231 +static unsigned int time2slot(lt_t time)
7232 +{
7233 + return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
7234 +}
7235 +
7236 +static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
7237 +{
7238 + unsigned long flags;
7239 + struct release_heap* rh;
7240 + rh = container_of(timer, struct release_heap, timer);
7241 +
7242 + TS_RELEASE_LATENCY(rh->release_time);
7243 +
7244 + VTRACE("on_release_timer(0x%p) starts.\n", timer);
7245 +
7246 + TS_RELEASE_START;
7247 +
7248 +
7249 + raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
7250 + VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
7251 + /* remove from release queue */
7252 + list_del(&rh->list);
7253 + raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
7254 + VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
7255 +
7256 + /* call release callback */
7257 + rh->dom->release_jobs(rh->dom, &rh->heap);
7258 + /* WARNING: rh can be referenced from other CPUs from now on. */
7259 +
7260 + TS_RELEASE_END;
7261 +
7262 + VTRACE("on_release_timer(0x%p) ends.\n", timer);
7263 +
7264 + return HRTIMER_NORESTART;
7265 +}
7266 +
7267 +/* allocated in litmus.c */
7268 +struct kmem_cache * release_heap_cache;
7269 +
7270 +struct release_heap* release_heap_alloc(int gfp_flags)
7271 +{
7272 + struct release_heap* rh;
7273 + rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
7274 + if (rh) {
7275 + /* initialize timer */
7276 + hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
7277 + rh->timer.function = on_release_timer;
7278 + }
7279 + return rh;
7280 +}
7281 +
7282 +void release_heap_free(struct release_heap* rh)
7283 +{
7284 + /* make sure timer is no longer in use */
7285 + hrtimer_cancel(&rh->timer);
7286 + kmem_cache_free(release_heap_cache, rh);
7287 +}
7288 +
7289 +/* Caller must hold release lock.
7290 + * Will return heap for given time. If no such heap exists prior to
7291 + * the invocation it will be created.
7292 + */
7293 +static struct release_heap* get_release_heap(rt_domain_t *rt,
7294 + struct task_struct* t,
7295 + int use_task_heap)
7296 +{
7297 + struct list_head* pos;
7298 + struct release_heap* heap = NULL;
7299 + struct release_heap* rh;
7300 + lt_t release_time = get_release(t);
7301 + unsigned int slot = time2slot(release_time);
7302 +
7303 + /* initialize pos for the case that the list is empty */
7304 + pos = rt->release_queue.slot[slot].next;
7305 + list_for_each(pos, &rt->release_queue.slot[slot]) {
7306 + rh = list_entry(pos, struct release_heap, list);
7307 + if (release_time == rh->release_time) {
7308 + /* perfect match -- this happens on hyperperiod
7309 + * boundaries
7310 + */
7311 + heap = rh;
7312 + break;
7313 + } else if (lt_before(release_time, rh->release_time)) {
7314 + /* we need to insert a new node since rh is
7315 + * already in the future
7316 + */
7317 + break;
7318 + }
7319 + }
7320 + if (!heap && use_task_heap) {
7321 + /* use pre-allocated release heap */
7322 + rh = tsk_rt(t)->rel_heap;
7323 +
7324 + rh->dom = rt;
7325 + rh->release_time = release_time;
7326 +
7327 + /* add to release queue */
7328 + list_add(&rh->list, pos->prev);
7329 + heap = rh;
7330 + }
7331 + return heap;
7332 +}
7333 +
7334 +static void reinit_release_heap(struct task_struct* t)
7335 +{
7336 + struct release_heap* rh;
7337 +
7338 + /* use pre-allocated release heap */
7339 + rh = tsk_rt(t)->rel_heap;
7340 +
7341 + /* Make sure it is safe to use. The timer callback could still
7342 + * be executing on another CPU; hrtimer_cancel() will wait
7343 + * until the timer callback has completed. However, under no
7344 + * circumstances should the timer be active (= yet to be
7345 + * triggered).
7346 + *
7347 + * WARNING: If the CPU still holds the release_lock at this point,
7348 + * deadlock may occur!
7349 + */
7350 + BUG_ON(hrtimer_cancel(&rh->timer));
7351 +
7352 + /* initialize */
7353 + bheap_init(&rh->heap);
7354 +#ifdef CONFIG_RELEASE_MASTER
7355 + atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
7356 +#endif
7357 +}
7358 +/* arm_release_timer() - start local release timer or trigger
7359 + * remote timer (pull timer)
7360 + *
7361 + * Called by add_release() with:
7362 + * - tobe_lock taken
7363 + * - IRQ disabled
7364 + */
7365 +#ifdef CONFIG_RELEASE_MASTER
7366 +#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
7367 +static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
7368 +#else
7369 +static void arm_release_timer(rt_domain_t *_rt)
7370 +#endif
7371 +{
7372 + rt_domain_t *rt = _rt;
7373 + struct list_head list;
7374 + struct list_head *pos, *safe;
7375 + struct task_struct* t;
7376 + struct release_heap* rh;
7377 +
7378 + VTRACE("arm_release_timer() at %llu\n", litmus_clock());
7379 + list_replace_init(&rt->tobe_released, &list);
7380 +
7381 + list_for_each_safe(pos, safe, &list) {
7382 + /* pick task of work list */
7383 + t = list_entry(pos, struct task_struct, rt_param.list);
7384 + sched_trace_task_release(t);
7385 + list_del(pos);
7386 +
7387 + /* put into release heap while holding release_lock */
7388 + raw_spin_lock(&rt->release_lock);
7389 + VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
7390 +
7391 + rh = get_release_heap(rt, t, 0);
7392 + if (!rh) {
7393 + /* need to use our own, but drop lock first */
7394 + raw_spin_unlock(&rt->release_lock);
7395 + VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
7396 + &rt->release_lock);
7397 +
7398 + reinit_release_heap(t);
7399 + VTRACE_TASK(t, "release_heap ready\n");
7400 +
7401 + raw_spin_lock(&rt->release_lock);
7402 + VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
7403 + &rt->release_lock);
7404 +
7405 + rh = get_release_heap(rt, t, 1);
7406 + }
7407 + bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
7408 + VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
7409 +
7410 + raw_spin_unlock(&rt->release_lock);
7411 + VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
7412 +
7413 + /* To avoid arming the timer multiple times, we only let the
7414 + * owner do the arming (which is the "first" task to reference
7415 + * this release_heap anyway).
7416 + */
7417 + if (rh == tsk_rt(t)->rel_heap) {
7418 + VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
7419 + /* we cannot arm the timer using hrtimer_start()
7420 + * as it may deadlock on rq->lock
7421 + *
7422 + * PINNED mode is ok on both local and remote CPU
7423 + */
7424 +#ifdef CONFIG_RELEASE_MASTER
7425 + if (rt->release_master == NO_CPU &&
7426 + target_cpu == NO_CPU)
7427 +#endif
7428 + __hrtimer_start_range_ns(&rh->timer,
7429 + ns_to_ktime(rh->release_time),
7430 + 0, HRTIMER_MODE_ABS_PINNED, 0);
7431 +#ifdef CONFIG_RELEASE_MASTER
7432 + else
7433 + hrtimer_start_on(
7434 + /* target_cpu overrides release master */
7435 + (target_cpu != NO_CPU ?
7436 + target_cpu : rt->release_master),
7437 + &rh->info, &rh->timer,
7438 + ns_to_ktime(rh->release_time),
7439 + HRTIMER_MODE_ABS_PINNED);
7440 +#endif
7441 + } else
7442 + VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
7443 + }
7444 +}
7445 +
7446 +void rt_domain_init(rt_domain_t *rt,
7447 + bheap_prio_t order,
7448 + check_resched_needed_t check,
7449 + release_jobs_t release
7450 + )
7451 +{
7452 + int i;
7453 +
7454 + BUG_ON(!rt);
7455 + if (!check)
7456 + check = dummy_resched;
7457 + if (!release)
7458 + release = default_release_jobs;
7459 + if (!order)
7460 + order = dummy_order;
7461 +
7462 +#ifdef CONFIG_RELEASE_MASTER
7463 + rt->release_master = NO_CPU;
7464 +#endif
7465 +
7466 + bheap_init(&rt->ready_queue);
7467 + INIT_LIST_HEAD(&rt->tobe_released);
7468 + for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
7469 + INIT_LIST_HEAD(&rt->release_queue.slot[i]);
7470 +
7471 + raw_spin_lock_init(&rt->ready_lock);
7472 + raw_spin_lock_init(&rt->release_lock);
7473 + raw_spin_lock_init(&rt->tobe_lock);
7474 +
7475 + rt->check_resched = check;
7476 + rt->release_jobs = release;
7477 + rt->order = order;
7478 +}
7479 +
7480 +/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
7481 + * @new: the newly released task
7482 + */
7483 +void __add_ready(rt_domain_t* rt, struct task_struct *new)
7484 +{
7485 + TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
7486 + new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
7487 + get_release(new), litmus_clock());
7488 +
7489 + BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
7490 +
7491 + bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
7492 + rt->check_resched(rt);
7493 +}
7494 +
7495 +/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
7496 + * @tasks - the newly released tasks
7497 + */
7498 +void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
7499 +{
7500 + bheap_union(rt->order, &rt->ready_queue, tasks);
7501 + rt->check_resched(rt);
7502 +}
7503 +
7504 +
7505 +#ifdef CONFIG_RELEASE_MASTER
7506 +void __add_release_on(rt_domain_t* rt, struct task_struct *task,
7507 + int target_cpu)
7508 +{
7509 + TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
7510 + get_release(task), target_cpu);
7511 + list_add(&tsk_rt(task)->list, &rt->tobe_released);
7512 + task->rt_param.domain = rt;
7513 +
7514 + /* start release timer */
7515 + TS_SCHED2_START(task);
7516 +
7517 + arm_release_timer_on(rt, target_cpu);
7518 +
7519 + TS_SCHED2_END(task);
7520 +}
7521 +#endif
7522 +
7523 +/* add_release - add a real-time task to the rt release queue.
7524 + * @task: the sleeping task
7525 + */
7526 +void __add_release(rt_domain_t* rt, struct task_struct *task)
7527 +{
7528 + TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
7529 + list_add(&tsk_rt(task)->list, &rt->tobe_released);
7530 + task->rt_param.domain = rt;
7531 +
7532 + /* start release timer */
7533 + TS_SCHED2_START(task);
7534 +
7535 + arm_release_timer(rt);
7536 +
7537 + TS_SCHED2_END(task);
7538 +}
7539 +
7540 diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
7541 new file mode 100644
7542 index 0000000..480c62b
7543 --- /dev/null
7544 +++ b/litmus/sched_cedf.c
7545 @@ -0,0 +1,863 @@
7546 +/*
7547 + * litmus/sched_cedf.c
7548 + *
7549 + * Implementation of the C-EDF scheduling algorithm.
7550 + *
7551 + * This implementation is based on G-EDF:
7552 + * - CPUs are clustered around L2 or L3 caches.
7553 + * - Clusters topology is automatically detected (this is arch dependent
7554 + * and is working only on x86 at the moment --- and only with modern
7555 + * cpus that exports cpuid4 information)
7556 + * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
7557 + * the programmer needs to be aware of the topology to place tasks
7558 + * in the desired cluster
7559 + * - default clustering is around L2 cache (cache index = 2)
7560 + * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
7561 + * online_cpus are placed in a single cluster).
7562 + *
7563 + * For details on functions, take a look at sched_gsn_edf.c
7564 + *
7565 + * Currently, we do not support changes in the number of online cpus.
7566 + * If the num_online_cpus() dynamically changes, the plugin is broken.
7567 + *
7568 + * This version uses the simple approach and serializes all scheduling
7569 + * decisions by the use of a queue lock. This is probably not the
7570 + * best way to do it, but it should suffice for now.
7571 + */
7572 +
7573 +#include <linux/spinlock.h>
7574 +#include <linux/percpu.h>
7575 +#include <linux/sched.h>
7576 +#include <linux/slab.h>
7577 +
7578 +#include <linux/module.h>
7579 +
7580 +#include <litmus/litmus.h>
7581 +#include <litmus/jobs.h>
7582 +#include <litmus/preempt.h>
7583 +#include <litmus/sched_plugin.h>
7584 +#include <litmus/edf_common.h>
7585 +#include <litmus/sched_trace.h>
7586 +
7587 +#include <litmus/clustered.h>
7588 +
7589 +#include <litmus/bheap.h>
7590 +
7591 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7592 +#include <litmus/affinity.h>
7593 +#endif
7594 +
7595 +/* to configure the cluster size */
7596 +#include <litmus/litmus_proc.h>
7597 +#include <linux/uaccess.h>
7598 +
7599 +/* Reference configuration variable. Determines which cache level is used to
7600 + * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
7601 + * all CPUs form a single cluster (just like GSN-EDF).
7602 + */
7603 +static enum cache_level cluster_config = GLOBAL_CLUSTER;
7604 +
7605 +struct clusterdomain;
7606 +
7607 +/* cpu_entry_t - maintain the linked and scheduled state
7608 + *
7609 + * A cpu also contains a pointer to the cedf_domain_t cluster
7610 + * that owns it (struct clusterdomain*)
7611 + */
7612 +typedef struct {
7613 + int cpu;
7614 + struct clusterdomain* cluster; /* owning cluster */
7615 + struct task_struct* linked; /* only RT tasks */
7616 + struct task_struct* scheduled; /* only RT tasks */
7617 + atomic_t will_schedule; /* prevent unneeded IPIs */
7618 + struct bheap_node* hn;
7619 +} cpu_entry_t;
7620 +
7621 +/* one cpu_entry_t per CPU */
7622 +DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
7623 +
7624 +#define set_will_schedule() \
7625 + (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
7626 +#define clear_will_schedule() \
7627 + (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
7628 +#define test_will_schedule(cpu) \
7629 + (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
7630 +
7631 +/*
7632 + * In C-EDF there is a cedf domain _per_ cluster
7633 + * The number of clusters is dynamically determined accordingly to the
7634 + * total cpu number and the cluster size
7635 + */
7636 +typedef struct clusterdomain {
7637 + /* rt_domain for this cluster */
7638 + rt_domain_t domain;
7639 + /* cpus in this cluster */
7640 + cpu_entry_t* *cpus;
7641 + /* map of this cluster cpus */
7642 + cpumask_var_t cpu_map;
7643 + /* the cpus queue themselves according to priority in here */
7644 + struct bheap_node *heap_node;
7645 + struct bheap cpu_heap;
7646 + /* lock for this cluster */
7647 +#define cluster_lock domain.ready_lock
7648 +} cedf_domain_t;
7649 +
7650 +/* a cedf_domain per cluster; allocation is done at init/activation time */
7651 +cedf_domain_t *cedf;
7652 +
7653 +#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
7654 +#define task_cpu_cluster(task) remote_cluster(get_partition(task))
7655 +
7656 +/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
7657 + * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
7658 + * information during the initialization of the plugin (e.g., topology)
7659 +#define WANT_ALL_SCHED_EVENTS
7660 + */
7661 +#define VERBOSE_INIT
7662 +
7663 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
7664 +{
7665 + cpu_entry_t *a, *b;
7666 + a = _a->value;
7667 + b = _b->value;
7668 + /* Note that a and b are inverted: we want the lowest-priority CPU at
7669 + * the top of the heap.
7670 + */
7671 + return edf_higher_prio(b->linked, a->linked);
7672 +}
7673 +
7674 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
7675 + * order in the cpu queue. Caller must hold cedf lock.
7676 + */
7677 +static void update_cpu_position(cpu_entry_t *entry)
7678 +{
7679 + cedf_domain_t *cluster = entry->cluster;
7680 +
7681 + if (likely(bheap_node_in_heap(entry->hn)))
7682 + bheap_delete(cpu_lower_prio,
7683 + &cluster->cpu_heap,
7684 + entry->hn);
7685 +
7686 + bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
7687 +}
7688 +
7689 +/* caller must hold cedf lock */
7690 +static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
7691 +{
7692 + struct bheap_node* hn;
7693 + hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
7694 + return hn->value;
7695 +}
7696 +
7697 +
7698 +/* link_task_to_cpu - Update the link of a CPU.
7699 + * Handles the case where the to-be-linked task is already
7700 + * scheduled on a different CPU.
7701 + */
7702 +static noinline void link_task_to_cpu(struct task_struct* linked,
7703 + cpu_entry_t *entry)
7704 +{
7705 + cpu_entry_t *sched;
7706 + struct task_struct* tmp;
7707 + int on_cpu;
7708 +
7709 + BUG_ON(linked && !is_realtime(linked));
7710 +
7711 + /* Currently linked task is set to be unlinked. */
7712 + if (entry->linked) {
7713 + entry->linked->rt_param.linked_on = NO_CPU;
7714 + }
7715 +
7716 + /* Link new task to CPU. */
7717 + if (linked) {
7718 + set_rt_flags(linked, RT_F_RUNNING);
7719 + /* handle task is already scheduled somewhere! */
7720 + on_cpu = linked->rt_param.scheduled_on;
7721 + if (on_cpu != NO_CPU) {
7722 + sched = &per_cpu(cedf_cpu_entries, on_cpu);
7723 + /* this should only happen if not linked already */
7724 + BUG_ON(sched->linked == linked);
7725 +
7726 + /* If we are already scheduled on the CPU to which we
7727 + * wanted to link, we don't need to do the swap --
7728 + * we just link ourselves to the CPU and depend on
7729 + * the caller to get things right.
7730 + */
7731 + if (entry != sched) {
7732 + TRACE_TASK(linked,
7733 + "already scheduled on %d, updating link.\n",
7734 + sched->cpu);
7735 + tmp = sched->linked;
7736 + linked->rt_param.linked_on = sched->cpu;
7737 + sched->linked = linked;
7738 + update_cpu_position(sched);
7739 + linked = tmp;
7740 + }
7741 + }
7742 + if (linked) /* might be NULL due to swap */
7743 + linked->rt_param.linked_on = entry->cpu;
7744 + }
7745 + entry->linked = linked;
7746 +#ifdef WANT_ALL_SCHED_EVENTS
7747 + if (linked)
7748 + TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
7749 + else
7750 + TRACE("NULL linked to %d.\n", entry->cpu);
7751 +#endif
7752 + update_cpu_position(entry);
7753 +}
7754 +
7755 +/* unlink - Make sure a task is not linked any longer to an entry
7756 + * where it was linked before. Must hold cedf_lock.
7757 + */
7758 +static noinline void unlink(struct task_struct* t)
7759 +{
7760 + cpu_entry_t *entry;
7761 +
7762 + if (t->rt_param.linked_on != NO_CPU) {
7763 + /* unlink */
7764 + entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
7765 + t->rt_param.linked_on = NO_CPU;
7766 + link_task_to_cpu(NULL, entry);
7767 + } else if (is_queued(t)) {
7768 + /* This is an interesting situation: t is scheduled,
7769 + * but was just recently unlinked. It cannot be
7770 + * linked anywhere else (because then it would have
7771 + * been relinked to this CPU), thus it must be in some
7772 + * queue. We must remove it from the list in this
7773 + * case.
7774 + *
7775 + * in C-EDF case is should be somewhere in the queue for
7776 + * its domain, therefore and we can get the domain using
7777 + * task_cpu_cluster
7778 + */
7779 + remove(&(task_cpu_cluster(t))->domain, t);
7780 + }
7781 +}
7782 +
7783 +
7784 +/* preempt - force a CPU to reschedule
7785 + */
7786 +static void preempt(cpu_entry_t *entry)
7787 +{
7788 + preempt_if_preemptable(entry->scheduled, entry->cpu);
7789 +}
7790 +
7791 +/* requeue - Put an unlinked task into gsn-edf domain.
7792 + * Caller must hold cedf_lock.
7793 + */
7794 +static noinline void requeue(struct task_struct* task)
7795 +{
7796 + cedf_domain_t *cluster = task_cpu_cluster(task);
7797 + BUG_ON(!task);
7798 + /* sanity check before insertion */
7799 + BUG_ON(is_queued(task));
7800 +
7801 + if (is_released(task, litmus_clock()))
7802 + __add_ready(&cluster->domain, task);
7803 + else {
7804 + /* it has got to wait */
7805 + add_release(&cluster->domain, task);
7806 + }
7807 +}
7808 +
7809 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7810 +static cpu_entry_t* cedf_get_nearest_available_cpu(
7811 + cedf_domain_t *cluster, cpu_entry_t *start)
7812 +{
7813 + cpu_entry_t *affinity;
7814 +
7815 + get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
7816 +#ifdef CONFIG_RELEASE_MASTER
7817 + cluster->domain.release_master
7818 +#else
7819 + NO_CPU
7820 +#endif
7821 + );
7822 +
7823 + /* make sure CPU is in our cluster */
7824 + if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
7825 + return(affinity);
7826 + else
7827 + return(NULL);
7828 +}
7829 +#endif
7830 +
7831 +
7832 +/* check for any necessary preemptions */
7833 +static void check_for_preemptions(cedf_domain_t *cluster)
7834 +{
7835 + struct task_struct *task;
7836 + cpu_entry_t *last;
7837 +
7838 + for(last = lowest_prio_cpu(cluster);
7839 + edf_preemption_needed(&cluster->domain, last->linked);
7840 + last = lowest_prio_cpu(cluster)) {
7841 + /* preemption necessary */
7842 + task = __take_ready(&cluster->domain);
7843 + TRACE("check_for_preemptions: attempting to link task %d to %d\n",
7844 + task->pid, last->cpu);
7845 +#ifdef CONFIG_SCHED_CPU_AFFINITY
7846 + {
7847 + cpu_entry_t *affinity =
7848 + cedf_get_nearest_available_cpu(cluster,
7849 + &per_cpu(cedf_cpu_entries, task_cpu(task)));
7850 + if(affinity)
7851 + last = affinity;
7852 + else if(last->linked)
7853 + requeue(last->linked);
7854 + }
7855 +#else
7856 + if (last->linked)
7857 + requeue(last->linked);
7858 +#endif
7859 + link_task_to_cpu(task, last);
7860 + preempt(last);
7861 + }
7862 +}
7863 +
7864 +/* cedf_job_arrival: task is either resumed or released */
7865 +static noinline void cedf_job_arrival(struct task_struct* task)
7866 +{
7867 + cedf_domain_t *cluster = task_cpu_cluster(task);
7868 + BUG_ON(!task);
7869 +
7870 + requeue(task);
7871 + check_for_preemptions(cluster);
7872 +}
7873 +
7874 +static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
7875 +{
7876 + cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
7877 + unsigned long flags;
7878 +
7879 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
7880 +
7881 + __merge_ready(&cluster->domain, tasks);
7882 + check_for_preemptions(cluster);
7883 +
7884 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
7885 +}
7886 +
7887 +/* caller holds cedf_lock */
7888 +static noinline void job_completion(struct task_struct *t, int forced)
7889 +{
7890 + BUG_ON(!t);
7891 +
7892 + sched_trace_task_completion(t, forced);
7893 +
7894 + TRACE_TASK(t, "job_completion().\n");
7895 +
7896 + /* set flags */
7897 + set_rt_flags(t, RT_F_SLEEP);
7898 + /* prepare for next period */
7899 + prepare_for_next_period(t);
7900 + if (is_released(t, litmus_clock()))
7901 + sched_trace_task_release(t);
7902 + /* unlink */
7903 + unlink(t);
7904 + /* requeue
7905 + * But don't requeue a blocking task. */
7906 + if (is_running(t))
7907 + cedf_job_arrival(t);
7908 +}
7909 +
7910 +/* cedf_tick - this function is called for every local timer
7911 + * interrupt.
7912 + *
7913 + * checks whether the current task has expired and checks
7914 + * whether we need to preempt it if it has not expired
7915 + */
7916 +static void cedf_tick(struct task_struct* t)
7917 +{
7918 + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
7919 + if (!is_np(t)) {
7920 + /* np tasks will be preempted when they become
7921 + * preemptable again
7922 + */
7923 + litmus_reschedule_local();
7924 + set_will_schedule();
7925 + TRACE("cedf_scheduler_tick: "
7926 + "%d is preemptable "
7927 + " => FORCE_RESCHED\n", t->pid);
7928 + } else if (is_user_np(t)) {
7929 + TRACE("cedf_scheduler_tick: "
7930 + "%d is non-preemptable, "
7931 + "preemption delayed.\n", t->pid);
7932 + request_exit_np(t);
7933 + }
7934 + }
7935 +}
7936 +
7937 +/* Getting schedule() right is a bit tricky. schedule() may not make any
7938 + * assumptions on the state of the current task since it may be called for a
7939 + * number of reasons. The reasons include a scheduler_tick() determined that it
7940 + * was necessary, because sys_exit_np() was called, because some Linux
7941 + * subsystem determined so, or even (in the worst case) because there is a bug
7942 + * hidden somewhere. Thus, we must take extreme care to determine what the
7943 + * current state is.
7944 + *
7945 + * The CPU could currently be scheduling a task (or not), be linked (or not).
7946 + *
7947 + * The following assertions for the scheduled task could hold:
7948 + *
7949 + * - !is_running(scheduled) // the job blocks
7950 + * - scheduled->timeslice == 0 // the job completed (forcefully)
7951 + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
7952 + * - linked != scheduled // we need to reschedule (for any reason)
7953 + * - is_np(scheduled) // rescheduling must be delayed,
7954 + * sys_exit_np must be requested
7955 + *
7956 + * Any of these can occur together.
7957 + */
7958 +static struct task_struct* cedf_schedule(struct task_struct * prev)
7959 +{
7960 + cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
7961 + cedf_domain_t *cluster = entry->cluster;
7962 + int out_of_time, sleep, preempt, np, exists, blocks;
7963 + struct task_struct* next = NULL;
7964 +
7965 +#ifdef CONFIG_RELEASE_MASTER
7966 + /* Bail out early if we are the release master.
7967 + * The release master never schedules any real-time tasks.
7968 + */
7969 + if (unlikely(cluster->domain.release_master == entry->cpu)) {
7970 + sched_state_task_picked();
7971 + return NULL;
7972 + }
7973 +#endif
7974 +
7975 + raw_spin_lock(&cluster->cluster_lock);
7976 + clear_will_schedule();
7977 +
7978 + /* sanity checking */
7979 + BUG_ON(entry->scheduled && entry->scheduled != prev);
7980 + BUG_ON(entry->scheduled && !is_realtime(prev));
7981 + BUG_ON(is_realtime(prev) && !entry->scheduled);
7982 +
7983 + /* (0) Determine state */
7984 + exists = entry->scheduled != NULL;
7985 + blocks = exists && !is_running(entry->scheduled);
7986 + out_of_time = exists &&
7987 + budget_enforced(entry->scheduled) &&
7988 + budget_exhausted(entry->scheduled);
7989 + np = exists && is_np(entry->scheduled);
7990 + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
7991 + preempt = entry->scheduled != entry->linked;
7992 +
7993 +#ifdef WANT_ALL_SCHED_EVENTS
7994 + TRACE_TASK(prev, "invoked cedf_schedule.\n");
7995 +#endif
7996 +
7997 + if (exists)
7998 + TRACE_TASK(prev,
7999 + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
8000 + "state:%d sig:%d\n",
8001 + blocks, out_of_time, np, sleep, preempt,
8002 + prev->state, signal_pending(prev));
8003 + if (entry->linked && preempt)
8004 + TRACE_TASK(prev, "will be preempted by %s/%d\n",
8005 + entry->linked->comm, entry->linked->pid);
8006 +
8007 +
8008 + /* If a task blocks we have no choice but to reschedule.
8009 + */
8010 + if (blocks)
8011 + unlink(entry->scheduled);
8012 +
8013 + /* Request a sys_exit_np() call if we would like to preempt but cannot.
8014 + * We need to make sure to update the link structure anyway in case
8015 + * that we are still linked. Multiple calls to request_exit_np() don't
8016 + * hurt.
8017 + */
8018 + if (np && (out_of_time || preempt || sleep)) {
8019 + unlink(entry->scheduled);
8020 + request_exit_np(entry->scheduled);
8021 + }
8022 +
8023 + /* Any task that is preemptable and either exhausts its execution
8024 + * budget or wants to sleep completes. We may have to reschedule after
8025 + * this. Don't do a job completion if we block (can't have timers running
8026 + * for blocked jobs). Preemption go first for the same reason.
8027 + */
8028 + if (!np && (out_of_time || sleep) && !blocks && !preempt)
8029 + job_completion(entry->scheduled, !sleep);
8030 +
8031 + /* Link pending task if we became unlinked.
8032 + */
8033 + if (!entry->linked)
8034 + link_task_to_cpu(__take_ready(&cluster->domain), entry);
8035 +
8036 + /* The final scheduling decision. Do we need to switch for some reason?
8037 + * If linked is different from scheduled, then select linked as next.
8038 + */
8039 + if ((!np || blocks) &&
8040 + entry->linked != entry->scheduled) {
8041 + /* Schedule a linked job? */
8042 + if (entry->linked) {
8043 + entry->linked->rt_param.scheduled_on = entry->cpu;
8044 + next = entry->linked;
8045 + }
8046 + if (entry->scheduled) {
8047 + /* not gonna be scheduled soon */
8048 + entry->scheduled->rt_param.scheduled_on = NO_CPU;
8049 + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
8050 + }
8051 + } else
8052 + /* Only override Linux scheduler if we have a real-time task
8053 + * scheduled that needs to continue.
8054 + */
8055 + if (exists)
8056 + next = prev;
8057 +
8058 + sched_state_task_picked();
8059 + raw_spin_unlock(&cluster->cluster_lock);
8060 +
8061 +#ifdef WANT_ALL_SCHED_EVENTS
8062 + TRACE("cedf_lock released, next=0x%p\n", next);
8063 +
8064 + if (next)
8065 + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
8066 + else if (exists && !next)
8067 + TRACE("becomes idle at %llu.\n", litmus_clock());
8068 +#endif
8069 +
8070 +
8071 + return next;
8072 +}
8073 +
8074 +
8075 +/* _finish_switch - we just finished the switch away from prev
8076 + */
8077 +static void cedf_finish_switch(struct task_struct *prev)
8078 +{
8079 + cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
8080 +
8081 + entry->scheduled = is_realtime(current) ? current : NULL;
8082 +#ifdef WANT_ALL_SCHED_EVENTS
8083 + TRACE_TASK(prev, "switched away from\n");
8084 +#endif
8085 +}
8086 +
8087 +
8088 +/* Prepare a task for running in RT mode
8089 + */
8090 +static void cedf_task_new(struct task_struct * t, int on_rq, int running)
8091 +{
8092 + unsigned long flags;
8093 + cpu_entry_t* entry;
8094 + cedf_domain_t* cluster;
8095 +
8096 + TRACE("gsn edf: task new %d\n", t->pid);
8097 +
8098 + /* the cluster doesn't change even if t is running */
8099 + cluster = task_cpu_cluster(t);
8100 +
8101 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
8102 +
8103 + /* setup job params */
8104 + release_at(t, litmus_clock());
8105 +
8106 + if (running) {
8107 + entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
8108 + BUG_ON(entry->scheduled);
8109 +
8110 +#ifdef CONFIG_RELEASE_MASTER
8111 + if (entry->cpu != cluster->domain.release_master) {
8112 +#endif
8113 + entry->scheduled = t;
8114 + tsk_rt(t)->scheduled_on = task_cpu(t);
8115 +#ifdef CONFIG_RELEASE_MASTER
8116 + } else {
8117 + /* do not schedule on release master */
8118 + preempt(entry); /* force resched */
8119 + tsk_rt(t)->scheduled_on = NO_CPU;
8120 + }
8121 +#endif
8122 + } else {
8123 + t->rt_param.scheduled_on = NO_CPU;
8124 + }
8125 + t->rt_param.linked_on = NO_CPU;
8126 +
8127 + cedf_job_arrival(t);
8128 + raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
8129 +}
8130 +
8131 +static void cedf_task_wake_up(struct task_struct *task)
8132 +{
8133 + unsigned long flags;
8134 + lt_t now;
8135 + cedf_domain_t *cluster;
8136 +
8137 + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
8138 +
8139 + cluster = task_cpu_cluster(task);
8140 +
8141 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
8142 + /* We need to take suspensions because of semaphores into
8143 + * account! If a job resumes after being suspended due to acquiring
8144 + * a semaphore, it should never be treated as a new job release.
8145 + */
8146 + if (get_rt_flags(task) == RT_F_EXIT_SEM) {
8147 + set_rt_flags(task, RT_F_RUNNING);
8148 + } else {
8149 + now = litmus_clock();
8150 + if (is_tardy(task, now)) {
8151 + /* new sporadic release */
8152 + release_at(task, now);
8153 + sched_trace_task_release(task);
8154 + }
8155 + else {
8156 + if (task->rt.time_slice) {
8157 + /* came back in time before deadline
8158 + */
8159 + set_rt_flags(task, RT_F_RUNNING);
8160 + }
8161 + }
8162 + }
8163 + cedf_job_arrival(task);
8164 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
8165 +}
8166 +
8167 +static void cedf_task_block(struct task_struct *t)
8168 +{
8169 + unsigned long flags;
8170 + cedf_domain_t *cluster;
8171 +
8172 + TRACE_TASK(t, "block at %llu\n", litmus_clock());
8173 +
8174 + cluster = task_cpu_cluster(t);
8175 +
8176 + /* unlink if necessary */
8177 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
8178 + unlink(t);
8179 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
8180 +
8181 + BUG_ON(!is_realtime(t));
8182 +}
8183 +
8184 +
8185 +static void cedf_task_exit(struct task_struct * t)
8186 +{
8187 + unsigned long flags;
8188 + cedf_domain_t *cluster = task_cpu_cluster(t);
8189 +
8190 + /* unlink if necessary */
8191 + raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
8192 + unlink(t);
8193 + if (tsk_rt(t)->scheduled_on != NO_CPU) {
8194 + cpu_entry_t *cpu;
8195 + cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
8196 + cpu->scheduled = NULL;
8197 + tsk_rt(t)->scheduled_on = NO_CPU;
8198 + }
8199 + raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
8200 +
8201 + BUG_ON(!is_realtime(t));
8202 + TRACE_TASK(t, "RIP\n");
8203 +}
8204 +
8205 +static long cedf_admit_task(struct task_struct* tsk)
8206 +{
8207 + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
8208 +}
8209 +
8210 +/* total number of cluster */
8211 +static int num_clusters;
8212 +/* we do not support cluster of different sizes */
8213 +static unsigned int cluster_size;
8214 +
8215 +#ifdef VERBOSE_INIT
8216 +static void print_cluster_topology(cpumask_var_t mask, int cpu)
8217 +{
8218 + int chk;
8219 + char buf[255];
8220 +
8221 + chk = cpulist_scnprintf(buf, 254, mask);
8222 + buf[chk] = '\0';
8223 + printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
8224 +
8225 +}
8226 +#endif
8227 +
8228 +static int clusters_allocated = 0;
8229 +
8230 +static void cleanup_cedf(void)
8231 +{
8232 + int i;
8233 +
8234 + if (clusters_allocated) {
8235 + for (i = 0; i < num_clusters; i++) {
8236 + kfree(cedf[i].cpus);
8237 + kfree(cedf[i].heap_node);
8238 + free_cpumask_var(cedf[i].cpu_map);
8239 + }
8240 +
8241 + kfree(cedf);
8242 + }
8243 +}
8244 +
8245 +static long cedf_activate_plugin(void)
8246 +{
8247 + int i, j, cpu, ccpu, cpu_count;
8248 + cpu_entry_t *entry;
8249 +
8250 + cpumask_var_t mask;
8251 + int chk = 0;
8252 +
8253 + /* de-allocate old clusters, if any */
8254 + cleanup_cedf();
8255 +
8256 + printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
8257 + cluster_config);
8258 +
8259 + /* need to get cluster_size first */
8260 + if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
8261 + return -ENOMEM;
8262 +
8263 + if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
8264 + cluster_size = num_online_cpus();
8265 + } else {
8266 + chk = get_shared_cpu_map(mask, 0, cluster_config);
8267 + if (chk) {
8268 + /* if chk != 0 then it is the max allowed index */
8269 + printk(KERN_INFO "C-EDF: Cluster configuration = %d "
8270 + "is not supported on this hardware.\n",
8271 + cluster_config);
8272 + /* User should notice that the configuration failed, so
8273 + * let's bail out. */
8274 + return -EINVAL;
8275 + }
8276 +
8277 + cluster_size = cpumask_weight(mask);
8278 + }
8279 +
8280 + if ((num_online_cpus() % cluster_size) != 0) {
8281 + /* this can't be right, some cpus are left out */
8282 + printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
8283 + num_online_cpus(), cluster_size);
8284 + return -1;
8285 + }
8286 +
8287 + num_clusters = num_online_cpus() / cluster_size;
8288 + printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
8289 + num_clusters, cluster_size);
8290 +
8291 + /* initialize clusters */
8292 + cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
8293 + for (i = 0; i < num_clusters; i++) {
8294 +
8295 + cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
8296 + GFP_ATOMIC);
8297 + cedf[i].heap_node = kmalloc(
8298 + cluster_size * sizeof(struct bheap_node),
8299 + GFP_ATOMIC);
8300 + bheap_init(&(cedf[i].cpu_heap));
8301 + edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
8302 +
8303 + if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
8304 + return -ENOMEM;
8305 +#ifdef CONFIG_RELEASE_MASTER
8306 + cedf[i].domain.release_master = atomic_read(&release_master_cpu);
8307 +#endif
8308 + }
8309 +
8310 + /* cycle through cluster and add cpus to them */
8311 + for (i = 0; i < num_clusters; i++) {
8312 +
8313 + for_each_online_cpu(cpu) {
8314 + /* check if the cpu is already in a cluster */
8315 + for (j = 0; j < num_clusters; j++)
8316 + if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
8317 + break;
8318 + /* if it is in a cluster go to next cpu */
8319 + if (j < num_clusters &&
8320 + cpumask_test_cpu(cpu, cedf[j].cpu_map))
8321 + continue;
8322 +
8323 + /* this cpu isn't in any cluster */
8324 + /* get the shared cpus */
8325 + if (unlikely(cluster_config == GLOBAL_CLUSTER))
8326 + cpumask_copy(mask, cpu_online_mask);
8327 + else
8328 + get_shared_cpu_map(mask, cpu, cluster_config);
8329 +
8330 + cpumask_copy(cedf[i].cpu_map, mask);
8331 +#ifdef VERBOSE_INIT
8332 + print_cluster_topology(mask, cpu);
8333 +#endif
8334 + /* add cpus to current cluster and init cpu_entry_t */
8335 + cpu_count = 0;
8336 + for_each_cpu(ccpu, cedf[i].cpu_map) {
8337 +
8338 + entry = &per_cpu(cedf_cpu_entries, ccpu);
8339 + cedf[i].cpus[cpu_count] = entry;
8340 + atomic_set(&entry->will_schedule, 0);
8341 + entry->cpu = ccpu;
8342 + entry->cluster = &cedf[i];
8343 + entry->hn = &(cedf[i].heap_node[cpu_count]);
8344 + bheap_node_init(&entry->hn, entry);
8345 +
8346 + cpu_count++;
8347 +
8348 + entry->linked = NULL;
8349 + entry->scheduled = NULL;
8350 +#ifdef CONFIG_RELEASE_MASTER
8351 + /* only add CPUs that should schedule jobs */
8352 + if (entry->cpu != entry->cluster->domain.release_master)
8353 +#endif
8354 + update_cpu_position(entry);
8355 + }
8356 + /* done with this cluster */
8357 + break;
8358 + }
8359 + }
8360 +
8361 + free_cpumask_var(mask);
8362 + clusters_allocated = 1;
8363 + return 0;
8364 +}
8365 +
8366 +/* Plugin object */
8367 +static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
8368 + .plugin_name = "C-EDF",
8369 + .finish_switch = cedf_finish_switch,
8370 + .tick = cedf_tick,
8371 + .task_new = cedf_task_new,
8372 + .complete_job = complete_job,
8373 + .task_exit = cedf_task_exit,
8374 + .schedule = cedf_schedule,
8375 + .task_wake_up = cedf_task_wake_up,
8376 + .task_block = cedf_task_block,
8377 + .admit_task = cedf_admit_task,
8378 + .activate_plugin = cedf_activate_plugin,
8379 +};
8380 +
8381 +static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
8382 +
8383 +static int __init init_cedf(void)
8384 +{
8385 + int err, fs;
8386 +
8387 + err = register_sched_plugin(&cedf_plugin);
8388 + if (!err) {
8389 + fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
8390 + if (!fs)
8391 + cluster_file = create_cluster_file(cedf_dir, &cluster_config);
8392 + else
8393 + printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
8394 + }
8395 + return err;
8396 +}
8397 +
8398 +static void clean_cedf(void)
8399 +{
8400 + cleanup_cedf();
8401 + if (cluster_file)
8402 + remove_proc_entry("cluster", cedf_dir);
8403 + if (cedf_dir)
8404 + remove_plugin_proc_dir(&cedf_plugin);
8405 +}
8406 +
8407 +module_init(init_cedf);
8408 +module_exit(clean_cedf);
8409 diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
8410 new file mode 100644
8411 index 0000000..6ed504f
8412 --- /dev/null
8413 +++ b/litmus/sched_gsn_edf.c
8414 @@ -0,0 +1,1030 @@
8415 +/*
8416 + * litmus/sched_gsn_edf.c
8417 + *
8418 + * Implementation of the GSN-EDF scheduling algorithm.
8419 + *
8420 + * This version uses the simple approach and serializes all scheduling
8421 + * decisions by the use of a queue lock. This is probably not the
8422 + * best way to do it, but it should suffice for now.
8423 + */
8424 +
8425 +#include <linux/spinlock.h>
8426 +#include <linux/percpu.h>
8427 +#include <linux/sched.h>
8428 +#include <linux/slab.h>
8429 +
8430 +#include <litmus/litmus.h>
8431 +#include <litmus/jobs.h>
8432 +#include <litmus/sched_plugin.h>
8433 +#include <litmus/edf_common.h>
8434 +#include <litmus/sched_trace.h>
8435 +#include <litmus/trace.h>
8436 +
8437 +#include <litmus/preempt.h>
8438 +
8439 +#include <litmus/bheap.h>
8440 +
8441 +#ifdef CONFIG_SCHED_CPU_AFFINITY
8442 +#include <litmus/affinity.h>
8443 +#endif
8444 +
8445 +#include <linux/module.h>
8446 +
8447 +/* Overview of GSN-EDF operations.
8448 + *
8449 + * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
8450 + * description only covers how the individual operations are implemented in
8451 + * LITMUS.
8452 + *
8453 + * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
8454 + * structure (NOT the actually scheduled
8455 + * task). If there is another linked task To
8456 + * already it will set To->linked_on = NO_CPU
8457 + * (thereby removing its association with this
8458 + * CPU). However, it will not requeue the
8459 + * previously linked task (if any). It will set
8460 + * T's state to RT_F_RUNNING and check whether
8461 + * it is already running somewhere else. If T
8462 + * is scheduled somewhere else it will link
8463 + * it to that CPU instead (and pull the linked
8464 + * task to cpu). T may be NULL.
8465 + *
8466 + * unlink(T) - Unlink removes T from all scheduler data
8467 + * structures. If it is linked to some CPU it
8468 + * will link NULL to that CPU. If it is
8469 + * currently queued in the gsnedf queue it will
8470 + * be removed from the rt_domain. It is safe to
8471 + * call unlink(T) if T is not linked. T may not
8472 + * be NULL.
8473 + *
8474 + * requeue(T) - Requeue will insert T into the appropriate
8475 + * queue. If the system is in real-time mode and
8476 + * the T is released already, it will go into the
8477 + * ready queue. If the system is not in
8478 + * real-time mode is T, then T will go into the
8479 + * release queue. If T's release time is in the
8480 + * future, it will go into the release
8481 + * queue. That means that T's release time/job
8482 + * no/etc. has to be updated before requeu(T) is
8483 + * called. It is not safe to call requeue(T)
8484 + * when T is already queued. T may not be NULL.
8485 + *
8486 + * gsnedf_job_arrival(T) - This is the catch all function when T enters
8487 + * the system after either a suspension or at a
8488 + * job release. It will queue T (which means it
8489 + * is not safe to call gsnedf_job_arrival(T) if
8490 + * T is already queued) and then check whether a
8491 + * preemption is necessary. If a preemption is
8492 + * necessary it will update the linkage
8493 + * accordingly and cause scheduled to be called
8494 + * (either with an IPI or need_resched). It is
8495 + * safe to call gsnedf_job_arrival(T) if T's
8496 + * next job has not been actually released yet
8497 + * (releast time in the future). T will be put
8498 + * on the release queue in that case.
8499 + *
8500 + * job_completion(T) - Take care of everything that needs to be done
8501 + * to prepare T for its next release and place
8502 + * it in the right queue with
8503 + * gsnedf_job_arrival().
8504 + *
8505 + *
8506 + * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
8507 + * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
8508 + * the functions will automatically propagate pending task from the ready queue
8509 + * to a linked task. This is the job of the calling function ( by means of
8510 + * __take_ready).
8511 + */
8512 +
8513 +
8514 +/* cpu_entry_t - maintain the linked and scheduled state
8515 + */
8516 +typedef struct {
8517 + int cpu;
8518 + struct task_struct* linked; /* only RT tasks */
8519 + struct task_struct* scheduled; /* only RT tasks */
8520 + struct bheap_node* hn;
8521 +} cpu_entry_t;
8522 +DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
8523 +
8524 +cpu_entry_t* gsnedf_cpus[NR_CPUS];
8525 +
8526 +/* the cpus queue themselves according to priority in here */
8527 +static struct bheap_node gsnedf_heap_node[NR_CPUS];
8528 +static struct bheap gsnedf_cpu_heap;
8529 +
8530 +static rt_domain_t gsnedf;
8531 +#define gsnedf_lock (gsnedf.ready_lock)
8532 +
8533 +
8534 +/* Uncomment this if you want to see all scheduling decisions in the
8535 + * TRACE() log.
8536 +#define WANT_ALL_SCHED_EVENTS
8537 + */
8538 +
8539 +static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
8540 +{
8541 + cpu_entry_t *a, *b;
8542 + a = _a->value;
8543 + b = _b->value;
8544 + /* Note that a and b are inverted: we want the lowest-priority CPU at
8545 + * the top of the heap.
8546 + */
8547 + return edf_higher_prio(b->linked, a->linked);
8548 +}
8549 +
8550 +/* update_cpu_position - Move the cpu entry to the correct place to maintain
8551 + * order in the cpu queue. Caller must hold gsnedf lock.
8552 + */
8553 +static void update_cpu_position(cpu_entry_t *entry)
8554 +{
8555 + if (likely(bheap_node_in_heap(entry->hn)))
8556 + bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
8557 + bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
8558 +}
8559 +
8560 +/* caller must hold gsnedf lock */
8561 +static cpu_entry_t* lowest_prio_cpu(void)
8562 +{
8563 + struct bheap_node* hn;
8564 + hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
8565 + return hn->value;
8566 +}
8567 +
8568 +
8569 +/* link_task_to_cpu - Update the link of a CPU.
8570 + * Handles the case where the to-be-linked task is already
8571 + * scheduled on a different CPU.
8572 + */
8573 +static noinline void link_task_to_cpu(struct task_struct* linked,
8574 + cpu_entry_t *entry)
8575 +{
8576 + cpu_entry_t *sched;
8577 + struct task_struct* tmp;
8578 + int on_cpu;
8579 +
8580 + BUG_ON(linked && !is_realtime(linked));
8581 +
8582 + /* Currently linked task is set to be unlinked. */
8583 + if (entry->linked) {
8584 + entry->linked->rt_param.linked_on = NO_CPU;
8585 + }
8586 +
8587 + /* Link new task to CPU. */
8588 + if (linked) {
8589 + set_rt_flags(linked, RT_F_RUNNING);
8590 + /* handle task is already scheduled somewhere! */
8591 + on_cpu = linked->rt_param.scheduled_on;
8592 + if (on_cpu != NO_CPU) {
8593 + sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
8594 + /* this should only happen if not linked already */
8595 + BUG_ON(sched->linked == linked);
8596 +
8597 + /* If we are already scheduled on the CPU to which we
8598 + * wanted to link, we don't need to do the swap --
8599 + * we just link ourselves to the CPU and depend on
8600 + * the caller to get things right.
8601 + */
8602 + if (entry != sched) {
8603 + TRACE_TASK(linked,
8604 + "already scheduled on %d, updating link.\n",
8605 + sched->cpu);
8606 + tmp = sched->linked;
8607 + linked->rt_param.linked_on = sched->cpu;
8608 + sched->linked = linked;
8609 + update_cpu_position(sched);
8610 + linked = tmp;
8611 + }
8612 + }
8613 + if (linked) /* might be NULL due to swap */
8614 + linked->rt_param.linked_on = entry->cpu;
8615 + }
8616 + entry->linked = linked;
8617 +#ifdef WANT_ALL_SCHED_EVENTS
8618 + if (linked)
8619 + TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
8620 + else
8621 + TRACE("NULL linked to %d.\n", entry->cpu);
8622 +#endif
8623 + update_cpu_position(entry);
8624 +}
8625 +
8626 +/* unlink - Make sure a task is not linked any longer to an entry
8627 + * where it was linked before. Must hold gsnedf_lock.
8628 + */
8629 +static noinline void unlink(struct task_struct* t)
8630 +{
8631 + cpu_entry_t *entry;
8632 +
8633 + if (t->rt_param.linked_on != NO_CPU) {
8634 + /* unlink */
8635 + entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
8636 + t->rt_param.linked_on = NO_CPU;
8637 + link_task_to_cpu(NULL, entry);
8638 + } else if (is_queued(t)) {
8639 + /* This is an interesting situation: t is scheduled,
8640 + * but was just recently unlinked. It cannot be
8641 + * linked anywhere else (because then it would have
8642 + * been relinked to this CPU), thus it must be in some
8643 + * queue. We must remove it from the list in this
8644 + * case.
8645 + */
8646 + remove(&gsnedf, t);
8647 + }
8648 +}
8649 +
8650 +
8651 +/* preempt - force a CPU to reschedule
8652 + */
8653 +static void preempt(cpu_entry_t *entry)
8654 +{
8655 + preempt_if_preemptable(entry->scheduled, entry->cpu);
8656 +}
8657 +
8658 +/* requeue - Put an unlinked task into gsn-edf domain.
8659 + * Caller must hold gsnedf_lock.
8660 + */
8661 +static noinline void requeue(struct task_struct* task)
8662 +{
8663 + BUG_ON(!task);
8664 + /* sanity check before insertion */
8665 + BUG_ON(is_queued(task));
8666 +
8667 + if (is_released(task, litmus_clock()))
8668 + __add_ready(&gsnedf, task);
8669 + else {
8670 + /* it has got to wait */
8671 + add_release(&gsnedf, task);
8672 + }
8673 +}
8674 +
8675 +#ifdef CONFIG_SCHED_CPU_AFFINITY
8676 +static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
8677 +{
8678 + cpu_entry_t *affinity;
8679 +
8680 + get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
8681 +#ifdef CONFIG_RELEASE_MASTER
8682 + gsnedf.release_master
8683 +#else
8684 + NO_CPU
8685 +#endif
8686 + );
8687 +
8688 + return(affinity);
8689 +}
8690 +#endif
8691 +
8692 +/* check for any necessary preemptions */
8693 +static void check_for_preemptions(void)
8694 +{
8695 + struct task_struct *task;
8696 + cpu_entry_t *last;
8697 +
8698 + for (last = lowest_prio_cpu();
8699 + edf_preemption_needed(&gsnedf, last->linked);
8700 + last = lowest_prio_cpu()) {
8701 + /* preemption necessary */
8702 + task = __take_ready(&gsnedf);
8703 + TRACE("check_for_preemptions: attempting to link task %d to %d\n",
8704 + task->pid, last->cpu);
8705 +
8706 +#ifdef CONFIG_SCHED_CPU_AFFINITY
8707 + {
8708 + cpu_entry_t *affinity =
8709 + gsnedf_get_nearest_available_cpu(
8710 + &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
8711 + if (affinity)
8712 + last = affinity;
8713 + else if (last->linked)
8714 + requeue(last->linked);
8715 + }
8716 +#else
8717 + if (last->linked)
8718 + requeue(last->linked);
8719 +#endif
8720 +
8721 + link_task_to_cpu(task, last);
8722 + preempt(last);
8723 + }
8724 +}
8725 +
8726 +/* gsnedf_job_arrival: task is either resumed or released */
8727 +static noinline void gsnedf_job_arrival(struct task_struct* task)
8728 +{
8729 + BUG_ON(!task);
8730 +
8731 + requeue(task);
8732 + check_for_preemptions();
8733 +}
8734 +
8735 +static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
8736 +{
8737 + unsigned long flags;
8738 +
8739 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
8740 +
8741 + __merge_ready(rt, tasks);
8742 + check_for_preemptions();
8743 +
8744 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
8745 +}
8746 +
8747 +/* caller holds gsnedf_lock */
8748 +static noinline void job_completion(struct task_struct *t, int forced)
8749 +{
8750 + BUG_ON(!t);
8751 +
8752 + sched_trace_task_completion(t, forced);
8753 +
8754 + TRACE_TASK(t, "job_completion().\n");
8755 +
8756 + /* set flags */
8757 + set_rt_flags(t, RT_F_SLEEP);
8758 + /* prepare for next period */
8759 + prepare_for_next_period(t);
8760 + if (is_released(t, litmus_clock()))
8761 + sched_trace_task_release(t);
8762 + /* unlink */
8763 + unlink(t);
8764 + /* requeue
8765 + * But don't requeue a blocking task. */
8766 + if (is_running(t))
8767 + gsnedf_job_arrival(t);
8768 +}
8769 +
8770 +/* gsnedf_tick - this function is called for every local timer
8771 + * interrupt.
8772 + *
8773 + * checks whether the current task has expired and checks
8774 + * whether we need to preempt it if it has not expired
8775 + */
8776 +static void gsnedf_tick(struct task_struct* t)
8777 +{
8778 + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
8779 + if (!is_np(t)) {
8780 + /* np tasks will be preempted when they become
8781 + * preemptable again
8782 + */
8783 + litmus_reschedule_local();
8784 + TRACE("gsnedf_scheduler_tick: "
8785 + "%d is preemptable "
8786 + " => FORCE_RESCHED\n", t->pid);
8787 + } else if (is_user_np(t)) {
8788 + TRACE("gsnedf_scheduler_tick: "
8789 + "%d is non-preemptable, "
8790 + "preemption delayed.\n", t->pid);
8791 + request_exit_np(t);
8792 + }
8793 + }
8794 +}
8795 +
8796 +/* Getting schedule() right is a bit tricky. schedule() may not make any
8797 + * assumptions on the state of the current task since it may be called for a
8798 + * number of reasons. The reasons include a scheduler_tick() determined that it
8799 + * was necessary, because sys_exit_np() was called, because some Linux
8800 + * subsystem determined so, or even (in the worst case) because there is a bug
8801 + * hidden somewhere. Thus, we must take extreme care to determine what the
8802 + * current state is.
8803 + *
8804 + * The CPU could currently be scheduling a task (or not), be linked (or not).
8805 + *
8806 + * The following assertions for the scheduled task could hold:
8807 + *
8808 + * - !is_running(scheduled) // the job blocks
8809 + * - scheduled->timeslice == 0 // the job completed (forcefully)
8810 + * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
8811 + * - linked != scheduled // we need to reschedule (for any reason)
8812 + * - is_np(scheduled) // rescheduling must be delayed,
8813 + * sys_exit_np must be requested
8814 + *
8815 + * Any of these can occur together.
8816 + */
8817 +static struct task_struct* gsnedf_schedule(struct task_struct * prev)
8818 +{
8819 + cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
8820 + int out_of_time, sleep, preempt, np, exists, blocks;
8821 + struct task_struct* next = NULL;
8822 +
8823 +#ifdef CONFIG_RELEASE_MASTER
8824 + /* Bail out early if we are the release master.
8825 + * The release master never schedules any real-time tasks.
8826 + */
8827 + if (unlikely(gsnedf.release_master == entry->cpu)) {
8828 + sched_state_task_picked();
8829 + return NULL;
8830 + }
8831 +#endif
8832 +
8833 + raw_spin_lock(&gsnedf_lock);
8834 +
8835 + /* sanity checking */
8836 + BUG_ON(entry->scheduled && entry->scheduled != prev);
8837 + BUG_ON(entry->scheduled && !is_realtime(prev));
8838 + BUG_ON(is_realtime(prev) && !entry->scheduled);
8839 +
8840 + /* (0) Determine state */
8841 + exists = entry->scheduled != NULL;
8842 + blocks = exists && !is_running(entry->scheduled);
8843 + out_of_time = exists &&
8844 + budget_enforced(entry->scheduled) &&
8845 + budget_exhausted(entry->scheduled);
8846 + np = exists && is_np(entry->scheduled);
8847 + sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
8848 + preempt = entry->scheduled != entry->linked;
8849 +
8850 +#ifdef WANT_ALL_SCHED_EVENTS
8851 + TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
8852 +#endif
8853 +
8854 + if (exists)
8855 + TRACE_TASK(prev,
8856 + "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
8857 + "state:%d sig:%d\n",
8858 + blocks, out_of_time, np, sleep, preempt,
8859 + prev->state, signal_pending(prev));
8860 + if (entry->linked && preempt)
8861 + TRACE_TASK(prev, "will be preempted by %s/%d\n",
8862 + entry->linked->comm, entry->linked->pid);
8863 +
8864 +
8865 + /* If a task blocks we have no choice but to reschedule.
8866 + */
8867 + if (blocks)
8868 + unlink(entry->scheduled);
8869 +
8870 + /* Request a sys_exit_np() call if we would like to preempt but cannot.
8871 + * We need to make sure to update the link structure anyway in case
8872 + * that we are still linked. Multiple calls to request_exit_np() don't
8873 + * hurt.
8874 + */
8875 + if (np && (out_of_time || preempt || sleep)) {
8876 + unlink(entry->scheduled);
8877 + request_exit_np(entry->scheduled);
8878 + }
8879 +
8880 + /* Any task that is preemptable and either exhausts its execution
8881 + * budget or wants to sleep completes. We may have to reschedule after
8882 + * this. Don't do a job completion if we block (can't have timers running
8883 + * for blocked jobs). Preemption go first for the same reason.
8884 + */
8885 + if (!np && (out_of_time || sleep) && !blocks && !preempt)
8886 + job_completion(entry->scheduled, !sleep);
8887 +
8888 + /* Link pending task if we became unlinked.
8889 + */
8890 + if (!entry->linked)
8891 + link_task_to_cpu(__take_ready(&gsnedf), entry);
8892 +
8893 + /* The final scheduling decision. Do we need to switch for some reason?
8894 + * If linked is different from scheduled, then select linked as next.
8895 + */
8896 + if ((!np || blocks) &&
8897 + entry->linked != entry->scheduled) {
8898 + /* Schedule a linked job? */
8899 + if (entry->linked) {
8900 + entry->linked->rt_param.scheduled_on = entry->cpu;
8901 + next = entry->linked;
8902 + TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
8903 + }
8904 + if (entry->scheduled) {
8905 + /* not gonna be scheduled soon */
8906 + entry->scheduled->rt_param.scheduled_on = NO_CPU;
8907 + TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
8908 + }
8909 + } else
8910 + /* Only override Linux scheduler if we have a real-time task
8911 + * scheduled that needs to continue.
8912 + */
8913 + if (exists)
8914 + next = prev;
8915 +
8916 + sched_state_task_picked();
8917 +
8918 + raw_spin_unlock(&gsnedf_lock);
8919 +
8920 +#ifdef WANT_ALL_SCHED_EVENTS
8921 + TRACE("gsnedf_lock released, next=0x%p\n", next);
8922 +
8923 + if (next)
8924 + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
8925 + else if (exists && !next)
8926 + TRACE("becomes idle at %llu.\n", litmus_clock());
8927 +#endif
8928 +
8929 +
8930 + return next;
8931 +}
8932 +
8933 +
8934 +/* _finish_switch - we just finished the switch away from prev
8935 + */
8936 +static void gsnedf_finish_switch(struct task_struct *prev)
8937 +{
8938 + cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
8939 +
8940 + entry->scheduled = is_realtime(current) ? current : NULL;
8941 +#ifdef WANT_ALL_SCHED_EVENTS
8942 + TRACE_TASK(prev, "switched away from\n");
8943 +#endif
8944 +}
8945 +
8946 +
8947 +/* Prepare a task for running in RT mode
8948 + */
8949 +static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
8950 +{
8951 + unsigned long flags;
8952 + cpu_entry_t* entry;
8953 +
8954 + TRACE("gsn edf: task new %d\n", t->pid);
8955 +
8956 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
8957 +
8958 + /* setup job params */
8959 + release_at(t, litmus_clock());
8960 +
8961 + if (running) {
8962 + entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
8963 + BUG_ON(entry->scheduled);
8964 +
8965 +#ifdef CONFIG_RELEASE_MASTER
8966 + if (entry->cpu != gsnedf.release_master) {
8967 +#endif
8968 + entry->scheduled = t;
8969 + tsk_rt(t)->scheduled_on = task_cpu(t);
8970 +#ifdef CONFIG_RELEASE_MASTER
8971 + } else {
8972 + /* do not schedule on release master */
8973 + preempt(entry); /* force resched */
8974 + tsk_rt(t)->scheduled_on = NO_CPU;
8975 + }
8976 +#endif
8977 + } else {
8978 + t->rt_param.scheduled_on = NO_CPU;
8979 + }
8980 + t->rt_param.linked_on = NO_CPU;
8981 +
8982 + gsnedf_job_arrival(t);
8983 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
8984 +}
8985 +
8986 +static void gsnedf_task_wake_up(struct task_struct *task)
8987 +{
8988 + unsigned long flags;
8989 + lt_t now;
8990 +
8991 + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
8992 +
8993 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
8994 + /* We need to take suspensions because of semaphores into
8995 + * account! If a job resumes after being suspended due to acquiring
8996 + * a semaphore, it should never be treated as a new job release.
8997 + */
8998 + if (get_rt_flags(task) == RT_F_EXIT_SEM) {
8999 + set_rt_flags(task, RT_F_RUNNING);
9000 + } else {
9001 + now = litmus_clock();
9002 + if (is_tardy(task, now)) {
9003 + /* new sporadic release */
9004 + release_at(task, now);
9005 + sched_trace_task_release(task);
9006 + }
9007 + else {
9008 + if (task->rt.time_slice) {
9009 + /* came back in time before deadline
9010 + */
9011 + set_rt_flags(task, RT_F_RUNNING);
9012 + }
9013 + }
9014 + }
9015 + gsnedf_job_arrival(task);
9016 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
9017 +}
9018 +
9019 +static void gsnedf_task_block(struct task_struct *t)
9020 +{
9021 + unsigned long flags;
9022 +
9023 + TRACE_TASK(t, "block at %llu\n", litmus_clock());
9024 +
9025 + /* unlink if necessary */
9026 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
9027 + unlink(t);
9028 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
9029 +
9030 + BUG_ON(!is_realtime(t));
9031 +}
9032 +
9033 +
9034 +static void gsnedf_task_exit(struct task_struct * t)
9035 +{
9036 + unsigned long flags;
9037 +
9038 + /* unlink if necessary */
9039 + raw_spin_lock_irqsave(&gsnedf_lock, flags);
9040 + unlink(t);
9041 + if (tsk_rt(t)->scheduled_on != NO_CPU) {
9042 + gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
9043 + tsk_rt(t)->scheduled_on = NO_CPU;
9044 + }
9045 + raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
9046 +
9047 + BUG_ON(!is_realtime(t));
9048 + TRACE_TASK(t, "RIP\n");
9049 +}
9050 +
9051 +
9052 +static long gsnedf_admit_task(struct task_struct* tsk)
9053 +{
9054 + return 0;
9055 +}
9056 +
9057 +#ifdef CONFIG_LITMUS_LOCKING
9058 +
9059 +#include <litmus/fdso.h>
9060 +
9061 +/* called with IRQs off */
9062 +static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
9063 +{
9064 + int linked_on;
9065 + int check_preempt = 0;
9066 +
9067 + raw_spin_lock(&gsnedf_lock);
9068 +
9069 + TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
9070 + tsk_rt(t)->inh_task = prio_inh;
9071 +
9072 + linked_on = tsk_rt(t)->linked_on;
9073 +
9074 + /* If it is scheduled, then we need to reorder the CPU heap. */
9075 + if (linked_on != NO_CPU) {
9076 + TRACE_TASK(t, "%s: linked on %d\n",
9077 + __FUNCTION__, linked_on);
9078 + /* Holder is scheduled; need to re-order CPUs.
9079 + * We can't use heap_decrease() here since
9080 + * the cpu_heap is ordered in reverse direction, so
9081 + * it is actually an increase. */
9082 + bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
9083 + gsnedf_cpus[linked_on]->hn);
9084 + bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
9085 + gsnedf_cpus[linked_on]->hn);
9086 + } else {
9087 + /* holder may be queued: first stop queue changes */
9088 + raw_spin_lock(&gsnedf.release_lock);
9089 + if (is_queued(t)) {
9090 + TRACE_TASK(t, "%s: is queued\n",
9091 + __FUNCTION__);
9092 + /* We need to update the position of holder in some
9093 + * heap. Note that this could be a release heap if we
9094 + * budget enforcement is used and this job overran. */
9095 + check_preempt =
9096 + !bheap_decrease(edf_ready_order,
9097 + tsk_rt(t)->heap_node);
9098 + } else {
9099 + /* Nothing to do: if it is not queued and not linked
9100 + * then it is either sleeping or currently being moved
9101 + * by other code (e.g., a timer interrupt handler) that
9102 + * will use the correct priority when enqueuing the
9103 + * task. */
9104 + TRACE_TASK(t, "%s: is NOT queued => Done.\n",
9105 + __FUNCTION__);
9106 + }
9107 + raw_spin_unlock(&gsnedf.release_lock);
9108 +
9109 + /* If holder was enqueued in a release heap, then the following
9110 + * preemption check is pointless, but we can't easily detect
9111 + * that case. If you want to fix this, then consider that
9112 + * simply adding a state flag requires O(n) time to update when
9113 + * releasing n tasks, which conflicts with the goal to have
9114 + * O(log n) merges. */
9115 + if (check_preempt) {
9116 + /* heap_decrease() hit the top level of the heap: make
9117 + * sure preemption checks get the right task, not the
9118 + * potentially stale cache. */
9119 + bheap_uncache_min(edf_ready_order,
9120 + &gsnedf.ready_queue);
9121 + check_for_preemptions();
9122 + }
9123 + }
9124 +
9125 + raw_spin_unlock(&gsnedf_lock);
9126 +}
9127 +
9128 +/* called with IRQs off */
9129 +static void clear_priority_inheritance(struct task_struct* t)
9130 +{
9131 + raw_spin_lock(&gsnedf_lock);
9132 +
9133 + /* A job only stops inheriting a priority when it releases a
9134 + * resource. Thus we can make the following assumption.*/
9135 + BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
9136 +
9137 + TRACE_TASK(t, "priority restored\n");
9138 + tsk_rt(t)->inh_task = NULL;
9139 +
9140 + /* Check if rescheduling is necessary. We can't use heap_decrease()
9141 + * since the priority was effectively lowered. */
9142 + unlink(t);
9143 + gsnedf_job_arrival(t);
9144 +
9145 + raw_spin_unlock(&gsnedf_lock);
9146 +}
9147 +
9148 +
9149 +/* ******************** FMLP support ********************** */
9150 +
9151 +/* struct for semaphore with priority inheritance */
9152 +struct fmlp_semaphore {
9153 + struct litmus_lock litmus_lock;
9154 +
9155 + /* current resource holder */
9156 + struct task_struct *owner;
9157 +
9158 + /* highest-priority waiter */
9159 + struct task_struct *hp_waiter;
9160 +
9161 + /* FIFO queue of waiting tasks */
9162 + wait_queue_head_t wait;
9163 +};
9164 +
9165 +static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
9166 +{
9167 + return container_of(lock, struct fmlp_semaphore, litmus_lock);
9168 +}
9169 +
9170 +/* caller is responsible for locking */
9171 +struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
9172 + struct task_struct* skip)
9173 +{
9174 + struct list_head *pos;
9175 + struct task_struct *queued, *found = NULL;
9176 +
9177 + list_for_each(pos, &sem->wait.task_list) {
9178 + queued = (struct task_struct*) list_entry(pos, wait_queue_t,
9179 + task_list)->private;
9180 +
9181 + /* Compare task prios, find high prio task. */
9182 + if (queued != skip && edf_higher_prio(queued, found))
9183 + found = queued;
9184 + }
9185 + return found;
9186 +}
9187 +
9188 +int gsnedf_fmlp_lock(struct litmus_lock* l)
9189 +{
9190 + struct task_struct* t = current;
9191 + struct fmlp_semaphore *sem = fmlp_from_lock(l);
9192 + wait_queue_t wait;
9193 + unsigned long flags;
9194 +
9195 + if (!is_realtime(t))
9196 + return -EPERM;
9197 +
9198 + spin_lock_irqsave(&sem->wait.lock, flags);
9199 +
9200 + if (sem->owner) {
9201 + /* resource is not free => must suspend and wait */
9202 +
9203 + init_waitqueue_entry(&wait, t);
9204 +
9205 + /* FIXME: interruptible would be nice some day */
9206 + set_task_state(t, TASK_UNINTERRUPTIBLE);
9207 +
9208 + __add_wait_queue_tail_exclusive(&sem->wait, &wait);
9209 +
9210 + /* check if we need to activate priority inheritance */
9211 + if (edf_higher_prio(t, sem->hp_waiter)) {
9212 + sem->hp_waiter = t;
9213 + if (edf_higher_prio(t, sem->owner))
9214 + set_priority_inheritance(sem->owner, sem->hp_waiter);
9215 + }
9216 +
9217 + TS_LOCK_SUSPEND;
9218 +
9219 + /* release lock before sleeping */
9220 + spin_unlock_irqrestore(&sem->wait.lock, flags);
9221 +
9222 + /* We depend on the FIFO order. Thus, we don't need to recheck
9223 + * when we wake up; we are guaranteed to have the lock since
9224 + * there is only one wake up per release.
9225 + */
9226 +
9227 + schedule();
9228 +
9229 + TS_LOCK_RESUME;
9230 +
9231 + /* Since we hold the lock, no other task will change
9232 + * ->owner. We can thus check it without acquiring the spin
9233 + * lock. */
9234 + BUG_ON(sem->owner != t);
9235 + } else {
9236 + /* it's ours now */
9237 + sem->owner = t;
9238 +
9239 + spin_unlock_irqrestore(&sem->wait.lock, flags);
9240 + }
9241 +
9242 + return 0;
9243 +}
9244 +
9245 +int gsnedf_fmlp_unlock(struct litmus_lock* l)
9246 +{
9247 + struct task_struct *t = current, *next;
9248 + struct fmlp_semaphore *sem = fmlp_from_lock(l);
9249 + unsigned long flags;
9250 + int err = 0;
9251 +
9252 + spin_lock_irqsave(&sem->wait.lock, flags);
9253 +
9254 + if (sem->owner != t) {
9255 + err = -EINVAL;
9256 + goto out;
9257 + }
9258 +
9259 + /* check if there are jobs waiting for this resource */
9260 + next = __waitqueue_remove_first(&sem->wait);
9261 + if (next) {
9262 + /* next becomes the resouce holder */
9263 + sem->owner = next;
9264 + TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
9265 +
9266 + /* determine new hp_waiter if necessary */
9267 + if (next == sem->hp_waiter) {
9268 + TRACE_TASK(next, "was highest-prio waiter\n");
9269 + /* next has the highest priority --- it doesn't need to
9270 + * inherit. However, we need to make sure that the
9271 + * next-highest priority in the queue is reflected in
9272 + * hp_waiter. */
9273 + sem->hp_waiter = find_hp_waiter(sem, next);
9274 + if (sem->hp_waiter)
9275 + TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
9276 + else
9277 + TRACE("no further waiters\n");
9278 + } else {
9279 + /* Well, if next is not the highest-priority waiter,
9280 + * then it ought to inherit the highest-priority
9281 + * waiter's priority. */
9282 + set_priority_inheritance(next, sem->hp_waiter);
9283 + }
9284 +
9285 + /* wake up next */
9286 + wake_up_process(next);
9287 + } else
9288 + /* becomes available */
9289 + sem->owner = NULL;
9290 +
9291 + /* we lose the benefit of priority inheritance (if any) */
9292 + if (tsk_rt(t)->inh_task)
9293 + clear_priority_inheritance(t);
9294 +
9295 +out:
9296 + spin_unlock_irqrestore(&sem->wait.lock, flags);
9297 +
9298 + return err;
9299 +}
9300 +
9301 +int gsnedf_fmlp_close(struct litmus_lock* l)
9302 +{
9303 + struct task_struct *t = current;
9304 + struct fmlp_semaphore *sem = fmlp_from_lock(l);
9305 + unsigned long flags;
9306 +
9307 + int owner;
9308 +
9309 + spin_lock_irqsave(&sem->wait.lock, flags);
9310 +
9311 + owner = sem->owner == t;
9312 +
9313 + spin_unlock_irqrestore(&sem->wait.lock, flags);
9314 +
9315 + if (owner)
9316 + gsnedf_fmlp_unlock(l);
9317 +
9318 + return 0;
9319 +}
9320 +
9321 +void gsnedf_fmlp_free(struct litmus_lock* lock)
9322 +{
9323 + kfree(fmlp_from_lock(lock));
9324 +}
9325 +
9326 +static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
9327 + .close = gsnedf_fmlp_close,
9328 + .lock = gsnedf_fmlp_lock,
9329 + .unlock = gsnedf_fmlp_unlock,
9330 + .deallocate = gsnedf_fmlp_free,
9331 +};
9332 +
9333 +static struct litmus_lock* gsnedf_new_fmlp(void)
9334 +{
9335 + struct fmlp_semaphore* sem;
9336 +
9337 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
9338 + if (!sem)
9339 + return NULL;
9340 +
9341 + sem->owner = NULL;
9342 + sem->hp_waiter = NULL;
9343 + init_waitqueue_head(&sem->wait);
9344 + sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops;
9345 +
9346 + return &sem->litmus_lock;
9347 +}
9348 +
9349 +/* **** lock constructor **** */
9350 +
9351 +
9352 +static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
9353 + void* __user unused)
9354 +{
9355 + int err = -ENXIO;
9356 +
9357 + /* GSN-EDF currently only supports the FMLP for global resources. */
9358 + switch (type) {
9359 +
9360 + case FMLP_SEM:
9361 + /* Flexible Multiprocessor Locking Protocol */
9362 + *lock = gsnedf_new_fmlp();
9363 + if (*lock)
9364 + err = 0;
9365 + else
9366 + err = -ENOMEM;
9367 + break;
9368 +
9369 + };
9370 +
9371 + return err;
9372 +}
9373 +
9374 +#endif
9375 +
9376 +
9377 +static long gsnedf_activate_plugin(void)
9378 +{
9379 + int cpu;
9380 + cpu_entry_t *entry;
9381 +
9382 + bheap_init(&gsnedf_cpu_heap);
9383 +#ifdef CONFIG_RELEASE_MASTER
9384 + gsnedf.release_master = atomic_read(&release_master_cpu);
9385 +#endif
9386 +
9387 + for_each_online_cpu(cpu) {
9388 + entry = &per_cpu(gsnedf_cpu_entries, cpu);
9389 + bheap_node_init(&entry->hn, entry);
9390 + entry->linked = NULL;
9391 + entry->scheduled = NULL;
9392 +#ifdef CONFIG_RELEASE_MASTER
9393 + if (cpu != gsnedf.release_master) {
9394 +#endif
9395 + TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
9396 + update_cpu_position(entry);
9397 +#ifdef CONFIG_RELEASE_MASTER
9398 + } else {
9399 + TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
9400 + }
9401 +#endif
9402 + }
9403 + return 0;
9404 +}
9405 +
9406 +/* Plugin object */
9407 +static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
9408 + .plugin_name = "GSN-EDF",
9409 + .finish_switch = gsnedf_finish_switch,
9410 + .tick = gsnedf_tick,
9411 + .task_new = gsnedf_task_new,
9412 + .complete_job = complete_job,
9413 + .task_exit = gsnedf_task_exit,
9414 + .schedule = gsnedf_schedule,
9415 + .task_wake_up = gsnedf_task_wake_up,
9416 + .task_block = gsnedf_task_block,
9417 + .admit_task = gsnedf_admit_task,
9418 + .activate_plugin = gsnedf_activate_plugin,
9419 +#ifdef CONFIG_LITMUS_LOCKING
9420 + .allocate_lock = gsnedf_allocate_lock,
9421 +#endif
9422 +};
9423 +
9424 +
9425 +static int __init init_gsn_edf(void)
9426 +{
9427 + int cpu;
9428 + cpu_entry_t *entry;
9429 +
9430 + bheap_init(&gsnedf_cpu_heap);
9431 + /* initialize CPU state */
9432 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
9433 + entry = &per_cpu(gsnedf_cpu_entries, cpu);
9434 + gsnedf_cpus[cpu] = entry;
9435 + entry->cpu = cpu;
9436 + entry->hn = &gsnedf_heap_node[cpu];
9437 + bheap_node_init(&entry->hn, entry);
9438 + }
9439 + edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
9440 + return register_sched_plugin(&gsn_edf_plugin);
9441 +}
9442 +
9443 +
9444 +module_init(init_gsn_edf);
9445 diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
9446 new file mode 100644
9447 index 0000000..5a15ce9
9448 --- /dev/null
9449 +++ b/litmus/sched_litmus.c
9450 @@ -0,0 +1,325 @@
9451 +/* This file is included from kernel/sched.c */
9452 +
9453 +#include <litmus/litmus.h>
9454 +#include <litmus/budget.h>
9455 +#include <litmus/sched_plugin.h>
9456 +#include <litmus/preempt.h>
9457 +
9458 +static void update_time_litmus(struct rq *rq, struct task_struct *p)
9459 +{
9460 + u64 delta = rq->clock - p->se.exec_start;
9461 + if (unlikely((s64)delta < 0))
9462 + delta = 0;
9463 + /* per job counter */
9464 + p->rt_param.job_params.exec_time += delta;
9465 + /* task counter */
9466 + p->se.sum_exec_runtime += delta;
9467 + /* sched_clock() */
9468 + p->se.exec_start = rq->clock;
9469 + cpuacct_charge(p, delta);
9470 +}
9471 +
9472 +static void double_rq_lock(struct rq *rq1, struct rq *rq2);
9473 +static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
9474 +
9475 +/*
9476 + * litmus_tick gets called by scheduler_tick() with HZ freq
9477 + * Interrupts are disabled
9478 + */
9479 +static void litmus_tick(struct rq *rq, struct task_struct *p)
9480 +{
9481 + TS_PLUGIN_TICK_START;
9482 +
9483 + if (is_realtime(p))
9484 + update_time_litmus(rq, p);
9485 +
9486 + /* plugin tick */
9487 + litmus->tick(p);
9488 +
9489 + TS_PLUGIN_TICK_END;
9490 +
9491 + return;
9492 +}
9493 +
9494 +static struct task_struct *
9495 +litmus_schedule(struct rq *rq, struct task_struct *prev)
9496 +{
9497 + struct rq* other_rq;
9498 + struct task_struct *next;
9499 +
9500 + long was_running;
9501 + lt_t _maybe_deadlock = 0;
9502 +
9503 + /* let the plugin schedule */
9504 + next = litmus->schedule(prev);
9505 +
9506 + sched_state_plugin_check();
9507 +
9508 + /* check if a global plugin pulled a task from a different RQ */
9509 + if (next && task_rq(next) != rq) {
9510 + /* we need to migrate the task */
9511 + other_rq = task_rq(next);
9512 + TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
9513 +
9514 + /* while we drop the lock, the prev task could change its
9515 + * state
9516 + */
9517 + was_running = is_running(prev);
9518 + mb();
9519 + raw_spin_unlock(&rq->lock);
9520 +
9521 + /* Don't race with a concurrent switch. This could deadlock in
9522 + * the case of cross or circular migrations. It's the job of
9523 + * the plugin to make sure that doesn't happen.
9524 + */
9525 + TRACE_TASK(next, "stack_in_use=%d\n",
9526 + next->rt_param.stack_in_use);
9527 + if (next->rt_param.stack_in_use != NO_CPU) {
9528 + TRACE_TASK(next, "waiting to deschedule\n");
9529 + _maybe_deadlock = litmus_clock();
9530 + }
9531 + while (next->rt_param.stack_in_use != NO_CPU) {
9532 + cpu_relax();
9533 + mb();
9534 + if (next->rt_param.stack_in_use == NO_CPU)
9535 + TRACE_TASK(next,"descheduled. Proceeding.\n");
9536 +
9537 + if (lt_before(_maybe_deadlock + 10000000,
9538 + litmus_clock())) {
9539 + /* We've been spinning for 10ms.
9540 + * Something can't be right!
9541 + * Let's abandon the task and bail out; at least
9542 + * we will have debug info instead of a hard
9543 + * deadlock.
9544 + */
9545 + TRACE_TASK(next,"stack too long in use. "
9546 + "Deadlock?\n");
9547 + next = NULL;
9548 +
9549 + /* bail out */
9550 + raw_spin_lock(&rq->lock);
9551 + return next;
9552 + }
9553 + }
9554 +#ifdef __ARCH_WANT_UNLOCKED_CTXSW
9555 + if (next->oncpu)
9556 + TRACE_TASK(next, "waiting for !oncpu");
9557 + while (next->oncpu) {
9558 + cpu_relax();
9559 + mb();
9560 + }
9561 +#endif
9562 + double_rq_lock(rq, other_rq);
9563 + mb();
9564 + if (is_realtime(prev) && is_running(prev) != was_running) {
9565 + TRACE_TASK(prev,
9566 + "state changed while we dropped"
9567 + " the lock: is_running=%d, was_running=%d\n",
9568 + is_running(prev), was_running);
9569 + if (is_running(prev) && !was_running) {
9570 + /* prev task became unblocked
9571 + * we need to simulate normal sequence of events
9572 + * to scheduler plugins.
9573 + */
9574 + litmus->task_block(prev);
9575 + litmus->task_wake_up(prev);
9576 + }
9577 + }
9578 +
9579 + set_task_cpu(next, smp_processor_id());
9580 +
9581 + /* DEBUG: now that we have the lock we need to make sure a
9582 + * couple of things still hold:
9583 + * - it is still a real-time task
9584 + * - it is still runnable (could have been stopped)
9585 + * If either is violated, then the active plugin is
9586 + * doing something wrong.
9587 + */
9588 + if (!is_realtime(next) || !is_running(next)) {
9589 + /* BAD BAD BAD */
9590 + TRACE_TASK(next,"BAD: migration invariant FAILED: "
9591 + "rt=%d running=%d\n",
9592 + is_realtime(next),
9593 + is_running(next));
9594 + /* drop the task */
9595 + next = NULL;
9596 + }
9597 + /* release the other CPU's runqueue, but keep ours */
9598 + raw_spin_unlock(&other_rq->lock);
9599 + }
9600 + if (next) {
9601 + next->rt_param.stack_in_use = rq->cpu;
9602 + next->se.exec_start = rq->clock;
9603 + }
9604 +
9605 + update_enforcement_timer(next);
9606 + return next;
9607 +}
9608 +
9609 +static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
9610 + int flags)
9611 +{
9612 + if (flags & ENQUEUE_WAKEUP) {
9613 + sched_trace_task_resume(p);
9614 + tsk_rt(p)->present = 1;
9615 + /* LITMUS^RT plugins need to update the state
9616 + * _before_ making it available in global structures.
9617 + * Linux gets away with being lazy about the task state
9618 + * update. We can't do that, hence we update the task
9619 + * state already here.
9620 + *
9621 + * WARNING: this needs to be re-evaluated when porting
9622 + * to newer kernel versions.
9623 + */
9624 + p->state = TASK_RUNNING;
9625 + litmus->task_wake_up(p);
9626 +
9627 + rq->litmus.nr_running++;
9628 + } else
9629 + TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
9630 +}
9631 +
9632 +static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
9633 + int flags)
9634 +{
9635 + if (flags & DEQUEUE_SLEEP) {
9636 + litmus->task_block(p);
9637 + tsk_rt(p)->present = 0;
9638 + sched_trace_task_block(p);
9639 +
9640 + rq->litmus.nr_running--;
9641 + } else
9642 + TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
9643 +}
9644 +
9645 +static void yield_task_litmus(struct rq *rq)
9646 +{
9647 + BUG_ON(rq->curr != current);
9648 + /* sched_yield() is called to trigger delayed preemptions.
9649 + * Thus, mark the current task as needing to be rescheduled.
9650 + * This will cause the scheduler plugin to be invoked, which can
9651 + * then determine if a preemption is still required.
9652 + */
9653 + clear_exit_np(current);
9654 + litmus_reschedule_local();
9655 +}
9656 +
9657 +/* Plugins are responsible for this.
9658 + */
9659 +static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
9660 +{
9661 +}
9662 +
9663 +static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
9664 +{
9665 +}
9666 +
9667 +static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev)
9668 +{
9669 + update_time_litmus(rq, prev);
9670 + if (!is_running(prev))
9671 + tsk_rt(prev)->present = 0;
9672 +}
9673 +
9674 +/* pick_next_task_litmus() - litmus_schedule() function
9675 + *
9676 + * return the next task to be scheduled
9677 + */
9678 +static struct task_struct *pick_next_task_litmus(struct rq *rq)
9679 +{
9680 + /* get the to-be-switched-out task (prev) */
9681 + struct task_struct *prev = rq->litmus.prev;
9682 + struct task_struct *next;
9683 +
9684 + /* if not called from schedule() but from somewhere
9685 + * else (e.g., migration), return now!
9686 + */
9687 + if(!rq->litmus.prev)
9688 + return NULL;
9689 +
9690 + rq->litmus.prev = NULL;
9691 +
9692 + TS_PLUGIN_SCHED_START;
9693 + next = litmus_schedule(rq, prev);
9694 + TS_PLUGIN_SCHED_END;
9695 +
9696 + return next;
9697 +}
9698 +
9699 +static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
9700 +{
9701 + /* nothing to do; tick related tasks are done by litmus_tick() */
9702 + return;
9703 +}
9704 +
9705 +static void switched_to_litmus(struct rq *rq, struct task_struct *p)
9706 +{
9707 +}
9708 +
9709 +static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
9710 + int oldprio)
9711 +{
9712 +}
9713 +
9714 +unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
9715 +{
9716 + /* return infinity */
9717 + return 0;
9718 +}
9719 +
9720 +/* This is called when a task became a real-time task, either due to a SCHED_*
9721 + * class transition or due to PI mutex inheritance. We don't handle Linux PI
9722 + * mutex inheritance yet (and probably never will). Use LITMUS provided
9723 + * synchronization primitives instead.
9724 + */
9725 +static void set_curr_task_litmus(struct rq *rq)
9726 +{
9727 + rq->curr->se.exec_start = rq->clock;
9728 +}
9729 +
9730 +
9731 +#ifdef CONFIG_SMP
9732 +/* execve tries to rebalance task in this scheduling domain.
9733 + * We don't care about the scheduling domain; can gets called from
9734 + * exec, fork, wakeup.
9735 + */
9736 +static int
9737 +select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
9738 +{
9739 + /* preemption is already disabled.
9740 + * We don't want to change cpu here
9741 + */
9742 + return task_cpu(p);
9743 +}
9744 +#endif
9745 +
9746 +static const struct sched_class litmus_sched_class = {
9747 + /* From 34f971f6 the stop/migrate worker threads have a class on
9748 + * their own, which is the highest prio class. We don't support
9749 + * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
9750 + * CPU capacity.
9751 + */
9752 + .next = &stop_sched_class,
9753 + .enqueue_task = enqueue_task_litmus,
9754 + .dequeue_task = dequeue_task_litmus,
9755 + .yield_task = yield_task_litmus,
9756 +
9757 + .check_preempt_curr = check_preempt_curr_litmus,
9758 +
9759 + .pick_next_task = pick_next_task_litmus,
9760 + .put_prev_task = put_prev_task_litmus,
9761 +
9762 +#ifdef CONFIG_SMP
9763 + .select_task_rq = select_task_rq_litmus,
9764 +
9765 + .pre_schedule = pre_schedule_litmus,
9766 +#endif
9767 +
9768 + .set_curr_task = set_curr_task_litmus,
9769 + .task_tick = task_tick_litmus,
9770 +
9771 + .get_rr_interval = get_rr_interval_litmus,
9772 +
9773 + .prio_changed = prio_changed_litmus,
9774 + .switched_to = switched_to_litmus,
9775 +};
9776 diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
9777 new file mode 100644
9778 index 0000000..16f1065
9779 --- /dev/null
9780 +++ b/litmus/sched_pfair.c
9781 @@ -0,0 +1,1067 @@
9782 +/*
9783 + * kernel/sched_pfair.c
9784 + *
9785 + * Implementation of the PD^2 pfair scheduling algorithm. This
9786 + * implementation realizes "early releasing," i.e., it is work-conserving.
9787 + *
9788 + */
9789 +
9790 +#include <asm/div64.h>
9791 +#include <linux/delay.h>
9792 +#include <linux/module.h>
9793 +#include <linux/spinlock.h>
9794 +#include <linux/percpu.h>
9795 +#include <linux/sched.h>
9796 +#include <linux/list.h>
9797 +#include <linux/slab.h>
9798 +
9799 +#include <litmus/litmus.h>
9800 +#include <litmus/jobs.h>
9801 +#include <litmus/preempt.h>
9802 +#include <litmus/rt_domain.h>
9803 +#include <litmus/sched_plugin.h>
9804 +#include <litmus/sched_trace.h>
9805 +
9806 +#include <litmus/bheap.h>
9807 +
9808 +/* to configure the cluster size */
9809 +#include <litmus/litmus_proc.h>
9810 +
9811 +#include <litmus/clustered.h>
9812 +
9813 +static enum cache_level pfair_cluster_level = GLOBAL_CLUSTER;
9814 +
9815 +struct subtask {
9816 + /* measured in quanta relative to job release */
9817 + quanta_t release;
9818 + quanta_t deadline;
9819 + quanta_t overlap; /* called "b bit" by PD^2 */
9820 + quanta_t group_deadline;
9821 +};
9822 +
9823 +struct pfair_param {
9824 + quanta_t quanta; /* number of subtasks */
9825 + quanta_t cur; /* index of current subtask */
9826 +
9827 + quanta_t release; /* in quanta */
9828 + quanta_t period; /* in quanta */
9829 +
9830 + quanta_t last_quantum; /* when scheduled last */
9831 + int last_cpu; /* where scheduled last */
9832 +
9833 + struct pfair_cluster* cluster; /* where this task is scheduled */
9834 +
9835 + struct subtask subtasks[0]; /* allocate together with pfair_param */
9836 +};
9837 +
9838 +#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
9839 +
9840 +struct pfair_state {
9841 + struct cluster_cpu topology;
9842 +
9843 + volatile quanta_t cur_tick; /* updated by the CPU that is advancing
9844 + * the time */
9845 + volatile quanta_t local_tick; /* What tick is the local CPU currently
9846 + * executing? Updated only by the local
9847 + * CPU. In QEMU, this may lag behind the
9848 + * current tick. In a real system, with
9849 + * proper timers and aligned quanta,
9850 + * that should only be the case for a
9851 + * very short time after the time
9852 + * advanced. With staggered quanta, it
9853 + * will lag for the duration of the
9854 + * offset.
9855 + */
9856 +
9857 + struct task_struct* linked; /* the task that should be executing */
9858 + struct task_struct* local; /* the local copy of linked */
9859 + struct task_struct* scheduled; /* what is actually scheduled */
9860 +
9861 + lt_t offset; /* stagger offset */
9862 + unsigned int missed_updates;
9863 + unsigned int missed_quanta;
9864 +};
9865 +
9866 +struct pfair_cluster {
9867 + struct scheduling_cluster topology;
9868 +
9869 + /* The "global" time in this cluster. */
9870 + quanta_t pfair_time; /* the "official" PFAIR clock */
9871 +
9872 + /* The ready queue for this cluster. */
9873 + rt_domain_t pfair;
9874 +
9875 + /* The set of jobs that should have their release enacted at the next
9876 + * quantum boundary.
9877 + */
9878 + struct bheap release_queue;
9879 + raw_spinlock_t release_lock;
9880 +};
9881 +
9882 +#define RT_F_REQUEUE 0x2
9883 +
9884 +static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
9885 +{
9886 + return container_of(state->topology.cluster, struct pfair_cluster, topology);
9887 +}
9888 +
9889 +static inline int cpu_id(struct pfair_state* state)
9890 +{
9891 + return state->topology.id;
9892 +}
9893 +
9894 +static inline struct pfair_state* from_cluster_list(struct list_head* pos)
9895 +{
9896 + return list_entry(pos, struct pfair_state, topology.cluster_list);
9897 +}
9898 +
9899 +static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
9900 +{
9901 + return container_of(rt, struct pfair_cluster, pfair);
9902 +}
9903 +
9904 +static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
9905 +{
9906 + /* The ready_lock is used to serialize all scheduling events. */
9907 + return &cluster->pfair.ready_lock;
9908 +}
9909 +
9910 +static inline raw_spinlock_t* cpu_lock(struct pfair_state* state)
9911 +{
9912 + return cluster_lock(cpu_cluster(state));
9913 +}
9914 +
9915 +DEFINE_PER_CPU(struct pfair_state, pfair_state);
9916 +struct pfair_state* *pstate; /* short cut */
9917 +
9918 +static struct pfair_cluster* pfair_clusters;
9919 +static int num_pfair_clusters;
9920 +
9921 +/* Enable for lots of trace info.
9922 + * #define PFAIR_DEBUG
9923 + */
9924 +
9925 +#ifdef PFAIR_DEBUG
9926 +#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args)
9927 +#define PTRACE(f, args...) TRACE(f, ## args)
9928 +#else
9929 +#define PTRACE_TASK(t, f, args...)
9930 +#define PTRACE(f, args...)
9931 +#endif
9932 +
9933 +/* gcc will inline all of these accessor functions... */
9934 +static struct subtask* cur_subtask(struct task_struct* t)
9935 +{
9936 + return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
9937 +}
9938 +
9939 +static quanta_t cur_deadline(struct task_struct* t)
9940 +{
9941 + return cur_subtask(t)->deadline + tsk_pfair(t)->release;
9942 +}
9943 +
9944 +static quanta_t cur_release(struct task_struct* t)
9945 +{
9946 + /* This is early releasing: only the release of the first subtask
9947 + * counts. */
9948 + return tsk_pfair(t)->release;
9949 +}
9950 +
9951 +static quanta_t cur_overlap(struct task_struct* t)
9952 +{
9953 + return cur_subtask(t)->overlap;
9954 +}
9955 +
9956 +static quanta_t cur_group_deadline(struct task_struct* t)
9957 +{
9958 + quanta_t gdl = cur_subtask(t)->group_deadline;
9959 + if (gdl)
9960 + return gdl + tsk_pfair(t)->release;
9961 + else
9962 + return gdl;
9963 +}
9964 +
9965 +
9966 +static int pfair_higher_prio(struct task_struct* first,
9967 + struct task_struct* second)
9968 +{
9969 + return /* first task must exist */
9970 + first && (
9971 + /* Does the second task exist and is it a real-time task? If
9972 + * not, the first task (which is a RT task) has higher
9973 + * priority.
9974 + */
9975 + !second || !is_realtime(second) ||
9976 +
9977 + /* Is the (subtask) deadline of the first task earlier?
9978 + * Then it has higher priority.
9979 + */
9980 + time_before(cur_deadline(first), cur_deadline(second)) ||
9981 +
9982 + /* Do we have a deadline tie?
9983 + * Then break by B-bit.
9984 + */
9985 + (cur_deadline(first) == cur_deadline(second) &&
9986 + (cur_overlap(first) > cur_overlap(second) ||
9987 +
9988 + /* Do we have a B-bit tie?
9989 + * Then break by group deadline.
9990 + */
9991 + (cur_overlap(first) == cur_overlap(second) &&
9992 + (time_after(cur_group_deadline(first),
9993 + cur_group_deadline(second)) ||
9994 +
9995 + /* Do we have a group deadline tie?
9996 + * Then break by PID, which are unique.
9997 + */
9998 + (cur_group_deadline(first) ==
9999 + cur_group_deadline(second) &&
10000 + first->pid < second->pid))))));
10001 +}
10002 +
10003 +int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
10004 +{
10005 + return pfair_higher_prio(bheap2task(a), bheap2task(b));
10006 +}
10007 +
10008 +static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
10009 +{
10010 + struct pfair_cluster* cluster = from_domain(rt);
10011 + unsigned long flags;
10012 +
10013 + raw_spin_lock_irqsave(&cluster->release_lock, flags);
10014 +
10015 + bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
10016 +
10017 + raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
10018 +}
10019 +
10020 +static void prepare_release(struct task_struct* t, quanta_t at)
10021 +{
10022 + tsk_pfair(t)->release = at;
10023 + tsk_pfair(t)->cur = 0;
10024 +}
10025 +
10026 +/* pull released tasks from the release queue */
10027 +static void poll_releases(struct pfair_cluster* cluster)
10028 +{
10029 + raw_spin_lock(&cluster->release_lock);
10030 + __merge_ready(&cluster->pfair, &cluster->release_queue);
10031 + raw_spin_unlock(&cluster->release_lock);
10032 +}
10033 +
10034 +static void check_preempt(struct task_struct* t)
10035 +{
10036 + int cpu = NO_CPU;
10037 + if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
10038 + tsk_rt(t)->present) {
10039 + /* the task can be scheduled and
10040 + * is not scheduled where it ought to be scheduled
10041 + */
10042 + cpu = tsk_rt(t)->linked_on != NO_CPU ?
10043 + tsk_rt(t)->linked_on :
10044 + tsk_rt(t)->scheduled_on;
10045 + PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
10046 + tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
10047 + /* preempt */
10048 + litmus_reschedule(cpu);
10049 + }
10050 +}
10051 +
10052 +/* caller must hold pfair.ready_lock */
10053 +static void drop_all_references(struct task_struct *t)
10054 +{
10055 + int cpu;
10056 + struct pfair_state* s;
10057 + struct pfair_cluster* cluster;
10058 + if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
10059 + /* It must be in the ready queue; drop references isn't called
10060 + * when the job is in a release queue. */
10061 + cluster = tsk_pfair(t)->cluster;
10062 + bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
10063 + tsk_rt(t)->heap_node);
10064 + }
10065 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
10066 + s = &per_cpu(pfair_state, cpu);
10067 + if (s->linked == t)
10068 + s->linked = NULL;
10069 + if (s->local == t)
10070 + s->local = NULL;
10071 + if (s->scheduled == t)
10072 + s->scheduled = NULL;
10073 + }
10074 + /* make sure we don't have a stale linked_on field */
10075 + tsk_rt(t)->linked_on = NO_CPU;
10076 +}
10077 +
10078 +static void pfair_prepare_next_period(struct task_struct* t)
10079 +{
10080 + struct pfair_param* p = tsk_pfair(t);
10081 +
10082 + prepare_for_next_period(t);
10083 + get_rt_flags(t) = RT_F_RUNNING;
10084 + p->release += p->period;
10085 +}
10086 +
10087 +/* returns 1 if the task needs to go the release queue */
10088 +static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
10089 +{
10090 + struct pfair_param* p = tsk_pfair(t);
10091 + int to_relq;
10092 + p->cur = (p->cur + 1) % p->quanta;
10093 + if (!p->cur) {
10094 + if (tsk_rt(t)->present) {
10095 + /* The job overran; we start a new budget allocation. */
10096 + pfair_prepare_next_period(t);
10097 + } else {
10098 + /* remove task from system until it wakes */
10099 + drop_all_references(t);
10100 + tsk_rt(t)->flags = RT_F_REQUEUE;
10101 + TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
10102 + cpu, p->cur);
10103 + return 0;
10104 + }
10105 + }
10106 + to_relq = time_after(cur_release(t), time);
10107 + TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
10108 + cpu, p->cur, to_relq, cur_release(t), time);
10109 + return to_relq;
10110 +}
10111 +
10112 +static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
10113 +{
10114 + struct task_struct* l;
10115 + struct pfair_param* p;
10116 + struct list_head* pos;
10117 + struct pfair_state* cpu;
10118 +
10119 + list_for_each(pos, &cluster->topology.cpus) {
10120 + cpu = from_cluster_list(pos);
10121 + l = cpu->linked;
10122 + cpu->missed_updates += cpu->linked != cpu->local;
10123 + if (l) {
10124 + p = tsk_pfair(l);
10125 + p->last_quantum = time;
10126 + p->last_cpu = cpu_id(cpu);
10127 + if (advance_subtask(time, l, cpu_id(cpu))) {
10128 + //cpu->linked = NULL;
10129 + PTRACE_TASK(l, "should go to release queue. "
10130 + "scheduled_on=%d present=%d\n",
10131 + tsk_rt(l)->scheduled_on,
10132 + tsk_rt(l)->present);
10133 + }
10134 + }
10135 + }
10136 +}
10137 +
10138 +static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
10139 +{
10140 + int cpu;
10141 + if (tsk_rt(t)->scheduled_on != NO_CPU) {
10142 + /* always observe scheduled_on linkage */
10143 + default_cpu = tsk_rt(t)->scheduled_on;
10144 + } else if (tsk_pfair(t)->last_quantum == time - 1) {
10145 + /* back2back quanta */
10146 + /* Only observe last_quantum if no scheduled_on is in the way.
10147 + * This should only kick in if a CPU missed quanta, and that
10148 + * *should* only happen in QEMU.
10149 + */
10150 + cpu = tsk_pfair(t)->last_cpu;
10151 + if (!pstate[cpu]->linked ||
10152 + tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
10153 + default_cpu = cpu;
10154 + }
10155 + }
10156 + return default_cpu;
10157 +}
10158 +
10159 +/* returns one if linking was redirected */
10160 +static int pfair_link(quanta_t time, int cpu,
10161 + struct task_struct* t)
10162 +{
10163 + int target = target_cpu(time, t, cpu);
10164 + struct task_struct* prev = pstate[cpu]->linked;
10165 + struct task_struct* other;
10166 + struct pfair_cluster* cluster = cpu_cluster(pstate[cpu]);
10167 +
10168 + if (target != cpu) {
10169 + BUG_ON(pstate[target]->topology.cluster != pstate[cpu]->topology.cluster);
10170 + other = pstate[target]->linked;
10171 + pstate[target]->linked = t;
10172 + tsk_rt(t)->linked_on = target;
10173 + if (!other)
10174 + /* linked ok, but reschedule this CPU */
10175 + return 1;
10176 + if (target < cpu) {
10177 + /* link other to cpu instead */
10178 + tsk_rt(other)->linked_on = cpu;
10179 + pstate[cpu]->linked = other;
10180 + if (prev) {
10181 + /* prev got pushed back into the ready queue */
10182 + tsk_rt(prev)->linked_on = NO_CPU;
10183 + __add_ready(&cluster->pfair, prev);
10184 + }
10185 + /* we are done with this cpu */
10186 + return 0;
10187 + } else {
10188 + /* re-add other, it's original CPU was not considered yet */
10189 + tsk_rt(other)->linked_on = NO_CPU;
10190 + __add_ready(&cluster->pfair, other);
10191 + /* reschedule this CPU */
10192 + return 1;
10193 + }
10194 + } else {
10195 + pstate[cpu]->linked = t;
10196 + tsk_rt(t)->linked_on = cpu;
10197 + if (prev) {
10198 + /* prev got pushed back into the ready queue */
10199 + tsk_rt(prev)->linked_on = NO_CPU;
10200 + __add_ready(&cluster->pfair, prev);
10201 + }
10202 + /* we are done with this CPU */
10203 + return 0;
10204 + }
10205 +}
10206 +
10207 +static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
10208 +{
10209 + int retry;
10210 + struct list_head *pos;
10211 + struct pfair_state *cpu_state;
10212 +
10213 + list_for_each(pos, &cluster->topology.cpus) {
10214 + cpu_state = from_cluster_list(pos);
10215 + retry = 1;
10216 +#ifdef CONFIG_RELEASE_MASTER
10217 + /* skip release master */
10218 + if (cluster->pfair.release_master == cpu_id(cpu_state))
10219 + continue;
10220 +#endif
10221 + while (retry) {
10222 + if (pfair_higher_prio(__peek_ready(&cluster->pfair),
10223 + cpu_state->linked))
10224 + retry = pfair_link(time, cpu_id(cpu_state),
10225 + __take_ready(&cluster->pfair));
10226 + else
10227 + retry = 0;
10228 + }
10229 + }
10230 +}
10231 +
10232 +static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
10233 +{
10234 + struct pfair_state *cpu;
10235 + struct list_head* pos;
10236 +
10237 + /* called with interrupts disabled */
10238 + PTRACE("--- Q %lu at %llu PRE-SPIN\n",
10239 + time, litmus_clock());
10240 + raw_spin_lock(cluster_lock(cluster));
10241 + PTRACE("<<< Q %lu at %llu\n",
10242 + time, litmus_clock());
10243 +
10244 + sched_trace_quantum_boundary();
10245 +
10246 + advance_subtasks(cluster, time);
10247 + poll_releases(cluster);
10248 + schedule_subtasks(cluster, time);
10249 +
10250 + list_for_each(pos, &cluster->topology.cpus) {
10251 + cpu = from_cluster_list(pos);
10252 + if (cpu->linked)
10253 + PTRACE_TASK(cpu->linked,
10254 + " linked on %d.\n", cpu_id(cpu));
10255 + else
10256 + PTRACE("(null) linked on %d.\n", cpu_id(cpu));
10257 + }
10258 + /* We are done. Advance time. */
10259 + mb();
10260 + list_for_each(pos, &cluster->topology.cpus) {
10261 + cpu = from_cluster_list(pos);
10262 + if (cpu->local_tick != cpu->cur_tick) {
10263 + TRACE("BAD Quantum not acked on %d "
10264 + "(l:%lu c:%lu p:%lu)\n",
10265 + cpu_id(cpu),
10266 + cpu->local_tick,
10267 + cpu->cur_tick,
10268 + cluster->pfair_time);
10269 + cpu->missed_quanta++;
10270 + }
10271 + cpu->cur_tick = time;
10272 + }
10273 + PTRACE(">>> Q %lu at %llu\n",
10274 + time, litmus_clock());
10275 + raw_spin_unlock(cluster_lock(cluster));
10276 +}
10277 +
10278 +static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
10279 +{
10280 + quanta_t loc;
10281 +
10282 + goto first; /* skip mb() on first iteration */
10283 + do {
10284 + cpu_relax();
10285 + mb();
10286 + first: loc = state->cur_tick;
10287 + /* FIXME: what if loc > cur? */
10288 + } while (time_before(loc, q));
10289 + PTRACE("observed cur_tick:%lu >= q:%lu\n",
10290 + loc, q);
10291 +}
10292 +
10293 +static quanta_t current_quantum(struct pfair_state* state)
10294 +{
10295 + lt_t t = litmus_clock() - state->offset;
10296 + return time2quanta(t, FLOOR);
10297 +}
10298 +
10299 +static void catchup_quanta(quanta_t from, quanta_t target,
10300 + struct pfair_state* state)
10301 +{
10302 + quanta_t cur = from, time;
10303 + TRACE("+++< BAD catching up quanta from %lu to %lu\n",
10304 + from, target);
10305 + while (time_before(cur, target)) {
10306 + wait_for_quantum(cur, state);
10307 + cur++;
10308 + time = cmpxchg(&cpu_cluster(state)->pfair_time,
10309 + cur - 1, /* expected */
10310 + cur /* next */
10311 + );
10312 + if (time == cur - 1)
10313 + schedule_next_quantum(cpu_cluster(state), cur);
10314 + }
10315 + TRACE("+++> catching up done\n");
10316 +}
10317 +
10318 +/* pfair_tick - this function is called for every local timer
10319 + * interrupt.
10320 + */
10321 +static void pfair_tick(struct task_struct* t)
10322 +{
10323 + struct pfair_state* state = &__get_cpu_var(pfair_state);
10324 + quanta_t time, cur;
10325 + int retry = 10;
10326 +
10327 + do {
10328 + cur = current_quantum(state);
10329 + PTRACE("q %lu at %llu\n", cur, litmus_clock());
10330 +
10331 + /* Attempt to advance time. First CPU to get here
10332 + * will prepare the next quantum.
10333 + */
10334 + time = cmpxchg(&cpu_cluster(state)->pfair_time,
10335 + cur - 1, /* expected */
10336 + cur /* next */
10337 + );
10338 + if (time == cur - 1) {
10339 + /* exchange succeeded */
10340 + wait_for_quantum(cur - 1, state);
10341 + schedule_next_quantum(cpu_cluster(state), cur);
10342 + retry = 0;
10343 + } else if (time_before(time, cur - 1)) {
10344 + /* the whole system missed a tick !? */
10345 + catchup_quanta(time, cur, state);
10346 + retry--;
10347 + } else if (time_after(time, cur)) {
10348 + /* our timer lagging behind!? */
10349 + TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
10350 + retry--;
10351 + } else {
10352 + /* Some other CPU already started scheduling
10353 + * this quantum. Let it do its job and then update.
10354 + */
10355 + retry = 0;
10356 + }
10357 + } while (retry);
10358 +
10359 + /* Spin locally until time advances. */
10360 + wait_for_quantum(cur, state);
10361 +
10362 + /* copy assignment */
10363 + /* FIXME: what if we race with a future update? Corrupted state? */
10364 + state->local = state->linked;
10365 + /* signal that we are done */
10366 + mb();
10367 + state->local_tick = state->cur_tick;
10368 +
10369 + if (state->local != current
10370 + && (is_realtime(current) || is_present(state->local)))
10371 + litmus_reschedule_local();
10372 +}
10373 +
10374 +static int safe_to_schedule(struct task_struct* t, int cpu)
10375 +{
10376 + int where = tsk_rt(t)->scheduled_on;
10377 + if (where != NO_CPU && where != cpu) {
10378 + TRACE_TASK(t, "BAD: can't be scheduled on %d, "
10379 + "scheduled already on %d.\n", cpu, where);
10380 + return 0;
10381 + } else
10382 + return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING;
10383 +}
10384 +
10385 +static struct task_struct* pfair_schedule(struct task_struct * prev)
10386 +{
10387 + struct pfair_state* state = &__get_cpu_var(pfair_state);
10388 + struct pfair_cluster* cluster = cpu_cluster(state);
10389 + int blocks, completion, out_of_time;
10390 + struct task_struct* next = NULL;
10391 +
10392 +#ifdef CONFIG_RELEASE_MASTER
10393 + /* Bail out early if we are the release master.
10394 + * The release master never schedules any real-time tasks.
10395 + */
10396 + if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
10397 + sched_state_task_picked();
10398 + return NULL;
10399 + }
10400 +#endif
10401 +
10402 + raw_spin_lock(cpu_lock(state));
10403 +
10404 + blocks = is_realtime(prev) && !is_running(prev);
10405 + completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
10406 + out_of_time = is_realtime(prev) && time_after(cur_release(prev),
10407 + state->local_tick);
10408 +
10409 + if (is_realtime(prev))
10410 + PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
10411 + blocks, completion, out_of_time);
10412 +
10413 + if (completion) {
10414 + sched_trace_task_completion(prev, 0);
10415 + pfair_prepare_next_period(prev);
10416 + prepare_release(prev, cur_release(prev));
10417 + }
10418 +
10419 + if (!blocks && (completion || out_of_time)) {
10420 + drop_all_references(prev);
10421 + sched_trace_task_release(prev);
10422 + add_release(&cluster->pfair, prev);
10423 + }
10424 +
10425 + if (state->local && safe_to_schedule(state->local, cpu_id(state)))
10426 + next = state->local;
10427 +
10428 + if (prev != next) {
10429 + tsk_rt(prev)->scheduled_on = NO_CPU;
10430 + if (next)
10431 + tsk_rt(next)->scheduled_on = cpu_id(state);
10432 + }
10433 + sched_state_task_picked();
10434 + raw_spin_unlock(cpu_lock(state));
10435 +
10436 + if (next)
10437 + TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
10438 + tsk_pfair(next)->release, cpu_cluster(state)->pfair_time, litmus_clock());
10439 + else if (is_realtime(prev))
10440 + TRACE("Becomes idle at %lu (%llu)\n", cpu_cluster(state)->pfair_time, litmus_clock());
10441 +
10442 + return next;
10443 +}
10444 +
10445 +static void pfair_task_new(struct task_struct * t, int on_rq, int running)
10446 +{
10447 + unsigned long flags;
10448 + struct pfair_cluster* cluster;
10449 +
10450 + TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
10451 +
10452 + cluster = tsk_pfair(t)->cluster;
10453 +
10454 + raw_spin_lock_irqsave(cluster_lock(cluster), flags);
10455 +
10456 + prepare_release(t, cluster->pfair_time + 1);
10457 +
10458 + t->rt_param.scheduled_on = NO_CPU;
10459 +
10460 + if (running) {
10461 +#ifdef CONFIG_RELEASE_MASTER
10462 + if (task_cpu(t) != cluster->pfair.release_master)
10463 +#endif
10464 + t->rt_param.scheduled_on = task_cpu(t);
10465 + __add_ready(&cluster->pfair, t);
10466 + }
10467 +
10468 + check_preempt(t);
10469 +
10470 + raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
10471 +}
10472 +
10473 +static void pfair_task_wake_up(struct task_struct *t)
10474 +{
10475 + unsigned long flags;
10476 + lt_t now;
10477 + int requeue = 0;
10478 + struct pfair_cluster* cluster;
10479 +
10480 + cluster = tsk_pfair(t)->cluster;
10481 +
10482 + TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
10483 + litmus_clock(), cur_release(t), cluster->pfair_time);
10484 +
10485 + raw_spin_lock_irqsave(cluster_lock(cluster), flags);
10486 +
10487 + /* If a task blocks and wakes before its next job release,
10488 + * then it may resume if it is currently linked somewhere
10489 + * (as if it never blocked at all). Otherwise, we have a
10490 + * new sporadic job release.
10491 + */
10492 + requeue = tsk_rt(t)->flags == RT_F_REQUEUE;
10493 + now = litmus_clock();
10494 + if (lt_before(get_deadline(t), now)) {
10495 + TRACE_TASK(t, "sporadic release!\n");
10496 + release_at(t, now);
10497 + prepare_release(t, time2quanta(now, CEIL));
10498 + sched_trace_task_release(t);
10499 + }
10500 +
10501 + /* only add to ready queue if the task isn't still linked somewhere */
10502 + if (requeue) {
10503 + TRACE_TASK(t, "requeueing required\n");
10504 + tsk_rt(t)->flags = RT_F_RUNNING;
10505 + __add_ready(&cluster->pfair, t);
10506 + }
10507 +
10508 + check_preempt(t);
10509 +
10510 + raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
10511 + TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
10512 +}
10513 +
10514 +static void pfair_task_block(struct task_struct *t)
10515 +{
10516 + BUG_ON(!is_realtime(t));
10517 + TRACE_TASK(t, "blocks at %llu, state:%d\n",
10518 + litmus_clock(), t->state);
10519 +}
10520 +
10521 +static void pfair_task_exit(struct task_struct * t)
10522 +{
10523 + unsigned long flags;
10524 + struct pfair_cluster *cluster;
10525 +
10526 + BUG_ON(!is_realtime(t));
10527 +
10528 + cluster = tsk_pfair(t)->cluster;
10529 +
10530 + /* Remote task from release or ready queue, and ensure
10531 + * that it is not the scheduled task for ANY CPU. We
10532 + * do this blanket check because occassionally when
10533 + * tasks exit while blocked, the task_cpu of the task
10534 + * might not be the same as the CPU that the PFAIR scheduler
10535 + * has chosen for it.
10536 + */
10537 + raw_spin_lock_irqsave(cluster_lock(cluster), flags);
10538 +
10539 + TRACE_TASK(t, "RIP, state:%d\n", t->state);
10540 + drop_all_references(t);
10541 +
10542 + raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
10543 +
10544 + kfree(t->rt_param.pfair);
10545 + t->rt_param.pfair = NULL;
10546 +}
10547 +
10548 +
10549 +static void pfair_release_at(struct task_struct* task, lt_t start)
10550 +{
10551 + unsigned long flags;
10552 + quanta_t release;
10553 +
10554 + struct pfair_cluster *cluster;
10555 +
10556 + cluster = tsk_pfair(task)->cluster;
10557 +
10558 + BUG_ON(!is_realtime(task));
10559 +
10560 + raw_spin_lock_irqsave(cluster_lock(cluster), flags);
10561 + release_at(task, start);
10562 + release = time2quanta(start, CEIL);
10563 +
10564 + TRACE_TASK(task, "sys release at %lu\n", release);
10565 +
10566 + drop_all_references(task);
10567 + prepare_release(task, release);
10568 + add_release(&cluster->pfair, task);
10569 +
10570 + raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
10571 +}
10572 +
10573 +static void init_subtask(struct subtask* sub, unsigned long i,
10574 + lt_t quanta, lt_t period)
10575 +{
10576 + /* since i is zero-based, the formulas are shifted by one */
10577 + lt_t tmp;
10578 +
10579 + /* release */
10580 + tmp = period * i;
10581 + do_div(tmp, quanta); /* floor */
10582 + sub->release = (quanta_t) tmp;
10583 +
10584 + /* deadline */
10585 + tmp = period * (i + 1);
10586 + if (do_div(tmp, quanta)) /* ceil */
10587 + tmp++;
10588 + sub->deadline = (quanta_t) tmp;
10589 +
10590 + /* next release */
10591 + tmp = period * (i + 1);
10592 + do_div(tmp, quanta); /* floor */
10593 + sub->overlap = sub->deadline - (quanta_t) tmp;
10594 +
10595 + /* Group deadline.
10596 + * Based on the formula given in Uma's thesis.
10597 + */
10598 + if (2 * quanta >= period) {
10599 + /* heavy */
10600 + tmp = (sub->deadline - (i + 1)) * period;
10601 + if (period > quanta &&
10602 + do_div(tmp, (period - quanta))) /* ceil */
10603 + tmp++;
10604 + sub->group_deadline = (quanta_t) tmp;
10605 + } else
10606 + sub->group_deadline = 0;
10607 +}
10608 +
10609 +static void dump_subtasks(struct task_struct* t)
10610 +{
10611 + unsigned long i;
10612 + for (i = 0; i < t->rt_param.pfair->quanta; i++)
10613 + TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
10614 + i + 1,
10615 + t->rt_param.pfair->subtasks[i].release,
10616 + t->rt_param.pfair->subtasks[i].deadline,
10617 + t->rt_param.pfair->subtasks[i].overlap,
10618 + t->rt_param.pfair->subtasks[i].group_deadline);
10619 +}
10620 +
10621 +static long pfair_admit_task(struct task_struct* t)
10622 +{
10623 + lt_t quanta;
10624 + lt_t period;
10625 + s64 quantum_length = ktime_to_ns(tick_period);
10626 + struct pfair_param* param;
10627 + unsigned long i;
10628 +
10629 + /* first check that the task is in the right cluster */
10630 + if (cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]) !=
10631 + cpu_cluster(pstate[task_cpu(t)]))
10632 + return -EINVAL;
10633 +
10634 + /* Pfair is a tick-based method, so the time
10635 + * of interest is jiffies. Calculate tick-based
10636 + * times for everything.
10637 + * (Ceiling of exec cost, floor of period.)
10638 + */
10639 +
10640 + quanta = get_exec_cost(t);
10641 + period = get_rt_period(t);
10642 +
10643 + quanta = time2quanta(get_exec_cost(t), CEIL);
10644 +
10645 + if (do_div(period, quantum_length))
10646 + printk(KERN_WARNING
10647 + "The period of %s/%d is not a multiple of %llu.\n",
10648 + t->comm, t->pid, (unsigned long long) quantum_length);
10649 +
10650 + if (quanta == period) {
10651 + /* special case: task has weight 1.0 */
10652 + printk(KERN_INFO
10653 + "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n",
10654 + t->comm, t->pid, quanta, period);
10655 + quanta = 1;
10656 + period = 1;
10657 + }
10658 +
10659 + param = kmalloc(sizeof(*param) +
10660 + quanta * sizeof(struct subtask), GFP_ATOMIC);
10661 +
10662 + if (!param)
10663 + return -ENOMEM;
10664 +
10665 + param->quanta = quanta;
10666 + param->cur = 0;
10667 + param->release = 0;
10668 + param->period = period;
10669 +
10670 + param->cluster = cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]);
10671 +
10672 + for (i = 0; i < quanta; i++)
10673 + init_subtask(param->subtasks + i, i, quanta, period);
10674 +
10675 + if (t->rt_param.pfair)
10676 + /* get rid of stale allocation */
10677 + kfree(t->rt_param.pfair);
10678 +
10679 + t->rt_param.pfair = param;
10680 +
10681 + /* spew out some debug info */
10682 + dump_subtasks(t);
10683 +
10684 + return 0;
10685 +}
10686 +
10687 +static void pfair_init_cluster(struct pfair_cluster* cluster)
10688 +{
10689 + rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
10690 + bheap_init(&cluster->release_queue);
10691 + raw_spin_lock_init(&cluster->release_lock);
10692 + INIT_LIST_HEAD(&cluster->topology.cpus);
10693 +}
10694 +
10695 +static void cleanup_clusters(void)
10696 +{
10697 + int i;
10698 +
10699 + if (num_pfair_clusters)
10700 + kfree(pfair_clusters);
10701 + pfair_clusters = NULL;
10702 + num_pfair_clusters = 0;
10703 +
10704 + /* avoid stale pointers */
10705 + for (i = 0; i < num_online_cpus(); i++) {
10706 + pstate[i]->topology.cluster = NULL;
10707 + printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
10708 + pstate[i]->missed_updates, pstate[i]->missed_quanta);
10709 + }
10710 +}
10711 +
10712 +static long pfair_activate_plugin(void)
10713 +{
10714 + int err, i;
10715 + struct pfair_state* state;
10716 + struct pfair_cluster* cluster ;
10717 + quanta_t now;
10718 + int cluster_size;
10719 + struct cluster_cpu* cpus[NR_CPUS];
10720 + struct scheduling_cluster* clust[NR_CPUS];
10721 +
10722 + cluster_size = get_cluster_size(pfair_cluster_level);
10723 +
10724 + if (cluster_size <= 0 || num_online_cpus() % cluster_size != 0)
10725 + return -EINVAL;
10726 +
10727 + num_pfair_clusters = num_online_cpus() / cluster_size;
10728 +
10729 + pfair_clusters = kzalloc(num_pfair_clusters * sizeof(struct pfair_cluster), GFP_ATOMIC);
10730 + if (!pfair_clusters) {
10731 + num_pfair_clusters = 0;
10732 + printk(KERN_ERR "Could not allocate Pfair clusters!\n");
10733 + return -ENOMEM;
10734 + }
10735 +
10736 + state = &__get_cpu_var(pfair_state);
10737 + now = current_quantum(state);
10738 + TRACE("Activating PFAIR at q=%lu\n", now);
10739 +
10740 + for (i = 0; i < num_pfair_clusters; i++) {
10741 + cluster = &pfair_clusters[i];
10742 + pfair_init_cluster(cluster);
10743 + cluster->pfair_time = now;
10744 + clust[i] = &cluster->topology;
10745 +#ifdef CONFIG_RELEASE_MASTER
10746 + cluster->pfair.release_master = atomic_read(&release_master_cpu);
10747 +#endif
10748 + }
10749 +
10750 + for (i = 0; i < num_online_cpus(); i++) {
10751 + state = &per_cpu(pfair_state, i);
10752 + state->cur_tick = now;
10753 + state->local_tick = now;
10754 + state->missed_quanta = 0;
10755 + state->missed_updates = 0;
10756 + state->offset = cpu_stagger_offset(i);
10757 + printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
10758 + cpus[i] = &state->topology;
10759 + }
10760 +
10761 + err = assign_cpus_to_clusters(pfair_cluster_level, clust, num_pfair_clusters,
10762 + cpus, num_online_cpus());
10763 +
10764 + if (err < 0)
10765 + cleanup_clusters();
10766 +
10767 + return err;
10768 +}
10769 +
10770 +static long pfair_deactivate_plugin(void)
10771 +{
10772 + cleanup_clusters();
10773 + return 0;
10774 +}
10775 +
10776 +/* Plugin object */
10777 +static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
10778 + .plugin_name = "PFAIR",
10779 + .tick = pfair_tick,
10780 + .task_new = pfair_task_new,
10781 + .task_exit = pfair_task_exit,
10782 + .schedule = pfair_schedule,
10783 + .task_wake_up = pfair_task_wake_up,
10784 + .task_block = pfair_task_block,
10785 + .admit_task = pfair_admit_task,
10786 + .release_at = pfair_release_at,
10787 + .complete_job = complete_job,
10788 + .activate_plugin = pfair_activate_plugin,
10789 + .deactivate_plugin = pfair_deactivate_plugin,
10790 +};
10791 +
10792 +
10793 +static struct proc_dir_entry *cluster_file = NULL, *pfair_dir = NULL;
10794 +
10795 +static int __init init_pfair(void)
10796 +{
10797 + int cpu, err, fs;
10798 + struct pfair_state *state;
10799 +
10800 + /*
10801 + * initialize short_cut for per-cpu pfair state;
10802 + * there may be a problem here if someone removes a cpu
10803 + * while we are doing this initialization... and if cpus
10804 + * are added / removed later... but we don't support CPU hotplug atm anyway.
10805 + */
10806 + pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL);
10807 +
10808 + /* initialize CPU state */
10809 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
10810 + state = &per_cpu(pfair_state, cpu);
10811 + state->topology.id = cpu;
10812 + state->cur_tick = 0;
10813 + state->local_tick = 0;
10814 + state->linked = NULL;
10815 + state->local = NULL;
10816 + state->scheduled = NULL;
10817 + state->missed_quanta = 0;
10818 + state->offset = cpu_stagger_offset(cpu);
10819 + pstate[cpu] = state;
10820 + }
10821 +
10822 + pfair_clusters = NULL;
10823 + num_pfair_clusters = 0;
10824 +
10825 + err = register_sched_plugin(&pfair_plugin);
10826 + if (!err) {
10827 + fs = make_plugin_proc_dir(&pfair_plugin, &pfair_dir);
10828 + if (!fs)
10829 + cluster_file = create_cluster_file(pfair_dir, &pfair_cluster_level);
10830 + else
10831 + printk(KERN_ERR "Could not allocate PFAIR procfs dir.\n");
10832 + }
10833 +
10834 + return err;
10835 +}
10836 +
10837 +static void __exit clean_pfair(void)
10838 +{
10839 + kfree(pstate);
10840 +
10841 + if (cluster_file)
10842 + remove_proc_entry("cluster", pfair_dir);
10843 + if (pfair_dir)
10844 + remove_plugin_proc_dir(&pfair_plugin);
10845 +}
10846 +
10847 +module_init(init_pfair);
10848 +module_exit(clean_pfair);
10849 diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
10850 new file mode 100644
10851 index 0000000..00a1900
10852 --- /dev/null
10853 +++ b/litmus/sched_plugin.c
10854 @@ -0,0 +1,227 @@
10855 +/* sched_plugin.c -- core infrastructure for the scheduler plugin system
10856 + *
10857 + * This file includes the initialization of the plugin system, the no-op Linux
10858 + * scheduler plugin, some dummy functions, and some helper functions.
10859 + */
10860 +
10861 +#include <linux/list.h>
10862 +#include <linux/spinlock.h>
10863 +#include <linux/sched.h>
10864 +
10865 +#include <litmus/litmus.h>
10866 +#include <litmus/sched_plugin.h>
10867 +#include <litmus/preempt.h>
10868 +#include <litmus/jobs.h>
10869 +
10870 +/*
10871 + * Generic function to trigger preemption on either local or remote cpu
10872 + * from scheduler plugins. The key feature is that this function is
10873 + * non-preemptive section aware and does not invoke the scheduler / send
10874 + * IPIs if the to-be-preempted task is actually non-preemptive.
10875 + */
10876 +void preempt_if_preemptable(struct task_struct* t, int cpu)
10877 +{
10878 + /* t is the real-time task executing on CPU on_cpu If t is NULL, then
10879 + * on_cpu is currently scheduling background work.
10880 + */
10881 +
10882 + int reschedule = 0;
10883 +
10884 + if (!t)
10885 + /* move non-real-time task out of the way */
10886 + reschedule = 1;
10887 + else {
10888 + if (smp_processor_id() == cpu) {
10889 + /* local CPU case */
10890 + /* check if we need to poke userspace */
10891 + if (is_user_np(t))
10892 + /* Yes, poke it. This doesn't have to be atomic since
10893 + * the task is definitely not executing. */
10894 + request_exit_np(t);
10895 + else if (!is_kernel_np(t))
10896 + /* only if we are allowed to preempt the
10897 + * currently-executing task */
10898 + reschedule = 1;
10899 + } else {
10900 + /* Remote CPU case. Only notify if it's not a kernel
10901 + * NP section and if we didn't set the userspace
10902 + * flag. */
10903 + reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
10904 + }
10905 + }
10906 + if (likely(reschedule))
10907 + litmus_reschedule(cpu);
10908 +}
10909 +
10910 +
10911 +/*************************************************************
10912 + * Dummy plugin functions *
10913 + *************************************************************/
10914 +
10915 +static void litmus_dummy_finish_switch(struct task_struct * prev)
10916 +{
10917 +}
10918 +
10919 +static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
10920 +{
10921 + sched_state_task_picked();
10922 + return NULL;
10923 +}
10924 +
10925 +static void litmus_dummy_tick(struct task_struct* tsk)
10926 +{
10927 +}
10928 +
10929 +static long litmus_dummy_admit_task(struct task_struct* tsk)
10930 +{
10931 + printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
10932 + tsk->comm, tsk->pid);
10933 + return -EINVAL;
10934 +}
10935 +
10936 +static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
10937 +{
10938 +}
10939 +
10940 +static void litmus_dummy_task_wake_up(struct task_struct *task)
10941 +{
10942 +}
10943 +
10944 +static void litmus_dummy_task_block(struct task_struct *task)
10945 +{
10946 +}
10947 +
10948 +static void litmus_dummy_task_exit(struct task_struct *task)
10949 +{
10950 +}
10951 +
10952 +static long litmus_dummy_complete_job(void)
10953 +{
10954 + return -ENOSYS;
10955 +}
10956 +
10957 +static long litmus_dummy_activate_plugin(void)
10958 +{
10959 + return 0;
10960 +}
10961 +
10962 +static long litmus_dummy_deactivate_plugin(void)
10963 +{
10964 + return 0;
10965 +}
10966 +
10967 +#ifdef CONFIG_LITMUS_LOCKING
10968 +
10969 +static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
10970 + void* __user config)
10971 +{
10972 + return -ENXIO;
10973 +}
10974 +
10975 +#endif
10976 +
10977 +
10978 +/* The default scheduler plugin. It doesn't do anything and lets Linux do its
10979 + * job.
10980 + */
10981 +struct sched_plugin linux_sched_plugin = {
10982 + .plugin_name = "Linux",
10983 + .tick = litmus_dummy_tick,
10984 + .task_new = litmus_dummy_task_new,
10985 + .task_exit = litmus_dummy_task_exit,
10986 + .task_wake_up = litmus_dummy_task_wake_up,
10987 + .task_block = litmus_dummy_task_block,
10988 + .complete_job = litmus_dummy_complete_job,
10989 + .schedule = litmus_dummy_schedule,
10990 + .finish_switch = litmus_dummy_finish_switch,
10991 + .activate_plugin = litmus_dummy_activate_plugin,
10992 + .deactivate_plugin = litmus_dummy_deactivate_plugin,
10993 +#ifdef CONFIG_LITMUS_LOCKING
10994 + .allocate_lock = litmus_dummy_allocate_lock,
10995 +#endif
10996 + .admit_task = litmus_dummy_admit_task
10997 +};
10998 +
10999 +/*
11000 + * The reference to current plugin that is used to schedule tasks within
11001 + * the system. It stores references to actual function implementations
11002 + * Should be initialized by calling "init_***_plugin()"
11003 + */
11004 +struct sched_plugin *litmus = &linux_sched_plugin;
11005 +
11006 +/* the list of registered scheduling plugins */
11007 +static LIST_HEAD(sched_plugins);
11008 +static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
11009 +
11010 +#define CHECK(func) {\
11011 + if (!plugin->func) \
11012 + plugin->func = litmus_dummy_ ## func;}
11013 +
11014 +/* FIXME: get reference to module */
11015 +int register_sched_plugin(struct sched_plugin* plugin)
11016 +{
11017 + printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
11018 + plugin->plugin_name);
11019 +
11020 + /* make sure we don't trip over null pointers later */
11021 + CHECK(finish_switch);
11022 + CHECK(schedule);
11023 + CHECK(tick);
11024 + CHECK(task_wake_up);
11025 + CHECK(task_exit);
11026 + CHECK(task_block);
11027 + CHECK(task_new);
11028 + CHECK(complete_job);
11029 + CHECK(activate_plugin);
11030 + CHECK(deactivate_plugin);
11031 +#ifdef CONFIG_LITMUS_LOCKING
11032 + CHECK(allocate_lock);
11033 +#endif
11034 + CHECK(admit_task);
11035 +
11036 + if (!plugin->release_at)
11037 + plugin->release_at = release_at;
11038 +
11039 + raw_spin_lock(&sched_plugins_lock);
11040 + list_add(&plugin->list, &sched_plugins);
11041 + raw_spin_unlock(&sched_plugins_lock);
11042 +
11043 + return 0;
11044 +}
11045 +
11046 +
11047 +/* FIXME: reference counting, etc. */
11048 +struct sched_plugin* find_sched_plugin(const char* name)
11049 +{
11050 + struct list_head *pos;
11051 + struct sched_plugin *plugin;
11052 +
11053 + raw_spin_lock(&sched_plugins_lock);
11054 + list_for_each(pos, &sched_plugins) {
11055 + plugin = list_entry(pos, struct sched_plugin, list);
11056 + if (!strcmp(plugin->plugin_name, name))
11057 + goto out_unlock;
11058 + }
11059 + plugin = NULL;
11060 +
11061 +out_unlock:
11062 + raw_spin_unlock(&sched_plugins_lock);
11063 + return plugin;
11064 +}
11065 +
11066 +int print_sched_plugins(char* buf, int max)
11067 +{
11068 + int count = 0;
11069 + struct list_head *pos;
11070 + struct sched_plugin *plugin;
11071 +
11072 + raw_spin_lock(&sched_plugins_lock);
11073 + list_for_each(pos, &sched_plugins) {
11074 + plugin = list_entry(pos, struct sched_plugin, list);
11075 + count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
11076 + if (max - count <= 0)
11077 + break;
11078 + }
11079 + raw_spin_unlock(&sched_plugins_lock);
11080 + return count;
11081 +}
11082 diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
11083 new file mode 100644
11084 index 0000000..8e4a22d
11085 --- /dev/null
11086 +++ b/litmus/sched_psn_edf.c
11087 @@ -0,0 +1,645 @@
11088 +/*
11089 + * kernel/sched_psn_edf.c
11090 + *
11091 + * Implementation of the PSN-EDF scheduler plugin.
11092 + * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
11093 + *
11094 + * Suspensions and non-preemptable sections are supported.
11095 + * Priority inheritance is not supported.
11096 + */
11097 +
11098 +#include <linux/percpu.h>
11099 +#include <linux/sched.h>
11100 +#include <linux/list.h>
11101 +#include <linux/spinlock.h>
11102 +#include <linux/module.h>
11103 +
11104 +#include <litmus/litmus.h>
11105 +#include <litmus/jobs.h>
11106 +#include <litmus/preempt.h>
11107 +#include <litmus/sched_plugin.h>
11108 +#include <litmus/edf_common.h>
11109 +#include <litmus/sched_trace.h>
11110 +#include <litmus/trace.h>
11111 +
11112 +typedef struct {
11113 + rt_domain_t domain;
11114 + int cpu;
11115 + struct task_struct* scheduled; /* only RT tasks */
11116 +/*
11117 + * scheduling lock slock
11118 + * protects the domain and serializes scheduling decisions
11119 + */
11120 +#define slock domain.ready_lock
11121 +
11122 +} psnedf_domain_t;
11123 +
11124 +DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
11125 +
11126 +#define local_edf (&__get_cpu_var(psnedf_domains).domain)
11127 +#define local_pedf (&__get_cpu_var(psnedf_domains))
11128 +#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
11129 +#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
11130 +#define task_edf(task) remote_edf(get_partition(task))
11131 +#define task_pedf(task) remote_pedf(get_partition(task))
11132 +
11133 +
11134 +static void psnedf_domain_init(psnedf_domain_t* pedf,
11135 + check_resched_needed_t check,
11136 + release_jobs_t release,
11137 + int cpu)
11138 +{
11139 + edf_domain_init(&pedf->domain, check, release);
11140 + pedf->cpu = cpu;
11141 + pedf->scheduled = NULL;
11142 +}
11143 +
11144 +static void requeue(struct task_struct* t, rt_domain_t *edf)
11145 +{
11146 + if (t->state != TASK_RUNNING)
11147 + TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
11148 +
11149 + set_rt_flags(t, RT_F_RUNNING);
11150 + if (is_released(t, litmus_clock()))
11151 + __add_ready(edf, t);
11152 + else
11153 + add_release(edf, t); /* it has got to wait */
11154 +}
11155 +
11156 +/* we assume the lock is being held */
11157 +static void preempt(psnedf_domain_t *pedf)
11158 +{
11159 + preempt_if_preemptable(pedf->scheduled, pedf->cpu);
11160 +}
11161 +
11162 +#ifdef CONFIG_LITMUS_LOCKING
11163 +
11164 +static void boost_priority(struct task_struct* t)
11165 +{
11166 + unsigned long flags;
11167 + psnedf_domain_t* pedf = task_pedf(t);
11168 + lt_t now;
11169 +
11170 + raw_spin_lock_irqsave(&pedf->slock, flags);
11171 + now = litmus_clock();
11172 +
11173 + TRACE_TASK(t, "priority boosted at %llu\n", now);
11174 +
11175 + tsk_rt(t)->priority_boosted = 1;
11176 + tsk_rt(t)->boost_start_time = now;
11177 +
11178 + if (pedf->scheduled != t) {
11179 + /* holder may be queued: first stop queue changes */
11180 + raw_spin_lock(&pedf->domain.release_lock);
11181 + if (is_queued(t) &&
11182 + /* If it is queued, then we need to re-order. */
11183 + bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
11184 + /* If we bubbled to the top, then we need to check for preemptions. */
11185 + edf_preemption_needed(&pedf->domain, pedf->scheduled))
11186 + preempt(pedf);
11187 + raw_spin_unlock(&pedf->domain.release_lock);
11188 + } /* else: nothing to do since the job is not queued while scheduled */
11189 +
11190 + raw_spin_unlock_irqrestore(&pedf->slock, flags);
11191 +}
11192 +
11193 +static void unboost_priority(struct task_struct* t)
11194 +{
11195 + unsigned long flags;
11196 + psnedf_domain_t* pedf = task_pedf(t);
11197 + lt_t now;
11198 +
11199 + raw_spin_lock_irqsave(&pedf->slock, flags);
11200 + now = litmus_clock();
11201 +
11202 + /* assumption: this only happens when the job is scheduled */
11203 + BUG_ON(pedf->scheduled != t);
11204 +
11205 + TRACE_TASK(t, "priority restored at %llu\n", now);
11206 +
11207 + /* priority boosted jobs must be scheduled */
11208 + BUG_ON(pedf->scheduled != t);
11209 +
11210 + tsk_rt(t)->priority_boosted = 0;
11211 + tsk_rt(t)->boost_start_time = 0;
11212 +
11213 + /* check if this changes anything */
11214 + if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
11215 + preempt(pedf);
11216 +
11217 + raw_spin_unlock_irqrestore(&pedf->slock, flags);
11218 +}
11219 +
11220 +#endif
11221 +
11222 +/* This check is trivial in partioned systems as we only have to consider
11223 + * the CPU of the partition.
11224 + */
11225 +static int psnedf_check_resched(rt_domain_t *edf)
11226 +{
11227 + psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
11228 +
11229 + /* because this is a callback from rt_domain_t we already hold
11230 + * the necessary lock for the ready queue
11231 + */
11232 + if (edf_preemption_needed(edf, pedf->scheduled)) {
11233 + preempt(pedf);
11234 + return 1;
11235 + } else
11236 + return 0;
11237 +}
11238 +
11239 +static void job_completion(struct task_struct* t, int forced)
11240 +{
11241 + sched_trace_task_completion(t,forced);
11242 + TRACE_TASK(t, "job_completion().\n");
11243 +
11244 + set_rt_flags(t, RT_F_SLEEP);
11245 + prepare_for_next_period(t);
11246 +}
11247 +
11248 +static void psnedf_tick(struct task_struct *t)
11249 +{
11250 + psnedf_domain_t *pedf = local_pedf;
11251 +
11252 + /* Check for inconsistency. We don't need the lock for this since
11253 + * ->scheduled is only changed in schedule, which obviously is not
11254 + * executing in parallel on this CPU
11255 + */
11256 + BUG_ON(is_realtime(t) && t != pedf->scheduled);
11257 +
11258 + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
11259 + if (!is_np(t)) {
11260 + litmus_reschedule_local();
11261 + TRACE("psnedf_scheduler_tick: "
11262 + "%d is preemptable "
11263 + " => FORCE_RESCHED\n", t->pid);
11264 + } else if (is_user_np(t)) {
11265 + TRACE("psnedf_scheduler_tick: "
11266 + "%d is non-preemptable, "
11267 + "preemption delayed.\n", t->pid);
11268 + request_exit_np(t);
11269 + }
11270 + }
11271 +}
11272 +
11273 +static struct task_struct* psnedf_schedule(struct task_struct * prev)
11274 +{
11275 + psnedf_domain_t* pedf = local_pedf;
11276 + rt_domain_t* edf = &pedf->domain;
11277 + struct task_struct* next;
11278 +
11279 + int out_of_time, sleep, preempt,
11280 + np, exists, blocks, resched;
11281 +
11282 + raw_spin_lock(&pedf->slock);
11283 +
11284 + /* sanity checking
11285 + * differently from gedf, when a task exits (dead)
11286 + * pedf->schedule may be null and prev _is_ realtime
11287 + */
11288 + BUG_ON(pedf->scheduled && pedf->scheduled != prev);
11289 + BUG_ON(pedf->scheduled && !is_realtime(prev));
11290 +
11291 + /* (0) Determine state */
11292 + exists = pedf->scheduled != NULL;
11293 + blocks = exists && !is_running(pedf->scheduled);
11294 + out_of_time = exists &&
11295 + budget_enforced(pedf->scheduled) &&
11296 + budget_exhausted(pedf->scheduled);
11297 + np = exists && is_np(pedf->scheduled);
11298 + sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
11299 + preempt = edf_preemption_needed(edf, prev);
11300 +
11301 + /* If we need to preempt do so.
11302 + * The following checks set resched to 1 in case of special
11303 + * circumstances.
11304 + */
11305 + resched = preempt;
11306 +
11307 + /* If a task blocks we have no choice but to reschedule.
11308 + */
11309 + if (blocks)
11310 + resched = 1;
11311 +
11312 + /* Request a sys_exit_np() call if we would like to preempt but cannot.
11313 + * Multiple calls to request_exit_np() don't hurt.
11314 + */
11315 + if (np && (out_of_time || preempt || sleep))
11316 + request_exit_np(pedf->scheduled);
11317 +
11318 + /* Any task that is preemptable and either exhausts its execution
11319 + * budget or wants to sleep completes. We may have to reschedule after
11320 + * this.
11321 + */
11322 + if (!np && (out_of_time || sleep) && !blocks) {
11323 + job_completion(pedf->scheduled, !sleep);
11324 + resched = 1;
11325 + }
11326 +
11327 + /* The final scheduling decision. Do we need to switch for some reason?
11328 + * Switch if we are in RT mode and have no task or if we need to
11329 + * resched.
11330 + */
11331 + next = NULL;
11332 + if ((!np || blocks) && (resched || !exists)) {
11333 + /* When preempting a task that does not block, then
11334 + * re-insert it into either the ready queue or the
11335 + * release queue (if it completed). requeue() picks
11336 + * the appropriate queue.
11337 + */
11338 + if (pedf->scheduled && !blocks)
11339 + requeue(pedf->scheduled, edf);
11340 + next = __take_ready(edf);
11341 + } else
11342 + /* Only override Linux scheduler if we have a real-time task
11343 + * scheduled that needs to continue.
11344 + */
11345 + if (exists)
11346 + next = prev;
11347 +
11348 + if (next) {
11349 + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
11350 + set_rt_flags(next, RT_F_RUNNING);
11351 + } else {
11352 + TRACE("becoming idle at %llu\n", litmus_clock());
11353 + }
11354 +
11355 + pedf->scheduled = next;
11356 + sched_state_task_picked();
11357 + raw_spin_unlock(&pedf->slock);
11358 +
11359 + return next;
11360 +}
11361 +
11362 +
11363 +/* Prepare a task for running in RT mode
11364 + */
11365 +static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
11366 +{
11367 + rt_domain_t* edf = task_edf(t);
11368 + psnedf_domain_t* pedf = task_pedf(t);
11369 + unsigned long flags;
11370 +
11371 + TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
11372 + t->rt_param.task_params.cpu);
11373 +
11374 + /* setup job parameters */
11375 + release_at(t, litmus_clock());
11376 +
11377 + /* The task should be running in the queue, otherwise signal
11378 + * code will try to wake it up with fatal consequences.
11379 + */
11380 + raw_spin_lock_irqsave(&pedf->slock, flags);
11381 + if (running) {
11382 + /* there shouldn't be anything else running at the time */
11383 + BUG_ON(pedf->scheduled);
11384 + pedf->scheduled = t;
11385 + } else {
11386 + requeue(t, edf);
11387 + /* maybe we have to reschedule */
11388 + preempt(pedf);
11389 + }
11390 + raw_spin_unlock_irqrestore(&pedf->slock, flags);
11391 +}
11392 +
11393 +static void psnedf_task_wake_up(struct task_struct *task)
11394 +{
11395 + unsigned long flags;
11396 + psnedf_domain_t* pedf = task_pedf(task);
11397 + rt_domain_t* edf = task_edf(task);
11398 + lt_t now;
11399 +
11400 + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
11401 + raw_spin_lock_irqsave(&pedf->slock, flags);
11402 + BUG_ON(is_queued(task));
11403 + now = litmus_clock();
11404 + if (is_tardy(task, now)
11405 +#ifdef CONFIG_LITMUS_LOCKING
11406 + /* We need to take suspensions because of semaphores into
11407 + * account! If a job resumes after being suspended due to acquiring
11408 + * a semaphore, it should never be treated as a new job release.
11409 + */
11410 + && !is_priority_boosted(task)
11411 +#endif
11412 + ) {
11413 + /* new sporadic release */
11414 + release_at(task, now);
11415 + sched_trace_task_release(task);
11416 + }
11417 +
11418 + /* Only add to ready queue if it is not the currently-scheduled
11419 + * task. This could be the case if a task was woken up concurrently
11420 + * on a remote CPU before the executing CPU got around to actually
11421 + * de-scheduling the task, i.e., wake_up() raced with schedule()
11422 + * and won.
11423 + */
11424 + if (pedf->scheduled != task)
11425 + requeue(task, edf);
11426 +
11427 + raw_spin_unlock_irqrestore(&pedf->slock, flags);
11428 + TRACE_TASK(task, "wake up done\n");
11429 +}
11430 +
11431 +static void psnedf_task_block(struct task_struct *t)
11432 +{
11433 + /* only running tasks can block, thus t is in no queue */
11434 + TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
11435 +
11436 + BUG_ON(!is_realtime(t));
11437 + BUG_ON(is_queued(t));
11438 +}
11439 +
11440 +static void psnedf_task_exit(struct task_struct * t)
11441 +{
11442 + unsigned long flags;
11443 + psnedf_domain_t* pedf = task_pedf(t);
11444 + rt_domain_t* edf;
11445 +
11446 + raw_spin_lock_irqsave(&pedf->slock, flags);
11447 + if (is_queued(t)) {
11448 + /* dequeue */
11449 + edf = task_edf(t);
11450 + remove(edf, t);
11451 + }
11452 + if (pedf->scheduled == t)
11453 + pedf->scheduled = NULL;
11454 +
11455 + TRACE_TASK(t, "RIP, now reschedule\n");
11456 +
11457 + preempt(pedf);
11458 + raw_spin_unlock_irqrestore(&pedf->slock, flags);
11459 +}
11460 +
11461 +#ifdef CONFIG_LITMUS_LOCKING
11462 +
11463 +#include <litmus/fdso.h>
11464 +#include <litmus/srp.h>
11465 +
11466 +/* ******************** SRP support ************************ */
11467 +
11468 +static unsigned int psnedf_get_srp_prio(struct task_struct* t)
11469 +{
11470 + /* assumes implicit deadlines */
11471 + return get_rt_period(t);
11472 +}
11473 +
11474 +/* ******************** FMLP support ********************** */
11475 +
11476 +/* struct for semaphore with priority inheritance */
11477 +struct fmlp_semaphore {
11478 + struct litmus_lock litmus_lock;
11479 +
11480 + /* current resource holder */
11481 + struct task_struct *owner;
11482 +
11483 + /* FIFO queue of waiting tasks */
11484 + wait_queue_head_t wait;
11485 +};
11486 +
11487 +static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
11488 +{
11489 + return container_of(lock, struct fmlp_semaphore, litmus_lock);
11490 +}
11491 +int psnedf_fmlp_lock(struct litmus_lock* l)
11492 +{
11493 + struct task_struct* t = current;
11494 + struct fmlp_semaphore *sem = fmlp_from_lock(l);
11495 + wait_queue_t wait;
11496 + unsigned long flags;
11497 +
11498 + if (!is_realtime(t))
11499 + return -EPERM;
11500 +
11501 + spin_lock_irqsave(&sem->wait.lock, flags);
11502 +
11503 + if (sem->owner) {
11504 + /* resource is not free => must suspend and wait */
11505 +
11506 + init_waitqueue_entry(&wait, t);
11507 +
11508 + /* FIXME: interruptible would be nice some day */
11509 + set_task_state(t, TASK_UNINTERRUPTIBLE);
11510 +
11511 + __add_wait_queue_tail_exclusive(&sem->wait, &wait);
11512 +
11513 + TS_LOCK_SUSPEND;
11514 +
11515 + /* release lock before sleeping */
11516 + spin_unlock_irqrestore(&sem->wait.lock, flags);
11517 +
11518 + /* We depend on the FIFO order. Thus, we don't need to recheck
11519 + * when we wake up; we are guaranteed to have the lock since
11520 + * there is only one wake up per release.
11521 + */
11522 +
11523 + schedule();
11524 +
11525 + TS_LOCK_RESUME;
11526 +
11527 + /* Since we hold the lock, no other task will change
11528 + * ->owner. We can thus check it without acquiring the spin
11529 + * lock. */
11530 + BUG_ON(sem->owner != t);
11531 + } else {
11532 + /* it's ours now */
11533 + sem->owner = t;
11534 +
11535 + /* mark the task as priority-boosted. */
11536 + boost_priority(t);
11537 +
11538 + spin_unlock_irqrestore(&sem->wait.lock, flags);
11539 + }
11540 +
11541 + return 0;
11542 +}
11543 +
11544 +int psnedf_fmlp_unlock(struct litmus_lock* l)
11545 +{
11546 + struct task_struct *t = current, *next;
11547 + struct fmlp_semaphore *sem = fmlp_from_lock(l);
11548 + unsigned long flags;
11549 + int err = 0;
11550 +
11551 + spin_lock_irqsave(&sem->wait.lock, flags);
11552 +
11553 + if (sem->owner != t) {
11554 + err = -EINVAL;
11555 + goto out;
11556 + }
11557 +
11558 + /* we lose the benefit of priority boosting */
11559 +
11560 + unboost_priority(t);
11561 +
11562 + /* check if there are jobs waiting for this resource */
11563 + next = __waitqueue_remove_first(&sem->wait);
11564 + if (next) {
11565 + /* boost next job */
11566 + boost_priority(next);
11567 +
11568 + /* next becomes the resouce holder */
11569 + sem->owner = next;
11570 +
11571 + /* wake up next */
11572 + wake_up_process(next);
11573 + } else
11574 + /* resource becomes available */
11575 + sem->owner = NULL;
11576 +
11577 +out:
11578 + spin_unlock_irqrestore(&sem->wait.lock, flags);
11579 + return err;
11580 +}
11581 +
11582 +int psnedf_fmlp_close(struct litmus_lock* l)
11583 +{
11584 + struct task_struct *t = current;
11585 + struct fmlp_semaphore *sem = fmlp_from_lock(l);
11586 + unsigned long flags;
11587 +
11588 + int owner;
11589 +
11590 + spin_lock_irqsave(&sem->wait.lock, flags);
11591 +
11592 + owner = sem->owner == t;
11593 +
11594 + spin_unlock_irqrestore(&sem->wait.lock, flags);
11595 +
11596 + if (owner)
11597 + psnedf_fmlp_unlock(l);
11598 +
11599 + return 0;
11600 +}
11601 +
11602 +void psnedf_fmlp_free(struct litmus_lock* lock)
11603 +{
11604 + kfree(fmlp_from_lock(lock));
11605 +}
11606 +
11607 +static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
11608 + .close = psnedf_fmlp_close,
11609 + .lock = psnedf_fmlp_lock,
11610 + .unlock = psnedf_fmlp_unlock,
11611 + .deallocate = psnedf_fmlp_free,
11612 +};
11613 +
11614 +static struct litmus_lock* psnedf_new_fmlp(void)
11615 +{
11616 + struct fmlp_semaphore* sem;
11617 +
11618 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
11619 + if (!sem)
11620 + return NULL;
11621 +
11622 + sem->owner = NULL;
11623 + init_waitqueue_head(&sem->wait);
11624 + sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
11625 +
11626 + return &sem->litmus_lock;
11627 +}
11628 +
11629 +/* **** lock constructor **** */
11630 +
11631 +
11632 +static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
11633 + void* __user unused)
11634 +{
11635 + int err = -ENXIO;
11636 + struct srp_semaphore* srp;
11637 +
11638 + /* PSN-EDF currently supports the SRP for local resources and the FMLP
11639 + * for global resources. */
11640 + switch (type) {
11641 + case FMLP_SEM:
11642 + /* Flexible Multiprocessor Locking Protocol */
11643 + *lock = psnedf_new_fmlp();
11644 + if (*lock)
11645 + err = 0;
11646 + else
11647 + err = -ENOMEM;
11648 + break;
11649 +
11650 + case SRP_SEM:
11651 + /* Baker's Stack Resource Policy */
11652 + srp = allocate_srp_semaphore();
11653 + if (srp) {
11654 + *lock = &srp->litmus_lock;
11655 + err = 0;
11656 + } else
11657 + err = -ENOMEM;
11658 + break;
11659 + };
11660 +
11661 + return err;
11662 +}
11663 +
11664 +#endif
11665 +
11666 +
11667 +static long psnedf_activate_plugin(void)
11668 +{
11669 +#ifdef CONFIG_RELEASE_MASTER
11670 + int cpu;
11671 +
11672 + for_each_online_cpu(cpu) {
11673 + remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
11674 + }
11675 +#endif
11676 +
11677 +#ifdef CONFIG_LITMUS_LOCKING
11678 + get_srp_prio = psnedf_get_srp_prio;
11679 +#endif
11680 +
11681 + return 0;
11682 +}
11683 +
11684 +static long psnedf_admit_task(struct task_struct* tsk)
11685 +{
11686 + if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
11687 +#ifdef CONFIG_RELEASE_MASTER
11688 + /* don't allow tasks on release master CPU */
11689 + && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
11690 +#endif
11691 + )
11692 + return 0;
11693 + else
11694 + return -EINVAL;
11695 +}
11696 +
11697 +/* Plugin object */
11698 +static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
11699 + .plugin_name = "PSN-EDF",
11700 + .tick = psnedf_tick,
11701 + .task_new = psnedf_task_new,
11702 + .complete_job = complete_job,
11703 + .task_exit = psnedf_task_exit,
11704 + .schedule = psnedf_schedule,
11705 + .task_wake_up = psnedf_task_wake_up,
11706 + .task_block = psnedf_task_block,
11707 + .admit_task = psnedf_admit_task,
11708 + .activate_plugin = psnedf_activate_plugin,
11709 +#ifdef CONFIG_LITMUS_LOCKING
11710 + .allocate_lock = psnedf_allocate_lock,
11711 +#endif
11712 +};
11713 +
11714 +
11715 +static int __init init_psn_edf(void)
11716 +{
11717 + int i;
11718 +
11719 + /* We do not really want to support cpu hotplug, do we? ;)
11720 + * However, if we are so crazy to do so,
11721 + * we cannot use num_online_cpu()
11722 + */
11723 + for (i = 0; i < num_online_cpus(); i++) {
11724 + psnedf_domain_init(remote_pedf(i),
11725 + psnedf_check_resched,
11726 + NULL, i);
11727 + }
11728 + return register_sched_plugin(&psn_edf_plugin);
11729 +}
11730 +
11731 +module_init(init_psn_edf);
11732 +
11733 diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
11734 new file mode 100644
11735 index 0000000..5ef8d09
11736 --- /dev/null
11737 +++ b/litmus/sched_task_trace.c
11738 @@ -0,0 +1,241 @@
11739 +/*
11740 + * sched_task_trace.c -- record scheduling events to a byte stream
11741 + */
11742 +
11743 +#define NO_TASK_TRACE_DECLS
11744 +
11745 +#include <linux/module.h>
11746 +#include <linux/sched.h>
11747 +#include <linux/percpu.h>
11748 +
11749 +#include <litmus/ftdev.h>
11750 +#include <litmus/litmus.h>
11751 +
11752 +#include <litmus/sched_trace.h>
11753 +#include <litmus/feather_trace.h>
11754 +#include <litmus/ftdev.h>
11755 +
11756 +
11757 +#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
11758 +
11759 +#define now() litmus_clock()
11760 +
11761 +struct local_buffer {
11762 + struct st_event_record record[NO_EVENTS];
11763 + char flag[NO_EVENTS];
11764 + struct ft_buffer ftbuf;
11765 +};
11766 +
11767 +DEFINE_PER_CPU(struct local_buffer, st_event_buffer);
11768 +
11769 +static struct ftdev st_dev;
11770 +
11771 +static int st_dev_can_open(struct ftdev *dev, unsigned int cpu)
11772 +{
11773 + return cpu_online(cpu) ? 0 : -ENODEV;
11774 +}
11775 +
11776 +static int __init init_sched_task_trace(void)
11777 +{
11778 + struct local_buffer* buf;
11779 + int i, ok = 0, err;
11780 + printk("Allocated %u sched_trace_xxx() events per CPU "
11781 + "(buffer size: %d bytes)\n",
11782 + NO_EVENTS, (int) sizeof(struct local_buffer));
11783 +
11784 + err = ftdev_init(&st_dev, THIS_MODULE,
11785 + num_online_cpus(), "sched_trace");
11786 + if (err)
11787 + goto err_out;
11788 +
11789 + for (i = 0; i < st_dev.minor_cnt; i++) {
11790 + buf = &per_cpu(st_event_buffer, i);
11791 + ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
11792 + sizeof(struct st_event_record),
11793 + buf->flag,
11794 + buf->record);
11795 + st_dev.minor[i].buf = &buf->ftbuf;
11796 + }
11797 + if (ok == st_dev.minor_cnt) {
11798 + st_dev.can_open = st_dev_can_open;
11799 + err = register_ftdev(&st_dev);
11800 + if (err)
11801 + goto err_dealloc;
11802 + } else {
11803 + err = -EINVAL;
11804 + goto err_dealloc;
11805 + }
11806 +
11807 + return 0;
11808 +
11809 +err_dealloc:
11810 + ftdev_exit(&st_dev);
11811 +err_out:
11812 + printk(KERN_WARNING "Could not register sched_trace module\n");
11813 + return err;
11814 +}
11815 +
11816 +static void __exit exit_sched_task_trace(void)
11817 +{
11818 + ftdev_exit(&st_dev);
11819 +}
11820 +
11821 +module_init(init_sched_task_trace);
11822 +module_exit(exit_sched_task_trace);
11823 +
11824 +
11825 +static inline struct st_event_record* get_record(u8 type, struct task_struct* t)
11826 +{
11827 + struct st_event_record* rec = NULL;
11828 + struct local_buffer* buf;
11829 +
11830 + buf = &get_cpu_var(st_event_buffer);
11831 + if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) {
11832 + rec->hdr.type = type;
11833 + rec->hdr.cpu = smp_processor_id();
11834 + rec->hdr.pid = t ? t->pid : 0;
11835 + rec->hdr.job = t ? t->rt_param.job_params.job_no : 0;
11836 + } else {
11837 + put_cpu_var(st_event_buffer);
11838 + }
11839 + /* rec will be NULL if it failed */
11840 + return rec;
11841 +}
11842 +
11843 +static inline void put_record(struct st_event_record* rec)
11844 +{
11845 + struct local_buffer* buf;
11846 + buf = &__get_cpu_var(st_event_buffer);
11847 + ft_buffer_finish_write(&buf->ftbuf, rec);
11848 + put_cpu_var(st_event_buffer);
11849 +}
11850 +
11851 +feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task)
11852 +{
11853 + struct task_struct *t = (struct task_struct*) _task;
11854 + struct st_event_record* rec = get_record(ST_NAME, t);
11855 + int i;
11856 + if (rec) {
11857 + for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++)
11858 + rec->data.name.cmd[i] = t->comm[i];
11859 + put_record(rec);
11860 + }
11861 +}
11862 +
11863 +feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task)
11864 +{
11865 + struct task_struct *t = (struct task_struct*) _task;
11866 + struct st_event_record* rec = get_record(ST_PARAM, t);
11867 + if (rec) {
11868 + rec->data.param.wcet = get_exec_cost(t);
11869 + rec->data.param.period = get_rt_period(t);
11870 + rec->data.param.phase = get_rt_phase(t);
11871 + rec->data.param.partition = get_partition(t);
11872 + rec->data.param.class = get_class(t);
11873 + put_record(rec);
11874 + }
11875 +}
11876 +
11877 +feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task)
11878 +{
11879 + struct task_struct *t = (struct task_struct*) _task;
11880 + struct st_event_record* rec = get_record(ST_RELEASE, t);
11881 + if (rec) {
11882 + rec->data.release.release = get_release(t);
11883 + rec->data.release.deadline = get_deadline(t);
11884 + put_record(rec);
11885 + }
11886 +}
11887 +
11888 +/* skipped: st_assigned_data, we don't use it atm */
11889 +
11890 +feather_callback void do_sched_trace_task_switch_to(unsigned long id,
11891 + unsigned long _task)
11892 +{
11893 + struct task_struct *t = (struct task_struct*) _task;
11894 + struct st_event_record* rec;
11895 + if (is_realtime(t)) {
11896 + rec = get_record(ST_SWITCH_TO, t);
11897 + if (rec) {
11898 + rec->data.switch_to.when = now();
11899 + rec->data.switch_to.exec_time = get_exec_time(t);
11900 + put_record(rec);
11901 + }
11902 + }
11903 +}
11904 +
11905 +feather_callback void do_sched_trace_task_switch_away(unsigned long id,
11906 + unsigned long _task)
11907 +{
11908 + struct task_struct *t = (struct task_struct*) _task;
11909 + struct st_event_record* rec;
11910 + if (is_realtime(t)) {
11911 + rec = get_record(ST_SWITCH_AWAY, t);
11912 + if (rec) {
11913 + rec->data.switch_away.when = now();
11914 + rec->data.switch_away.exec_time = get_exec_time(t);
11915 + put_record(rec);
11916 + }
11917 + }
11918 +}
11919 +
11920 +feather_callback void do_sched_trace_task_completion(unsigned long id,
11921 + unsigned long _task,
11922 + unsigned long forced)
11923 +{
11924 + struct task_struct *t = (struct task_struct*) _task;
11925 + struct st_event_record* rec = get_record(ST_COMPLETION, t);
11926 + if (rec) {
11927 + rec->data.completion.when = now();
11928 + rec->data.completion.forced = forced;
11929 + put_record(rec);
11930 + }
11931 +}
11932 +
11933 +feather_callback void do_sched_trace_task_block(unsigned long id,
11934 + unsigned long _task)
11935 +{
11936 + struct task_struct *t = (struct task_struct*) _task;
11937 + struct st_event_record* rec = get_record(ST_BLOCK, t);
11938 + if (rec) {
11939 + rec->data.block.when = now();
11940 + put_record(rec);
11941 + }
11942 +}
11943 +
11944 +feather_callback void do_sched_trace_task_resume(unsigned long id,
11945 + unsigned long _task)
11946 +{
11947 + struct task_struct *t = (struct task_struct*) _task;
11948 + struct st_event_record* rec = get_record(ST_RESUME, t);
11949 + if (rec) {
11950 + rec->data.resume.when = now();
11951 + put_record(rec);
11952 + }
11953 +}
11954 +
11955 +feather_callback void do_sched_trace_sys_release(unsigned long id,
11956 + unsigned long _start)
11957 +{
11958 + lt_t *start = (lt_t*) _start;
11959 + struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL);
11960 + if (rec) {
11961 + rec->data.sys_release.when = now();
11962 + rec->data.sys_release.release = *start;
11963 + put_record(rec);
11964 + }
11965 +}
11966 +
11967 +feather_callback void do_sched_trace_action(unsigned long id,
11968 + unsigned long _task,
11969 + unsigned long action)
11970 +{
11971 + struct task_struct *t = (struct task_struct*) _task;
11972 + struct st_event_record* rec = get_record(ST_ACTION, t);
11973 +
11974 + if (rec) {
11975 + rec->data.action.when = now();
11976 + rec->data.action.action = action;
11977 + put_record(rec);
11978 + }
11979 +}
11980 diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
11981 new file mode 100644
11982 index 0000000..f4171fd
11983 --- /dev/null
11984 +++ b/litmus/sched_trace.c
11985 @@ -0,0 +1,252 @@
11986 +/*
11987 + * sched_trace.c -- record scheduling events to a byte stream.
11988 + */
11989 +#include <linux/spinlock.h>
11990 +#include <linux/mutex.h>
11991 +
11992 +#include <linux/fs.h>
11993 +#include <linux/slab.h>
11994 +#include <linux/miscdevice.h>
11995 +#include <asm/uaccess.h>
11996 +#include <linux/module.h>
11997 +#include <linux/sysrq.h>
11998 +
11999 +#include <linux/kfifo.h>
12000 +
12001 +#include <litmus/sched_trace.h>
12002 +#include <litmus/litmus.h>
12003 +
12004 +#define SCHED_TRACE_NAME "litmus/log"
12005 +
12006 +/* Compute size of TRACE() buffer */
12007 +#define LITMUS_TRACE_BUF_SIZE (1 << CONFIG_SCHED_DEBUG_TRACE_SHIFT)
12008 +
12009 +/* Max length of one read from the buffer */
12010 +#define MAX_READ_LEN (64 * 1024)
12011 +
12012 +/* Max length for one write --- by TRACE() --- to the buffer. This is used to
12013 + * allocate a per-cpu buffer for printf() formatting. */
12014 +#define MSG_SIZE 255
12015 +
12016 +
12017 +static DEFINE_MUTEX(reader_mutex);
12018 +static atomic_t reader_cnt = ATOMIC_INIT(0);
12019 +static DEFINE_KFIFO(debug_buffer, char, LITMUS_TRACE_BUF_SIZE);
12020 +
12021 +
12022 +static DEFINE_RAW_SPINLOCK(log_buffer_lock);
12023 +static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
12024 +
12025 +/*
12026 + * sched_trace_log_message - Write to the trace buffer (log_buffer)
12027 + *
12028 + * This is the only function accessing the log_buffer from inside the
12029 + * kernel for writing.
12030 + * Concurrent access to sched_trace_log_message must be serialized using
12031 + * log_buffer_lock
12032 + * The maximum length of a formatted message is 255
12033 + */
12034 +void sched_trace_log_message(const char* fmt, ...)
12035 +{
12036 + unsigned long flags;
12037 + va_list args;
12038 + size_t len;
12039 + char* buf;
12040 +
12041 + if (!atomic_read(&reader_cnt))
12042 + /* early exit if nobody is listening */
12043 + return;
12044 +
12045 + va_start(args, fmt);
12046 + local_irq_save(flags);
12047 +
12048 + /* format message */
12049 + buf = __get_cpu_var(fmt_buffer);
12050 + len = vscnprintf(buf, MSG_SIZE, fmt, args);
12051 +
12052 + raw_spin_lock(&log_buffer_lock);
12053 + /* Don't copy the trailing null byte, we don't want null bytes in a
12054 + * text file.
12055 + */
12056 + kfifo_in(&debug_buffer, buf, len);
12057 + raw_spin_unlock(&log_buffer_lock);
12058 +
12059 + local_irq_restore(flags);
12060 + va_end(args);
12061 +}
12062 +
12063 +
12064 +/*
12065 + * log_read - Read the trace buffer
12066 + *
12067 + * This function is called as a file operation from userspace.
12068 + * Readers can sleep. Access is serialized through reader_mutex
12069 + */
12070 +static ssize_t log_read(struct file *filp,
12071 + char __user *to, size_t len,
12072 + loff_t *f_pos)
12073 +{
12074 + /* we ignore f_pos, this is strictly sequential */
12075 +
12076 + ssize_t error = -EINVAL;
12077 + char* mem;
12078 +
12079 + if (mutex_lock_interruptible(&reader_mutex)) {
12080 + error = -ERESTARTSYS;
12081 + goto out;
12082 + }
12083 +
12084 + if (len > MAX_READ_LEN)
12085 + len = MAX_READ_LEN;
12086 +
12087 + mem = kmalloc(len, GFP_KERNEL);
12088 + if (!mem) {
12089 + error = -ENOMEM;
12090 + goto out_unlock;
12091 + }
12092 +
12093 + error = kfifo_out(&debug_buffer, mem, len);
12094 + while (!error) {
12095 + set_current_state(TASK_INTERRUPTIBLE);
12096 + schedule_timeout(110);
12097 + if (signal_pending(current))
12098 + error = -ERESTARTSYS;
12099 + else
12100 + error = kfifo_out(&debug_buffer, mem, len);
12101 + }
12102 +
12103 + if (error > 0 && copy_to_user(to, mem, error))
12104 + error = -EFAULT;
12105 +
12106 + kfree(mem);
12107 + out_unlock:
12108 + mutex_unlock(&reader_mutex);
12109 + out:
12110 + return error;
12111 +}
12112 +
12113 +/*
12114 + * Enable redirection of printk() messages to the trace buffer.
12115 + * Defined in kernel/printk.c
12116 + */
12117 +extern int trace_override;
12118 +extern int trace_recurse;
12119 +
12120 +/*
12121 + * log_open - open the global log message ring buffer.
12122 + */
12123 +static int log_open(struct inode *in, struct file *filp)
12124 +{
12125 + int error = -EINVAL;
12126 +
12127 + if (mutex_lock_interruptible(&reader_mutex)) {
12128 + error = -ERESTARTSYS;
12129 + goto out;
12130 + }
12131 +
12132 + atomic_inc(&reader_cnt);
12133 + error = 0;
12134 +
12135 + printk(KERN_DEBUG
12136 + "sched_trace kfifo with buffer starting at: 0x%p\n",
12137 + debug_buffer.buf);
12138 +
12139 + /* override printk() */
12140 + trace_override++;
12141 +
12142 + mutex_unlock(&reader_mutex);
12143 + out:
12144 + return error;
12145 +}
12146 +
12147 +static int log_release(struct inode *in, struct file *filp)
12148 +{
12149 + int error = -EINVAL;
12150 +
12151 + if (mutex_lock_interruptible(&reader_mutex)) {
12152 + error = -ERESTARTSYS;
12153 + goto out;
12154 + }
12155 +
12156 + atomic_dec(&reader_cnt);
12157 +
12158 + /* release printk() overriding */
12159 + trace_override--;
12160 +
12161 + printk(KERN_DEBUG "sched_trace kfifo released\n");
12162 +
12163 + mutex_unlock(&reader_mutex);
12164 + out:
12165 + return error;
12166 +}
12167 +
12168 +/*
12169 + * log_fops - The file operations for accessing the global LITMUS log message
12170 + * buffer.
12171 + *
12172 + * Except for opening the device file it uses the same operations as trace_fops.
12173 + */
12174 +static struct file_operations log_fops = {
12175 + .owner = THIS_MODULE,
12176 + .open = log_open,
12177 + .release = log_release,
12178 + .read = log_read,
12179 +};
12180 +
12181 +static struct miscdevice litmus_log_dev = {
12182 + .name = SCHED_TRACE_NAME,
12183 + .minor = MISC_DYNAMIC_MINOR,
12184 + .fops = &log_fops,
12185 +};
12186 +
12187 +#ifdef CONFIG_MAGIC_SYSRQ
12188 +void dump_trace_buffer(int max)
12189 +{
12190 + char line[80];
12191 + int len;
12192 + int count = 0;
12193 +
12194 + /* potential, but very unlikely, race... */
12195 + trace_recurse = 1;
12196 + while ((max == 0 || count++ < max) &&
12197 + (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) {
12198 + line[len] = '\0';
12199 + printk("%s", line);
12200 + }
12201 + trace_recurse = 0;
12202 +}
12203 +
12204 +static void sysrq_dump_trace_buffer(int key)
12205 +{
12206 + dump_trace_buffer(100);
12207 +}
12208 +
12209 +static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
12210 + .handler = sysrq_dump_trace_buffer,
12211 + .help_msg = "dump-trace-buffer(Y)",
12212 + .action_msg = "writing content of TRACE() buffer",
12213 +};
12214 +#endif
12215 +
12216 +static int __init init_sched_trace(void)
12217 +{
12218 + printk("Initializing TRACE() device\n");
12219 +
12220 +#ifdef CONFIG_MAGIC_SYSRQ
12221 + /* offer some debugging help */
12222 + if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op))
12223 + printk("Registered dump-trace-buffer(Y) magic sysrq.\n");
12224 + else
12225 + printk("Could not register dump-trace-buffer(Y) magic sysrq.\n");
12226 +#endif
12227 +
12228 + return misc_register(&litmus_log_dev);
12229 +}
12230 +
12231 +static void __exit exit_sched_trace(void)
12232 +{
12233 + misc_deregister(&litmus_log_dev);
12234 +}
12235 +
12236 +module_init(init_sched_trace);
12237 +module_exit(exit_sched_trace);
12238 diff --git a/litmus/srp.c b/litmus/srp.c
12239 new file mode 100644
12240 index 0000000..2ed4ec1
12241 --- /dev/null
12242 +++ b/litmus/srp.c
12243 @@ -0,0 +1,295 @@
12244 +/* ************************************************************************** */
12245 +/* STACK RESOURCE POLICY */
12246 +/* ************************************************************************** */
12247 +
12248 +#include <asm/atomic.h>
12249 +#include <linux/sched.h>
12250 +#include <linux/wait.h>
12251 +
12252 +#include <litmus/litmus.h>
12253 +#include <litmus/sched_plugin.h>
12254 +#include <litmus/fdso.h>
12255 +#include <litmus/trace.h>
12256 +
12257 +
12258 +#ifdef CONFIG_LITMUS_LOCKING
12259 +
12260 +#include <litmus/srp.h>
12261 +
12262 +srp_prioritization_t get_srp_prio;
12263 +
12264 +struct srp {
12265 + struct list_head ceiling;
12266 + wait_queue_head_t ceiling_blocked;
12267 +};
12268 +#define system_ceiling(srp) list2prio(srp->ceiling.next)
12269 +#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
12270 +
12271 +#define UNDEF_SEM -2
12272 +
12273 +atomic_t srp_objects_in_use = ATOMIC_INIT(0);
12274 +
12275 +DEFINE_PER_CPU(struct srp, srp);
12276 +
12277 +/* Initialize SRP semaphores at boot time. */
12278 +static int __init srp_init(void)
12279 +{
12280 + int i;
12281 +
12282 + printk("Initializing SRP per-CPU ceilings...");
12283 + for (i = 0; i < NR_CPUS; i++) {
12284 + init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
12285 + INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
12286 + }
12287 + printk(" done!\n");
12288 +
12289 + return 0;
12290 +}
12291 +module_init(srp_init);
12292 +
12293 +/* SRP task priority comparison function. Smaller numeric values have higher
12294 + * priority, tie-break is PID. Special case: priority == 0 <=> no priority
12295 + */
12296 +static int srp_higher_prio(struct srp_priority* first,
12297 + struct srp_priority* second)
12298 +{
12299 + if (!first->priority)
12300 + return 0;
12301 + else
12302 + return !second->priority ||
12303 + first->priority < second->priority || (
12304 + first->priority == second->priority &&
12305 + first->pid < second->pid);
12306 +}
12307 +
12308 +
12309 +static int srp_exceeds_ceiling(struct task_struct* first,
12310 + struct srp* srp)
12311 +{
12312 + struct srp_priority prio;
12313 +
12314 + if (list_empty(&srp->ceiling))
12315 + return 1;
12316 + else {
12317 + prio.pid = first->pid;
12318 + prio.priority = get_srp_prio(first);
12319 + return srp_higher_prio(&prio, system_ceiling(srp)) ||
12320 + ceiling2sem(system_ceiling(srp))->owner == first;
12321 + }
12322 +}
12323 +
12324 +static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
12325 +{
12326 + struct list_head *pos;
12327 + if (in_list(&prio->list)) {
12328 + printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
12329 + "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
12330 + return;
12331 + }
12332 + list_for_each(pos, &srp->ceiling)
12333 + if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
12334 + __list_add(&prio->list, pos->prev, pos);
12335 + return;
12336 + }
12337 +
12338 + list_add_tail(&prio->list, &srp->ceiling);
12339 +}
12340 +
12341 +
12342 +static int lock_srp_semaphore(struct litmus_lock* l)
12343 +{
12344 + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
12345 +
12346 + if (!is_realtime(current))
12347 + return -EPERM;
12348 +
12349 + preempt_disable();
12350 +
12351 + /* Update ceiling. */
12352 + srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
12353 +
12354 + /* SRP invariant: all resources available */
12355 + BUG_ON(sem->owner != NULL);
12356 +
12357 + sem->owner = current;
12358 + TRACE_CUR("acquired srp 0x%p\n", sem);
12359 +
12360 + preempt_enable();
12361 +
12362 + return 0;
12363 +}
12364 +
12365 +static int unlock_srp_semaphore(struct litmus_lock* l)
12366 +{
12367 + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
12368 + int err = 0;
12369 +
12370 + preempt_disable();
12371 +
12372 + if (sem->owner != current) {
12373 + err = -EINVAL;
12374 + } else {
12375 + /* Determine new system priority ceiling for this CPU. */
12376 + BUG_ON(!in_list(&sem->ceiling.list));
12377 +
12378 + list_del(&sem->ceiling.list);
12379 + sem->owner = NULL;
12380 +
12381 + /* Wake tasks on this CPU, if they exceed current ceiling. */
12382 + TRACE_CUR("released srp 0x%p\n", sem);
12383 + wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
12384 + }
12385 +
12386 + preempt_enable();
12387 + return err;
12388 +}
12389 +
12390 +static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
12391 +{
12392 + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
12393 + int err = 0;
12394 + struct task_struct* t = current;
12395 + struct srp_priority t_prio;
12396 +
12397 + if (!is_realtime(t))
12398 + return -EPERM;
12399 +
12400 + TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
12401 +
12402 + preempt_disable();
12403 +
12404 + if (sem->owner != NULL)
12405 + err = -EBUSY;
12406 +
12407 + if (err == 0) {
12408 + if (sem->cpu == UNDEF_SEM)
12409 + sem->cpu = get_partition(t);
12410 + else if (sem->cpu != get_partition(t))
12411 + err = -EPERM;
12412 + }
12413 +
12414 + if (err == 0) {
12415 + t_prio.priority = get_srp_prio(t);
12416 + t_prio.pid = t->pid;
12417 + if (srp_higher_prio(&t_prio, &sem->ceiling)) {
12418 + sem->ceiling.priority = t_prio.priority;
12419 + sem->ceiling.pid = t_prio.pid;
12420 + }
12421 + }
12422 +
12423 + preempt_enable();
12424 +
12425 + return err;
12426 +}
12427 +
12428 +static int close_srp_semaphore(struct litmus_lock* l)
12429 +{
12430 + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
12431 + int err = 0;
12432 +
12433 + preempt_disable();
12434 +
12435 + if (sem->owner == current)
12436 + unlock_srp_semaphore(l);
12437 +
12438 + preempt_enable();
12439 +
12440 + return err;
12441 +}
12442 +
12443 +static void deallocate_srp_semaphore(struct litmus_lock* l)
12444 +{
12445 + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
12446 + atomic_dec(&srp_objects_in_use);
12447 + kfree(sem);
12448 +}
12449 +
12450 +static struct litmus_lock_ops srp_lock_ops = {
12451 + .open = open_srp_semaphore,
12452 + .close = close_srp_semaphore,
12453 + .lock = lock_srp_semaphore,
12454 + .unlock = unlock_srp_semaphore,
12455 + .deallocate = deallocate_srp_semaphore,
12456 +};
12457 +
12458 +struct srp_semaphore* allocate_srp_semaphore(void)
12459 +{
12460 + struct srp_semaphore* sem;
12461 +
12462 + sem = kmalloc(sizeof(*sem), GFP_KERNEL);
12463 + if (!sem)
12464 + return NULL;
12465 +
12466 + INIT_LIST_HEAD(&sem->ceiling.list);
12467 + sem->ceiling.priority = 0;
12468 + sem->cpu = UNDEF_SEM;
12469 + sem->owner = NULL;
12470 +
12471 + sem->litmus_lock.ops = &srp_lock_ops;
12472 +
12473 + atomic_inc(&srp_objects_in_use);
12474 + return sem;
12475 +}
12476 +
12477 +static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
12478 + void *key)
12479 +{
12480 + int cpu = smp_processor_id();
12481 + struct task_struct *tsk = wait->private;
12482 + if (cpu != get_partition(tsk))
12483 + TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
12484 + get_partition(tsk));
12485 + else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
12486 + return default_wake_function(wait, mode, sync, key);
12487 + return 0;
12488 +}
12489 +
12490 +static void do_ceiling_block(struct task_struct *tsk)
12491 +{
12492 + wait_queue_t wait = {
12493 + .private = tsk,
12494 + .func = srp_wake_up,
12495 + .task_list = {NULL, NULL}
12496 + };
12497 +
12498 + tsk->state = TASK_UNINTERRUPTIBLE;
12499 + add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
12500 + tsk->rt_param.srp_non_recurse = 1;
12501 + preempt_enable_no_resched();
12502 + schedule();
12503 + preempt_disable();
12504 + tsk->rt_param.srp_non_recurse = 0;
12505 + remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
12506 +}
12507 +
12508 +/* Wait for current task priority to exceed system-wide priority ceiling.
12509 + * FIXME: the hotpath should be inline.
12510 + */
12511 +void srp_ceiling_block(void)
12512 +{
12513 + struct task_struct *tsk = current;
12514 +
12515 + /* Only applies to real-time tasks, but optimize for RT tasks. */
12516 + if (unlikely(!is_realtime(tsk)))
12517 + return;
12518 +
12519 + /* Avoid recursive ceiling blocking. */
12520 + if (unlikely(tsk->rt_param.srp_non_recurse))
12521 + return;
12522 +
12523 + /* Bail out early if there aren't any SRP resources around. */
12524 + if (likely(!atomic_read(&srp_objects_in_use)))
12525 + return;
12526 +
12527 + preempt_disable();
12528 + if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
12529 + TRACE_CUR("is priority ceiling blocked.\n");
12530 + while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
12531 + do_ceiling_block(tsk);
12532 + TRACE_CUR("finally exceeds system ceiling.\n");
12533 + } else
12534 + TRACE_CUR("is not priority ceiling blocked\n");
12535 + preempt_enable();
12536 +}
12537 +
12538 +#endif
12539 diff --git a/litmus/sync.c b/litmus/sync.c
12540 new file mode 100644
12541 index 0000000..bf75fde
12542 --- /dev/null
12543 +++ b/litmus/sync.c
12544 @@ -0,0 +1,104 @@
12545 +/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
12546 + *
12547 + *
12548 + */
12549 +
12550 +#include <asm/atomic.h>
12551 +#include <asm/uaccess.h>
12552 +#include <linux/spinlock.h>
12553 +#include <linux/list.h>
12554 +#include <linux/sched.h>
12555 +#include <linux/completion.h>
12556 +
12557 +#include <litmus/litmus.h>
12558 +#include <litmus/sched_plugin.h>
12559 +#include <litmus/jobs.h>
12560 +
12561 +#include <litmus/sched_trace.h>
12562 +
12563 +static DECLARE_COMPLETION(ts_release);
12564 +
12565 +static long do_wait_for_ts_release(void)
12566 +{
12567 + long ret = 0;
12568 +
12569 + /* If the interruption races with a release, the completion object
12570 + * may have a non-zero counter. To avoid this problem, this should
12571 + * be replaced by wait_for_completion().
12572 + *
12573 + * For debugging purposes, this is interruptible for now.
12574 + */
12575 + ret = wait_for_completion_interruptible(&ts_release);
12576 +
12577 + return ret;
12578 +}
12579 +
12580 +int count_tasks_waiting_for_release(void)
12581 +{
12582 + unsigned long flags;
12583 + int task_count = 0;
12584 + struct list_head *pos;
12585 +
12586 + spin_lock_irqsave(&ts_release.wait.lock, flags);
12587 + list_for_each(pos, &ts_release.wait.task_list) {
12588 + task_count++;
12589 + }
12590 + spin_unlock_irqrestore(&ts_release.wait.lock, flags);
12591 +
12592 + return task_count;
12593 +}
12594 +
12595 +static long do_release_ts(lt_t start)
12596 +{
12597 + int task_count = 0;
12598 + unsigned long flags;
12599 + struct list_head *pos;
12600 + struct task_struct *t;
12601 +
12602 +
12603 + spin_lock_irqsave(&ts_release.wait.lock, flags);
12604 + TRACE("<<<<<< synchronous task system release >>>>>>\n");
12605 +
12606 + sched_trace_sys_release(&start);
12607 + list_for_each(pos, &ts_release.wait.task_list) {
12608 + t = (struct task_struct*) list_entry(pos,
12609 + struct __wait_queue,
12610 + task_list)->private;
12611 + task_count++;
12612 + litmus->release_at(t, start + t->rt_param.task_params.phase);
12613 + sched_trace_task_release(t);
12614 + }
12615 +
12616 + spin_unlock_irqrestore(&ts_release.wait.lock, flags);
12617 +
12618 + complete_n(&ts_release, task_count);
12619 +
12620 + return task_count;
12621 +}
12622 +
12623 +
12624 +asmlinkage long sys_wait_for_ts_release(void)
12625 +{
12626 + long ret = -EPERM;
12627 + struct task_struct *t = current;
12628 +
12629 + if (is_realtime(t))
12630 + ret = do_wait_for_ts_release();
12631 +
12632 + return ret;
12633 +}
12634 +
12635 +
12636 +asmlinkage long sys_release_ts(lt_t __user *__delay)
12637 +{
12638 + long ret;
12639 + lt_t delay;
12640 +
12641 + /* FIXME: check capabilities... */
12642 +
12643 + ret = copy_from_user(&delay, __delay, sizeof(delay));
12644 + if (ret == 0)
12645 + ret = do_release_ts(litmus_clock() + delay);
12646 +
12647 + return ret;
12648 +}
12649 diff --git a/litmus/trace.c b/litmus/trace.c
12650 new file mode 100644
12651 index 0000000..3c35c52
12652 --- /dev/null
12653 +++ b/litmus/trace.c
12654 @@ -0,0 +1,225 @@
12655 +#include <linux/sched.h>
12656 +#include <linux/module.h>
12657 +#include <linux/uaccess.h>
12658 +
12659 +#include <litmus/ftdev.h>
12660 +#include <litmus/litmus.h>
12661 +#include <litmus/trace.h>
12662 +
12663 +/******************************************************************************/
12664 +/* Allocation */
12665 +/******************************************************************************/
12666 +
12667 +static struct ftdev overhead_dev;
12668 +
12669 +#define trace_ts_buf overhead_dev.minor[0].buf
12670 +
12671 +static unsigned int ts_seq_no = 0;
12672 +
12673 +DEFINE_PER_CPU(atomic_t, irq_fired_count);
12674 +
12675 +static inline void clear_irq_fired(void)
12676 +{
12677 + atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
12678 +}
12679 +
12680 +static inline unsigned int get_and_clear_irq_fired(void)
12681 +{
12682 + /* This is potentially not atomic since we might migrate if
12683 + * preemptions are not disabled. As a tradeoff between
12684 + * accuracy and tracing overheads, this seems acceptable.
12685 + * If it proves to be a problem, then one could add a callback
12686 + * from the migration code to invalidate irq_fired_count.
12687 + */
12688 + return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
12689 +}
12690 +
12691 +static inline void __save_irq_flags(struct timestamp *ts)
12692 +{
12693 + unsigned int irq_count;
12694 +
12695 + irq_count = get_and_clear_irq_fired();
12696 + /* Store how many interrupts occurred. */
12697 + ts->irq_count = irq_count;
12698 + /* Extra flag because ts->irq_count overflows quickly. */
12699 + ts->irq_flag = irq_count > 0;
12700 +}
12701 +
12702 +static inline void __save_timestamp_cpu(unsigned long event,
12703 + uint8_t type, uint8_t cpu)
12704 +{
12705 + unsigned int seq_no;
12706 + struct timestamp *ts;
12707 + seq_no = fetch_and_inc((int *) &ts_seq_no);
12708 + if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
12709 + ts->event = event;
12710 + ts->seq_no = seq_no;
12711 + ts->cpu = cpu;
12712 + ts->task_type = type;
12713 + __save_irq_flags(ts);
12714 + barrier();
12715 + /* prevent re-ordering of ft_timestamp() */
12716 + ts->timestamp = ft_timestamp();
12717 + ft_buffer_finish_write(trace_ts_buf, ts);
12718 + }
12719 +}
12720 +
12721 +static void __add_timestamp_user(struct timestamp *pre_recorded)
12722 +{
12723 + unsigned int seq_no;
12724 + struct timestamp *ts;
12725 + seq_no = fetch_and_inc((int *) &ts_seq_no);
12726 + if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
12727 + *ts = *pre_recorded;
12728 + ts->seq_no = seq_no;
12729 + __save_irq_flags(ts);
12730 + ft_buffer_finish_write(trace_ts_buf, ts);
12731 + }
12732 +}
12733 +
12734 +static inline void __save_timestamp(unsigned long event,
12735 + uint8_t type)
12736 +{
12737 + __save_timestamp_cpu(event, type, raw_smp_processor_id());
12738 +}
12739 +
12740 +feather_callback void save_timestamp(unsigned long event)
12741 +{
12742 + __save_timestamp(event, TSK_UNKNOWN);
12743 +}
12744 +
12745 +feather_callback void save_timestamp_def(unsigned long event,
12746 + unsigned long type)
12747 +{
12748 + __save_timestamp(event, (uint8_t) type);
12749 +}
12750 +
12751 +feather_callback void save_timestamp_task(unsigned long event,
12752 + unsigned long t_ptr)
12753 +{
12754 + int rt = is_realtime((struct task_struct *) t_ptr);
12755 + __save_timestamp(event, rt ? TSK_RT : TSK_BE);
12756 +}
12757 +
12758 +feather_callback void save_timestamp_cpu(unsigned long event,
12759 + unsigned long cpu)
12760 +{
12761 + __save_timestamp_cpu(event, TSK_UNKNOWN, cpu);
12762 +}
12763 +
12764 +feather_callback void save_task_latency(unsigned long event,
12765 + unsigned long when_ptr)
12766 +{
12767 + lt_t now = litmus_clock();
12768 + lt_t *when = (lt_t*) when_ptr;
12769 + unsigned int seq_no;
12770 + int cpu = raw_smp_processor_id();
12771 + struct timestamp *ts;
12772 +
12773 + seq_no = fetch_and_inc((int *) &ts_seq_no);
12774 + if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
12775 + ts->event = event;
12776 + ts->timestamp = now - *when;
12777 + ts->seq_no = seq_no;
12778 + ts->cpu = cpu;
12779 + ts->task_type = TSK_RT;
12780 + __save_irq_flags(ts);
12781 + ft_buffer_finish_write(trace_ts_buf, ts);
12782 + }
12783 +}
12784 +
12785 +/******************************************************************************/
12786 +/* DEVICE FILE DRIVER */
12787 +/******************************************************************************/
12788 +
12789 +/*
12790 + * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER)
12791 + * and we might not get as much
12792 + */
12793 +#define NO_TIMESTAMPS (2 << 16)
12794 +
12795 +static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
12796 +{
12797 + unsigned int count = NO_TIMESTAMPS;
12798 +
12799 + /* An overhead-tracing timestamp should be exactly 16 bytes long. */
12800 + BUILD_BUG_ON(sizeof(struct timestamp) != 16);
12801 +
12802 + while (count && !trace_ts_buf) {
12803 + printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
12804 + ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
12805 + count /= 2;
12806 + }
12807 + return ftdev->minor[idx].buf ? 0 : -ENOMEM;
12808 +}
12809 +
12810 +static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
12811 +{
12812 + free_ft_buffer(ftdev->minor[idx].buf);
12813 + ftdev->minor[idx].buf = NULL;
12814 +}
12815 +
12816 +static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
12817 + const char __user *from)
12818 +{
12819 + ssize_t consumed = 0;
12820 + struct timestamp ts;
12821 +
12822 + /* don't give us partial timestamps */
12823 + if (len % sizeof(ts))
12824 + return -EINVAL;
12825 +
12826 + while (len >= sizeof(ts)) {
12827 + if (copy_from_user(&ts, from, sizeof(ts))) {
12828 + consumed = -EFAULT;
12829 + goto out;
12830 + }
12831 + len -= sizeof(ts);
12832 + from += sizeof(ts);
12833 + consumed += sizeof(ts);
12834 +
12835 + __add_timestamp_user(&ts);
12836 + }
12837 +
12838 +out:
12839 + return consumed;
12840 +}
12841 +
12842 +static int __init init_ft_overhead_trace(void)
12843 +{
12844 + int err, cpu;
12845 +
12846 + printk("Initializing Feather-Trace overhead tracing device.\n");
12847 + err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace");
12848 + if (err)
12849 + goto err_out;
12850 +
12851 + overhead_dev.alloc = alloc_timestamp_buffer;
12852 + overhead_dev.free = free_timestamp_buffer;
12853 + overhead_dev.write = write_timestamp_from_user;
12854 +
12855 + err = register_ftdev(&overhead_dev);
12856 + if (err)
12857 + goto err_dealloc;
12858 +
12859 + /* initialize IRQ flags */
12860 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
12861 + clear_irq_fired();
12862 + }
12863 +
12864 + return 0;
12865 +
12866 +err_dealloc:
12867 + ftdev_exit(&overhead_dev);
12868 +err_out:
12869 + printk(KERN_WARNING "Could not register ft_trace module.\n");
12870 + return err;
12871 +}
12872 +
12873 +static void __exit exit_ft_overhead_trace(void)
12874 +{
12875 + ftdev_exit(&overhead_dev);
12876 +}
12877 +
12878 +module_init(init_ft_overhead_trace);
12879 +module_exit(exit_ft_overhead_trace);
12880
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.