Attachment 'gpusync_rtss13_liblitmus.patch'
Download 1 From 8cc1caa8190c0dcb95d690d43f73d3fea867d377 Mon Sep 17 00:00:00 2001
2 From: Glenn Elliott <gelliott@cs.unc.edu>
3 Date: Sun, 19 May 2013 23:35:28 -0400
4 Subject: [PATCH] squash
5
6 ---
7 Makefile | 127 ++-
8 bin/base_mt_task.c | 38 +-
9 bin/base_task.c | 38 +-
10 bin/null_call.c | 4 +-
11 bin/release_ts.c | 37 +-
12 bin/rt_launch.c | 62 +-
13 bin/rtspin.c | 132 ++-
14 bin/uncache.c | 381 ++++++++
15 gpu/aux_threads.c | 313 ++++++
16 gpu/budget.cpp | 379 ++++++++
17 gpu/dgl.c | 282 ++++++
18 gpu/gpuspin.cu | 2705 +++++++++++++++++++++++++++++++++++++++++++++++++++
19 gpu/ikglptest.c | 653 +++++++++++++
20 gpu/locktest.c | 206 ++++
21 gpu/nested.c | 262 +++++
22 gpu/normal_task.c | 90 ++
23 include/common.h | 7 +
24 include/litmus.h | 322 +++++-
25 include/migration.h | 24 +
26 include/tests.h | 7 +-
27 src/kernel_iface.c | 17 +-
28 src/litmus.c | 270 ++++-
29 src/migration.c | 217 +++++
30 src/signal.c | 109 +++
31 src/syscalls.c | 82 +-
32 src/task.c | 24 +-
33 tests/core_api.c | 9 +-
34 tests/fdso.c | 10 +-
35 tests/locks.c | 12 +-
36 tests/nesting.c | 468 +++++++++
37 tests/pcp.c | 224 ++++-
38 tests/sched.c | 15 +-
39 32 files changed, 7264 insertions(+), 262 deletions(-)
40 create mode 100644 bin/uncache.c
41 create mode 100644 gpu/aux_threads.c
42 create mode 100644 gpu/budget.cpp
43 create mode 100644 gpu/dgl.c
44 create mode 100644 gpu/gpuspin.cu
45 create mode 100644 gpu/ikglptest.c
46 create mode 100644 gpu/locktest.c
47 create mode 100644 gpu/nested.c
48 create mode 100644 gpu/normal_task.c
49 create mode 100644 include/migration.h
50 create mode 100644 src/migration.c
51 create mode 100644 src/signal.c
52 create mode 100644 tests/nesting.c
53
54 diff --git a/Makefile b/Makefile
55 index 8195752..e877ca4 100644
56 --- a/Makefile
57 +++ b/Makefile
58 @@ -14,13 +14,29 @@ ARCH ?= ${host-arch}
59 # LITMUS_KERNEL -- where to find the litmus kernel?
60 LITMUS_KERNEL ?= ../litmus-rt
61
62 +# NUMA Support. Comment out to disable. Requires libnuma dev files.
63 +#
64 +# Enabling this option will ensure all memory resides on NUMA nodes
65 +# that overlap clusters/partitions specified by a call to be_migrate*().
66 +NUMA_SUPPORT = dummyval
67
68 # ##############################################################################
69 # Internal configuration.
70
71 # compiler flags
72 -flags-debug = -Wall -Werror -g -Wdeclaration-after-statement
73 +flags-debug = -O2 -Wall -Werror -g -Wdeclaration-after-statement
74 +#flags-debug = -Wall -Werror -g -Wdeclaration-after-statement
75 +flags-debug-cpp = -O2 -Wall -Werror -g
76 +#flags-debug-cpp = -Wall -Werror -g
77 flags-api = -D_XOPEN_SOURCE=600 -D_GNU_SOURCE
78 +flags-misc = -fasynchronous-unwind-tables -fnon-call-exceptions
79 +
80 +flags-cu-debug = -g -G -Xcompiler -Wall -Xcompiler -Werror
81 +flags-cu-optim = -O2 -Xcompiler -march=native
82 +#flags-cu-optim = -Xcompiler -march=native
83 +flags-cu-nvcc = --use_fast_math -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30
84 +flags-cu-misc = -Xcompiler -fasynchronous-unwind-tables -Xcompiler -fnon-call-exceptions -Xcompiler -malign-double -Xcompiler -pthread
85 +flags-cu-x86_64 = -m64
86
87 # architecture-specific flags
88 flags-i386 = -m32
89 @@ -48,12 +64,27 @@ LIBLITMUS ?= .
90 headers = -I${LIBLITMUS}/include -I${LIBLITMUS}/arch/${include-${ARCH}}/include
91
92 # combine options
93 -CPPFLAGS = ${flags-api} ${flags-${ARCH}} -DARCH=${ARCH} ${headers}
94 -CFLAGS = ${flags-debug}
95 +CPPFLAGS = ${flags-api} ${flags-debug-cpp} ${flags-misc} ${flags-${ARCH}} -DARCH=${ARCH} ${headers}
96 +CUFLAGS = ${flags-api} ${flags-cu-debug} ${flags-cu-optim} ${flags-cu-nvcc} ${flags-cu-misc} -DARCH=${ARCH} ${headers}
97 +CFLAGS = ${flags-debug} ${flags-misc}
98 LDFLAGS = ${flags-${ARCH}}
99
100 +ifdef NUMA_SUPPORT
101 +CFLAGS += -DLITMUS_NUMA_SUPPORT
102 +CPPFLAGS += -DLITMUS_NUMA_SUPPORT
103 +CUFLAGS += -DLITMUS_NUMA_SUPPORT
104 +endif
105 +
106 # how to link against liblitmus
107 liblitmus-flags = -L${LIBLITMUS} -llitmus
108 +ifdef NUMA_SUPPORT
109 +liblitmus-flags += -lnuma
110 +endif
111 +
112 +# how to link cuda
113 +cuda-flags-i386 = -L/usr/local/cuda/lib
114 +cuda-flags-x86_64 = -L/usr/local/cuda/lib64
115 +cuda-flags = ${cuda-flags-${ARCH}} -lcudart -lcuda
116
117 # Force gcc instead of cc, but let the user specify a more specific version if
118 # desired.
119 @@ -61,17 +92,28 @@ ifeq (${CC},cc)
120 CC = gcc
121 endif
122
123 +#ifeq (${CPP},cpp)
124 +CPP = g++
125 +#endif
126 +
127 +CU = nvcc
128 +
129 # incorporate cross-compiler (if any)
130 CC := ${CROSS_COMPILE}${CC}
131 +CPP := ${CROSS_COMPILE}${CPP}
132 LD := ${CROSS_COMPILE}${LD}
133 AR := ${CROSS_COMPILE}${AR}
134 +CU := ${CROSS_COMPILE}${CU}
135
136 # ##############################################################################
137 # Targets
138
139 -all = lib ${rt-apps}
140 +all = lib ${rt-apps} ${rt-cppapps} ${rt-cuapps}
141 rt-apps = cycles base_task rt_launch rtspin release_ts measure_syscall \
142 - base_mt_task runtests
143 + base_mt_task uncache runtests \
144 + nested locktest ikglptest dgl aux_threads normal_task
145 +rt-cppapps = budget
146 +rt-cuapps = gpuspin
147
148 .PHONY: all lib clean dump-config TAGS tags cscope help
149
150 @@ -86,10 +128,14 @@ inc/config.makefile: Makefile
151 @printf "%-15s= %-20s\n" \
152 ARCH ${ARCH} \
153 CFLAGS '${CFLAGS}' \
154 + CPPFLAGS '${CPPFLAGS}' \
155 + CUFLAGS '${CUFLAGS}' \
156 LDFLAGS '${LDFLAGS}' \
157 LDLIBS '${liblitmus-flags}' \
158 CPPFLAGS '${CPPFLAGS}' \
159 CC '${shell which ${CC}}' \
160 + CPP '${shell which ${CPP}}' \
161 + CU '${shell which ${CU}}' \
162 LD '${shell which ${LD}}' \
163 AR '${shell which ${AR}}' \
164 > $@
165 @@ -103,10 +149,12 @@ dump-config:
166 headers "${headers}" \
167 "kernel headers" "${imported-headers}" \
168 CFLAGS "${CFLAGS}" \
169 - LDFLAGS "${LDFLAGS}" \
170 CPPFLAGS "${CPPFLAGS}" \
171 + CUFLAGS "${CUFLAGS}" \
172 + LDFLAGS "${LDFLAGS}" \
173 CC "${CC}" \
174 CPP "${CPP}" \
175 + CU "${CU}" \
176 LD "${LD}" \
177 AR "${AR}" \
178 obj-all "${obj-all}"
179 @@ -115,7 +163,7 @@ help:
180 @cat INSTALL
181
182 clean:
183 - rm -f ${rt-apps}
184 + rm -f ${rt-apps} ${rt-cppapps} ${rt-cuapps}
185 rm -f *.o *.d *.a test_catalog.inc
186 rm -f ${imported-headers}
187 rm -f inc/config.makefile
188 @@ -156,6 +204,8 @@ arch/${include-${ARCH}}/include/asm/%.h: \
189 litmus-headers = \
190 include/litmus/rt_param.h \
191 include/litmus/fpmath.h \
192 + include/litmus/binheap.h \
193 + include/litmus/signal.h \
194 include/litmus/unistd_32.h \
195 include/litmus/unistd_64.h
196
197 @@ -201,7 +251,7 @@ tests/runner.c: test_catalog.inc
198 # Tools that link with liblitmus
199
200 # these source files are found in bin/
201 -vpath %.c bin/
202 +vpath %.c bin/ gpu/
203
204 obj-cycles = cycles.o
205
206 @@ -210,16 +260,49 @@ obj-base_task = base_task.o
207 obj-base_mt_task = base_mt_task.o
208 ldf-base_mt_task = -pthread
209
210 +obj-aux_threads = aux_threads.o
211 +ldf-aux_threads = -pthread
212 +
213 obj-rt_launch = rt_launch.o common.o
214
215 obj-rtspin = rtspin.o common.o
216 lib-rtspin = -lrt
217
218 +obj-nested = nested.o common.o
219 +lib-nested = -lrt -pthread
220 +
221 +obj-locktest = locktest.o common.o
222 +lib-locktest = -lrt -pthread
223 +
224 +obj-ikglptest = ikglptest.o common.o
225 +lib-ikglptest = -lrt -pthread -lm
226 +
227 +obj-normal_task = normal_task.o common.o
228 +lib-normal_task = -lrt -pthread -lm
229 +
230 +obj-dgl = dgl.o common.o
231 +lib-dgl = -lrt -pthread
232 +
233 +obj-uncache = uncache.o
234 +lib-uncache = -lrt
235 +
236 obj-release_ts = release_ts.o
237
238 obj-measure_syscall = null_call.o
239 lib-measure_syscall = -lm
240
241 +
242 +vpath %.cpp gpu/
243 +
244 +objcpp-budget = budget.o common.o
245 +lib-budget = -lrt -lm -pthread
246 +
247 +
248 +vpath %.cu gpu/
249 +
250 +objcu-gpuspin = gpuspin.o common.o
251 +lib-gpuspin = -lblitz -lrt -lm -lpthread -lboost_filesystem -lboost_system
252 +
253 # ##############################################################################
254 # Build everything that depends on liblitmus.
255
256 @@ -227,12 +310,22 @@ lib-measure_syscall = -lm
257 ${rt-apps}: $${obj-$$@} liblitmus.a
258 $(CC) -o $@ $(LDFLAGS) ${ldf-$@} $(filter-out liblitmus.a,$+) $(LOADLIBS) $(LDLIBS) ${liblitmus-flags} ${lib-$@}
259
260 +${rt-cppapps}: $${objcpp-$$@} liblitmus.a
261 + $(CPP) -o $@ $(LDFLAGS) ${ldf-$@} $(filter-out liblitmus.a,$+) $(LOADLIBS) $(LDLIBS) ${liblitmus-flags} ${lib-$@}
262 +
263 +${rt-cuapps}: $${objcu-$$@} liblitmus.a
264 + $(CPP) -o $@ $(LDFLAGS) ${ldf-$@} $(filter-out liblitmus.a,$+) $(LOADLIBS) $(LDLIBS) ${liblitmus-flags} ${cuda-flags} ${lib-$@}
265 +
266 # ##############################################################################
267 # Dependency resolution.
268
269 -vpath %.c bin/ src/ tests/
270 +vpath %.c bin/ src/ gpu/ tests/
271 +vpath %.cpp gpu/
272 +vpath %.cu gpu/
273
274 obj-all = ${sort ${foreach target,${all},${obj-${target}}}}
275 +obj-all += ${sort ${foreach target,${all},${objcpp-${target}}}}
276 +obj-all += ${sort ${foreach target,${all},${objcu-${target}}}}
277
278 # rule to generate dependency files
279 %.d: %.c ${imported-headers}
280 @@ -241,6 +334,22 @@ obj-all = ${sort ${foreach target,${all},${obj-${target}}}}
281 sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
282 rm -f $@.$$$$
283
284 +%.d: %.cpp ${imported-headers}
285 + @set -e; rm -f $@; \
286 + $(CPP) -MM $(CPPFLAGS) $< > $@.$$$$; \
287 + sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
288 + rm -f $@.$$$$
289 +
290 +%.d: %.cu ${imported-headers}
291 + @set -e; rm -f $@; \
292 + $(CU) --generate-dependencies $(CUFLAGS) $< > $@.$$$$; \
293 + sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
294 + rm -f $@.$$$$
295 +
296 +# teach make how to compile .cu files
297 +%.o: %.cu
298 + $(CU) --compile $(CUFLAGS) $(OUTPUT_OPTION) $<
299 +
300 ifeq ($(MAKECMDGOALS),)
301 MAKECMDGOALS += all
302 endif
303 diff --git a/bin/base_mt_task.c b/bin/base_mt_task.c
304 index 8090cc3..1406b20 100644
305 --- a/bin/base_mt_task.c
306 +++ b/bin/base_mt_task.c
307 @@ -1,4 +1,4 @@
308 -/* based_mt_task.c -- A basic multi-threaded real-time task skeleton.
309 +/* based_mt_task.c -- A basic multi-threaded real-time task skeleton.
310 *
311 * This (by itself useless) task demos how to setup a multi-threaded LITMUS^RT
312 * real-time task. Familiarity with the single threaded example (base_task.c)
313 @@ -26,12 +26,10 @@
314 #define RELATIVE_DEADLINE 100
315 #define EXEC_COST 10
316
317 -#define NS_PER_MS 1e6
318 -
319 /* Let's create 10 threads in the example,
320 * for a total utilization of 1.
321 */
322 -#define NUM_THREADS 10
323 +#define NUM_THREADS 10
324
325 /* The information passed to each thread. Could be anything. */
326 struct thread_context {
327 @@ -43,7 +41,7 @@ struct thread_context {
328 */
329 void* rt_thread(void *tcontext);
330
331 -/* Declare the periodically invoked job.
332 +/* Declare the periodically invoked job.
333 * Returns 1 -> task should exit.
334 * 0 -> task should continue.
335 */
336 @@ -62,7 +60,7 @@ int job(void);
337 } while (0)
338
339
340 -/* Basic setup is the same as in the single-threaded example. However,
341 +/* Basic setup is the same as in the single-threaded example. However,
342 * we do some thread initiliazation first before invoking the job.
343 */
344 int main(int argc, char** argv)
345 @@ -71,7 +69,7 @@ int main(int argc, char** argv)
346 struct thread_context ctx[NUM_THREADS];
347 pthread_t task[NUM_THREADS];
348
349 - /* The task is in background mode upon startup. */
350 + /* The task is in background mode upon startup. */
351
352
353 /*****
354 @@ -79,7 +77,7 @@ int main(int argc, char** argv)
355 */
356
357
358 -
359 +
360 /*****
361 * 2) Work environment (e.g., global data structures, file data, etc.) would
362 * be setup here.
363 @@ -94,7 +92,7 @@ int main(int argc, char** argv)
364 init_litmus();
365
366
367 - /*****
368 + /*****
369 * 4) Launch threads.
370 */
371 for (i = 0; i < NUM_THREADS; i++) {
372 @@ -102,15 +100,15 @@ int main(int argc, char** argv)
373 pthread_create(task + i, NULL, rt_thread, (void *) (ctx + i));
374 }
375
376 -
377 +
378 /*****
379 * 5) Wait for RT threads to terminate.
380 */
381 for (i = 0; i < NUM_THREADS; i++)
382 pthread_join(task[i], NULL);
383 -
384
385 - /*****
386 +
387 + /*****
388 * 6) Clean up, maybe print results and stats, and exit.
389 */
390 return 0;
391 @@ -129,10 +127,10 @@ void* rt_thread(void *tcontext)
392 struct rt_task param;
393
394 /* Set up task parameters */
395 - memset(¶m, 0, sizeof(param));
396 - param.exec_cost = EXEC_COST * NS_PER_MS;
397 - param.period = PERIOD * NS_PER_MS;
398 - param.relative_deadline = RELATIVE_DEADLINE * NS_PER_MS;
399 + init_rt_task_param(¶m);
400 + param.exec_cost = ms2ns(EXEC_COST);
401 + param.period = ms2ns(PERIOD);
402 + param.relative_deadline = ms2ns(RELATIVE_DEADLINE);
403
404 /* What to do in the case of budget overruns? */
405 param.budget_policy = NO_ENFORCEMENT;
406 @@ -166,7 +164,7 @@ void* rt_thread(void *tcontext)
407 */
408 CALL( task_mode(LITMUS_RT_TASK) );
409
410 - /* The task is now executing as a real-time task if the call didn't fail.
411 + /* The task is now executing as a real-time task if the call didn't fail.
412 */
413
414
415 @@ -178,11 +176,11 @@ void* rt_thread(void *tcontext)
416 /* Wait until the next job is released. */
417 sleep_next_period();
418 /* Invoke job. */
419 - do_exit = job();
420 + do_exit = job();
421 } while (!do_exit);
422
423
424 -
425 +
426 /*****
427 * 4) Transition to background mode.
428 */
429 @@ -194,7 +192,7 @@ void* rt_thread(void *tcontext)
430
431
432
433 -int job(void)
434 +int job(void)
435 {
436 /* Do real-time calculation. */
437
438 diff --git a/bin/base_task.c b/bin/base_task.c
439 index df0c5a2..0274c89 100644
440 --- a/bin/base_task.c
441 +++ b/bin/base_task.c
442 @@ -1,6 +1,6 @@
443 -/* based_task.c -- A basic real-time task skeleton.
444 +/* based_task.c -- A basic real-time task skeleton.
445 *
446 - * This (by itself useless) task demos how to setup a
447 + * This (by itself useless) task demos how to setup a
448 * single-threaded LITMUS^RT real-time task.
449 */
450
451 @@ -20,7 +20,7 @@
452 */
453 #include "litmus.h"
454
455 -/* Next, we define period and execution cost to be constant.
456 +/* Next, we define period and execution cost to be constant.
457 * These are only constants for convenience in this example, they can be
458 * determined at run time, e.g., from command line parameters.
459 *
460 @@ -30,8 +30,6 @@
461 #define RELATIVE_DEADLINE 100
462 #define EXEC_COST 10
463
464 -#define NS_PER_MS 1e6
465 -
466 /* Catch errors.
467 */
468 #define CALL( exp ) do { \
469 @@ -44,13 +42,13 @@
470 } while (0)
471
472
473 -/* Declare the periodically invoked job.
474 +/* Declare the periodically invoked job.
475 * Returns 1 -> task should exit.
476 * 0 -> task should continue.
477 */
478 int job(void);
479
480 -/* typically, main() does a couple of things:
481 +/* typically, main() does a couple of things:
482 * 1) parse command line parameters, etc.
483 * 2) Setup work environment.
484 * 3) Setup real-time parameters.
485 @@ -60,7 +58,7 @@ int job(void);
486 * 7) Clean up and exit.
487 *
488 * The following main() function provides the basic skeleton of a single-threaded
489 - * LITMUS^RT real-time task. In a real program, all the return values should be
490 + * LITMUS^RT real-time task. In a real program, all the return values should be
491 * checked for errors.
492 */
493 int main(int argc, char** argv)
494 @@ -69,10 +67,10 @@ int main(int argc, char** argv)
495 struct rt_task param;
496
497 /* Setup task parameters */
498 - memset(¶m, 0, sizeof(param));
499 - param.exec_cost = EXEC_COST * NS_PER_MS;
500 - param.period = PERIOD * NS_PER_MS;
501 - param.relative_deadline = RELATIVE_DEADLINE * NS_PER_MS;
502 + init_rt_task_param(¶m);
503 + param.exec_cost = ms2ns(EXEC_COST);
504 + param.period = ms2ns(PERIOD);
505 + param.relative_deadline = ms2ns(RELATIVE_DEADLINE);
506
507 /* What to do in the case of budget overruns? */
508 param.budget_policy = NO_ENFORCEMENT;
509 @@ -100,9 +98,9 @@ int main(int argc, char** argv)
510
511
512 /*****
513 - * 3) Setup real-time parameters.
514 - * In this example, we create a sporadic task that does not specify a
515 - * target partition (and thus is intended to run under global scheduling).
516 + * 3) Setup real-time parameters.
517 + * In this example, we create a sporadic task that does not specify a
518 + * target partition (and thus is intended to run under global scheduling).
519 * If this were to execute under a partitioned scheduler, it would be assigned
520 * to the first partition (since partitioning is performed offline).
521 */
522 @@ -124,7 +122,7 @@ int main(int argc, char** argv)
523 */
524 CALL( task_mode(LITMUS_RT_TASK) );
525
526 - /* The task is now executing as a real-time task if the call didn't fail.
527 + /* The task is now executing as a real-time task if the call didn't fail.
528 */
529
530
531 @@ -136,11 +134,11 @@ int main(int argc, char** argv)
532 /* Wait until the next job is released. */
533 sleep_next_period();
534 /* Invoke job. */
535 - do_exit = job();
536 + do_exit = job();
537 } while (!do_exit);
538
539
540 -
541 +
542 /*****
543 * 6) Transition to background mode.
544 */
545 @@ -148,14 +146,14 @@ int main(int argc, char** argv)
546
547
548
549 - /*****
550 + /*****
551 * 7) Clean up, maybe print results and stats, and exit.
552 */
553 return 0;
554 }
555
556
557 -int job(void)
558 +int job(void)
559 {
560 /* Do real-time calculation. */
561
562 diff --git a/bin/null_call.c b/bin/null_call.c
563 index d714e77..bab8e73 100644
564 --- a/bin/null_call.c
565 +++ b/bin/null_call.c
566 @@ -16,7 +16,7 @@ static void time_null_call(void)
567 t2 = get_cycles();
568 if (ret != 0)
569 perror("null_call");
570 - printf("%10" CYCLES_FMT ", "
571 + printf("%10" CYCLES_FMT ", "
572 "%10" CYCLES_FMT ", "
573 "%10" CYCLES_FMT ", "
574 "%10" CYCLES_FMT ", "
575 @@ -38,7 +38,7 @@ int main(int argc, char **argv)
576 {
577 double delay;
578 struct timespec sleep_time;
579 -
580 +
581 if (argc == 2) {
582 delay = atof(argv[1]);
583 sleep_time = sec2timespec(delay);
584 diff --git a/bin/release_ts.c b/bin/release_ts.c
585 index 7752097..6a74710 100644
586 --- a/bin/release_ts.c
587 +++ b/bin/release_ts.c
588 @@ -10,7 +10,6 @@
589 #include "internal.h"
590
591 #define OPTSTR "d:wf:"
592 -#define NS_PER_MS 1000000
593
594 #define LITMUS_STATS_FILE "/proc/litmus/stats"
595
596 @@ -31,54 +30,34 @@ void usage(char *error) {
597 void wait_until_ready(int expected)
598 {
599 int ready = 0, all = 0;
600 - char buf[100];
601 int loops = 0;
602 - ssize_t len;
603 -
604
605 do {
606 if (loops++ > 0)
607 sleep(1);
608 - len = read_file(LITMUS_STATS_FILE, buf, sizeof(buf) - 1);
609 - if (len < 0) {
610 - fprintf(stderr,
611 - "(EE) Error while reading '%s': %m.\n"
612 - "(EE) Ignoring -w option.\n",
613 - LITMUS_STATS_FILE);
614 - break;
615 - } else {
616 - len = sscanf(buf,
617 - "real-time tasks = %d\n"
618 - "ready for release = %d\n",
619 - &all, &ready);
620 - if (len != 2) {
621 - fprintf(stderr,
622 - "(EE) Could not parse '%s'.\n"
623 - "(EE) Ignoring -w option.\n",
624 - LITMUS_STATS_FILE);
625 - break;
626 - }
627 - }
628 - } while (expected > ready || ready < all);
629 + if (!read_litmus_stats(&ready, &all))
630 + perror("read_litmus_stats");
631 + } while (expected > ready || (!expected && ready < all));
632 }
633
634 int main(int argc, char** argv)
635 {
636 int released;
637 - lt_t delay = ms2lt(1000);
638 + lt_t delay = ms2ns(1000);
639 int wait = 0;
640 int expected = 0;
641 int opt;
642 -
643 +
644 while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
645 switch (opt) {
646 case 'd':
647 - delay = ms2lt(atoi(optarg));
648 + delay = ms2ns(atoi(optarg));
649 break;
650 case 'w':
651 wait = 1;
652 break;
653 case 'f':
654 + wait = 1;
655 expected = atoi(optarg);
656 break;
657 case ':':
658 @@ -99,7 +78,7 @@ int main(int argc, char** argv)
659 perror("release task system");
660 exit(1);
661 }
662 -
663 +
664 printf("Released %d real-time tasks.\n", released);
665
666 return 0;
667 diff --git a/bin/rt_launch.c b/bin/rt_launch.c
668 index 3863031..805e20b 100644
669 --- a/bin/rt_launch.c
670 +++ b/bin/rt_launch.c
671 @@ -29,10 +29,11 @@ int launch(void *task_info_p) {
672 }
673
674 void usage(char *error) {
675 - fprintf(stderr, "%s\nUsage: rt_launch [-w][-v][-p cpu][-c hrt | srt | be] wcet period program [arg1 arg2 ...]\n"
676 + fprintf(stderr, "%s\nUsage: rt_launch [-w][-v][-p partition/cluster [-z cluster size]][-q prio][-c hrt | srt | be] wcet period program [arg1 arg2 ...]\n"
677 "\t-w\tSynchronous release\n"
678 "\t-v\tVerbose\n"
679 - "\t-p\tcpu (or initial cpu)\n"
680 + "\t-p\tpartition or cluster\n"
681 + "\t-z\tsize of cluster (default = 1 for partitioned)\n"
682 "\t-c\tClass\n"
683 "\twcet, period in ms\n"
684 "\tprogram to be launched\n",
685 @@ -41,20 +42,24 @@ void usage(char *error) {
686 }
687
688
689 -#define OPTSTR "p:c:vw"
690 +#define OPTSTR "p:z:c:vwq:t"
691
692 -int main(int argc, char** argv)
693 +int main(int argc, char** argv)
694 {
695 int ret;
696 lt_t wcet;
697 lt_t period;
698 int migrate = 0;
699 - int cpu = 0;
700 + int cluster = 0;
701 + int cluster_size = 1;
702 int opt;
703 int verbose = 0;
704 int wait = 0;
705 startup_info_t info;
706 - task_class_t class = RT_CLASS_HARD;
707 + task_class_t cls = RT_CLASS_HARD;
708 + unsigned int priority = LITMUS_LOWEST_PRIORITY;
709 + budget_policy_t budget_pol = QUANTUM_ENFORCEMENT;
710 + struct rt_task param;
711
712 while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
713 switch (opt) {
714 @@ -65,15 +70,26 @@ int main(int argc, char** argv)
715 verbose = 1;
716 break;
717 case 'p':
718 - cpu = atoi(optarg);
719 + cluster = atoi(optarg);
720 migrate = 1;
721 break;
722 + case 'z':
723 + cluster_size = atoi(optarg);
724 + break;
725 + case 'q':
726 + priority = atoi(optarg);
727 + if (!litmus_is_valid_fixed_prio(priority))
728 + usage("Invalid priority.");
729 + break;
730 case 'c':
731 - class = str2class(optarg);
732 - if (class == -1)
733 + cls = str2class(optarg);
734 + if (cls == -1)
735 usage("Unknown task class.");
736 break;
737 -
738 + case 't':
739 + /* use an hrtimer for budget enforcement */
740 + budget_pol = PRECISE_ENFORCEMENT;
741 + break;
742 case ':':
743 usage("Argument missing.");
744 break;
745 @@ -87,9 +103,9 @@ int main(int argc, char** argv)
746 signal(SIGUSR1, SIG_IGN);
747
748 if (argc - optind < 3)
749 - usage("Arguments missing.");
750 - wcet = ms2lt(atoi(argv[optind + 0]));
751 - period = ms2lt(atoi(argv[optind + 1]));
752 + usage("Arguments missing.");
753 + wcet = ms2ns(atoi(argv[optind + 0]));
754 + period = ms2ns(atoi(argv[optind + 1]));
755 if (wcet <= 0)
756 usage("The worst-case execution time must be a "
757 "positive number.");
758 @@ -103,17 +119,27 @@ int main(int argc, char** argv)
759 info.argv = argv + optind + 2;
760 info.wait = wait;
761 if (migrate) {
762 - ret = be_migrate_to(cpu);
763 + ret = be_migrate_to_cluster(cluster, cluster_size);
764 if (ret < 0)
765 - bail_out("could not migrate to target partition");
766 + bail_out("could not migrate to target partition or cluster");
767 }
768 - ret = __create_rt_task(launch, &info, cpu, wcet, period, class);
769
770 -
771 + init_rt_task_param(¶m);
772 + param.exec_cost = wcet;
773 + param.period = period;
774 + param.priority = priority;
775 + param.cls = cls;
776 + param.budget_policy = budget_pol;
777 +
778 + if (migrate)
779 + param.cpu = cluster_to_first_cpu(cluster, cluster_size);
780 +
781 + ret = create_rt_task(launch, &info, ¶m);
782 +
783 if (ret < 0)
784 bail_out("could not create rt child process");
785 else if (verbose)
786 printf("%d\n", ret);
787
788 - return 0;
789 + return 0;
790 }
791 diff --git a/bin/rtspin.c b/bin/rtspin.c
792 index f0a477d..4a1d994 100644
793 --- a/bin/rtspin.c
794 +++ b/bin/rtspin.c
795 @@ -4,6 +4,7 @@
796 #include <stdlib.h>
797 #include <unistd.h>
798 #include <time.h>
799 +#include <string.h>
800 #include <assert.h>
801
802
803 @@ -20,9 +21,12 @@ static void usage(char *error) {
804 " rt_spin [COMMON-OPTS] -f FILE [-o COLUMN] WCET PERIOD\n"
805 " rt_spin -l\n"
806 "\n"
807 - "COMMON-OPTS = [-w] [-p PARTITION] [-c CLASS] [-s SCALE]\n"
808 + "COMMON-OPTS = [-w] [-s SCALE]\n"
809 + " [-p PARTITION/CLUSTER [-z CLUSTER SIZE]] [-c CLASS]\n"
810 + " [-X LOCKING-PROTOCOL] [-L CRITICAL SECTION LENGTH] [-Q RESOURCE-ID]"
811 "\n"
812 - "WCET and PERIOD are milliseconds, DURATION is seconds.\n");
813 + "WCET and PERIOD are milliseconds, DURATION is seconds.\n"
814 + "CRITICAL SECTION LENGTH is in milliseconds.\n");
815 exit(EXIT_FAILURE);
816 }
817
818 @@ -67,7 +71,7 @@ static void get_exec_times(const char *file, const int column,
819 bail_out("rewinding file failed");
820
821 /* allocate space for exec times */
822 - *exec_times = calloc(*num_jobs, sizeof(*exec_times));
823 + *exec_times = (double*)calloc(*num_jobs, sizeof(*exec_times));
824 if (!*exec_times)
825 bail_out("couldn't allocate memory");
826
827 @@ -77,7 +81,7 @@ static void get_exec_times(const char *file, const int column,
828
829 for (cur_col = 1; cur_col < column; ++cur_col) {
830 /* discard input until we get to the column we want */
831 - fscanf(fstream, "%*s,");
832 + int unused __attribute__ ((unused)) = fscanf(fstream, "%*s,");
833 }
834
835 /* get the desired exec. time */
836 @@ -150,19 +154,37 @@ static void debug_delay_loop(void)
837 }
838 }
839
840 -static int job(double exec_time, double program_end)
841 +static int job(double exec_time, double program_end, int lock_od, double cs_length)
842 {
843 + double chunk1, chunk2;
844 +
845 if (wctime() > program_end)
846 return 0;
847 else {
848 - loop_for(exec_time, program_end + 1);
849 + if (lock_od >= 0) {
850 + /* simulate critical section somewhere in the middle */
851 + chunk1 = drand48() * (exec_time - cs_length);
852 + chunk2 = exec_time - cs_length - chunk1;
853 +
854 + /* non-critical section */
855 + loop_for(chunk1, program_end + 1);
856 +
857 + /* critical section */
858 + litmus_lock(lock_od);
859 + loop_for(cs_length, program_end + 1);
860 + litmus_unlock(lock_od);
861 +
862 + /* non-critical section */
863 + loop_for(chunk2, program_end + 2);
864 + } else {
865 + loop_for(exec_time, program_end + 1);
866 + }
867 sleep_next_period();
868 return 1;
869 }
870 }
871
872 -#define OPTSTR "p:c:wlveo:f:s:q:"
873 -
874 +#define OPTSTR "p:z:c:wlveio:f:s:q:X:L:Q:"
875 int main(int argc, char** argv)
876 {
877 int ret;
878 @@ -171,18 +193,28 @@ int main(int argc, char** argv)
879 double wcet_ms, period_ms;
880 unsigned int priority = LITMUS_LOWEST_PRIORITY;
881 int migrate = 0;
882 - int cpu = 0;
883 + int cluster = 0;
884 + int cluster_size = 1;
885 int opt;
886 int wait = 0;
887 int test_loop = 0;
888 int column = 1;
889 const char *file = NULL;
890 int want_enforcement = 0;
891 - double duration = 0, start;
892 + int want_signals = 0;
893 + double duration = 0, start = 0;
894 double *exec_times = NULL;
895 double scale = 1.0;
896 - task_class_t class = RT_CLASS_HARD;
897 - int cur_job, num_jobs;
898 + task_class_t cls = RT_CLASS_HARD;
899 + int cur_job = 0, num_jobs = 0;
900 + struct rt_task param;
901 +
902 + /* locking */
903 + int lock_od = -1;
904 + int resource_id = 0;
905 + const char *lock_namespace = "./rtspin-locks";
906 + int protocol = -1;
907 + double cs_length = 1; /* millisecond */
908
909 progname = argv[0];
910
911 @@ -192,22 +224,28 @@ int main(int argc, char** argv)
912 wait = 1;
913 break;
914 case 'p':
915 - cpu = atoi(optarg);
916 + cluster = atoi(optarg);
917 migrate = 1;
918 break;
919 + case 'z':
920 + cluster_size = atoi(optarg);
921 + break;
922 case 'q':
923 priority = atoi(optarg);
924 if (!litmus_is_valid_fixed_prio(priority))
925 usage("Invalid priority.");
926 break;
927 case 'c':
928 - class = str2class(optarg);
929 - if (class == -1)
930 + cls = str2class(optarg);
931 + if (cls == -1)
932 usage("Unknown task class.");
933 break;
934 case 'e':
935 want_enforcement = 1;
936 break;
937 + case 'i':
938 + want_signals = 1;
939 + break;
940 case 'l':
941 test_loop = 1;
942 break;
943 @@ -220,6 +258,21 @@ int main(int argc, char** argv)
944 case 's':
945 scale = atof(optarg);
946 break;
947 + case 'X':
948 + protocol = lock_protocol_for_name(optarg);
949 + if (protocol < 0)
950 + usage("Unknown locking protocol specified.");
951 + break;
952 + case 'L':
953 + cs_length = atof(optarg);
954 + if (cs_length <= 0)
955 + usage("Invalid critical section length.");
956 + break;
957 + case 'Q':
958 + resource_id = atoi(optarg);
959 + if (resource_id <= 0 && strcmp(optarg, "0"))
960 + usage("Invalid resource ID.");
961 + break;
962 case ':':
963 usage("Argument missing.");
964 break;
965 @@ -235,6 +288,8 @@ int main(int argc, char** argv)
966 return 0;
967 }
968
969 + srand(getpid());
970 +
971 if (file) {
972 get_exec_times(file, column, &num_jobs, &exec_times);
973
974 @@ -257,8 +312,8 @@ int main(int argc, char** argv)
975 wcet_ms = atof(argv[optind + 0]);
976 period_ms = atof(argv[optind + 1]);
977
978 - wcet = wcet_ms * __NS_PER_MS;
979 - period = period_ms * __NS_PER_MS;
980 + wcet = ms2ns(wcet_ms);
981 + period = ms2ns(period_ms);
982 if (wcet <= 0)
983 usage("The worst-case execution time must be a "
984 "positive number.");
985 @@ -275,24 +330,47 @@ int main(int argc, char** argv)
986 duration += period_ms * 0.001 * (num_jobs - 1);
987
988 if (migrate) {
989 - ret = be_migrate_to(cpu);
990 + ret = be_migrate_to_cluster(cluster, cluster_size);
991 if (ret < 0)
992 - bail_out("could not migrate to target partition");
993 + bail_out("could not migrate to target partition or cluster.");
994 }
995
996 - ret = sporadic_task_ns(wcet, period, 0, cpu, priority, class,
997 - want_enforcement ? PRECISE_ENFORCEMENT
998 - : NO_ENFORCEMENT,
999 - migrate);
1000 + init_rt_task_param(¶m);
1001 + param.exec_cost = wcet;
1002 + param.period = period;
1003 + param.priority = priority;
1004 + param.cls = cls;
1005 + param.budget_policy = (want_enforcement) ?
1006 + PRECISE_ENFORCEMENT : NO_ENFORCEMENT;
1007 + param.budget_signal_policy = (want_enforcement && want_signals) ?
1008 + PRECISE_SIGNALS : NO_SIGNALS;
1009 +
1010 + if (migrate)
1011 + param.cpu = cluster_to_first_cpu(cluster, cluster_size);
1012 + ret = set_rt_task_param(gettid(), ¶m);
1013 if (ret < 0)
1014 bail_out("could not setup rt task params");
1015
1016 init_litmus();
1017
1018 + if (want_signals) {
1019 + /* bind default longjmp signal handler to SIG_BUDGET. */
1020 + activate_litmus_signals(SIG_BUDGET_MASK, longjmp_on_litmus_signal);
1021 + }
1022 +
1023 ret = task_mode(LITMUS_RT_TASK);
1024 if (ret != 0)
1025 bail_out("could not become RT task");
1026
1027 + if (protocol >= 0) {
1028 + /* open reference to semaphore */
1029 + lock_od = litmus_open_lock(protocol, resource_id, lock_namespace, &cluster);
1030 + if (lock_od < 0) {
1031 + perror("litmus_open_lock");
1032 + usage("Could not open lock.");
1033 + }
1034 + }
1035 +
1036 if (wait) {
1037 ret = wait_for_ts_release();
1038 if (ret != 0)
1039 @@ -306,11 +384,13 @@ int main(int argc, char** argv)
1040 for (cur_job = 0; cur_job < num_jobs; ++cur_job) {
1041 /* convert job's length to seconds */
1042 job(exec_times[cur_job] * 0.001 * scale,
1043 - start + duration);
1044 + start + duration,
1045 + lock_od, cs_length * 0.001);
1046 }
1047 } else {
1048 - /* conver to seconds and scale */
1049 - while (job(wcet_ms * 0.001 * scale, start + duration));
1050 + /* convert to seconds and scale */
1051 + while (job(wcet_ms * 0.001 * scale, start + duration,
1052 + lock_od, cs_length * 0.001));
1053 }
1054
1055 ret = task_mode(BACKGROUND_TASK);
1056 diff --git a/bin/uncache.c b/bin/uncache.c
1057 new file mode 100644
1058 index 0000000..b6f6913
1059 --- /dev/null
1060 +++ b/bin/uncache.c
1061 @@ -0,0 +1,381 @@
1062 +#include <stdio.h>
1063 +#include <stdlib.h>
1064 +#include <unistd.h>
1065 +#include <time.h>
1066 +#include <sched.h>
1067 +#include <assert.h>
1068 +#include <string.h>
1069 +#include <stdint.h>
1070 +#include <sys/fcntl.h>
1071 +#include <sys/mman.h>
1072 +
1073 +/* Test tool for validating Litmus's uncache device. */
1074 +/* Tool also capable basic cache vs. sysmem statistics. */
1075 +/* Compile with '-O2' for significaintly greater margins */
1076 +/* in performance between cache and sysmem: */
1077 +/* (Intel Xeon X5650) */
1078 +/* -g -> uncache is 30x slower */
1079 +/* -O2 -> uncache is >100x slower */
1080 +
1081 +int PAGE_SIZE;
1082 +#define NR_PAGES 16
1083 +
1084 +#define UNCACHE_DEV "/dev/litmus/uncache"
1085 +
1086 +/* volatile forces a read from memory (or cache) on every reference. Note
1087 + that volatile does not keep data out of the cache! */
1088 +typedef volatile char* pbuf_t;
1089 +
1090 +/* hit the first byte in each page.
1091 + addr must be page aligned. */
1092 +inline int linear_write(pbuf_t addr, int size, char val)
1093 +{
1094 + pbuf_t end = addr + size;
1095 + pbuf_t step;
1096 + int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE;
1097 + int times = nr_pages * PAGE_SIZE;
1098 + int i;
1099 +
1100 + for (i = 0; i < times; ++i)
1101 + for(step = addr; step < end; step += PAGE_SIZE)
1102 + *step = val;
1103 + return 0;
1104 +}
1105 +inline int linear_read(pbuf_t addr, int size, char val)
1106 +{
1107 + pbuf_t end = addr + size;
1108 + pbuf_t step;
1109 + int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE;
1110 + int times = nr_pages * PAGE_SIZE;
1111 + int i;
1112 +
1113 + for (i = 0; i < times; ++i)
1114 + for(step = addr; step < end; step += PAGE_SIZE) {
1115 + if (*step != val)
1116 + return -1;
1117 + }
1118 + return 0;
1119 +}
1120 +
1121 +/* write to *data nr times. */
1122 +inline int hammer_write(pbuf_t data, char val, int nr)
1123 +{
1124 + int i;
1125 + for (i = 0; i < nr; ++i)
1126 + *data = val;
1127 + return 0;
1128 +}
1129 +
1130 +/* read from *data nr times. */
1131 +inline int hammer_read(pbuf_t data, char val, int nr)
1132 +{
1133 + int i;
1134 + for (i = 0; i < nr; ++i) {
1135 + if (*data != val)
1136 + return -1;
1137 + }
1138 + return 0;
1139 +}
1140 +
1141 +inline int test(pbuf_t data, int size, int trials)
1142 +{
1143 + int HAMMER_TIME = 10000; /* can't cache this! */
1144 + char VAL = 0x55;
1145 + int t;
1146 + for(t = 0; t < trials; ++t) {
1147 +
1148 +#if 0
1149 + if (linear_write(data, size, VAL) != 0) {
1150 + printf("failed linear_write()\n");
1151 + return -1;
1152 + }
1153 + if (linear_read(data, size, VAL) != 0) {
1154 + printf("failed linear_read()\n");
1155 + return -1;
1156 + }
1157 +#endif
1158 +
1159 + /* hammer at the first byte in the array */
1160 + if (hammer_write(data, VAL, HAMMER_TIME) != 0) {
1161 + printf("failed hammer_write()\n");
1162 + return -1;
1163 + }
1164 + if (hammer_read(data, VAL, HAMMER_TIME) != 0) {
1165 + printf("failed hammer_read()\n");
1166 + return -1;
1167 + }
1168 + }
1169 + return 0;
1170 +}
1171 +
1172 +inline void timespec_normalize(struct timespec* ts, time_t sec, int64_t nsec)
1173 +{
1174 + while(nsec > 1000000000LL) {
1175 + asm("" : "+rm"(nsec));
1176 + nsec -= 1000000000LL;
1177 + ++sec;
1178 + }
1179 + while(nsec < 0) {
1180 + asm("" : "+rm"(nsec));
1181 + nsec += 1000000000LL;
1182 + --sec;
1183 + }
1184 +
1185 + ts->tv_sec = sec;
1186 + ts->tv_nsec = nsec;
1187 +}
1188 +
1189 +inline struct timespec timespec_sub(struct timespec lhs, struct timespec rhs)
1190 +{
1191 + struct timespec delta;
1192 + timespec_normalize(&delta, lhs.tv_sec - rhs.tv_sec, lhs.tv_nsec - rhs.tv_nsec);
1193 + return delta;
1194 +}
1195 +
1196 +inline struct timespec timespec_add(struct timespec lhs, struct timespec rhs)
1197 +{
1198 + struct timespec delta;
1199 + timespec_normalize(&delta, lhs.tv_sec + rhs.tv_sec, lhs.tv_nsec + rhs.tv_nsec);
1200 + return delta;
1201 +}
1202 +
1203 +inline int64_t timespec_to_us(struct timespec ts)
1204 +{
1205 + int64_t t;
1206 + t = ts.tv_sec * 1000000LL;
1207 + t += ts.tv_nsec / 1000LL;
1208 + return t;
1209 +}
1210 +
1211 +/* hammers away at the first byte in each mmaped page and
1212 + times how long it took. */
1213 +int do_data(int do_uncache, int64_t* time)
1214 +{
1215 + int size;
1216 + int prot = PROT_READ | PROT_WRITE;
1217 + int flags = MAP_PRIVATE;
1218 +
1219 + pbuf_t data;
1220 +
1221 + struct sched_param fifo_params;
1222 +
1223 + struct timespec start, end;
1224 + int64_t elapsed;
1225 + int trials = 1000;
1226 +
1227 + printf("Running data access test.\n");
1228 +
1229 + mlockall(MCL_CURRENT | MCL_FUTURE);
1230 +
1231 + memset(&fifo_params, 0, sizeof(fifo_params));
1232 + fifo_params.sched_priority = sched_get_priority_max(SCHED_FIFO);
1233 +
1234 + size = PAGE_SIZE*NR_PAGES;
1235 +
1236 + printf("Allocating %d %s pages.\n", NR_PAGES, (do_uncache) ?
1237 + "uncacheable" : "cacheable");
1238 + if (do_uncache) {
1239 + int fd = open(UNCACHE_DEV, O_RDWR);
1240 + data = mmap(NULL, size, prot, flags, fd, 0);
1241 + close(fd);
1242 + }
1243 + else {
1244 + /* Accessed data will probably fit in L1, so this will go VERY fast.
1245 + Code should also have little-to-no pipeline stalls. */
1246 + flags |= MAP_ANONYMOUS;
1247 + data = mmap(NULL, size, prot, flags, -1, 0);
1248 + }
1249 + if (data == MAP_FAILED) {
1250 + printf("Failed to alloc data! "
1251 + "Are you running Litmus? "
1252 + "Is Litmus broken?\n");
1253 + return -1;
1254 + }
1255 + else {
1256 + printf("Data allocated at %p.\n", data);
1257 + }
1258 +
1259 + printf("Beginning tests...\n");
1260 + if (sched_setscheduler(getpid(), SCHED_FIFO, &fifo_params)) {
1261 + printf("(Could not become SCHED_FIFO task.) Are you running as root?\n");
1262 + }
1263 +
1264 + /* observations suggest that no warmup phase is needed. */
1265 + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
1266 + if (test(data, size, trials) != 0) {
1267 + printf("Test failed!\n");
1268 + munmap((char*)data, size);
1269 + return -1;
1270 + }
1271 + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
1272 + elapsed = timespec_to_us(timespec_sub(end, start));
1273 + printf("%s Time: %ldus\n", (do_uncache) ?
1274 + "Uncache" : "Cache", elapsed);
1275 +
1276 + munmap((char*)data, size);
1277 +
1278 + if(time)
1279 + *time = elapsed;
1280 +
1281 + return 0;
1282 +}
1283 +
1284 +/* compares runtime of cached vs. uncached */
1285 +int do_data_compare()
1286 +{
1287 + const double thresh = 1.3;
1288 + int ret = 0;
1289 + double ratio;
1290 + int64_t cache_time = 0, uncache_time = 0;
1291 +
1292 + printf("Timing cached pages...\n");
1293 + ret = do_data(0, &cache_time);
1294 + if (ret != 0)
1295 + goto out;
1296 +
1297 + printf("Timing uncached pages...\n");
1298 + ret = do_data(1, &uncache_time);
1299 + if (ret != 0)
1300 + goto out;
1301 +
1302 + ratio = (double)uncache_time/(double)cache_time;
1303 + printf("Uncached/Cached Ratio: %f\n", ratio);
1304 +
1305 + if (ratio < thresh) {
1306 + printf("Ratio is unexpectedly small (< %f)! "
1307 + " Uncache broken? Are you on kvm?\n", thresh);
1308 + ret = -1;
1309 + }
1310 +
1311 +out:
1312 + return ret;
1313 +}
1314 +
1315 +/* tries to max out uncache allocations.
1316 + under normal conditions (non-mlock),
1317 + pages should spill into swap. uncache
1318 + pages are not locked in memory. */
1319 +int do_max_alloc(void)
1320 +{
1321 + int fd;
1322 + int good = 1;
1323 + int count = 0;
1324 + uint64_t mmap_size = PAGE_SIZE; /* start at one page per mmap */
1325 +
1326 + /* half of default limit on ubuntu. (see /proc/sys/vm/max_map_count) */
1327 + int max_mmaps = 32765;
1328 + volatile char** maps = calloc(max_mmaps, sizeof(pbuf_t));
1329 +
1330 + if (!maps) {
1331 + printf("failed to alloc pointers for pages\n");
1332 + return -1;
1333 + }
1334 +
1335 + printf("Testing max amount of uncache data. System may get wonkie (OOM Killer)!\n");
1336 +
1337 + fd = open(UNCACHE_DEV, O_RDWR);
1338 + do {
1339 + int i;
1340 + int nr_pages = mmap_size/PAGE_SIZE;
1341 + printf("Testing mmaps of %d pages.\n", nr_pages);
1342 +
1343 + count = 0;
1344 + for (i = 0; (i < max_mmaps) && good; ++i) {
1345 + pbuf_t data = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE, fd, 0);
1346 +
1347 + if (data != MAP_FAILED) {
1348 + maps[i] = data;
1349 + ++count;
1350 + }
1351 + else {
1352 + perror(NULL);
1353 + good = 0;
1354 + }
1355 + }
1356 + for (i = 0; i < count; ++i) {
1357 + if (maps[i])
1358 + munmap((char*)(maps[i]), mmap_size);
1359 + }
1360 + memset(maps, 0, sizeof(maps[0])*max_mmaps);
1361 +
1362 + mmap_size *= 2; /* let's do it again with bigger allocations */
1363 + }while(good);
1364 +
1365 + free(maps);
1366 + close(fd);
1367 +
1368 + printf("Maxed out allocs with %d mmaps of %lu pages in size.\n",
1369 + count, mmap_size/PAGE_SIZE);
1370 +
1371 + return 0;
1372 +}
1373 +
1374 +typedef enum
1375 +{
1376 + UNCACHE,
1377 + CACHE,
1378 + COMPARE,
1379 + MAX_ALLOC
1380 +} test_t;
1381 +
1382 +#define OPTSTR "ucxa"
1383 +int main(int argc, char** argv)
1384 +{
1385 + int ret;
1386 + test_t test = UNCACHE;
1387 + int opt;
1388 + PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
1389 +
1390 + while((opt = getopt(argc, argv, OPTSTR)) != -1) {
1391 + switch(opt) {
1392 + case 'c':
1393 + test = CACHE;
1394 + break;
1395 + case 'u':
1396 + test = UNCACHE;
1397 + break;
1398 + case 'x':
1399 + test = COMPARE;
1400 + break;
1401 + case 'a':
1402 + test = MAX_ALLOC;
1403 + break;
1404 + case ':':
1405 + printf("missing option\n");
1406 + exit(-1);
1407 + case '?':
1408 + default:
1409 + printf("bad argument\n");
1410 + exit(-1);
1411 + }
1412 + }
1413 +
1414 +
1415 + printf("Page Size: %d\n", PAGE_SIZE);
1416 +
1417 + switch(test)
1418 + {
1419 + case CACHE:
1420 + ret = do_data(0, NULL);
1421 + break;
1422 + case UNCACHE:
1423 + ret = do_data(1, NULL);
1424 + break;
1425 + case COMPARE:
1426 + ret = do_data_compare();
1427 + break;
1428 + case MAX_ALLOC:
1429 + ret = do_max_alloc();
1430 + break;
1431 + default:
1432 + printf("invalid test\n");
1433 + ret = -1;
1434 + break;
1435 + }
1436 +
1437 + if (ret != 0) {
1438 + printf("Test failed.\n");
1439 + }
1440 +
1441 + return ret;
1442 +}
1443 diff --git a/gpu/aux_threads.c b/gpu/aux_threads.c
1444 new file mode 100644
1445 index 0000000..1711c40
1446 --- /dev/null
1447 +++ b/gpu/aux_threads.c
1448 @@ -0,0 +1,313 @@
1449 +/* based_mt_task.c -- A basic multi-threaded real-time task skeleton.
1450 + *
1451 + * This (by itself useless) task demos how to setup a multi-threaded LITMUS^RT
1452 + * real-time task. Familiarity with the single threaded example (base_task.c)
1453 + * is assumed.
1454 + *
1455 + * Currently, liblitmus still lacks automated support for real-time
1456 + * tasks, but internaly it is thread-safe, and thus can be used together
1457 + * with pthreads.
1458 + */
1459 +
1460 +#include <stdio.h>
1461 +#include <stdlib.h>
1462 +#include <unistd.h>
1463 +
1464 +#include <fcntl.h>
1465 +#include <sys/stat.h>
1466 +#include <sys/time.h>
1467 +#include <sys/resource.h>
1468 +
1469 +/* Include gettid() */
1470 +#include <sys/types.h>
1471 +
1472 +/* Include threading support. */
1473 +#include <pthread.h>
1474 +
1475 +/* Include the LITMUS^RT API.*/
1476 +#include "litmus.h"
1477 +
1478 +//#define PERIOD 500
1479 +#define PERIOD 10
1480 +//#define EXEC_COST 10
1481 +#define EXEC_COST 1
1482 +
1483 +int NUM_AUX_THREADS = 2;
1484 +
1485 +#define LITMUS_STATS_FILE "/proc/litmus/stats"
1486 +
1487 +/* The information passed to each thread. Could be anything. */
1488 +struct thread_context {
1489 + int id;
1490 + struct timeval total_time;
1491 +};
1492 +
1493 +/* The real-time thread program. Doesn't have to be the same for
1494 + * all threads. Here, we only have one that will invoke job().
1495 + */
1496 +void* rt_thread(void *tcontext);
1497 +void* aux_thread(void *tcontext);
1498 +
1499 +/* Declare the periodically invoked job.
1500 + * Returns 1 -> task should exit.
1501 + * 0 -> task should continue.
1502 + */
1503 +int job(void);
1504 +
1505 +
1506 +/* Catch errors.
1507 + */
1508 +#define CALL( exp ) do { \
1509 + int ret; \
1510 + ret = exp; \
1511 + if (ret != 0) \
1512 + fprintf(stderr, "%s failed: %m\n", #exp);\
1513 + else \
1514 + fprintf(stderr, "%s ok.\n", #exp); \
1515 + } while (0)
1516 +
1517 +int gRun = 1;
1518 +
1519 +pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER;
1520 +pthread_barrier_t gBar;
1521 +
1522 +#define OPTSTR "t:fcb"
1523 +
1524 +int main(int argc, char** argv)
1525 +{
1526 + int i;
1527 + struct thread_context *ctx;
1528 + pthread_t *task;
1529 +
1530 + int opt;
1531 + int before = 0;
1532 + int aux_flags = 0;
1533 + int do_future = 0;
1534 +
1535 + while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
1536 + switch(opt)
1537 + {
1538 + case 't':
1539 + NUM_AUX_THREADS = atoi(optarg);
1540 + printf("%d aux threads\n", NUM_AUX_THREADS);
1541 + break;
1542 + case 'f':
1543 + aux_flags |= AUX_FUTURE;
1544 + do_future = 1;
1545 + break;
1546 + case 'c':
1547 + aux_flags |= AUX_CURRENT;
1548 + break;
1549 + case 'b':
1550 + before = 1;
1551 + printf("Will become real-time before spawning aux threads.\n");
1552 + break;
1553 + }
1554 + }
1555 +
1556 + if (aux_flags == 0) {
1557 + printf("Must specify -c (AUX_CURRENT) and/or -f (AUX_FUTURE) for aux tasks.\n");
1558 + return -1;
1559 + }
1560 +
1561 + ctx = calloc(NUM_AUX_THREADS, sizeof(struct thread_context));
1562 + task = calloc(NUM_AUX_THREADS, sizeof(pthread_t));
1563 +
1564 + //lt_t delay = ms2lt(1000);
1565 +
1566 + /*****
1567 + * 3) Initialize LITMUS^RT.
1568 + * Task parameters will be specified per thread.
1569 + */
1570 + init_litmus();
1571 +
1572 + {
1573 + pthread_barrierattr_t battr;
1574 + pthread_barrierattr_init(&battr);
1575 + pthread_barrier_init(&gBar, &battr, (NUM_AUX_THREADS)+1);
1576 + }
1577 +
1578 + if(before)
1579 + {
1580 + CALL( init_rt_thread() );
1581 + CALL( sporadic_partitioned(EXEC_COST, PERIOD, 0) );
1582 + CALL( task_mode(LITMUS_RT_TASK) );
1583 + }
1584 +
1585 +
1586 + if(do_future && before)
1587 + {
1588 + CALL( enable_aux_rt_tasks(aux_flags) );
1589 + }
1590 +
1591 +// printf("Red Leader is now real-time!\n");
1592 +
1593 + for (i = 0; i < NUM_AUX_THREADS; i++) {
1594 + ctx[i].id = i;
1595 + pthread_create(task + i, NULL, aux_thread, (void *) (ctx + i));
1596 + }
1597 +
1598 +// pthread_barrier_wait(&gBar);
1599 +
1600 +// sleep(1);
1601 +
1602 + if(!before)
1603 + {
1604 + CALL( init_rt_thread() );
1605 + CALL( sporadic_global(EXEC_COST, PERIOD) );
1606 + CALL( task_mode(LITMUS_RT_TASK) );
1607 + }
1608 +
1609 + // secondary call *should* be harmless
1610 + CALL( enable_aux_rt_tasks(aux_flags) );
1611 +
1612 + {
1613 + int last = time(0);
1614 +// struct timespec sleeptime = {0, 1000}; // 1 microsecond
1615 +// for(i = 0; i < 24000; ++i) {
1616 + for(i = 0; i < 2000; ++i) {
1617 + sleep_next_period();
1618 +// printf("RED LEADER!\n");
1619 +
1620 +// nanosleep(&sleeptime, NULL);
1621 +
1622 + pthread_mutex_lock(&gMutex);
1623 +
1624 + if((i%(10000/PERIOD)) == 0) {
1625 + int now = time(0);
1626 + printf("hearbeat %d: %d\n", i, now - last);
1627 + last = now;
1628 + }
1629 +
1630 + pthread_mutex_unlock(&gMutex);
1631 + }
1632 + }
1633 +
1634 + CALL( disable_aux_rt_tasks(aux_flags) );
1635 + gRun = 0;
1636 +
1637 + CALL( task_mode(BACKGROUND_TASK) );
1638 +
1639 + /*****
1640 + * 5) Wait for RT threads to terminate.
1641 + */
1642 + for (i = 0; i < NUM_AUX_THREADS; i++) {
1643 + if (task[i] != 0) {
1644 + float time;
1645 + pthread_join(task[i], NULL);
1646 + time = ctx[i].total_time.tv_sec + ctx[i].total_time.tv_usec / (float)(1e6);
1647 + printf("child %d: %fs\n", i, time);
1648 + }
1649 + }
1650 +
1651 +
1652 + /*****
1653 + * 6) Clean up, maybe print results and stats, and exit.
1654 + */
1655 + return 0;
1656 +}
1657 +
1658 +
1659 +
1660 +/* A real-time thread is very similar to the main function of a single-threaded
1661 + * real-time app. Notice, that init_rt_thread() is called to initialized per-thread
1662 + * data structures of the LITMUS^RT user space libary.
1663 + */
1664 +void* aux_thread(void *tcontext)
1665 +{
1666 + struct thread_context *ctx = (struct thread_context *) tcontext;
1667 + int count = 0;
1668 +
1669 +// pthread_barrier_wait(&gBar);
1670 +
1671 + while(gRun)
1672 + {
1673 + if(count++ % 100000 == 0) {
1674 + pthread_mutex_lock(&gMutex);
1675 + pthread_mutex_unlock(&gMutex);
1676 + }
1677 + }
1678 +
1679 + {
1680 + struct rusage use;
1681 + long int sec;
1682 +
1683 + getrusage(RUSAGE_THREAD, &use);
1684 +
1685 + ctx->total_time.tv_usec = use.ru_utime.tv_usec + use.ru_stime.tv_usec;
1686 + sec = ctx->total_time.tv_usec / (long int)(1e6);
1687 + ctx->total_time.tv_usec = ctx->total_time.tv_usec % (long int)(1e6);
1688 + ctx->total_time.tv_sec = use.ru_utime.tv_sec + use.ru_stime.tv_sec + sec;
1689 + }
1690 +
1691 + return ctx;
1692 +}
1693 +
1694 +
1695 +/* A real-time thread is very similar to the main function of a single-threaded
1696 + * real-time app. Notice, that init_rt_thread() is called to initialized per-thread
1697 + * data structures of the LITMUS^RT user space libary.
1698 + */
1699 +void* rt_thread(void *tcontext)
1700 +{
1701 + struct thread_context *ctx = (struct thread_context *) tcontext;
1702 +
1703 + /* Make presence visible. */
1704 + printf("RT Thread %d active.\n", ctx->id);
1705 +
1706 + /*****
1707 + * 1) Initialize real-time settings.
1708 + */
1709 + CALL( init_rt_thread() );
1710 + CALL( sporadic_global(EXEC_COST, PERIOD + ctx->id * 10) );
1711 +
1712 +
1713 + /*****
1714 + * 2) Transition to real-time mode.
1715 + */
1716 + CALL( task_mode(LITMUS_RT_TASK) );
1717 +
1718 +
1719 +
1720 + wait_for_ts_release();
1721 +
1722 + /* The task is now executing as a real-time task if the call didn't fail.
1723 + */
1724 +
1725 +
1726 +
1727 + /*****
1728 + * 3) Invoke real-time jobs.
1729 + */
1730 + while(gRun) {
1731 + /* Wait until the next job is released. */
1732 + sleep_next_period();
1733 + printf("%d: task.\n", ctx->id);
1734 + }
1735 +
1736 + /*****
1737 + * 4) Transition to background mode.
1738 + */
1739 + CALL( task_mode(BACKGROUND_TASK) );
1740 +
1741 + {
1742 + struct rusage use;
1743 + long int sec;
1744 +
1745 + getrusage(RUSAGE_THREAD, &use);
1746 + ctx->total_time.tv_usec = use.ru_utime.tv_usec + use.ru_stime.tv_usec;
1747 + sec = ctx->total_time.tv_usec / (long int)(1e6);
1748 + ctx->total_time.tv_usec = ctx->total_time.tv_usec % (long int)(1e6);
1749 + ctx->total_time.tv_sec = use.ru_utime.tv_sec + use.ru_stime.tv_sec + sec;
1750 + }
1751 +
1752 + return ctx;
1753 +}
1754 +
1755 +int job(void)
1756 +{
1757 + /* Do real-time calculation. */
1758 +
1759 + /* Don't exit. */
1760 + return 0;
1761 +}
1762 diff --git a/gpu/budget.cpp b/gpu/budget.cpp
1763 new file mode 100644
1764 index 0000000..e08daf7
1765 --- /dev/null
1766 +++ b/gpu/budget.cpp
1767 @@ -0,0 +1,379 @@
1768 +#include <stdio.h>
1769 +#include <stdlib.h>
1770 +#include <stdint.h>
1771 +#include <math.h>
1772 +#include <unistd.h>
1773 +#include <assert.h>
1774 +#include <errno.h>
1775 +#include <sys/types.h>
1776 +#include <sys/stat.h>
1777 +#include <fcntl.h>
1778 +
1779 +/* Include gettid() */
1780 +#include <sys/types.h>
1781 +
1782 +/* Include threading support. */
1783 +#include <pthread.h>
1784 +
1785 +/* Include the LITMUS^RT API.*/
1786 +#include "litmus.h"
1787 +
1788 +#define NUMS 4096
1789 +static int nums[NUMS];
1790 +
1791 +inline static lt_t cputime_ns(void)
1792 +{
1793 + struct timespec ts;
1794 + lt_t time;
1795 + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
1796 +
1797 + // safe, as long as sizeof(ls_t) >= 8
1798 + time = s2ns(ts.tv_sec) + ts.tv_nsec;
1799 +
1800 + return time;
1801 +}
1802 +
1803 +inline static lt_t wtime_ns(void)
1804 +{
1805 + struct timespec ts;
1806 + lt_t time;
1807 + clock_gettime(CLOCK_MONOTONIC, &ts);
1808 +
1809 + // safe, as long as sizeof(ls_t) >= 8
1810 + time = s2ns(ts.tv_sec) + ts.tv_nsec;
1811 +
1812 + return time;
1813 +}
1814 +
1815 +static int loop_once(void)
1816 +{
1817 + int i, j = 0;
1818 + for (i = 0; i < NUMS; ++i)
1819 + j += nums[i]++;
1820 + return j;
1821 +}
1822 +
1823 +int loop_for(lt_t time)
1824 +{
1825 + lt_t end, now;
1826 + lt_t last_loop = 0, loop_start;
1827 + int dummy = 0;
1828 +
1829 + last_loop = 0;
1830 +
1831 + now = cputime_ns();
1832 + end = now + time;
1833 +
1834 + /* '+ last_loop' attempts to avoid overrun */
1835 + while (now + last_loop < end) {
1836 + loop_start = now;
1837 + dummy += loop_once();
1838 + now = cputime_ns();
1839 + last_loop = now - loop_start;
1840 + }
1841 +
1842 + return dummy;
1843 +}
1844 +
1845 +int OVERRUN = 0;
1846 +int SIGNALS = 0;
1847 +int BLOCK_SIGNALS_ON_SLEEP = 0;
1848 +int OVERRUN_RATE = 1; /* default: every job overruns */
1849 +
1850 +int CXS_OVERRUN = 0;
1851 +int NUM_LOCKS = 1;
1852 +int NUM_REPLICAS = 1;
1853 +int NAMESPACE = 0;
1854 +int *LOCKS = NULL;
1855 +int IKGLP_LOCK = 0;
1856 +int USE_DGLS = 0;
1857 +int NEST_IN_IKGLP = 0;
1858 +
1859 +int WAIT = 0;
1860 +
1861 +enum eLockType
1862 +{
1863 + FIFO,
1864 + PRIOQ,
1865 + IKGLP
1866 +};
1867 +
1868 +eLockType LOCK_TYPE = FIFO;
1869 +
1870 +int OVERRUN_BY_SLEEP = 0;
1871 +
1872 +int NUM_JOBS = 0;
1873 +int NUM_COMPLETED_JOBS = 0;
1874 +int NUM_OVERRUNS = 0;
1875 +
1876 +lt_t overrun_extra = 0;
1877 +
1878 +int job(lt_t exec_ns, lt_t budget_ns)
1879 +{
1880 + ++NUM_JOBS;
1881 +
1882 + try{
1883 + lt_t approx_remaining = budget_ns;
1884 + lt_t now = cputime_ns();
1885 + loop_for(lt_t(exec_ns * 0.9)); /* fudge it a bit to account for overheads */
1886 +
1887 + if (OVERRUN) {
1888 + // do we want to overrun this job?
1889 + if ((NUM_JOBS % OVERRUN_RATE) == 0) {
1890 + approx_remaining -= (cputime_ns() - now);
1891 +
1892 + if (SIGNALS && BLOCK_SIGNALS_ON_SLEEP)
1893 + block_litmus_signals(SIG_BUDGET);
1894 +
1895 + if(CXS_OVERRUN) {
1896 + if (NEST_IN_IKGLP)
1897 + litmus_lock(IKGLP_LOCK);
1898 + if (USE_DGLS)
1899 + litmus_dgl_lock(LOCKS, NUM_LOCKS);
1900 + else
1901 + for(int i = 0; i < NUM_LOCKS; ++i)
1902 + litmus_lock(LOCKS[i]);
1903 + }
1904 +
1905 + // intentionally overrun via suspension
1906 + if (OVERRUN_BY_SLEEP)
1907 + lt_sleep(approx_remaining + overrun_extra);
1908 + else
1909 + loop_for((approx_remaining + overrun_extra) * 0.9);
1910 +
1911 + if(CXS_OVERRUN) {
1912 + if (USE_DGLS)
1913 + litmus_dgl_unlock(LOCKS, NUM_LOCKS);
1914 + else
1915 + for(int i = NUM_LOCKS-1; i >= 0; --i)
1916 + litmus_unlock(LOCKS[i]);
1917 + if (NEST_IN_IKGLP)
1918 + litmus_unlock(IKGLP_LOCK);
1919 + }
1920 +
1921 + if (SIGNALS && BLOCK_SIGNALS_ON_SLEEP)
1922 + unblock_litmus_signals(SIG_BUDGET);
1923 + }
1924 + }
1925 + ++NUM_COMPLETED_JOBS;
1926 + }
1927 + catch (const litmus::sigbudget& e) {
1928 + ++NUM_OVERRUNS;
1929 + }
1930 +
1931 + sleep_next_period();
1932 + return 1;
1933 +}
1934 +
1935 +#define OPTSTR "SbosOvzalwqixdn:r:p:"
1936 +
1937 +int main(int argc, char** argv)
1938 +{
1939 + int ret;
1940 +
1941 + srand(getpid());
1942 +
1943 + lt_t e_ns = ms2ns(2);
1944 + lt_t p_ns = ms2ns(50) + rand()%200;
1945 + lt_t budget_ns = p_ns/2;
1946 + lt_t duration = s2ns(60);
1947 + lt_t terminate_time;
1948 + unsigned int first_job, last_job;
1949 + int opt;
1950 + struct rt_task param;
1951 + budget_drain_policy_t drain_policy = DRAIN_SIMPLE;
1952 + int compute_overrun_rate = 0;
1953 + int once = 1;
1954 +
1955 + bool migrate = false;
1956 + int partition = 0;
1957 + int partition_sz = 1;
1958 +
1959 + while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
1960 + switch(opt) {
1961 + case 'p':
1962 + migrate = true;
1963 + partition = atoi(optarg);
1964 + break;
1965 + case 'S':
1966 + SIGNALS = 1;
1967 + break;
1968 + case 'b':
1969 + BLOCK_SIGNALS_ON_SLEEP = 1;
1970 + break;
1971 + case 's':
1972 + OVERRUN_BY_SLEEP = 1;
1973 + break;
1974 + case 'o':
1975 + OVERRUN = 1;
1976 + overrun_extra = budget_ns/2;
1977 + break;
1978 + case 'O':
1979 + OVERRUN = 1;
1980 + overrun_extra = 4*p_ns;
1981 + break;
1982 + case 'a':
1983 + /* select an overrun rate such that a task should be caught
1984 + * up from a backlog caused by an overrun before the next
1985 + * overrun occurs.
1986 + */
1987 + compute_overrun_rate = 1;
1988 + break;
1989 + case 'v':
1990 + drain_policy = DRAIN_SOBLIV;
1991 + break;
1992 + case 'z':
1993 + drain_policy = DRAIN_SIMPLE_IO;
1994 + break;
1995 + case 'l':
1996 + CXS_OVERRUN = 1;
1997 + NAMESPACE = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
1998 + break;
1999 + case 'q':
2000 + LOCK_TYPE = PRIOQ;
2001 + break;
2002 + case 'i':
2003 + LOCK_TYPE = IKGLP;
2004 + break;
2005 + case 'x':
2006 + NEST_IN_IKGLP = 1;
2007 + break;
2008 + case 'w':
2009 + WAIT = 1;
2010 + break;
2011 + case 'd':
2012 + USE_DGLS = 1;
2013 + break;
2014 + case 'n':
2015 + NUM_LOCKS = atoi(optarg);
2016 + break;
2017 + case 'r':
2018 + NUM_REPLICAS = atoi(optarg);
2019 + break;
2020 + case ':':
2021 + printf("missing argument\n");
2022 + assert(false);
2023 + break;
2024 + default:
2025 + printf("unknown option\n");
2026 + assert(false);
2027 + break;
2028 + }
2029 + }
2030 +
2031 + assert(!BLOCK_SIGNALS_ON_SLEEP || (BLOCK_SIGNALS_ON_SLEEP && SIGNALS));
2032 + assert(!CXS_OVERRUN || (CXS_OVERRUN && WAIT));
2033 + assert(LOCK_TYPE != IKGLP || NUM_LOCKS == 1);
2034 + assert(LOCK_TYPE != IKGLP || (LOCK_TYPE == IKGLP && !NEST_IN_IKGLP));
2035 + assert(NUM_LOCKS > 0);
2036 + if (LOCK_TYPE == IKGLP || NEST_IN_IKGLP)
2037 + assert(NUM_REPLICAS >= 1);
2038 +
2039 + LOCKS = new int[NUM_LOCKS];
2040 +
2041 + if (compute_overrun_rate) {
2042 + int backlog = (int)ceil((overrun_extra + budget_ns)/(double)budget_ns);
2043 + if (!CXS_OVERRUN)
2044 + OVERRUN_RATE = backlog + 2; /* some padding */
2045 + else
2046 + OVERRUN_RATE = 2*backlog + 2; /* overrun less frequently for testing */
2047 + }
2048 +
2049 + init_rt_task_param(¶m);
2050 + param.exec_cost = budget_ns;
2051 + param.period = p_ns;
2052 + param.release_policy = PERIODIC;
2053 + param.drain_policy = drain_policy;
2054 + if (!SIGNALS)
2055 + param.budget_policy = PRECISE_ENFORCEMENT;
2056 + else
2057 + param.budget_signal_policy = PRECISE_SIGNALS;
2058 + if (migrate)
2059 + param.cpu = cluster_to_first_cpu(partition, partition_sz);
2060 +
2061 + // set up affinity and init litmus
2062 + if (migrate) {
2063 + ret = be_migrate_to_cluster(partition, partition_sz);
2064 + assert(!ret);
2065 + }
2066 + init_litmus();
2067 +
2068 + ret = set_rt_task_param(gettid(), ¶m);
2069 + assert(ret == 0);
2070 +
2071 + if (CXS_OVERRUN) {
2072 + int i;
2073 + for(i = 0; i < NUM_LOCKS; ++i) {
2074 + int lock = -1;
2075 + switch(LOCK_TYPE)
2076 + {
2077 + case FIFO:
2078 + lock = open_fifo_sem(NAMESPACE, i);
2079 + break;
2080 + case PRIOQ:
2081 + lock = open_prioq_sem(NAMESPACE, i);
2082 + break;
2083 + case IKGLP:
2084 + lock = open_ikglp_sem(NAMESPACE, i, NUM_REPLICAS);
2085 + break;
2086 + }
2087 + if (lock < 0) {
2088 + perror("open_sem");
2089 + exit(-1);
2090 + }
2091 + LOCKS[i] = lock;
2092 + }
2093 +
2094 + if (NEST_IN_IKGLP) {
2095 + IKGLP_LOCK = open_ikglp_sem(NAMESPACE, i, NUM_REPLICAS);
2096 + if (IKGLP_LOCK < 0) {
2097 + perror("open_sem");
2098 + exit(-1);
2099 + }
2100 + }
2101 + }
2102 +
2103 + if (WAIT) {
2104 + ret = wait_for_ts_release();
2105 + if (ret < 0)
2106 + perror("wait_for_ts_release");
2107 + }
2108 +
2109 + ret = task_mode(LITMUS_RT_TASK);
2110 + assert(ret == 0);
2111 +
2112 + sleep_next_period();
2113 +
2114 + ret = get_job_no(&first_job);
2115 + assert(ret == 0);
2116 +
2117 + terminate_time = duration + wtime_ns();
2118 +
2119 + while (wtime_ns() < terminate_time) {
2120 + try{
2121 + if(once) {
2122 + activate_litmus_signals(SIG_BUDGET, litmus::throw_on_litmus_signal);
2123 + once = 0;
2124 + }
2125 + job(e_ns, budget_ns);
2126 + }
2127 + catch(const litmus::sigbudget &e) {
2128 + /* drop silently */
2129 + }
2130 + }
2131 +
2132 + ret = get_job_no(&last_job);
2133 + assert(ret == 0);
2134 +
2135 + ret = task_mode(BACKGROUND_TASK);
2136 + assert(ret == 0);
2137 +
2138 + printf("# Kernel Jobs: %d\n", last_job - first_job + 1);
2139 + printf("# User Started Jobs: %d\n", NUM_JOBS);
2140 + printf("# User Jobs Completed: %d\n", NUM_COMPLETED_JOBS);
2141 + printf("# Overruns: %d\n", NUM_OVERRUNS);
2142 +
2143 + delete[] LOCKS;
2144 +
2145 + return 0;
2146 +}
2147 diff --git a/gpu/dgl.c b/gpu/dgl.c
2148 new file mode 100644
2149 index 0000000..c40fec6
2150 --- /dev/null
2151 +++ b/gpu/dgl.c
2152 @@ -0,0 +1,282 @@
2153 +#include <stdio.h>
2154 +#include <stdlib.h>
2155 +#include <stdint.h>
2156 +#include <unistd.h>
2157 +#include <assert.h>
2158 +#include <errno.h>
2159 +#include <sys/types.h>
2160 +#include <sys/stat.h>
2161 +#include <fcntl.h>
2162 +
2163 +/* Include gettid() */
2164 +#include <sys/types.h>
2165 +
2166 +/* Include threading support. */
2167 +#include <pthread.h>
2168 +
2169 +/* Include the LITMUS^RT API.*/
2170 +#include "litmus.h"
2171 +
2172 +#define xfprintf( ... ) do { \
2173 +if(!SILENT) { fprintf( __VA_ARGS__ ) ; } \
2174 +} while (0)
2175 +
2176 +
2177 +/* Catch errors.
2178 + */
2179 +#define CALL( exp ) do { \
2180 + int ret; \
2181 + ret = exp; \
2182 + if (ret != 0) \
2183 + xfprintf(stderr, "%s failed: %m\n", #exp);\
2184 + else \
2185 + xfprintf(stderr, "%s ok.\n", #exp); \
2186 + } while (0)
2187 +
2188 +#define TH_CALL( exp ) do { \
2189 + int ret; \
2190 + ret = exp; \
2191 + if (ret != 0) \
2192 + xfprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \
2193 + else \
2194 + xfprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \
2195 + } while (0)
2196 +
2197 +#define TH_SAFE_CALL( exp ) do { \
2198 + int ret; \
2199 + xfprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \
2200 + ret = exp; \
2201 + if (ret != 0) \
2202 + xfprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \
2203 + else \
2204 + xfprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \
2205 + } while (0)
2206 +
2207 +
2208 +
2209 +
2210 +
2211 +/* these are only default values */
2212 +int NUM_THREADS=3;
2213 +int NUM_SEMS=1;
2214 +unsigned int NUM_REPLICAS=0;
2215 +int NEST_DEPTH=1;
2216 +
2217 +int SILENT = 0;
2218 +
2219 +int SLEEP_BETWEEN_JOBS = 1;
2220 +int USE_PRIOQ = 0;
2221 +
2222 +#define MAX_SEMS 1000
2223 +#define MAX_NEST_DEPTH 10
2224 +
2225 +
2226 +// 1000 = 1us
2227 +#define EXEC_COST 1000*1
2228 +#define PERIOD 1000*10
2229 +
2230 +/* The information passed to each thread. Could be anything. */
2231 +struct thread_context {
2232 + int id;
2233 + int fd;
2234 + int ikglp;
2235 + int od[MAX_SEMS];
2236 + int count;
2237 + unsigned int rand;
2238 +};
2239 +
2240 +void* rt_thread(void* _ctx);
2241 +int nested_job(struct thread_context* ctx, int *count, int *next);
2242 +int job(struct thread_context*);
2243 +
2244 +#define OPTSTR "t:k:s:d:fqX"
2245 +
2246 +int main(int argc, char** argv)
2247 +{
2248 + int i;
2249 + struct thread_context* ctx;
2250 + pthread_t* task;
2251 + int fd;
2252 +
2253 + int opt;
2254 + while((opt = getopt(argc, argv, OPTSTR)) != -1) {
2255 + switch(opt) {
2256 + case 't':
2257 + NUM_THREADS = atoi(optarg);
2258 + break;
2259 + case 'k':
2260 + NUM_REPLICAS = atoi(optarg);
2261 + assert(NUM_REPLICAS > 0);
2262 + break;
2263 + case 's':
2264 + NUM_SEMS = atoi(optarg);
2265 + assert(NUM_SEMS >= 0 && NUM_SEMS <= MAX_SEMS);
2266 + break;
2267 + case 'd':
2268 + NEST_DEPTH = atoi(optarg);
2269 + assert(NEST_DEPTH >= 1 && NEST_DEPTH <= MAX_NEST_DEPTH);
2270 + break;
2271 + case 'f':
2272 + SLEEP_BETWEEN_JOBS = 0;
2273 + break;
2274 + case 'q':
2275 + USE_PRIOQ = 1;
2276 + break;
2277 + case 'X':
2278 + SILENT = 1;
2279 + break;
2280 + default:
2281 + fprintf(stderr, "Unknown option: %c\n", opt);
2282 + exit(-1);
2283 + break;
2284 + }
2285 + }
2286 +
2287 + ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context));
2288 + task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t));
2289 +
2290 + srand(0); /* something repeatable for now */
2291 +
2292 + fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
2293 +
2294 + CALL( init_litmus() );
2295 +
2296 + for (i = 0; i < NUM_THREADS; i++) {
2297 + ctx[i].id = i;
2298 + ctx[i].fd = fd;
2299 + ctx[i].rand = rand();
2300 + CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) );
2301 + }
2302 +
2303 +
2304 + for (i = 0; i < NUM_THREADS; i++)
2305 + pthread_join(task[i], NULL);
2306 +
2307 +
2308 + return 0;
2309 +}
2310 +
2311 +void* rt_thread(void* _ctx)
2312 +{
2313 + int i;
2314 + int do_exit = 0;
2315 + struct rt_task param;
2316 +
2317 + struct thread_context *ctx = (struct thread_context*)_ctx;
2318 +
2319 + init_rt_task_param(¶m);
2320 + param.exec_cost = EXEC_COST;
2321 + param.period = PERIOD + 10*ctx->id; /* Vary period a little bit. */
2322 + param.cls = RT_CLASS_SOFT;
2323 +
2324 + TH_CALL( init_rt_thread() );
2325 + TH_CALL( set_rt_task_param(gettid(), ¶m) );
2326 +
2327 + if (NUM_REPLICAS) {
2328 + ctx->ikglp = open_ikglp_sem(ctx->fd, 0, NUM_REPLICAS);
2329 + if(ctx->ikglp < 0)
2330 + perror("open_ikglp_sem");
2331 + else
2332 + xfprintf(stdout, "ikglp od = %d\n", ctx->ikglp);
2333 + }
2334 +
2335 +
2336 + for (i = 0; i < NUM_SEMS; i++) {
2337 + if(!USE_PRIOQ) {
2338 + ctx->od[i] = open_fifo_sem(ctx->fd, i+1);
2339 + if(ctx->od[i] < 0)
2340 + perror("open_fifo_sem");
2341 + else
2342 + xfprintf(stdout, "fifo[%d] od = %d\n", i, ctx->od[i]);
2343 + }
2344 + else {
2345 + ctx->od[i] = open_prioq_sem(ctx->fd, i+1);
2346 + if(ctx->od[i] < 0)
2347 + perror("open_prioq_sem");
2348 + else
2349 + xfprintf(stdout, "prioq[%d] od = %d\n", i, ctx->od[i]);
2350 + }
2351 + }
2352 +
2353 + TH_CALL( task_mode(LITMUS_RT_TASK) );
2354 +
2355 +
2356 + xfprintf(stdout, "[%d] Waiting for TS release.\n ", ctx->id);
2357 + wait_for_ts_release();
2358 + ctx->count = 0;
2359 +
2360 + do {
2361 + int replica = -1;
2362 + int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0)));
2363 + int last = (first + NEST_DEPTH - 1 >= NUM_SEMS) ? NUM_SEMS - 1 : first + NEST_DEPTH - 1;
2364 + int dgl_size = last - first + 1;
2365 + int dgl[dgl_size];
2366 +
2367 + // construct the DGL
2368 + for(i = first; i <= last; ++i) {
2369 + dgl[i-first] = ctx->od[i];
2370 + }
2371 +
2372 +
2373 + if(NUM_REPLICAS) {
2374 + replica = litmus_lock(ctx->ikglp);
2375 + xfprintf(stdout, "[%d] got ikglp replica %d.\n", ctx->id, replica);
2376 + }
2377 +
2378 +
2379 + litmus_dgl_lock(dgl, dgl_size);
2380 + xfprintf(stdout, "[%d] acquired dgl.\n", ctx->id);
2381 +
2382 + do_exit = job(ctx);
2383 +
2384 + fprintf(stdout, "[%d] should yield dgl: %d.\n", ctx->id, litmus_dgl_should_yield_lock(dgl, dgl_size));
2385 +
2386 + xfprintf(stdout, "[%d] unlocking dgl.\n", ctx->id);
2387 + litmus_dgl_unlock(dgl, dgl_size);
2388 +
2389 + if(NUM_REPLICAS) {
2390 + xfprintf(stdout, "[%d]: freeing ikglp replica %d.\n", ctx->id, replica);
2391 + litmus_unlock(ctx->ikglp);
2392 + }
2393 +
2394 + if(SLEEP_BETWEEN_JOBS && !do_exit) {
2395 + sleep_next_period();
2396 + }
2397 + } while(!do_exit);
2398 +
2399 + /*****
2400 + * 4) Transition to background mode.
2401 + */
2402 + TH_CALL( task_mode(BACKGROUND_TASK) );
2403 +
2404 +
2405 + return NULL;
2406 +}
2407 +
2408 +void dirty_kb(int kb)
2409 +{
2410 + int32_t one_kb[256];
2411 + int32_t sum = 0;
2412 + int32_t i;
2413 +
2414 + for (i = 0; i < 256; i++)
2415 + sum += one_kb[i];
2416 + kb--;
2417 + /* prevent tail recursion */
2418 + if (kb)
2419 + dirty_kb(kb);
2420 + for (i = 0; i < 256; i++)
2421 + sum += one_kb[i];
2422 +}
2423 +
2424 +int job(struct thread_context* ctx)
2425 +{
2426 + /* Do real-time calculation. */
2427 + dirty_kb(8);
2428 +
2429 + /* Don't exit. */
2430 + //return ctx->count++ > 100;
2431 + //return ctx->count++ > 12000;
2432 + //return ctx->count++ > 120000;
2433 + return ctx->count++ > 50000; // controls number of jobs per task
2434 +}
2435 diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu
2436 new file mode 100644
2437 index 0000000..c42dea9
2438 --- /dev/null
2439 +++ b/gpu/gpuspin.cu
2440 @@ -0,0 +1,2705 @@
2441 +#include <sys/time.h>
2442 +
2443 +#include <stdio.h>
2444 +#include <stdlib.h>
2445 +#include <unistd.h>
2446 +#include <time.h>
2447 +#include <string.h>
2448 +#include <assert.h>
2449 +#include <execinfo.h>
2450 +
2451 +#include <exception>
2452 +
2453 +#include <boost/interprocess/managed_shared_memory.hpp>
2454 +#include <boost/interprocess/sync/interprocess_mutex.hpp>
2455 +#include <boost/filesystem.hpp>
2456 +
2457 +#include <random/normal.h>
2458 +
2459 +#include <cuda.h>
2460 +#include <cuda_runtime.h>
2461 +
2462 +#include "litmus.h"
2463 +#include "common.h"
2464 +
2465 +using namespace std;
2466 +using namespace boost::interprocess;
2467 +using namespace ranlib;
2468 +
2469 +#define ms2s(ms) ((ms)*0.001)
2470 +
2471 +const unsigned int TOKEN_START = 100;
2472 +const unsigned int TOKEN_END = 101;
2473 +
2474 +const unsigned int EE_START = 200;
2475 +const unsigned int EE_END = 201;
2476 +
2477 +const unsigned int CE_SEND_START = 300;
2478 +const unsigned int CE_SEND_END = 301;
2479 +
2480 +const unsigned int CE_RECV_START = 400;
2481 +const unsigned int CE_RECV_END = 401;
2482 +
2483 +bool SILENT = true;
2484 +//bool SILENT = false;
2485 +inline int xprintf(const char *format, ...)
2486 +{
2487 + int ret = 0;
2488 + if (!SILENT) {
2489 + va_list args;
2490 + va_start(args, format);
2491 + ret = vprintf(format, args);
2492 + va_end(args);
2493 + }
2494 + return ret;
2495 +}
2496 +
2497 +const char *lock_namespace = "./.gpuspin-locks";
2498 +const size_t PAGE_SIZE = sysconf(_SC_PAGESIZE);
2499 +
2500 +const int NR_GPUS = 8;
2501 +
2502 +bool WANT_SIGNALS = false;
2503 +inline void gpuspin_block_litmus_signals(unsigned long mask)
2504 +{
2505 + if (WANT_SIGNALS)
2506 + block_litmus_signals(mask);
2507 +}
2508 +
2509 +inline void gpuspin_unblock_litmus_signals(unsigned long mask)
2510 +{
2511 + if (WANT_SIGNALS)
2512 + unblock_litmus_signals(mask);
2513 +}
2514 +
2515 +bool GPU_USING = false;
2516 +bool ENABLE_AFFINITY = false;
2517 +bool RELAX_FIFO_MAX_LEN = false;
2518 +bool ENABLE_CHUNKING = false;
2519 +bool MIGRATE_VIA_SYSMEM = false;
2520 +
2521 +bool YIELD_LOCKS = false;
2522 +
2523 +enum eEngineLockTypes
2524 +{
2525 + FIFO,
2526 + PRIOQ
2527 +};
2528 +
2529 +eEngineLockTypes ENGINE_LOCK_TYPE = FIFO;
2530 +
2531 +int GPU_PARTITION = 0;
2532 +int GPU_PARTITION_SIZE = 0;
2533 +int CPU_PARTITION_SIZE = 0;
2534 +
2535 +int RHO = 2;
2536 +
2537 +int NUM_COPY_ENGINES = 2;
2538 +
2539 +
2540 +__attribute__((unused)) static size_t kbToB(size_t kb) { return kb * 1024; }
2541 +__attribute__((unused)) static size_t mbToB(size_t mb) { return kbToB(mb * 1024); }
2542 +
2543 +/* in bytes */
2544 +size_t SEND_SIZE = 0;
2545 +size_t RECV_SIZE = 0;
2546 +size_t STATE_SIZE = 0;
2547 +size_t CHUNK_SIZE = 0;
2548 +
2549 +int TOKEN_LOCK = -1;
2550 +
2551 +bool USE_ENGINE_LOCKS = false;
2552 +bool USE_DYNAMIC_GROUP_LOCKS = false;
2553 +int EE_LOCKS[NR_GPUS];
2554 +int CE_SEND_LOCKS[NR_GPUS];
2555 +int CE_RECV_LOCKS[NR_GPUS];
2556 +int CE_MIGR_SEND_LOCKS[NR_GPUS];
2557 +int CE_MIGR_RECV_LOCKS[NR_GPUS];
2558 +bool RESERVED_MIGR_COPY_ENGINE = false; // only checked if NUM_COPY_ENGINES == 2
2559 +
2560 +//bool ENABLE_RT_AUX_THREADS = false;
2561 +bool ENABLE_RT_AUX_THREADS = true;
2562 +
2563 +enum eGpuSyncMode
2564 +{
2565 + IKGLP_MODE,
2566 + IKGLP_WC_MODE, /* work-conserving IKGLP. no GPU is left idle, but breaks optimality */
2567 + KFMLP_MODE,
2568 + RGEM_MODE,
2569 +};
2570 +
2571 +eGpuSyncMode GPU_SYNC_MODE = IKGLP_MODE;
2572 +
2573 +enum eCudaSyncMode
2574 +{
2575 + BLOCKING,
2576 + SPIN
2577 +};
2578 +
2579 +eCudaSyncMode CUDA_SYNC_MODE = BLOCKING;
2580 +
2581 +
2582 +int CUR_DEVICE = -1;
2583 +int LAST_DEVICE = -1;
2584 +
2585 +cudaStream_t STREAMS[NR_GPUS];
2586 +cudaEvent_t EVENTS[NR_GPUS];
2587 +int GPU_HZ[NR_GPUS];
2588 +int NUM_SM[NR_GPUS];
2589 +int WARP_SIZE[NR_GPUS];
2590 +int ELEM_PER_THREAD[NR_GPUS];
2591 +
2592 +enum eScheduler
2593 +{
2594 + LITMUS,
2595 + LINUX,
2596 + RT_LINUX
2597 +};
2598 +
2599 +struct Args
2600 +{
2601 + bool wait;
2602 + bool migrate;
2603 + int cluster;
2604 + int cluster_size;
2605 + bool gpu_using;
2606 + int gpu_partition;
2607 + int gpu_partition_size;
2608 + int rho;
2609 + int num_ce;
2610 + bool reserve_migr_ce;
2611 + bool use_engine_locks;
2612 + eEngineLockTypes engine_lock_type;
2613 + bool yield_locks;
2614 + bool use_dgls;
2615 + eGpuSyncMode gpusync_mode;
2616 + bool enable_affinity;
2617 + int relax_fifo_len;
2618 + eCudaSyncMode sync_mode;
2619 + size_t send_size;
2620 + size_t recv_size;
2621 + size_t state_size;
2622 + bool enable_chunking;
2623 + size_t chunk_size;
2624 + bool use_sysmem_migration;
2625 + int num_kernels;
2626 +
2627 + double wcet_ms;
2628 + double gpu_wcet_ms;
2629 + double period_ms;
2630 +
2631 + double budget_ms;
2632 +
2633 + double stddev;
2634 +
2635 + eScheduler scheduler;
2636 +
2637 + unsigned int priority;
2638 +
2639 + task_class_t cls;
2640 +
2641 + bool want_enforcement;
2642 + bool want_signals;
2643 + budget_drain_policy_t drain_policy;
2644 +
2645 + int column;
2646 +
2647 + int num_gpu_tasks;
2648 + int num_tasks;
2649 +
2650 + double scale;
2651 +
2652 + double duration;
2653 +
2654 + bool is_aberrant;
2655 + double aberrant_prob;
2656 + double aberrant_factor;
2657 +};
2658 +
2659 +
2660 +
2661 +#define DEFINE_PER_GPU(type, var) type var[NR_GPUS]
2662 +#define per_gpu(var, idx) (var[(idx)])
2663 +#define this_gpu(var) (var[(CUR_DEVICE)])
2664 +#define cur_stream() (this_gpu(STREAMS))
2665 +#define cur_event() (this_gpu(EVENTS))
2666 +#define cur_gpu() (CUR_DEVICE)
2667 +#define last_gpu() (LAST_DEVICE)
2668 +#define cur_ee() (EE_LOCKS[CUR_DEVICE])
2669 +#define cur_send() (CE_SEND_LOCKS[CUR_DEVICE])
2670 +#define cur_recv() (CE_RECV_LOCKS[CUR_DEVICE])
2671 +#define cur_migr_send() (CE_MIGR_SEND_LOCKS[CUR_DEVICE])
2672 +#define cur_migr_recv() (CE_MIGR_RECV_LOCKS[CUR_DEVICE])
2673 +#define cur_hz() (GPU_HZ[CUR_DEVICE])
2674 +#define cur_sms() (NUM_SM[CUR_DEVICE])
2675 +#define cur_warp_size() (WARP_SIZE[CUR_DEVICE])
2676 +#define cur_elem_per_thread() (ELEM_PER_THREAD[CUR_DEVICE])
2677 +#define num_online_gpus() (NUM_GPUS)
2678 +
2679 +static bool useEngineLocks()
2680 +{
2681 + return(USE_ENGINE_LOCKS);
2682 +}
2683 +
2684 +//#define VANILLA_LINUX
2685 +
2686 +bool TRACE_MIGRATIONS = false;
2687 +#ifndef VANILLA_LINUX
2688 +#define trace_migration(to, from) do { inject_gpu_migration((to), (from)); } while(0)
2689 +#define trace_release(arrival, deadline, jobno) do { inject_release((arrival), (deadline), (jobno)); } while(0)
2690 +#define trace_completion(jobno) do { inject_completion((jobno)); } while(0)
2691 +#define trace_name() do { inject_name(); } while(0)
2692 +#define trace_param() do { inject_param(); } while(0)
2693 +#else
2694 +#define set_rt_task_param(x, y) (0)
2695 +#define trace_migration(to, from)
2696 +#define trace_release(arrival, deadline, jobno)
2697 +#define trace_completion(jobno)
2698 +#define trace_name()
2699 +#define trace_param()
2700 +#endif
2701 +
2702 +struct ce_lock_state
2703 +{
2704 + int locks[2];
2705 + size_t num_locks;
2706 + size_t budget_remaining;
2707 + bool locked;
2708 +
2709 + ce_lock_state(int device_a, enum cudaMemcpyKind kind, size_t size, int device_b = -1, bool migration = false) {
2710 + num_locks = (device_a != -1) + (device_b != -1);
2711 +
2712 + if(device_a != -1) {
2713 + if (!migration)
2714 + locks[0] = (kind == cudaMemcpyHostToDevice || (kind == cudaMemcpyDeviceToDevice && device_b == -1)) ?
2715 + CE_SEND_LOCKS[device_a] : CE_RECV_LOCKS[device_a];
2716 + else
2717 + locks[0] = (kind == cudaMemcpyHostToDevice || (kind == cudaMemcpyDeviceToDevice && device_b == -1)) ?
2718 + CE_MIGR_SEND_LOCKS[device_a] : CE_MIGR_RECV_LOCKS[device_a];
2719 + }
2720 +
2721 + if(device_b != -1) {
2722 + assert(kind == cudaMemcpyDeviceToDevice);
2723 +
2724 + if (!migration)
2725 + locks[1] = CE_RECV_LOCKS[device_b];
2726 + else
2727 + locks[1] = CE_MIGR_RECV_LOCKS[device_b];
2728 +
2729 + if(locks[1] < locks[0]) {
2730 + // enforce total order on locking
2731 + int temp = locks[1];
2732 + locks[1] = locks[0];
2733 + locks[0] = temp;
2734 + }
2735 + }
2736 + else {
2737 + locks[1] = -1;
2738 + }
2739 +
2740 + if(!ENABLE_CHUNKING)
2741 + budget_remaining = size;
2742 + else
2743 + budget_remaining = CHUNK_SIZE;
2744 + }
2745 +
2746 + void crash(void) {
2747 + void *array[50];
2748 + int size, i;
2749 + char **messages;
2750 +
2751 + size = backtrace(array, 50);
2752 + messages = backtrace_symbols(array, size);
2753 +
2754 + fprintf(stderr, "%d: TRIED TO GRAB SAME LOCK TWICE! Lock = %d\n", getpid(), locks[0]);
2755 + for (i = 1; i < size && messages != NULL; ++i)
2756 + {
2757 + fprintf(stderr, "%d: [bt]: (%d) %s\n", getpid(), i, messages[i]);
2758 + }
2759 + free(messages);
2760 +
2761 + assert(false);
2762 + }
2763 +
2764 +
2765 + void lock() {
2766 + if(locks[0] == locks[1]) crash();
2767 +
2768 + if (num_locks == 1) {
2769 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2770 + litmus_lock(locks[0]);
2771 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2772 + }
2773 + else if(USE_DYNAMIC_GROUP_LOCKS) {
2774 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2775 + litmus_dgl_lock(locks, num_locks);
2776 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2777 + }
2778 + else
2779 + {
2780 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2781 + for(int l = 0; l < num_locks; ++l)
2782 + {
2783 + litmus_lock(locks[l]);
2784 + }
2785 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2786 + }
2787 + locked = true;
2788 + }
2789 +
2790 + void unlock() {
2791 + if(locks[0] == locks[1]) crash();
2792 +
2793 + if (num_locks == 1) {
2794 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2795 + litmus_unlock(locks[0]);
2796 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2797 + }
2798 + else if(USE_DYNAMIC_GROUP_LOCKS) {
2799 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2800 + litmus_dgl_unlock(locks, num_locks);
2801 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2802 + }
2803 + else
2804 + {
2805 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2806 + // reverse order
2807 + for(int l = num_locks - 1; l >= 0; --l)
2808 + {
2809 + litmus_unlock(locks[l]);
2810 + }
2811 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2812 + }
2813 + locked = false;
2814 + }
2815 +
2816 + bool should_yield() {
2817 + int yield = 1; // assume we should yield
2818 + if (YIELD_LOCKS) {
2819 + if(locks[0] == locks[1]) crash();
2820 + if (num_locks == 1)
2821 + yield = litmus_should_yield_lock(locks[0]);
2822 + else if(USE_DYNAMIC_GROUP_LOCKS)
2823 + yield = litmus_dgl_should_yield_lock(locks, num_locks);
2824 + else
2825 + for(int l = num_locks - 1; l >= 0; --l) // reverse order
2826 + yield |= litmus_should_yield_lock(locks[l]);
2827 + }
2828 + return (yield);
2829 + }
2830 +
2831 + void refresh() {
2832 + budget_remaining = CHUNK_SIZE;
2833 + }
2834 +
2835 + bool budgetIsAvailable(size_t tosend) {
2836 + return(tosend >= budget_remaining);
2837 + }
2838 +
2839 + void decreaseBudget(size_t spent) {
2840 + budget_remaining -= spent;
2841 + }
2842 +};
2843 +
2844 +// precondition: if do_locking == true, locks in state are held.
2845 +static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
2846 + enum cudaMemcpyKind kind,
2847 + ce_lock_state* state)
2848 +{
2849 + cudaError_t ret = cudaSuccess;
2850 + int remaining = count;
2851 +
2852 + char* dst = (char*)a_dst;
2853 + const char* src = (const char*)a_src;
2854 +
2855 + // disable chunking, if needed, by setting chunk_size equal to the
2856 + // amount of data to be copied.
2857 + int chunk_size = (ENABLE_CHUNKING) ? CHUNK_SIZE : count;
2858 + int i = 0;
2859 +
2860 + while(remaining != 0)
2861 + {
2862 + int bytesToCopy = std::min(remaining, chunk_size);
2863 +
2864 + if (state && state->locked) {
2865 + // we have to unlock/re-lock the copy engine to refresh our budget unless
2866 + // we still have budget available.
2867 + if (!state->budgetIsAvailable(bytesToCopy)) {
2868 + // optimization - don't unlock if no one else needs the engine
2869 + if (state->should_yield()) {
2870 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2871 + cudaEventSynchronize(cur_event());
2872 + ret = cudaGetLastError();
2873 + if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
2874 + inject_action(CE_RECV_END);
2875 + if (kind == cudaMemcpyHostToDevice)
2876 + inject_action(CE_SEND_END);
2877 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2878 +
2879 + state->unlock();
2880 + if(ret != cudaSuccess)
2881 + break;
2882 + }
2883 + // we can only run out of
2884 + // budget if chunking is enabled.
2885 + // we presume that init budget would
2886 + // be set to cover entire memcpy
2887 + // if chunking were disabled.
2888 + state->refresh();
2889 + }
2890 + }
2891 +
2892 + if(state && !state->locked) {
2893 + state->lock();
2894 + if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
2895 + inject_action(CE_RECV_START);
2896 + if (kind == cudaMemcpyHostToDevice)
2897 + inject_action(CE_SEND_START);
2898 + }
2899 +
2900 + //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind);
2901 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2902 + cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, cur_stream());
2903 + cudaEventRecord(cur_event(), cur_stream());
2904 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2905 +
2906 + if(state)
2907 + state->decreaseBudget(bytesToCopy);
2908 +
2909 + ++i;
2910 + remaining -= bytesToCopy;
2911 + }
2912 + return ret;
2913 +}
2914 +
2915 +static cudaError_t chunkMemcpy(void* a_dst, const void* a_src, size_t count,
2916 + enum cudaMemcpyKind kind,
2917 + int device_a = -1, // device_a == -1 disables locking
2918 + bool do_locking = true,
2919 + int device_b = -1,
2920 + bool migration = false)
2921 +{
2922 + cudaError_t ret;
2923 + if(!do_locking || device_a == -1) {
2924 + ret = __chunkMemcpy(a_dst, a_src, count, kind, NULL);
2925 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2926 + cudaEventSynchronize(cur_event());
2927 + if(ret == cudaSuccess)
2928 + ret = cudaGetLastError();
2929 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2930 + }
2931 + else {
2932 + ce_lock_state state(device_a, kind, count, device_b, migration);
2933 + state.lock();
2934 +
2935 + if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
2936 + inject_action(CE_RECV_START);
2937 + if (kind == cudaMemcpyHostToDevice)
2938 + inject_action(CE_SEND_START);
2939 +
2940 + ret = __chunkMemcpy(a_dst, a_src, count, kind, &state);
2941 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
2942 + cudaEventSynchronize(cur_event());
2943 + // cudaStreamSynchronize(cur_stream());
2944 + if(ret == cudaSuccess)
2945 + ret = cudaGetLastError();
2946 +
2947 + if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
2948 + inject_action(CE_RECV_END);
2949 + if (kind == cudaMemcpyHostToDevice)
2950 + inject_action(CE_SEND_END);
2951 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
2952 +
2953 + state.unlock();
2954 + }
2955 + return ret;
2956 +}
2957 +
2958 +int LITMUS_LOCK_FD = 0;
2959 +
2960 +int EXP_OFFSET = 0;
2961 +
2962 +void allocate_locks_litmus(void)
2963 +{
2964 + stringstream ss;
2965 + ss<<lock_namespace<<"-"<<EXP_OFFSET;
2966 +
2967 + // allocate k-FMLP lock
2968 + //LITMUS_LOCK_FD = open(lock_namespace, O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
2969 + LITMUS_LOCK_FD = open(ss.str().c_str(), O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
2970 + int *fd = &LITMUS_LOCK_FD;
2971 +
2972 + int base_name = GPU_PARTITION * 100 + EXP_OFFSET * 200;
2973 + ++EXP_OFFSET;
2974 +
2975 + if (GPU_SYNC_MODE == IKGLP_MODE) {
2976 + /* Standard (optimal) IKGLP */
2977 + TOKEN_LOCK = open_gpusync_token_lock(*fd,
2978 + base_name, /* name */
2979 + GPU_PARTITION_SIZE,
2980 + GPU_PARTITION*GPU_PARTITION_SIZE,
2981 + RHO,
2982 + IKGLP_M_IN_FIFOS,
2983 + (!RELAX_FIFO_MAX_LEN) ?
2984 + IKGLP_OPTIMAL_FIFO_LEN :
2985 + IKGLP_UNLIMITED_FIFO_LEN,
2986 + ENABLE_AFFINITY);
2987 + }
2988 + else if (GPU_SYNC_MODE == KFMLP_MODE) {
2989 + /* KFMLP. FIFO queues only for tokens. */
2990 + TOKEN_LOCK = open_gpusync_token_lock(*fd,
2991 + base_name, /* name */
2992 + GPU_PARTITION_SIZE,
2993 + GPU_PARTITION*GPU_PARTITION_SIZE,
2994 + RHO,
2995 + IKGLP_UNLIMITED_IN_FIFOS,
2996 + IKGLP_UNLIMITED_FIFO_LEN,
2997 + ENABLE_AFFINITY);
2998 + }
2999 + else if (GPU_SYNC_MODE == RGEM_MODE) {
3000 + /* RGEM-like token allocation. Shared priority queue for all tokens. */
3001 + TOKEN_LOCK = open_gpusync_token_lock(*fd,
3002 + base_name, /* name */
3003 + GPU_PARTITION_SIZE,
3004 + GPU_PARTITION*GPU_PARTITION_SIZE,
3005 + RHO,
3006 + RHO*GPU_PARTITION_SIZE,
3007 + 1,
3008 + ENABLE_AFFINITY);
3009 + }
3010 + else if (GPU_SYNC_MODE == IKGLP_WC_MODE) {
3011 + /* Non-optimal IKGLP that never lets a replica idle if there are pending
3012 + * token requests. */
3013 + int max_simult_run = std::max(CPU_PARTITION_SIZE, RHO*GPU_PARTITION_SIZE);
3014 + int max_fifo_len = (int)ceil((float)max_simult_run / (RHO*GPU_PARTITION_SIZE));
3015 + TOKEN_LOCK = open_gpusync_token_lock(*fd,
3016 + base_name, /* name */
3017 + GPU_PARTITION_SIZE,
3018 + GPU_PARTITION*GPU_PARTITION_SIZE,
3019 + RHO,
3020 + max_simult_run,
3021 + (!RELAX_FIFO_MAX_LEN) ?
3022 + max_fifo_len :
3023 + IKGLP_UNLIMITED_FIFO_LEN,
3024 + ENABLE_AFFINITY);
3025 + }
3026 + else {
3027 + perror("Invalid GPUSync mode specified\n");
3028 + TOKEN_LOCK = -1;
3029 + }
3030 +
3031 + if(TOKEN_LOCK < 0)
3032 + perror("open_token_sem");
3033 +
3034 + if(USE_ENGINE_LOCKS)
3035 + {
3036 + assert(NUM_COPY_ENGINES == 1 || NUM_COPY_ENGINES == 2);
3037 + assert((NUM_COPY_ENGINES == 1 && !RESERVED_MIGR_COPY_ENGINE) || NUM_COPY_ENGINES == 2);
3038 +
3039 + // allocate the engine locks.
3040 + for (int i = 0; i < GPU_PARTITION_SIZE; ++i)
3041 + {
3042 + int idx = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3043 + int ee_name = (i+1)*10 + base_name;
3044 + int ce_0_name = (i+1)*10 + base_name + 1;
3045 + int ce_1_name = (i+1)*10 + base_name + 2;
3046 + int ee_lock = -1, ce_0_lock = -1, ce_1_lock = -1;
3047 +
3048 + open_sem_t openEngineLock = (ENGINE_LOCK_TYPE == FIFO) ?
3049 + open_fifo_sem : open_prioq_sem;
3050 +
3051 + ee_lock = openEngineLock(*fd, ee_name);
3052 + if (ee_lock < 0)
3053 + perror("open_*_sem (engine lock)");
3054 +
3055 + ce_0_lock = openEngineLock(*fd, ce_0_name);
3056 + if (ce_0_lock < 0)
3057 + perror("open_*_sem (engine lock)");
3058 +
3059 + if (NUM_COPY_ENGINES == 2)
3060 + {
3061 + ce_1_lock = openEngineLock(*fd, ce_1_name);
3062 + if (ce_1_lock < 0)
3063 + perror("open_*_sem (engine lock)");
3064 + }
3065 +
3066 + EE_LOCKS[idx] = ee_lock;
3067 +
3068 + if (NUM_COPY_ENGINES == 1)
3069 + {
3070 + // share locks
3071 + CE_SEND_LOCKS[idx] = ce_0_lock;
3072 + CE_RECV_LOCKS[idx] = ce_0_lock;
3073 + CE_MIGR_SEND_LOCKS[idx] = ce_0_lock;
3074 + CE_MIGR_RECV_LOCKS[idx] = ce_0_lock;
3075 + }
3076 + else
3077 + {
3078 + assert(NUM_COPY_ENGINES == 2);
3079 +
3080 + if (RESERVED_MIGR_COPY_ENGINE) {
3081 + // copy engine deadicated to migration operations
3082 + CE_SEND_LOCKS[idx] = ce_0_lock;
3083 + CE_RECV_LOCKS[idx] = ce_0_lock;
3084 + CE_MIGR_SEND_LOCKS[idx] = ce_1_lock;
3085 + CE_MIGR_RECV_LOCKS[idx] = ce_1_lock;
3086 + }
3087 + else {
3088 + // migration transmissions treated as regular data
3089 + CE_SEND_LOCKS[idx] = ce_0_lock;
3090 + CE_RECV_LOCKS[idx] = ce_1_lock;
3091 + CE_MIGR_SEND_LOCKS[idx] = ce_0_lock;
3092 + CE_MIGR_RECV_LOCKS[idx] = ce_1_lock;
3093 + }
3094 + }
3095 + }
3096 + }
3097 +}
3098 +
3099 +void deallocate_locks_litmus(void)
3100 +{
3101 + for (int i = 0; i < GPU_PARTITION_SIZE; ++i)
3102 + {
3103 + int idx = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3104 +
3105 + od_close(EE_LOCKS[idx]);
3106 + if (NUM_COPY_ENGINES == 1)
3107 + {
3108 + od_close(CE_SEND_LOCKS[idx]);
3109 + }
3110 + else
3111 + {
3112 + if (RESERVED_MIGR_COPY_ENGINE) {
3113 + od_close(CE_SEND_LOCKS[idx]);
3114 + od_close(CE_MIGR_SEND_LOCKS[idx]);
3115 + }
3116 + else {
3117 + od_close(CE_SEND_LOCKS[idx]);
3118 + od_close(CE_RECV_LOCKS[idx]);
3119 + }
3120 + }
3121 + }
3122 +
3123 + od_close(TOKEN_LOCK);
3124 +
3125 + close(LITMUS_LOCK_FD);
3126 +
3127 + memset(&CE_SEND_LOCKS[0], 0, sizeof(CE_SEND_LOCKS));
3128 + memset(&CE_RECV_LOCKS[0], 0, sizeof(CE_RECV_LOCKS));
3129 + memset(&CE_MIGR_SEND_LOCKS[0], 0, sizeof(CE_MIGR_SEND_LOCKS));
3130 + memset(&CE_MIGR_RECV_LOCKS[0], 0, sizeof(CE_MIGR_RECV_LOCKS));
3131 + TOKEN_LOCK = -1;
3132 + LITMUS_LOCK_FD = 0;
3133 +}
3134 +
3135 +
3136 +class gpu_pool
3137 +{
3138 +public:
3139 + gpu_pool(int pSz): poolSize(pSz)
3140 + {
3141 + memset(&pool[0], 0, sizeof(pool[0])*poolSize);
3142 + }
3143 +
3144 + int get(pthread_mutex_t* tex, int preference = -1)
3145 + {
3146 + int which = -1;
3147 + int last = (ENABLE_AFFINITY) ?
3148 + ((preference >= 0) ? preference : 0) :
3149 + (rand()%poolSize);
3150 + int minIdx = last;
3151 +
3152 + pthread_mutex_lock(tex);
3153 +
3154 + int min = pool[last];
3155 + for(int i = (minIdx+1)%poolSize; i != last; i = (i+1)%poolSize)
3156 + {
3157 + if(min > pool[i])
3158 + minIdx = i;
3159 + }
3160 + ++pool[minIdx];
3161 +
3162 + pthread_mutex_unlock(tex);
3163 +
3164 + which = minIdx;
3165 +
3166 + return which;
3167 + }
3168 +
3169 + void put(pthread_mutex_t* tex, int which)
3170 + {
3171 + pthread_mutex_lock(tex);
3172 + --pool[which];
3173 + pthread_mutex_unlock(tex);
3174 + }
3175 +
3176 +private:
3177 + int poolSize;
3178 + int pool[NR_GPUS]; // >= gpu_part_size
3179 +};
3180 +
3181 +
3182 +static managed_shared_memory *linux_lock_segment_ptr = NULL;
3183 +static gpu_pool* GPU_LINUX_SEM_POOL = NULL;
3184 +static pthread_mutex_t* GPU_LINUX_MUTEX_POOL = NULL;
3185 +
3186 +static void allocate_locks_linux(const int num_gpu_users)
3187 +{
3188 + int numGpuPartitions = NR_GPUS/GPU_PARTITION_SIZE;
3189 +
3190 + if(num_gpu_users > 0)
3191 + {
3192 + xprintf("%d: creating linux locks\n", getpid());
3193 + shared_memory_object::remove("linux_lock_memory");
3194 +
3195 + linux_lock_segment_ptr = new managed_shared_memory(create_only, "linux_lock_memory", 30*PAGE_SIZE);
3196 + GPU_LINUX_MUTEX_POOL = linux_lock_segment_ptr->construct<pthread_mutex_t>("pthread_mutex_t linux_m")[numGpuPartitions]();
3197 + for(int i = 0; i < numGpuPartitions; ++i)
3198 + {
3199 + pthread_mutexattr_t attr;
3200 + pthread_mutexattr_init(&attr);
3201 + pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
3202 + pthread_mutex_init(&(GPU_LINUX_MUTEX_POOL[i]), &attr);
3203 + pthread_mutexattr_destroy(&attr);
3204 + }
3205 + GPU_LINUX_SEM_POOL = linux_lock_segment_ptr->construct<gpu_pool>("gpu_pool linux_p")[numGpuPartitions](GPU_PARTITION_SIZE);
3206 + }
3207 + else
3208 + {
3209 + sleep(5);
3210 + do
3211 + {
3212 + try
3213 + {
3214 + if (!linux_lock_segment_ptr)
3215 + linux_lock_segment_ptr = new managed_shared_memory(open_only, "linux_lock_memory");
3216 + }
3217 + catch(...)
3218 + {
3219 + sleep(1);
3220 + }
3221 + }while(linux_lock_segment_ptr == NULL);
3222 +
3223 + GPU_LINUX_MUTEX_POOL = linux_lock_segment_ptr->find<pthread_mutex_t>("pthread_mutex_t linux_m").first;
3224 + GPU_LINUX_SEM_POOL = linux_lock_segment_ptr->find<gpu_pool>("gpu_pool linux_p").first;
3225 + }
3226 +}
3227 +
3228 +static void deallocate_locks_linux(const int num_gpu_users)
3229 +{
3230 + GPU_LINUX_MUTEX_POOL = NULL;
3231 + GPU_LINUX_SEM_POOL = NULL;
3232 +
3233 + delete linux_lock_segment_ptr;
3234 + linux_lock_segment_ptr = NULL;
3235 +
3236 + if(num_gpu_users > 0)
3237 + shared_memory_object::remove("linux_lock_memory");
3238 +}
3239 +
3240 +
3241 +
3242 +
3243 +static void allocate_locks(const int num_gpu_users, bool linux_mode)
3244 +{
3245 + if(!linux_mode)
3246 + allocate_locks_litmus();
3247 + else
3248 + allocate_locks_linux(num_gpu_users);
3249 +}
3250 +
3251 +static void deallocate_locks(const int num_gpu_users, bool linux_mode)
3252 +{
3253 + if(!linux_mode)
3254 + deallocate_locks_litmus();
3255 + else
3256 + deallocate_locks_linux(num_gpu_users);
3257 +}
3258 +
3259 +static void set_cur_gpu(int gpu)
3260 +{
3261 + if (TRACE_MIGRATIONS) {
3262 + trace_migration(gpu, CUR_DEVICE);
3263 + }
3264 + if(gpu != CUR_DEVICE) {
3265 + cudaSetDevice(gpu);
3266 + CUR_DEVICE = gpu;
3267 + }
3268 +}
3269 +
3270 +
3271 +//static pthread_barrier_t *gpu_barrier = NULL;
3272 +static interprocess_mutex *gpu_mgmt_mutexes = NULL;
3273 +static managed_shared_memory *gpu_mutex_segment_ptr = NULL;
3274 +
3275 +void coordinate_gpu_tasks(const int num_gpu_users)
3276 +{
3277 + if(num_gpu_users > 0)
3278 + {
3279 + xprintf("%d creating shared memory\n", getpid());
3280 + shared_memory_object::remove("gpu_mutex_memory");
3281 + gpu_mutex_segment_ptr = new managed_shared_memory(create_only, "gpu_mutex_memory", PAGE_SIZE);
3282 +
3283 +// printf("%d creating a barrier for %d users\n", getpid(), num_gpu_users);
3284 +// gpu_barrier = segment_ptr->construct<pthread_barrier_t>("pthread_barrier_t gpu_barrier")();
3285 +// pthread_barrierattr_t battr;
3286 +// pthread_barrierattr_init(&battr);
3287 +// pthread_barrierattr_setpshared(&battr, PTHREAD_PROCESS_SHARED);
3288 +// pthread_barrier_init(gpu_barrier, &battr, num_gpu_users);
3289 +// pthread_barrierattr_destroy(&battr);
3290 +// printf("%d creating gpu mgmt mutexes for %d devices\n", getpid(), NR_GPUS);
3291 + gpu_mgmt_mutexes = gpu_mutex_segment_ptr->construct<interprocess_mutex>("interprocess_mutex m")[NR_GPUS]();
3292 + }
3293 + else
3294 + {
3295 + sleep(5);
3296 + do
3297 + {
3298 + try
3299 + {
3300 + gpu_mutex_segment_ptr = new managed_shared_memory(open_only, "gpu_mutex_memory");
3301 + }
3302 + catch(...)
3303 + {
3304 + sleep(1);
3305 + }
3306 + }while(gpu_mutex_segment_ptr == NULL);
3307 +
3308 +// gpu_barrier = segment_ptr->find<pthread_barrier_t>("pthread_barrier_t gpu_barrier").first;
3309 + gpu_mgmt_mutexes = gpu_mutex_segment_ptr->find<interprocess_mutex>("interprocess_mutex m").first;
3310 + }
3311 +}
3312 +
3313 +const size_t SEND_ALLOC_SIZE = 12*1024;
3314 +const size_t RECV_ALLOC_SIZE = 12*1024;
3315 +const size_t STATE_ALLOC_SIZE = 16*1024;
3316 +
3317 +typedef float spindata_t;
3318 +
3319 +char *d_send_data[NR_GPUS] = {0};
3320 +char *d_recv_data[NR_GPUS] = {0};
3321 +char *d_state_data[NR_GPUS] = {0};
3322 +spindata_t *d_spin_data[NR_GPUS] = {0};
3323 +//unsigned int *d_iteration_count[NR_GPUS] = {0};
3324 +
3325 +
3326 +bool p2pMigration[NR_GPUS][NR_GPUS] = {0};
3327 +
3328 +char *h_send_data = 0;
3329 +char *h_recv_data = 0;
3330 +char *h_state_data = 0;
3331 +
3332 +static void destroy_events()
3333 +{
3334 + for(int i = 0; i < GPU_PARTITION_SIZE; ++i)
3335 + {
3336 + int which = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3337 + gpu_mgmt_mutexes[which].lock();
3338 + set_cur_gpu(which);
3339 + cudaEventDestroy(EVENTS[which]);
3340 + gpu_mgmt_mutexes[which].unlock();
3341 + }
3342 +}
3343 +
3344 +static void init_events()
3345 +{
3346 + xprintf("creating %s events\n", (CUDA_SYNC_MODE == BLOCKING) ? "blocking" : "spinning");
3347 + for(int i = 0; i < GPU_PARTITION_SIZE; ++i)
3348 + {
3349 + int which = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3350 + gpu_mgmt_mutexes[which].lock();
3351 + set_cur_gpu(which);
3352 + if (CUDA_SYNC_MODE == BLOCKING)
3353 + cudaEventCreateWithFlags(&EVENTS[which], cudaEventBlockingSync | cudaEventDisableTiming);
3354 + else
3355 + cudaEventCreateWithFlags(&EVENTS[which], cudaEventDefault | cudaEventDisableTiming);
3356 + gpu_mgmt_mutexes[which].unlock();
3357 + }
3358 +}
3359 +
3360 +static void init_cuda(const int num_gpu_users)
3361 +{
3362 + size_t send_alloc_bytes = SEND_ALLOC_SIZE + (SEND_ALLOC_SIZE%PAGE_SIZE != 0)*PAGE_SIZE;
3363 + size_t recv_alloc_bytes = RECV_ALLOC_SIZE + (RECV_ALLOC_SIZE%PAGE_SIZE != 0)*PAGE_SIZE;
3364 + size_t state_alloc_bytes = STATE_ALLOC_SIZE + (STATE_ALLOC_SIZE%PAGE_SIZE != 0)*PAGE_SIZE;
3365 +
3366 + static bool first_time = true;
3367 +
3368 + if (first_time) {
3369 + coordinate_gpu_tasks(num_gpu_users);
3370 + first_time = false;
3371 + }
3372 +
3373 +#if 0
3374 + switch (CUDA_SYNC_MODE)
3375 + {
3376 + case BLOCKING:
3377 + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
3378 + break;
3379 + case SPIN:
3380 + cudaSetDeviceFlags(cudaDeviceScheduleSpin);
3381 + break;
3382 + }
3383 +#endif
3384 +
3385 + for(int i = 0; i < GPU_PARTITION_SIZE; ++i)
3386 + {
3387 + cudaDeviceProp prop;
3388 + int which = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3389 +
3390 + gpu_mgmt_mutexes[which].lock();
3391 + try
3392 + {
3393 + set_cur_gpu(which);
3394 +
3395 + xprintf("setting up GPU %d\n", which);
3396 +
3397 + cudaDeviceSetLimit(cudaLimitPrintfFifoSize, 0);
3398 + cudaDeviceSetLimit(cudaLimitMallocHeapSize, 0);
3399 +
3400 + cudaGetDeviceProperties(&prop, which);
3401 + GPU_HZ[which] = prop.clockRate * 1000; /* khz -> hz */
3402 + NUM_SM[which] = prop.multiProcessorCount;
3403 + WARP_SIZE[which] = prop.warpSize;
3404 +
3405 + // enough to fill the L2 cache exactly.
3406 + ELEM_PER_THREAD[which] = (prop.l2CacheSize/(NUM_SM[which]*WARP_SIZE[which]*sizeof(spindata_t)));
3407 +
3408 +// if (!MIGRATE_VIA_SYSMEM && prop.unifiedAddressing)
3409 + if (prop.unifiedAddressing)
3410 + {
3411 + for(int j = 0; j < GPU_PARTITION_SIZE; ++j)
3412 + {
3413 + if (i != j)
3414 + {
3415 + int other = GPU_PARTITION*GPU_PARTITION_SIZE + j;
3416 + int canAccess = 0;
3417 + cudaDeviceCanAccessPeer(&canAccess, which, other);
3418 + if(canAccess)
3419 + {
3420 + cudaDeviceEnablePeerAccess(other, 0);
3421 + p2pMigration[which][other] = true;
3422 + }
3423 + }
3424 + }
3425 + }
3426 +
3427 + cudaStreamCreate(&STREAMS[which]);
3428 +
3429 + // gpu working set
3430 + cudaMalloc(&d_spin_data[which], prop.l2CacheSize);
3431 + cudaMemset(&d_spin_data[which], 0, prop.l2CacheSize);
3432 +
3433 + // send data
3434 + cudaMalloc(&d_send_data[which], send_alloc_bytes);
3435 + cudaHostAlloc(&h_send_data, send_alloc_bytes, cudaHostAllocPortable | cudaHostAllocMapped);
3436 +
3437 + // recv data
3438 + cudaMalloc(&d_recv_data[which], recv_alloc_bytes);
3439 + cudaHostAlloc(&h_recv_data, recv_alloc_bytes, cudaHostAllocPortable | cudaHostAllocMapped);
3440 +
3441 + // state data
3442 + cudaMalloc(&d_state_data[which], state_alloc_bytes);
3443 + cudaHostAlloc(&h_state_data, state_alloc_bytes, cudaHostAllocPortable | cudaHostAllocMapped | cudaHostAllocWriteCombined);
3444 + }
3445 + catch(std::exception &e)
3446 + {
3447 + fprintf(stderr, "caught an exception during initializiation!: %s\n", e.what());
3448 + }
3449 + catch(...)
3450 + {
3451 + fprintf(stderr, "caught unknown exception.\n");
3452 + }
3453 +
3454 + gpu_mgmt_mutexes[which].unlock();
3455 + }
3456 +
3457 + // roll back to first GPU
3458 + set_cur_gpu(GPU_PARTITION*GPU_PARTITION_SIZE);
3459 +}
3460 +
3461 +
3462 +
3463 +static bool MigrateToGPU_P2P(int from, int to)
3464 +{
3465 + bool success = true;
3466 + set_cur_gpu(to);
3467 + chunkMemcpy(this_gpu(d_state_data), per_gpu(d_state_data, from),
3468 + STATE_SIZE, cudaMemcpyDeviceToDevice, to,
3469 + useEngineLocks(), from, true);
3470 + return success;
3471 +}
3472 +
3473 +
3474 +static bool PullState(void)
3475 +{
3476 + bool success = true;
3477 + chunkMemcpy(h_state_data, this_gpu(d_state_data),
3478 + STATE_SIZE, cudaMemcpyDeviceToHost,
3479 + cur_gpu(), useEngineLocks(), -1, true);
3480 + return success;
3481 +}
3482 +
3483 +static bool PushState(void)
3484 +{
3485 + bool success = true;
3486 + chunkMemcpy(this_gpu(d_state_data), h_state_data,
3487 + STATE_SIZE, cudaMemcpyHostToDevice,
3488 + cur_gpu(), useEngineLocks(), -1, true);
3489 + return success;
3490 +}
3491 +
3492 +static bool MigrateToGPU_SysMem(int from, int to)
3493 +{
3494 + // THIS IS ON-DEMAND SYS_MEM MIGRATION. GPUSync says
3495 + // you should be using speculative migrations.
3496 + // Use PushState() and PullState().
3497 + fprintf(stderr, "Tried to sysmem migrate from %d to %d\n",
3498 + from, to);
3499 + assert(false); // for now
3500 +
3501 + bool success = true;
3502 +
3503 + set_cur_gpu(from);
3504 + chunkMemcpy(h_state_data, this_gpu(d_state_data),
3505 + STATE_SIZE, cudaMemcpyDeviceToHost,
3506 + from, useEngineLocks(), -1, true);
3507 +
3508 + set_cur_gpu(to);
3509 + chunkMemcpy(this_gpu(d_state_data), h_state_data,
3510 + STATE_SIZE, cudaMemcpyHostToDevice,
3511 + to, useEngineLocks(), -1, true);
3512 +
3513 + return success;
3514 +}
3515 +
3516 +static bool MigrateToGPU(int from, int to)
3517 +{
3518 + bool success = false;
3519 +
3520 + if (from != to)
3521 + {
3522 + if(!MIGRATE_VIA_SYSMEM && p2pMigration[to][from])
3523 + success = MigrateToGPU_P2P(from, to);
3524 + else
3525 + success = MigrateToGPU_SysMem(from, to);
3526 + }
3527 + else
3528 + {
3529 + set_cur_gpu(to);
3530 + success = true;
3531 + }
3532 +
3533 + return success;
3534 +}
3535 +
3536 +static bool MigrateToGPU_Implicit(int to)
3537 +{
3538 + return( MigrateToGPU(cur_gpu(), to) );
3539 +}
3540 +
3541 +static void MigrateIfNeeded(int next_gpu)
3542 +{
3543 + if(next_gpu != cur_gpu() && cur_gpu() != -1)
3544 + {
3545 + if (!MIGRATE_VIA_SYSMEM)
3546 + MigrateToGPU_Implicit(next_gpu);
3547 + else {
3548 + set_cur_gpu(next_gpu);
3549 + PushState();
3550 + }
3551 + }
3552 + else if(cur_gpu() == -1) {
3553 + set_cur_gpu(next_gpu);
3554 + }
3555 +}
3556 +
3557 +static void exit_cuda()
3558 +{
3559 +#if 0
3560 + for(int i = 0; i < GPU_PARTITION_SIZE; ++i)
3561 + {
3562 + int which = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3563 + gpu_mgmt_mutexes[which].lock();
3564 + set_cur_gpu(which);
3565 + cudaFree(d_send_data[which]);
3566 + cudaFree(d_recv_data[which]);
3567 + cudaFree(d_state_data[which]);
3568 + cudaFree(d_spin_data[which]);
3569 + gpu_mgmt_mutexes[which].unlock();
3570 + }
3571 +#endif
3572 +
3573 + cudaFreeHost(h_send_data);
3574 + cudaFreeHost(h_recv_data);
3575 + cudaFreeHost(h_state_data);
3576 +
3577 + for(int i = 0; i < GPU_PARTITION_SIZE; ++i)
3578 + {
3579 + int which = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3580 + gpu_mgmt_mutexes[which].lock();
3581 + set_cur_gpu(which);
3582 + cudaDeviceReset();
3583 + gpu_mgmt_mutexes[which].unlock();
3584 + }
3585 +
3586 + memset(d_send_data, 0, sizeof(d_send_data));
3587 + memset(d_recv_data, 0, sizeof(d_recv_data));
3588 + memset(d_state_data, 0, sizeof(d_state_data));
3589 + memset(d_spin_data, 0, sizeof(d_spin_data));
3590 + h_send_data = NULL;
3591 + h_recv_data = NULL;
3592 + h_state_data = NULL;
3593 +}
3594 +
3595 +bool safetynet = false;
3596 +
3597 +static void catch_exit(int catch_exit)
3598 +{
3599 + if(GPU_USING && USE_ENGINE_LOCKS && safetynet)
3600 + {
3601 + safetynet = false;
3602 + for(int i = 0; i < GPU_PARTITION_SIZE; ++i)
3603 + {
3604 + int which = GPU_PARTITION*GPU_PARTITION_SIZE + i;
3605 + set_cur_gpu(which);
3606 +
3607 +// cudaDeviceReset();
3608 +
3609 + // try to unlock everything. litmus will prevent bogus calls.
3610 + if(USE_ENGINE_LOCKS)
3611 + {
3612 + litmus_unlock(EE_LOCKS[which]);
3613 + litmus_unlock(CE_SEND_LOCKS[which]);
3614 + if (NUM_COPY_ENGINES == 2)
3615 + {
3616 + if (RESERVED_MIGR_COPY_ENGINE)
3617 + litmus_unlock(CE_MIGR_SEND_LOCKS[which]);
3618 + else
3619 + litmus_unlock(CE_MIGR_RECV_LOCKS[which]);
3620 + }
3621 + }
3622 + }
3623 + litmus_unlock(TOKEN_LOCK);
3624 + }
3625 +}
3626 +
3627 +
3628 +__global__ void docudaspin(float* data, /*unsigned int* iterations,*/ unsigned int num_elem, unsigned int cycles)
3629 +{
3630 + long long int now = clock64();
3631 + long long unsigned int elapsed = 0;
3632 + long long int last;
3633 +
3634 +// unsigned int iter = 0;
3635 + unsigned int i = blockDim.x * blockIdx.x + threadIdx.x;
3636 + unsigned int j = 0;
3637 + bool toggle = true;
3638 +
3639 +// iterations[i] = 0;
3640 + do
3641 + {
3642 + data[i*num_elem+j] += (toggle) ? M_PI : -M_PI;
3643 + j = (j + 1 != num_elem) ? j + 1 : 0;
3644 + toggle = !toggle;
3645 +// iter++;
3646 +
3647 + last = now;
3648 + now = clock64();
3649 +
3650 +// // exact calculation takes more cycles than a second
3651 +// // loop iteration when code is compiled optimized
3652 +// long long int diff = now - last;
3653 +// elapsed += (diff > 0) ?
3654 +// diff :
3655 +// now + ((~((long long int)0)<<1)>>1) - last;
3656 +
3657 + // don't count iterations with clock roll-over
3658 + elapsed += max(0ll, now - last);
3659 + }while(elapsed < cycles);
3660 +
3661 +// iterations[i] = iter;
3662 +
3663 + return;
3664 +}
3665 +
3666 +
3667 +int next_gpu = -1;
3668 +static bool ee_locked = false;
3669 +static bool early_exit = false;
3670 +static bool have_token = false;
3671 +
3672 +static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double emergency_exit)
3673 +{
3674 +// int next_gpu;
3675 + next_gpu = -1;
3676 + ee_locked = false;
3677 + early_exit = false;
3678 + have_token = false;
3679 +
3680 + if (gpu_sec_time <= 0.0)
3681 + goto out;
3682 + if (emergency_exit && wctime() > emergency_exit)
3683 + goto out;
3684 +
3685 + LITMUS_TRY
3686 + {
3687 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
3688 + next_gpu = litmus_lock(TOKEN_LOCK);
3689 + inject_action(TOKEN_START);
3690 + have_token = true;
3691 + __sync_synchronize();
3692 + MigrateIfNeeded(next_gpu);
3693 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
3694 +
3695 + unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
3696 +
3697 + if(SEND_SIZE > 0) {
3698 + chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE,
3699 + cudaMemcpyHostToDevice, cur_gpu(), useEngineLocks());
3700 + }
3701 +
3702 + for(unsigned int i = 0; i < num_kernels; ++i)
3703 + {
3704 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
3705 +
3706 + if(useEngineLocks() && !ee_locked) {
3707 + litmus_lock(cur_ee());
3708 + inject_action(EE_START);
3709 + ee_locked = true;
3710 + __sync_synchronize();
3711 + }
3712 + /* one block per sm, one warp per block */
3713 + docudaspin <<<cur_sms(), cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles);
3714 + if(useEngineLocks() &&
3715 + (i == num_kernels - 1 || /* last kernel */
3716 + !YIELD_LOCKS || /* always yeild */
3717 + (YIELD_LOCKS && litmus_should_yield_lock(cur_ee())) /* we should yield */
3718 + )
3719 + ) {
3720 + cudaEventRecord(cur_event(), cur_stream());
3721 + cudaEventSynchronize(cur_event());
3722 + inject_action(EE_END);
3723 + litmus_unlock(cur_ee());
3724 + ee_locked = false;
3725 + __sync_synchronize();
3726 + }
3727 +
3728 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
3729 + }
3730 +
3731 + if(RECV_SIZE > 0) {
3732 + chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE,
3733 + cudaMemcpyDeviceToHost, cur_gpu(), useEngineLocks());
3734 + }
3735 +
3736 + if (MIGRATE_VIA_SYSMEM) {
3737 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
3738 + PullState();
3739 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
3740 + }
3741 +
3742 + gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
3743 + inject_action(TOKEN_END);
3744 + litmus_unlock(TOKEN_LOCK);
3745 + last_gpu() = cur_gpu();
3746 + have_token = false;
3747 + __sync_synchronize();
3748 + gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
3749 + }
3750 + LITMUS_CATCH(SIG_BUDGET)
3751 + {
3752 + if (have_token)
3753 + {
3754 + cudaEventRecord(cur_event(), cur_stream());
3755 + cudaEventSynchronize(cur_event());
3756 +
3757 + if (useEngineLocks()) {
3758 + if (ee_locked) {
3759 + litmus_unlock(cur_ee());
3760 + }
3761 +
3762 + /* we don't know which CEs might be locked... unlock them all. */
3763 + if (NUM_COPY_ENGINES == 1) {
3764 + litmus_unlock(cur_send());
3765 + }
3766 + else if (RESERVED_MIGR_COPY_ENGINE) {
3767 + litmus_unlock(cur_send());
3768 + litmus_unlock(cur_migr_send());
3769 + }
3770 + else {
3771 + litmus_unlock(cur_send());
3772 + litmus_unlock(cur_recv());
3773 + }
3774 + }
3775 +
3776 + litmus_unlock(TOKEN_LOCK);
3777 + last_gpu() = cur_gpu();
3778 + }
3779 +
3780 + early_exit = true;
3781 + }
3782 + END_LITMUS_TRY
3783 +
3784 + if (early_exit)
3785 + throw std::exception();
3786 +
3787 +out:
3788 + return;
3789 +}
3790 +
3791 +static void gpu_loop_for_linux(double gpu_sec_time, unsigned int num_kernels, double emergency_exit)
3792 +{
3793 + int GPU_OFFSET = GPU_PARTITION * GPU_PARTITION_SIZE;
3794 + gpu_pool *pool = &GPU_LINUX_SEM_POOL[GPU_PARTITION];
3795 + pthread_mutex_t *mutex = &GPU_LINUX_MUTEX_POOL[GPU_PARTITION];
3796 +
3797 + int next_gpu;
3798 +
3799 + if (gpu_sec_time <= 0.0)
3800 + goto out;
3801 + if (emergency_exit && wctime() > emergency_exit)
3802 + goto out;
3803 +
3804 + next_gpu = pool->get(mutex, ((cur_gpu() != -1) ?
3805 + cur_gpu() - GPU_OFFSET :
3806 + -1))
3807 + + GPU_OFFSET;
3808 + {
3809 + MigrateIfNeeded(next_gpu);
3810 +
3811 + unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
3812 +
3813 + if(SEND_SIZE > 0)
3814 + chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE,
3815 + cudaMemcpyHostToDevice, cur_gpu(), useEngineLocks());
3816 +
3817 + for(unsigned int i = 0; i < num_kernels; ++i)
3818 + {
3819 + /* one block per sm, one warp per block */
3820 + docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles);
3821 + cudaEventRecord(cur_event(), cur_stream());
3822 + cudaEventSynchronize(cur_event());
3823 + }
3824 +
3825 + if(RECV_SIZE > 0)
3826 + chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE,
3827 + cudaMemcpyDeviceToHost, cur_gpu(), useEngineLocks());
3828 +
3829 + if (MIGRATE_VIA_SYSMEM)
3830 + PullState();
3831 + }
3832 + pool->put(mutex, cur_gpu() - GPU_OFFSET);
3833 +
3834 + last_gpu() = cur_gpu();
3835 +
3836 +out:
3837 + return;
3838 +}
3839 +
3840 +
3841 +
3842 +
3843 +static void usage(char *error) {
3844 + fprintf(stderr, "Error: %s\n", error);
3845 + fprintf(stderr,
3846 + "Usage:\n"
3847 + " rt_spin [COMMON-OPTS] WCET PERIOD DURATION\n"
3848 + " rt_spin [COMMON-OPTS] -f FILE [-o COLUMN] WCET PERIOD\n"
3849 + " rt_spin -l\n"
3850 + "\n"
3851 + "COMMON-OPTS = [-w] [-s SCALE]\n"
3852 + " [-p PARTITION/CLUSTER [-z CLUSTER SIZE]] [-c CLASS]\n"
3853 + " [-X LOCKING-PROTOCOL] [-L CRITICAL SECTION LENGTH] [-Q RESOURCE-ID]"
3854 + "\n"
3855 + "WCET and PERIOD are milliseconds, DURATION is seconds.\n"
3856 + "CRITICAL SECTION LENGTH is in milliseconds.\n");
3857 + exit(EXIT_FAILURE);
3858 +}
3859 +
3860 +///*
3861 +// * returns the character that made processing stop, newline or EOF
3862 +// */
3863 +//static int skip_to_next_line(FILE *fstream)
3864 +//{
3865 +// int ch;
3866 +// for (ch = fgetc(fstream); ch != EOF && ch != '\n'; ch = fgetc(fstream));
3867 +// return ch;
3868 +//}
3869 +//
3870 +//static void skip_comments(FILE *fstream)
3871 +//{
3872 +// int ch;
3873 +// for (ch = fgetc(fstream); ch == '#'; ch = fgetc(fstream))
3874 +// skip_to_next_line(fstream);
3875 +// ungetc(ch, fstream);
3876 +//}
3877 +//
3878 +//static void get_exec_times(const char *file, const int column,
3879 +// int *num_jobs, double **exec_times)
3880 +//{
3881 +// FILE *fstream;
3882 +// int cur_job, cur_col, ch;
3883 +// *num_jobs = 0;
3884 +//
3885 +// fstream = fopen(file, "r");
3886 +// if (!fstream)
3887 +// bail_out("could not open execution time file");
3888 +//
3889 +// /* figure out the number of jobs */
3890 +// do {
3891 +// skip_comments(fstream);
3892 +// ch = skip_to_next_line(fstream);
3893 +// if (ch != EOF)
3894 +// ++(*num_jobs);
3895 +// } while (ch != EOF);
3896 +//
3897 +// if (-1 == fseek(fstream, 0L, SEEK_SET))
3898 +// bail_out("rewinding file failed");
3899 +//
3900 +// /* allocate space for exec times */
3901 +// *exec_times = (double*)calloc(*num_jobs, sizeof(*exec_times));
3902 +// if (!*exec_times)
3903 +// bail_out("couldn't allocate memory");
3904 +//
3905 +// for (cur_job = 0; cur_job < *num_jobs && !feof(fstream); ++cur_job) {
3906 +//
3907 +// skip_comments(fstream);
3908 +//
3909 +// for (cur_col = 1; cur_col < column; ++cur_col) {
3910 +// /* discard input until we get to the column we want */
3911 +// int unused __attribute__ ((unused)) = fscanf(fstream, "%*s,");
3912 +// }
3913 +//
3914 +// /* get the desired exec. time */
3915 +// if (1 != fscanf(fstream, "%lf", (*exec_times)+cur_job)) {
3916 +// fprintf(stderr, "invalid execution time near line %d\n",
3917 +// cur_job);
3918 +// exit(EXIT_FAILURE);
3919 +// }
3920 +//
3921 +// skip_to_next_line(fstream);
3922 +// }
3923 +//
3924 +// assert(cur_job == *num_jobs);
3925 +// fclose(fstream);
3926 +//}
3927 +
3928 +#define NUMS 4096
3929 +static int num[NUMS];
3930 +__attribute__((unused)) static char* progname;
3931 +
3932 +static int loop_once(void)
3933 +{
3934 + int i, j = 0;
3935 + for (i = 0; i < NUMS; i++)
3936 + j += num[i]++;
3937 + return j;
3938 +}
3939 +
3940 +static int loop_for(double exec_time, double emergency_exit)
3941 +{
3942 + int tmp = 0;
3943 + double last_loop, loop_start;
3944 + double start, now;
3945 +
3946 + if (exec_time <= 0.0)
3947 + goto out;
3948 +
3949 + start = cputime();
3950 + now = cputime();
3951 +
3952 + if (emergency_exit && wctime() > emergency_exit)
3953 + goto out;
3954 +
3955 + last_loop = 0;
3956 + while (now + last_loop < start + exec_time) {
3957 + loop_start = now;
3958 + tmp += loop_once();
3959 + now = cputime();
3960 + last_loop = now - loop_start;
3961 + if (emergency_exit && wctime() > emergency_exit) {
3962 + /* Oops --- this should only be possible if the execution time tracking
3963 + * is broken in the LITMUS^RT kernel. */
3964 + fprintf(stderr, "!!! gpuspin/%d emergency exit!\n", getpid());
3965 + fprintf(stderr, "Something is seriously wrong! Do not ignore this.\n");
3966 + break;
3967 + }
3968 + }
3969 +
3970 +out:
3971 + return tmp;
3972 +}
3973 +
3974 +
3975 +//static void debug_delay_loop(void)
3976 +//{
3977 +// double start, end, delay;
3978 +//
3979 +// while (1) {
3980 +// for (delay = 0.5; delay > 0.01; delay -= 0.01) {
3981 +// start = wctime();
3982 +// loop_for(delay, 0);
3983 +// end = wctime();
3984 +// printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n",
3985 +// delay,
3986 +// end - start,
3987 +// end - start - delay,
3988 +// 100 * (end - start - delay) / delay);
3989 +// }
3990 +// }
3991 +//}
3992 +
3993 +typedef bool (*gpu_job_t)(double exec_time, double gpu_exec_time, unsigned int num_kernels, double program_end);
3994 +typedef bool (*cpu_job_t)(double exec_time, double program_end);
3995 +
3996 +static bool gpu_job(double exec_time, double gpu_exec_time, unsigned int num_kernels, double program_end)
3997 +{
3998 + double chunk1, chunk2;
3999 +
4000 + if (wctime() > program_end) {
4001 + return false;
4002 + }
4003 + else {
4004 + chunk1 = exec_time * drand48();
4005 + chunk2 = exec_time - chunk1;
4006 +
4007 + LITMUS_TRY
4008 + {
4009 + try
4010 + {
4011 + loop_for(chunk1, program_end + 1);
4012 + gpu_loop_for(gpu_exec_time, num_kernels, program_end + 1);
4013 + loop_for(chunk2, program_end + 1);
4014 + }
4015 + catch(std::exception& e)
4016 + {
4017 + xprintf("%d: ran out of time while using GPU\n", gettid());
4018 + }
4019 + }
4020 + LITMUS_CATCH(SIG_BUDGET)
4021 + {
4022 + xprintf("%d: ran out of time\n", gettid());
4023 + }
4024 + END_LITMUS_TRY
4025 +
4026 + sleep_next_period();
4027 + }
4028 + return true;
4029 +}
4030 +
4031 +static bool job(double exec_time, double program_end)
4032 +{
4033 + if (wctime() > program_end) {
4034 + return false;
4035 + }
4036 + else {
4037 + LITMUS_TRY
4038 + {
4039 + loop_for(exec_time, program_end + 1);
4040 + }
4041 + LITMUS_CATCH(SIG_BUDGET)
4042 + {
4043 + xprintf("%d: ran out of time\n", gettid());
4044 + }
4045 + END_LITMUS_TRY
4046 + sleep_next_period();
4047 + }
4048 + return true;
4049 +}
4050 +
4051 +/*****************************/
4052 +/* only used for linux modes */
4053 +
4054 +static struct timespec periodTime;
4055 +static struct timespec releaseTime;
4056 +static unsigned int job_no = 0;
4057 +
4058 +static lt_t period_ns;
4059 +
4060 +static void log_release()
4061 +{
4062 + __attribute__ ((unused)) lt_t rel = releaseTime.tv_sec * s2ns(1) + releaseTime.tv_nsec;
4063 + __attribute__ ((unused)) lt_t dead = rel + period_ns;
4064 + trace_release(rel, dead, job_no);
4065 +}
4066 +
4067 +static void log_completion()
4068 +{
4069 + trace_completion(job_no);
4070 + ++job_no;
4071 +}
4072 +
4073 +static void setup_next_period_linux(struct timespec* spec, struct timespec* period)
4074 +{
4075 + spec->tv_sec += period->tv_sec;
4076 + spec->tv_nsec += period->tv_nsec;
4077 + if (spec->tv_nsec >= s2ns(1)) {
4078 + ++(spec->tv_sec);
4079 + spec->tv_nsec -= s2ns(1);
4080 + }
4081 +}
4082 +
4083 +static void sleep_next_period_linux()
4084 +{
4085 + log_completion();
4086 + setup_next_period_linux(&releaseTime, &periodTime);
4087 + clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &releaseTime, NULL);
4088 + log_release();
4089 +}
4090 +
4091 +static void init_linux()
4092 +{
4093 + mlockall(MCL_CURRENT | MCL_FUTURE);
4094 +}
4095 +
4096 +static int enable_aux_rt_tasks_linux(pid_t tid)
4097 +{
4098 + /* pre: caller must already be real time */
4099 + int ret = 0;
4100 + struct sched_param param;
4101 + stringstream pidstr;
4102 + boost::filesystem::directory_iterator theEnd;
4103 + boost::filesystem::path proc_dir;
4104 +
4105 + int policy = sched_getscheduler(tid);
4106 + if (policy == -1 || policy != SCHED_FIFO) {
4107 + ret = -1;
4108 + goto out;
4109 + }
4110 +
4111 + ret = sched_getparam(tid, ¶m);
4112 + if (ret < 0)
4113 + goto out;
4114 +
4115 +
4116 + pidstr<<getpid();
4117 + proc_dir = boost::filesystem::path("/proc");
4118 + proc_dir /= pidstr.str();
4119 + proc_dir /= "task";
4120 +
4121 + for(boost::filesystem::directory_iterator iter(proc_dir); iter != theEnd; ++iter)
4122 + {
4123 + stringstream taskstr(iter->path().leaf().c_str());
4124 + int child = 0;
4125 + taskstr>>child;
4126 + if (child != tid && child != 0)
4127 + {
4128 + /* mirror tid's params to others */
4129 + ret = sched_setscheduler(child, policy, ¶m);
4130 + if (ret != 0)
4131 + goto out;
4132 + }
4133 + }
4134 +
4135 +out:
4136 + return ret;
4137 +}
4138 +
4139 +static int disable_aux_rt_tasks_linux(pid_t tid)
4140 +{
4141 + int ret = 0;
4142 + struct sched_param param;
4143 + stringstream pidstr;
4144 + boost::filesystem::directory_iterator theEnd;
4145 + boost::filesystem::path proc_dir;
4146 +
4147 + memset(¶m, 0, sizeof(param));
4148 +
4149 + pidstr<<getpid();
4150 + proc_dir = boost::filesystem::path("/proc");
4151 + proc_dir /= pidstr.str();
4152 + proc_dir /= "task";
4153 +
4154 + for(boost::filesystem::directory_iterator iter(proc_dir); iter != theEnd; ++iter)
4155 + {
4156 + stringstream taskstr(iter->path().leaf().c_str());
4157 + int child = 0;
4158 + taskstr>>child;
4159 + if (child != tid && child != 0)
4160 + {
4161 + /* make all other threads sched_normal */
4162 + ret = sched_setscheduler(child, SCHED_OTHER, ¶m);
4163 + if (ret != 0)
4164 + goto out;
4165 + }
4166 + }
4167 +
4168 +out:
4169 + return ret;
4170 +}
4171 +
4172 +static int be_migrate_all_to_cluster(int cluster, int cluster_size)
4173 +{
4174 + int ret = 0;
4175 + stringstream pidstr;
4176 +
4177 + pidstr<<getpid();
4178 + boost::filesystem::path proc_dir("/proc");
4179 + proc_dir /= pidstr.str();
4180 + proc_dir /= "task";
4181 + boost::filesystem::directory_iterator theEnd;
4182 + for(boost::filesystem::directory_iterator iter(proc_dir); iter != theEnd; ++iter)
4183 + {
4184 + stringstream taskstr(iter->path().leaf().c_str());
4185 + int task = 0;
4186 + taskstr>>task;
4187 + if (task != 0) {
4188 + ret = be_migrate_to_cluster(cluster, cluster_size);
4189 + if (ret != 0)
4190 + goto out;
4191 + }
4192 + }
4193 +
4194 +out:
4195 + return ret;
4196 +}
4197 +
4198 +static bool gpu_job_linux(double exec_time, double gpu_exec_time, unsigned int num_kernels, double program_end)
4199 +{
4200 + double chunk1, chunk2;
4201 +
4202 + if (wctime() > program_end) {
4203 + return false;
4204 + }
4205 + else {
4206 + chunk1 = exec_time * drand48();
4207 + chunk2 = exec_time - chunk1;
4208 +
4209 + loop_for(chunk1, program_end + 1);
4210 + gpu_loop_for_linux(gpu_exec_time, num_kernels, program_end + 1);
4211 + loop_for(chunk2, program_end + 1);
4212 +
4213 + sleep_next_period_linux();
4214 + }
4215 + return true;
4216 +}
4217 +
4218 +static bool job_linux(double exec_time, double program_end)
4219 +{
4220 + if (wctime() > program_end) {
4221 + return false;
4222 + }
4223 + else {
4224 + loop_for(exec_time, program_end + 1);
4225 + sleep_next_period_linux();
4226 + }
4227 + return true;
4228 +}
4229 +
4230 +/*****************************/
4231 +
4232 +
4233 +
4234 +
4235 +
4236 +enum eRunMode
4237 +{
4238 + NORMAL,
4239 + PROXY,
4240 + DAEMON,
4241 +};
4242 +
4243 +void set_defaults(struct Args* args)
4244 +{
4245 + memset(args, 0, sizeof(*args));
4246 + args->wcet_ms = -1.0;
4247 + args->gpu_wcet_ms = 0.0;
4248 + args->period_ms = -1.0;
4249 + args->budget_ms = -1.0;
4250 + args->gpusync_mode = IKGLP_MODE;
4251 + args->sync_mode = BLOCKING;
4252 + args->gpu_using = false;
4253 + args->enable_affinity = false;
4254 + args->enable_chunking = false;
4255 + args->relax_fifo_len = false;
4256 + args->use_sysmem_migration = false;
4257 + args->rho = 2;
4258 + args->num_ce = 2;
4259 + args->reserve_migr_ce = false;
4260 + args->num_kernels = 1;
4261 + args->engine_lock_type = FIFO;
4262 + args->yield_locks = false;
4263 + args->drain_policy = DRAIN_SIMPLE;
4264 + args->want_enforcement = false;
4265 + args->want_signals = false;
4266 + args->priority = LITMUS_LOWEST_PRIORITY;
4267 + args->cls = RT_CLASS_SOFT;
4268 + args->scheduler = LITMUS;
4269 + args->migrate = false;
4270 + args->cluster = 0;
4271 + args->cluster_size = 1;
4272 + args->stddev = 0.0;
4273 + args->wait = false;
4274 + args->scale = 1.0;
4275 + args->duration = 0.0;
4276 +}
4277 +
4278 +void apply_args(struct Args* args)
4279 +{
4280 + // set all the globals
4281 + CPU_PARTITION_SIZE = args->cluster_size;
4282 + GPU_USING = args->gpu_using;
4283 + GPU_PARTITION = args->gpu_partition;
4284 + GPU_PARTITION_SIZE = args->gpu_partition_size;
4285 + RHO = args->rho;
4286 + NUM_COPY_ENGINES = args->num_ce;
4287 + RESERVED_MIGR_COPY_ENGINE = args->reserve_migr_ce;
4288 + USE_ENGINE_LOCKS = args->use_engine_locks;
4289 + ENGINE_LOCK_TYPE = args->engine_lock_type;
4290 + YIELD_LOCKS = args->yield_locks;
4291 + USE_DYNAMIC_GROUP_LOCKS = args->use_dgls;
4292 + GPU_SYNC_MODE = args->gpusync_mode;
4293 + ENABLE_AFFINITY = args->enable_affinity;
4294 + RELAX_FIFO_MAX_LEN = args->relax_fifo_len;
4295 + CUDA_SYNC_MODE = args->sync_mode;
4296 + SEND_SIZE = args->send_size;
4297 + RECV_SIZE = args->recv_size;
4298 + STATE_SIZE = args->state_size;
4299 + ENABLE_CHUNKING = args->enable_chunking;
4300 + CHUNK_SIZE = args->chunk_size;
4301 + MIGRATE_VIA_SYSMEM = args->use_sysmem_migration;
4302 +
4303 + if (args->scheduler == LITMUS && !ENABLE_AFFINITY)
4304 + TRACE_MIGRATIONS = true;
4305 + else if (args->scheduler == LITMUS)
4306 + TRACE_MIGRATIONS = false;
4307 + else if (args->scheduler != LITMUS)
4308 + TRACE_MIGRATIONS = true;
4309 +
4310 + WANT_SIGNALS = args->want_signals;
4311 +
4312 + // roll back other globals to an initial state
4313 + CUR_DEVICE = -1;
4314 + LAST_DEVICE = -1;
4315 +}
4316 +
4317 +int __do_normal(struct Args* args)
4318 +{
4319 + int ret = 0;
4320 + struct rt_task param;
4321 +
4322 + lt_t wcet;
4323 + lt_t period;
4324 + lt_t budget;
4325 +
4326 + Normal<double> *wcet_dist_ms = NULL;
4327 +
4328 + cpu_job_t cjobfn = NULL;
4329 + gpu_job_t gjobfn = NULL;
4330 +
4331 + double start = 0;
4332 +
4333 + if (MIGRATE_VIA_SYSMEM && GPU_PARTITION_SIZE == 1)
4334 + return -1;
4335 +
4336 + // turn off some features to be safe
4337 + if (args->scheduler != LITMUS)
4338 + {
4339 + RHO = 0;
4340 + USE_ENGINE_LOCKS = false;
4341 + USE_DYNAMIC_GROUP_LOCKS = false;
4342 + RELAX_FIFO_MAX_LEN = false;
4343 + ENABLE_RT_AUX_THREADS = false;
4344 + args->want_enforcement = false;
4345 + args->want_signals = false;
4346 +
4347 + cjobfn = job_linux;
4348 + gjobfn = gpu_job_linux;
4349 + }
4350 + else
4351 + {
4352 + cjobfn = job;
4353 + gjobfn = gpu_job;
4354 + }
4355 +
4356 + wcet = ms2ns(args->wcet_ms);
4357 + period = ms2ns(args->period_ms);
4358 +
4359 + if (wcet <= 0) {
4360 + fprintf(stderr, "The worst-case execution time must be a positive number.\n");
4361 + ret = -1;
4362 + goto out;
4363 + }
4364 + if (period <= 0) {
4365 + fprintf(stderr, "The period must be a positive number.\n");
4366 + ret = -1;
4367 + goto out;
4368 + }
4369 + if (wcet > period) {
4370 + fprintf(stderr, "The worst-case execution time must not exceed the period.\n");
4371 + ret = -1;
4372 + goto out;
4373 + }
4374 + if (args->gpu_using && args->gpu_wcet_ms <= 0) {
4375 + fprintf(stderr, "The worst-case gpu execution time must be a positive number.\n");
4376 + ret = -1;
4377 + goto out;
4378 + }
4379 +
4380 + if (args->budget_ms > 0.0)
4381 + budget = ms2ns(args->budget_ms);
4382 + else
4383 + budget = wcet;
4384 +
4385 + // randomize execution time according to a normal distribution
4386 + // centered around the desired execution time.
4387 + // standard deviation is a percentage of this average
4388 + wcet_dist_ms = new Normal<double>(args->wcet_ms + args->gpu_wcet_ms, (args->wcet_ms + args->gpu_wcet_ms) * args->stddev);
4389 + wcet_dist_ms->seed((unsigned int)time(0));
4390 +
4391 + ret = be_migrate_all_to_cluster(args->cluster, args->cluster_size);
4392 + if (ret < 0) {
4393 + fprintf(stderr, "could not migrate to target partition or cluster.\n");
4394 + goto out;
4395 + }
4396 +
4397 + if (args->scheduler != LITMUS)
4398 + {
4399 + // set some variables needed by linux modes
4400 + if (args->gpu_using)
4401 + TRACE_MIGRATIONS = true;
4402 + periodTime.tv_sec = period / s2ns(1);
4403 + periodTime.tv_nsec = period - periodTime.tv_sec * s2ns(1);
4404 + period_ns = period;
4405 + job_no = 0;
4406 + }
4407 +
4408 +
4409 + ignore_litmus_signals(SIG_BUDGET_MASK);
4410 +
4411 + init_rt_task_param(¶m);
4412 + param.exec_cost = budget;
4413 + param.period = period;
4414 + param.priority = args->priority;
4415 + param.cls = args->cls;
4416 + param.budget_policy = (args->want_enforcement) ?
4417 + PRECISE_ENFORCEMENT : NO_ENFORCEMENT;
4418 + param.budget_signal_policy = (args->want_signals) ?
4419 + PRECISE_SIGNALS : NO_SIGNALS;
4420 + param.drain_policy = args->drain_policy;
4421 + param.drain_policy = args->drain_policy;
4422 + param.release_policy = PERIODIC;
4423 + param.cpu = cluster_to_first_cpu(args->cluster, args->cluster_size);
4424 +
4425 + ret = set_rt_task_param(gettid(), ¶m);
4426 + if (ret < 0) {
4427 + bail_out("could not setup rt task params\n");
4428 + goto out;
4429 + }
4430 +
4431 + if (args->gpu_using)
4432 + allocate_locks(args->num_gpu_tasks, args->scheduler != LITMUS);
4433 +
4434 + if (args->scheduler == LITMUS)
4435 + {
4436 + ret = task_mode(LITMUS_RT_TASK);
4437 + if (ret < 0) {
4438 + fprintf(stderr, "could not become RT task\n");
4439 + goto out;
4440 + }
4441 + }
4442 + else
4443 + {
4444 + if (args->scheduler == RT_LINUX)
4445 + {
4446 + struct sched_param fifoparams;
4447 + memset(&fifoparams, 0, sizeof(fifoparams));
4448 + fifoparams.sched_priority = args->priority;
4449 + ret = sched_setscheduler(getpid(), SCHED_FIFO, &fifoparams);
4450 + if (ret < 0) {
4451 + fprintf(stderr, "could not become sched_fifo task\n");
4452 + goto out;
4453 + }
4454 + }
4455 + trace_name();
4456 + trace_param();
4457 + }
4458 +
4459 + if (args->wait) {
4460 + xprintf("%d: waiting for release.\n", getpid());
4461 + ret = wait_for_ts_release2(&releaseTime);
4462 + if (ret != 0) {
4463 + printf("wait_for_ts_release2()\n");
4464 + goto out;
4465 + }
4466 +
4467 + if (args->scheduler != LITMUS)
4468 + log_release();
4469 + }
4470 + else if (args->scheduler != LITMUS)
4471 + {
4472 + clock_gettime(CLOCK_MONOTONIC, &releaseTime);
4473 + sleep_next_period_linux();
4474 + }
4475 +
4476 + if (args->gpu_using && ENABLE_RT_AUX_THREADS) {
4477 + if (args->scheduler == LITMUS) {
4478 + ret = enable_aux_rt_tasks(AUX_CURRENT | AUX_FUTURE);
4479 + if (ret != 0) {
4480 + fprintf(stderr, "enable_aux_rt_tasks() failed\n");
4481 + goto out;
4482 + }
4483 + }
4484 + else if (args->scheduler == RT_LINUX) {
4485 + ret = enable_aux_rt_tasks_linux(gettid());
4486 + if (ret != 0) {
4487 + fprintf(stderr, "enable_aux_rt_tasks_linux() failed\n");
4488 + goto out;
4489 + }
4490 + }
4491 + }
4492 +
4493 + start = wctime();
4494 +
4495 + if (args->want_signals) {
4496 + ignore_litmus_signals(SIG_BUDGET_MASK); /* flush signals? */
4497 + activate_litmus_signals(SIG_BUDGET_MASK, longjmp_on_litmus_signal);
4498 + }
4499 +
4500 + if (!args->gpu_using) {
4501 + bool keepgoing;
4502 + do
4503 + {
4504 + double job_ms = wcet_dist_ms->random();
4505 + if (args->is_aberrant) {
4506 + double roll = drand48();
4507 + if (roll <= args->aberrant_prob)
4508 + job_ms *= args->aberrant_factor;
4509 + }
4510 +
4511 + if (job_ms < 0.0)
4512 + job_ms = 0.0;
4513 + keepgoing = cjobfn(ms2s(job_ms * args->scale), start + args->duration);
4514 + }while(keepgoing);
4515 + }
4516 + else {
4517 + bool keepgoing;
4518 + do
4519 + {
4520 + double job_ms = wcet_dist_ms->random();
4521 +
4522 + if (args->is_aberrant) {
4523 + double roll = drand48();
4524 + if (roll <= args->aberrant_prob)
4525 + job_ms *= args->aberrant_factor;
4526 + }
4527 +
4528 + if (job_ms < 0.0)
4529 + job_ms = 0.0;
4530 +
4531 + double cpu_job_ms = (job_ms/(args->wcet_ms + args->gpu_wcet_ms))*args->wcet_ms;
4532 + double gpu_job_ms = (job_ms/(args->wcet_ms + args->gpu_wcet_ms))*args->gpu_wcet_ms;
4533 + keepgoing = gjobfn(
4534 + ms2s(cpu_job_ms * args->scale),
4535 + ms2s(gpu_job_ms * args->scale),
4536 + args->num_kernels,
4537 + start + args->duration);
4538 + }while(keepgoing);
4539 + }
4540 +
4541 + ignore_litmus_signals(SIG_BUDGET_MASK);
4542 +
4543 + if (args->gpu_using && ENABLE_RT_AUX_THREADS) {
4544 + if (args->scheduler == LITMUS) {
4545 + ret = disable_aux_rt_tasks(AUX_CURRENT | AUX_FUTURE);
4546 + if (ret != 0) {
4547 + fprintf(stderr, "disable_aux_rt_tasks() failed\n");
4548 + goto out;
4549 + }
4550 + }
4551 + else if(args->scheduler == RT_LINUX) {
4552 + ret = disable_aux_rt_tasks_linux(gettid());
4553 + if (ret != 0) {
4554 + fprintf(stderr, "disable_aux_rt_tasks_linux() failed\n");
4555 + goto out;
4556 + }
4557 + }
4558 + }
4559 +
4560 + if (args->gpu_using)
4561 + deallocate_locks(args->num_gpu_tasks, args->scheduler != LITMUS);
4562 +
4563 + if (args->scheduler == LITMUS)
4564 + {
4565 + ret = task_mode(BACKGROUND_TASK);
4566 + if (ret != 0) {
4567 + fprintf(stderr, "could not become regular task (huh?)\n");
4568 + goto out;
4569 + }
4570 + }
4571 +
4572 + {
4573 + // become a normal task just in case.
4574 + struct sched_param normalparams;
4575 + memset(&normalparams, 0, sizeof(normalparams));
4576 + ret = sched_setscheduler(getpid(), SCHED_OTHER, &normalparams);
4577 + if (ret < 0) {
4578 + fprintf(stderr, "could not become sched_normal task\n");
4579 + goto out;
4580 + }
4581 + }
4582 +
4583 +out:
4584 + if (wcet_dist_ms)
4585 + delete wcet_dist_ms;
4586 +
4587 + return ret;
4588 +}
4589 +
4590 +int do_normal(struct Args* args)
4591 +{
4592 + int ret = 0;
4593 +
4594 + apply_args(args);
4595 +
4596 + if (args->scheduler == LITMUS)
4597 + init_litmus();
4598 + else
4599 + init_linux();
4600 +
4601 + if (args->gpu_using) {
4602 +#if 0
4603 + signal(SIGABRT, catch_exit);
4604 + signal(SIGTERM, catch_exit);
4605 + signal(SIGQUIT, catch_exit);
4606 + signal(SIGSEGV, catch_exit);
4607 +#endif
4608 +
4609 + cudaSetDeviceFlags(cudaDeviceScheduleSpin);
4610 + init_cuda(args->num_gpu_tasks);
4611 + init_events();
4612 + safetynet = true;
4613 + }
4614 +
4615 + ret = __do_normal(args);
4616 +
4617 + if (args->gpu_using) {
4618 + safetynet = false;
4619 + exit_cuda();
4620 + }
4621 +
4622 + return ret;
4623 +}
4624 +
4625 +typedef struct run_entry
4626 +{
4627 + struct Args args;
4628 + int used;
4629 + int ret;
4630 +} run_entry_t;
4631 +
4632 +
4633 +
4634 +static int *num_run_entries = NULL;
4635 +static run_entry_t *run_entries = NULL;
4636 +static pthread_barrier_t *daemon_barrier = NULL;
4637 +static pthread_mutex_t *daemon_mutex = NULL;
4638 +
4639 +static run_entry_t *my_run_entry = NULL;
4640 +static managed_shared_memory *daemon_segment_ptr = NULL;
4641 +
4642 +int init_daemon(struct Args* args, int num_total_users, bool is_daemon)
4643 +{
4644 + if (num_total_users)
4645 + {
4646 + shared_memory_object::remove("gpuspin_daemon_memory");
4647 +
4648 + daemon_segment_ptr = new managed_shared_memory(create_only, "gpuspin_daemon_memory", 30*PAGE_SIZE);
4649 + num_run_entries = daemon_segment_ptr->construct<int>("int num_run_entries")();
4650 + *num_run_entries = num_total_users;
4651 +
4652 + run_entries = daemon_segment_ptr->construct<struct run_entry>("run_entry_t run_entries")[num_total_users]();
4653 + memset(run_entries, 0, sizeof(run_entry_t)*num_total_users);
4654 +
4655 + daemon_mutex = daemon_segment_ptr->construct<pthread_mutex_t>("pthread_mutex_t daemon_mutex")();
4656 + pthread_mutexattr_t attr;
4657 + pthread_mutexattr_init(&attr);
4658 + pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
4659 + pthread_mutex_init(daemon_mutex, &attr);
4660 + pthread_mutexattr_destroy(&attr);
4661 +
4662 + daemon_barrier = daemon_segment_ptr->construct<pthread_barrier_t>("pthread_barrier_t daemon_barrier")();
4663 + pthread_barrierattr_t battr;
4664 + pthread_barrierattr_init(&battr);
4665 + pthread_barrierattr_setpshared(&battr, PTHREAD_PROCESS_SHARED);
4666 + pthread_barrier_init(daemon_barrier, &battr, args->num_tasks*2);
4667 + pthread_barrierattr_destroy(&battr);
4668 + }
4669 + else
4670 + {
4671 + do
4672 + {
4673 + try
4674 + {
4675 + if (!daemon_segment_ptr) daemon_segment_ptr = new managed_shared_memory(open_only, "gpuspin_daemon_memory");
4676 + }
4677 + catch(...)
4678 + {
4679 + sleep(1);
4680 + }
4681 + }while(daemon_segment_ptr == NULL);
4682 +
4683 + num_run_entries = daemon_segment_ptr->find<int>("int num_run_entries").first;
4684 + run_entries = daemon_segment_ptr->find<struct run_entry>("run_entry_t run_entries").first;
4685 + daemon_mutex = daemon_segment_ptr->find<pthread_mutex_t>("pthread_mutex_t daemon_mutex").first;
4686 + daemon_barrier = daemon_segment_ptr->find<pthread_barrier_t>("pthread_barrier_t daemon_barrier").first;
4687 + }
4688 +
4689 + if (is_daemon)
4690 + {
4691 + // find and claim an entry
4692 + pthread_mutex_lock(daemon_mutex);
4693 + for(int i = 0; i < *num_run_entries; ++i)
4694 + {
4695 + if(!run_entries[i].used)
4696 + {
4697 + my_run_entry = &run_entries[i];
4698 + my_run_entry->used = 1;
4699 + break;
4700 + }
4701 + }
4702 + pthread_mutex_unlock(daemon_mutex);
4703 +
4704 + assert(my_run_entry);
4705 + my_run_entry->args = *args;
4706 + my_run_entry->ret = 0;
4707 + }
4708 + else
4709 + {
4710 + // find my entry
4711 + pthread_mutex_lock(daemon_mutex);
4712 + for(int i = 0; i < *num_run_entries; ++i)
4713 + {
4714 + if (run_entries[i].args.wcet_ms == args->wcet_ms &&
4715 + run_entries[i].args.gpu_wcet_ms == args->gpu_wcet_ms &&
4716 + run_entries[i].args.period_ms == args->period_ms)
4717 + {
4718 + my_run_entry = &run_entries[i];
4719 + break;
4720 + }
4721 + }
4722 + pthread_mutex_unlock(daemon_mutex);
4723 + }
4724 +
4725 + if (!my_run_entry) {
4726 + fprintf(stderr, "Could not find task <wcet, gpu_wcet, period>: <%f %f %f>\n", args->wcet_ms, args->gpu_wcet_ms, args->period_ms);
4727 + return -1;
4728 + }
4729 + return 0;
4730 +}
4731 +
4732 +int put_next_run(struct Args* args)
4733 +{
4734 + assert(my_run_entry);
4735 +
4736 + pthread_mutex_lock(daemon_mutex);
4737 + my_run_entry->args = *args;
4738 + pthread_mutex_unlock(daemon_mutex);
4739 +
4740 + pthread_barrier_wait(daemon_barrier);
4741 +
4742 + return 0;
4743 +}
4744 +
4745 +int get_next_run(struct Args* args)
4746 +{
4747 + assert(my_run_entry);
4748 +
4749 + pthread_barrier_wait(daemon_barrier);
4750 +
4751 + pthread_mutex_lock(daemon_mutex);
4752 + *args = my_run_entry->args;
4753 + my_run_entry->ret = 0;
4754 + pthread_mutex_unlock(daemon_mutex);
4755 +
4756 + return 0;
4757 +}
4758 +
4759 +int complete_run(int ret)
4760 +{
4761 + assert(my_run_entry);
4762 +
4763 + pthread_mutex_lock(daemon_mutex);
4764 + my_run_entry->ret = ret;
4765 + pthread_mutex_unlock(daemon_mutex);
4766 +
4767 + pthread_barrier_wait(daemon_barrier);
4768 +
4769 + return 0;
4770 +}
4771 +
4772 +int wait_completion()
4773 +{
4774 + int ret = 0;
4775 +
4776 + assert(my_run_entry);
4777 +
4778 + pthread_barrier_wait(daemon_barrier);
4779 +
4780 + pthread_mutex_lock(daemon_mutex);
4781 + ret = my_run_entry->ret;
4782 + pthread_mutex_unlock(daemon_mutex);
4783 +
4784 + return ret;
4785 +}
4786 +
4787 +
4788 +
4789 +
4790 +int do_proxy(struct Args* args)
4791 +{
4792 + int ret = 0;
4793 + ret = init_daemon(args, 0, false);
4794 + if (ret < 0)
4795 + goto out;
4796 + put_next_run(args);
4797 + ret = wait_completion();
4798 +
4799 +out:
4800 + return ret;
4801 +}
4802 +
4803 +static bool is_daemon = false;
4804 +static bool running = false;
4805 +static void catch_exit2(int signal)
4806 +{
4807 + if (is_daemon && running)
4808 + complete_run(-signal);
4809 + catch_exit(signal);
4810 +}
4811 +
4812 +int do_daemon(struct Args* args)
4813 +{
4814 + is_daemon = true;
4815 +
4816 + int ret = 0;
4817 + struct Args nextargs;
4818 +
4819 + signal(SIGFPE, catch_exit2);
4820 + signal(SIGABRT, catch_exit2);
4821 + signal(SIGTERM, catch_exit2);
4822 + signal(SIGQUIT, catch_exit2);
4823 + signal(SIGSEGV, catch_exit2);
4824 +
4825 + init_daemon(args, args->num_tasks, true);
4826 +
4827 + apply_args(args);
4828 + init_litmus(); /* does everything init_linux() does, plus litmus stuff */
4829 +
4830 + if (args->gpu_using) {
4831 + cudaSetDeviceFlags(cudaDeviceScheduleSpin);
4832 + init_cuda(args->num_gpu_tasks);
4833 + init_events();
4834 + safetynet = true;
4835 + }
4836 +
4837 + do {
4838 + bool sync_change = false;
4839 + bool gpu_part_change = false;
4840 + bool gpu_part_size_change = false;
4841 +
4842 + xprintf("%d: waiting for work\n", getpid());
4843 +
4844 + get_next_run(&nextargs);
4845 +
4846 + if (nextargs.gpu_using) {
4847 + xprintf("%d: gpu using! gpu partition = %d, gwcet = %f, send = %lu\n",
4848 + getpid(),
4849 + nextargs.gpu_partition,
4850 + nextargs.gpu_wcet_ms,
4851 + nextargs.send_size);
4852 + }
4853 +
4854 + running = true;
4855 + sync_change = args->gpu_using && (CUDA_SYNC_MODE != nextargs.sync_mode);
4856 + gpu_part_change = args->gpu_using && (GPU_PARTITION != nextargs.gpu_partition);
4857 + gpu_part_size_change = args->gpu_using && (GPU_PARTITION_SIZE != nextargs.gpu_partition_size);
4858 +
4859 + if (sync_change || gpu_part_change || gpu_part_size_change) {
4860 + destroy_events();
4861 + if (gpu_part_change || gpu_part_size_change)
4862 + exit_cuda();
4863 + }
4864 + apply_args(&nextargs);
4865 + if (sync_change || gpu_part_change || gpu_part_size_change) {
4866 + if (gpu_part_change || gpu_part_size_change) {
4867 + xprintf("%d: changing device configuration\n", getpid());
4868 + init_cuda(nextargs.num_gpu_tasks);
4869 + CUR_DEVICE = -1;
4870 + LAST_DEVICE = -1;
4871 + }
4872 + init_events();
4873 + }
4874 +
4875 + xprintf("%d: starting run\n", getpid());
4876 +
4877 + ret = __do_normal(&nextargs);
4878 + complete_run(ret);
4879 + running = false;
4880 + }while(ret == 0);
4881 +
4882 + if (args->gpu_using) {
4883 + safetynet = false;
4884 + exit_cuda();
4885 + }
4886 +
4887 + if (args->num_gpu_tasks)
4888 + shared_memory_object::remove("gpu_mutex_memory");
4889 +
4890 + if (args->num_tasks)
4891 + shared_memory_object::remove("gpuspin_daemon_memory");
4892 +
4893 + return ret;
4894 +}
4895 +
4896 +#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d:"
4897 +#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:VW:uU:O:"
4898 +#define PROXY_OPTIONS "B:PA"
4899 +
4900 +// concat the option strings
4901 +#define OPTSTR CPU_OPTIONS GPU_OPTIONS PROXY_OPTIONS
4902 +
4903 +int main(int argc, char** argv)
4904 +{
4905 + struct Args myArgs;
4906 + set_defaults(&myArgs);
4907 +
4908 + eRunMode run_mode = NORMAL;
4909 +
4910 + int opt;
4911 + progname = argv[0];
4912 +
4913 + while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
4914 + switch (opt) {
4915 + case 'B':
4916 + myArgs.num_tasks = atoi(optarg);
4917 + break;
4918 + case 'P':
4919 + run_mode = PROXY;
4920 + break;
4921 + case 'A':
4922 + run_mode = DAEMON;
4923 + break;
4924 + case 'U':
4925 + myArgs.is_aberrant = true;
4926 + myArgs.aberrant_prob = (double)atoi(optarg);
4927 + break;
4928 + case 'O':
4929 + myArgs.is_aberrant = true;
4930 + myArgs.aberrant_factor = atof(optarg);
4931 + break;
4932 +
4933 +
4934 + case 'w':
4935 + myArgs.wait = true;
4936 + break;
4937 + case 'p':
4938 + myArgs.cluster = atoi(optarg);
4939 + myArgs.migrate = true;
4940 + break;
4941 + case 'z':
4942 +// CPU_PARTITION_SIZE = cluster_size;
4943 + myArgs.cluster_size = atoi(optarg);
4944 + break;
4945 + case 'g':
4946 +// GPU_USING = true;
4947 +// GPU_PARTITION = atoi(optarg);
4948 + myArgs.gpu_using = true;
4949 + myArgs.gpu_partition = atoi(optarg);
4950 +// assert(GPU_PARTITION >= 0 && GPU_PARTITION < NR_GPUS);
4951 + break;
4952 + case 'y':
4953 +// GPU_PARTITION_SIZE = atoi(optarg);
4954 + myArgs.gpu_partition_size = atoi(optarg);
4955 +// assert(GPU_PARTITION_SIZE > 0);
4956 + break;
4957 + case 'r':
4958 +// RHO = atoi(optarg);
4959 + myArgs.rho = atoi(optarg);
4960 +// assert(RHO > 0);
4961 + break;
4962 + case 'C':
4963 +// NUM_COPY_ENGINES = atoi(optarg);
4964 + myArgs.num_ce = atoi(optarg);
4965 +// assert(NUM_COPY_ENGINES == 1 || NUM_COPY_ENGINES == 2);
4966 + break;
4967 + case 'V':
4968 +// RESERVED_MIGR_COPY_ENGINE = true;
4969 + myArgs.reserve_migr_ce = true;
4970 + break;
4971 + case 'E':
4972 +// USE_ENGINE_LOCKS = true;
4973 +// ENGINE_LOCK_TYPE = (eEngineLockTypes)atoi(optarg);
4974 + myArgs.use_engine_locks = true;
4975 + myArgs.engine_lock_type = (eEngineLockTypes)atoi(optarg);
4976 +// assert(ENGINE_LOCK_TYPE == FIFO || ENGINE_LOCK_TYPE == PRIOQ);
4977 + break;
4978 + case 'u':
4979 + myArgs.yield_locks = true;
4980 + break;
4981 + case 'D':
4982 +// USE_DYNAMIC_GROUP_LOCKS = true;
4983 + myArgs.use_dgls = true;
4984 + break;
4985 + case 'G':
4986 +// GPU_SYNC_MODE = (eGpuSyncMode)atoi(optarg);
4987 + myArgs.gpusync_mode = (eGpuSyncMode)atoi(optarg);
4988 +// assert(GPU_SYNC_MODE >= IKGLP_MODE && GPU_SYNC_MODE <= RGEM_MODE);
4989 + break;
4990 + case 'a':
4991 +// ENABLE_AFFINITY = true;
4992 + myArgs.enable_affinity = true;
4993 + break;
4994 + case 'F':
4995 +// RELAX_FIFO_MAX_LEN = true;
4996 + myArgs.relax_fifo_len = true;
4997 + break;
4998 + case 'x':
4999 +// CUDA_SYNC_MODE = SPIN;
5000 + myArgs.sync_mode = SPIN;
5001 + break;
5002 + case 'S':
5003 +// SEND_SIZE = kbToB((size_t)atoi(optarg));
5004 + myArgs.send_size = kbToB((size_t)atoi(optarg));
5005 + break;
5006 + case 'R':
5007 +// RECV_SIZE = kbToB((size_t)atoi(optarg));
5008 + myArgs.recv_size = kbToB((size_t)atoi(optarg));
5009 + break;
5010 + case 'T':
5011 +// STATE_SIZE = kbToB((size_t)atoi(optarg));
5012 + myArgs.state_size = kbToB((size_t)atoi(optarg));
5013 + break;
5014 + case 'Z':
5015 +// ENABLE_CHUNKING = true;
5016 +// CHUNK_SIZE = kbToB((size_t)atoi(optarg));
5017 + myArgs.enable_chunking = true;
5018 + myArgs.chunk_size = kbToB((size_t)atoi(optarg));
5019 + break;
5020 + case 'M':
5021 +// MIGRATE_VIA_SYSMEM = true;
5022 + myArgs.use_sysmem_migration = true;
5023 + break;
5024 + case 'm':
5025 +// num_gpu_users = (int)atoi(optarg);
5026 + myArgs.num_gpu_tasks = (int)atoi(optarg);
5027 +// assert(num_gpu_users > 0);
5028 + break;
5029 + case 'k':
5030 +// num_kernels = (unsigned int)atoi(optarg);
5031 + myArgs.num_kernels = (unsigned int)atoi(optarg);
5032 + break;
5033 + case 'b':
5034 +// budget_ms = atoi(optarg);
5035 + myArgs.budget_ms = atoi(optarg);
5036 + break;
5037 + case 'W':
5038 +// stdpct = (double)atof(optarg);
5039 + myArgs.stddev = (double)atof(optarg);
5040 + break;
5041 + case 'N':
5042 +// scheduler = LINUX;
5043 + myArgs.scheduler = LINUX;
5044 + break;
5045 + case 'I':
5046 +// scheduler = RT_LINUX;
5047 + myArgs.scheduler = RT_LINUX;
5048 + break;
5049 + case 'q':
5050 +// priority = atoi(optarg);
5051 + myArgs.priority = atoi(optarg);
5052 + break;
5053 + case 'c':
5054 +// cls = str2class(optarg);
5055 + myArgs.cls = str2class(optarg);
5056 + break;
5057 + case 'e':
5058 +// want_enforcement = true;
5059 + myArgs.want_enforcement = true;
5060 + break;
5061 + case 'i':
5062 +// want_signals = true;
5063 + myArgs.want_signals = true;
5064 + break;
5065 + case 'd':
5066 +// drain = (budget_drain_policy_t)atoi(optarg);
5067 + myArgs.drain_policy = (budget_drain_policy_t)atoi(optarg);
5068 +// assert(drain >= DRAIN_SIMPLE && drain <= DRAIN_SOBLIV);
5069 +// assert(drain != DRAIN_SAWARE); // unsupported
5070 + break;
5071 +// case 'l':
5072 +// test_loop = 1;
5073 +// break;
5074 +// case 'o':
5075 +//// column = atoi(optarg);
5076 +// myArgs.column = atoi(optarg);
5077 +// break;
5078 +// case 'f':
5079 +// file = optarg;
5080 +// break;
5081 + case 's':
5082 +// scale = (double)atof(optarg);
5083 + myArgs.scale = (double)atof(optarg);
5084 + break;
5085 +// case 'X':
5086 +// protocol = lock_protocol_for_name(optarg);
5087 +// if (protocol < 0)
5088 +// usage("Unknown locking protocol specified.");
5089 +// break;
5090 +// case 'L':
5091 +// cs_length = atof(optarg);
5092 +// if (cs_length <= 0)
5093 +// usage("Invalid critical section length.");
5094 +// break;
5095 +// case 'Q':
5096 +// resource_id = atoi(optarg);
5097 +// if (resource_id <= 0 && strcmp(optarg, "0"))
5098 +// usage("Invalid resource ID.");
5099 +// break;
5100 + case ':':
5101 + usage("Argument missing.");
5102 + break;
5103 + case '?':
5104 + default:
5105 + usage("Bad argument.");
5106 + break;
5107 + }
5108 + }
5109 +
5110 +
5111 + srand(time(0));
5112 +
5113 + if (argc - optind == 3) {
5114 + myArgs.wcet_ms = atof(argv[optind + 0]);
5115 + myArgs.period_ms = atof(argv[optind + 1]);
5116 + myArgs.duration = atof(argv[optind + 2]);
5117 + }
5118 + else if (argc - optind == 4) {
5119 + myArgs.wcet_ms = atof(argv[optind + 0]);
5120 + myArgs.gpu_wcet_ms = atof(argv[optind + 1]);
5121 + myArgs.period_ms = atof(argv[optind + 2]);
5122 + myArgs.duration = atof(argv[optind + 3]);
5123 + }
5124 +
5125 + double rate = (1000.0/myArgs.period_ms)*myArgs.aberrant_prob;
5126 + myArgs.aberrant_prob = 1.0 / rate;
5127 +
5128 + if (myArgs.num_tasks == 0 || myArgs.num_gpu_tasks == 0) {
5129 + // safety w.r.t. shared mem.
5130 + sleep(2);
5131 + }
5132 +
5133 + /* make sure children don't take sigmasks */
5134 + ignore_litmus_signals(ALL_LITMUS_SIG_MASKS);
5135 +
5136 + if (run_mode == NORMAL) {
5137 + return do_normal(&myArgs);
5138 + }
5139 + else if (run_mode == PROXY) {
5140 + return do_proxy(&myArgs);
5141 + }
5142 + else if (run_mode == DAEMON) {
5143 + return do_daemon(&myArgs);
5144 + }
5145 +}
5146 diff --git a/gpu/ikglptest.c b/gpu/ikglptest.c
5147 new file mode 100644
5148 index 0000000..e5fa6fc
5149 --- /dev/null
5150 +++ b/gpu/ikglptest.c
5151 @@ -0,0 +1,653 @@
5152 +#include <stdio.h>
5153 +#include <stdlib.h>
5154 +#include <string.h>
5155 +#include <stdint.h>
5156 +#include <unistd.h>
5157 +#include <assert.h>
5158 +#include <errno.h>
5159 +#include <sys/types.h>
5160 +#include <sys/stat.h>
5161 +#include <fcntl.h>
5162 +#include <time.h>
5163 +#include <math.h>
5164 +
5165 +/* Include gettid() */
5166 +#include <sys/types.h>
5167 +
5168 +/* Include threading support. */
5169 +#include <pthread.h>
5170 +
5171 +/* Include the LITMUS^RT API.*/
5172 +#include "litmus.h"
5173 +
5174 +/* Catch errors.
5175 + */
5176 +#if 1
5177 +#define CALL( exp ) do { \
5178 + int ret; \
5179 + ret = exp; \
5180 + if (ret != 0) \
5181 + fprintf(stderr, "%s failed: %m\n", #exp);\
5182 + else \
5183 + fprintf(stderr, "%s ok.\n", #exp); \
5184 + } while (0)
5185 +
5186 +#define TH_CALL( exp ) do { \
5187 + int ret; \
5188 + ret = exp; \
5189 + if (ret != 0) \
5190 + fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \
5191 + else \
5192 + fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \
5193 + } while (0)
5194 +
5195 +#define TH_SAFE_CALL( exp ) do { \
5196 + int ret; \
5197 + fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \
5198 + ret = exp; \
5199 + if (ret != 0) \
5200 + fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \
5201 + else \
5202 + fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \
5203 + } while (0)
5204 +#else
5205 +#define CALL( exp )
5206 +#define TH_CALL( exp )
5207 +#define TH_SAFE_CALL( exp )
5208 +#endif
5209 +
5210 +/* these are only default values */
5211 +int NUM_THREADS=3;
5212 +int NUM_AUX_THREADS=0;
5213 +int NUM_SEMS=1;
5214 +int NUM_GPUS=1;
5215 +int GPU_OFFSET=0;
5216 +int NUM_SIMULT_USERS = 1;
5217 +int ENABLE_AFFINITY = 0;
5218 +int NEST_DEPTH=1;
5219 +int USE_KFMLP = 0;
5220 +int RELAX_FIFO_MAX_LEN = 0;
5221 +int USE_DYNAMIC_GROUP_LOCKS = 0;
5222 +
5223 +int SLEEP_BETWEEN_JOBS = 1;
5224 +int USE_PRIOQ = 0;
5225 +
5226 +int gAuxRun = 1;
5227 +pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER;
5228 +
5229 +#define MAX_SEMS 1000
5230 +
5231 +// 1000 = 1us
5232 +#define EXEC_COST 1000*1
5233 +#define PERIOD 2*1000*100
5234 +
5235 +/* The information passed to each thread. Could be anything. */
5236 +struct thread_context {
5237 + int id;
5238 + int fd;
5239 + int kexclu;
5240 + int od[MAX_SEMS];
5241 + int count;
5242 + unsigned int rand;
5243 + int mig_count[5];
5244 +};
5245 +
5246 +void* rt_thread(void* _ctx);
5247 +void* aux_thread(void* _ctx);
5248 +int nested_job(struct thread_context* ctx, int *count, int *next, int runfactor);
5249 +int job(struct thread_context* ctx, int runfactor);
5250 +
5251 +
5252 +struct avg_info
5253 +{
5254 + float avg;
5255 + float stdev;
5256 +};
5257 +
5258 +struct avg_info feedback(int _a, int _b)
5259 +{
5260 + fp_t a = _frac(_a, 10000);
5261 + fp_t b = _frac(_b, 10000);
5262 + int i;
5263 +
5264 + fp_t actual_fp;
5265 +
5266 + fp_t _est, _err;
5267 +
5268 + int base = 1000000;
5269 + //int range = 40;
5270 +
5271 + fp_t est = _integer_to_fp(base);
5272 + fp_t err = _fp(base/2);
5273 +
5274 +#define NUM_SAMPLES 10000
5275 +
5276 + float samples[NUM_SAMPLES] = {0.0};
5277 + float accu_abs, accu;
5278 + float avg;
5279 + float devsum;
5280 + float stdev;
5281 + struct avg_info ret;
5282 +
5283 + for(i = 0; i < NUM_SAMPLES; ++i) {
5284 + int num = ((rand()%40)*(rand()%2 ? -1 : 1)/100.0)*base + base;
5285 + float rel_err;
5286 +
5287 + actual_fp = _integer_to_fp(num);
5288 +
5289 +// printf("Before: est = %d\terr = %d\n", (int)_fp_to_integer(est), (int)_fp_to_integer(err));
5290 +
5291 + _err = _sub(actual_fp, est);
5292 + _est = _add(_mul(a, _err), _mul(b, err));
5293 +
5294 + rel_err = _fp_to_integer(_mul(_div(_err, est), _integer_to_fp(10000)))/10000.0;
5295 + rel_err *= 100.0;
5296 + //printf("%6.2f\n", rel_err);
5297 + samples[i] = rel_err;
5298 +
5299 + est = _est;
5300 + err = _add(err, _err);
5301 +
5302 + if((int)_fp_to_integer(est) <= 0) {
5303 + est = actual_fp;
5304 + err = _div(actual_fp, _integer_to_fp(2));
5305 + }
5306 +
5307 + //printf("After: est = %d\terr = %d\n", (int)_fp_to_integer(est), (int)_fp_to_integer(err));
5308 + }
5309 +
5310 + accu_abs = 0.0;
5311 + accu = 0.0;
5312 + for(i = 0; i < NUM_SAMPLES; ++i) {
5313 + accu += samples[i];
5314 + accu_abs += abs(samples[i]);
5315 + }
5316 +
5317 + avg = accu_abs/NUM_SAMPLES;
5318 + devsum = 0;
5319 + for(i = 0; i < NUM_SAMPLES; ++i) {
5320 + float dev = samples[i] - avg;
5321 + dev *= dev;
5322 + devsum += dev;
5323 + }
5324 +
5325 + stdev = sqrtf(devsum/(NUM_SAMPLES-1));
5326 +
5327 + ret.avg = avg;
5328 + ret.stdev = stdev;
5329 +
5330 + //printf("AVG: %6.2f\tw/ neg: %6.2f\n", accu_abs/NUM_SAMPLES, accu/NUM_SAMPLES);
5331 +
5332 + //return (accu_abs/NUM_SAMPLES);
5333 + return(ret);
5334 +}
5335 +
5336 +
5337 +
5338 +#define OPTSTR "t:k:o:z:s:d:lfaryA:q"
5339 +
5340 +int main(int argc, char** argv)
5341 +{
5342 + int i;
5343 + struct thread_context* ctx = NULL;
5344 + struct thread_context* aux_ctx = NULL;
5345 + pthread_t* task = NULL;
5346 + pthread_t* aux_task = NULL;
5347 + struct rt_task param;
5348 + int fd;
5349 +
5350 + int opt;
5351 + while((opt = getopt(argc, argv, OPTSTR)) != -1) {
5352 + switch(opt) {
5353 + case 't':
5354 + NUM_THREADS = atoi(optarg);
5355 + break;
5356 + case 'A':
5357 + NUM_AUX_THREADS = atoi(optarg);
5358 + break;
5359 + case 'k':
5360 + NUM_GPUS = atoi(optarg);
5361 + assert(NUM_GPUS > 0);
5362 + break;
5363 + case 'z':
5364 + NUM_SIMULT_USERS = atoi(optarg);
5365 + assert(NUM_SIMULT_USERS > 0);
5366 + break;
5367 + case 'o':
5368 + GPU_OFFSET = atoi(optarg);
5369 + assert(GPU_OFFSET >= 0);
5370 + break;
5371 + case 's':
5372 + NUM_SEMS = atoi(optarg);
5373 + assert(NUM_SEMS >= 0 && NUM_SEMS < MAX_SEMS);
5374 + break;
5375 + case 'd':
5376 + NEST_DEPTH = atoi(optarg);
5377 + assert(NEST_DEPTH >= 0);
5378 + break;
5379 + case 'f':
5380 + SLEEP_BETWEEN_JOBS = 0;
5381 + break;
5382 + case 'a':
5383 + ENABLE_AFFINITY = 1;
5384 + break;
5385 + case 'l':
5386 + USE_KFMLP = 1;
5387 + break;
5388 + case 'y':
5389 + USE_DYNAMIC_GROUP_LOCKS = 1;
5390 + break;
5391 + case 'r':
5392 + RELAX_FIFO_MAX_LEN = 1;
5393 + break;
5394 + case 'q':
5395 + USE_PRIOQ = 1;
5396 + break;
5397 + default:
5398 + fprintf(stderr, "Unknown option: %c\n", opt);
5399 + exit(-1);
5400 + break;
5401 + }
5402 + }
5403 +
5404 +#if 0
5405 + int best_a = 0, best_b = 0;
5406 + int first = 1;
5407 + int TRIALS = 15;
5408 +
5409 + int a, b, t;
5410 +
5411 + struct avg_info best = {0.0,0.0}, second_best;
5412 +
5413 + int second_best_a, second_best_b;
5414 +
5415 + srand(time(0));
5416 +
5417 + int step = 50;
5418 +
5419 + for(b = 2000; b < 5000; b += step) {
5420 + for(a = 1500; a < b; a += (step/4)) {
5421 + float std_accum = 0;
5422 + float avg_accum = 0;
5423 + for(t = 0; t < TRIALS; ++t) {
5424 + struct avg_info temp;
5425 + temp = feedback(a, b);
5426 + std_accum += temp.stdev;
5427 + avg_accum += temp.avg;
5428 + }
5429 +
5430 + float avg_std = std_accum / TRIALS;
5431 +
5432 + if(first || avg_std < best.stdev) {
5433 + second_best_a = best_a;
5434 + second_best_b = best_b;
5435 + second_best = best;
5436 +
5437 + best.stdev = avg_std;
5438 + best.avg = avg_accum / TRIALS;
5439 + best_a = a;
5440 + best_b = b;
5441 +
5442 + first = 0;
5443 + }
5444 + }
5445 + }
5446 +
5447 + printf("Best:\ta = %d\tb = %d\t(b-a) = %d\tavg = %6.2f\tstdev = %6.2f\n", best_a, best_b, best_b - best_a, best.avg, best.stdev);
5448 + printf("2nd:\ta = %d\tb = %d\t(b-a) = %d\tavg = %6.2f\tstdev = %6.2f\n", second_best_a, second_best_b, second_best_b - second_best_a, second_best.avg, second_best.stdev);
5449 +
5450 +
5451 + a = 14008;
5452 + b = 16024;
5453 + float std_accum = 0;
5454 + float avg_accum = 0;
5455 + for(t = 0; t < TRIALS; ++t) {
5456 + struct avg_info temp;
5457 + temp = feedback(a, b);
5458 + std_accum += temp.stdev;
5459 + avg_accum += temp.avg;
5460 + }
5461 +
5462 + printf("Aaron:\tavg = %6.2f\tstd = %6.2f\n", avg_accum/TRIALS, std_accum/TRIALS);
5463 +
5464 +
5465 +
5466 +
5467 + return 0;
5468 +#endif
5469 +
5470 +
5471 +
5472 +
5473 + ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context));
5474 + task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t));
5475 +
5476 + if (NUM_AUX_THREADS) {
5477 + aux_ctx = (struct thread_context*) calloc(NUM_AUX_THREADS, sizeof(struct thread_context));
5478 + aux_task = (pthread_t*) calloc(NUM_AUX_THREADS, sizeof(pthread_t));
5479 + }
5480 +
5481 + srand(0); /* something repeatable for now */
5482 +
5483 + fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
5484 +
5485 + CALL( init_litmus() );
5486 +
5487 + for (i = 0; i < NUM_AUX_THREADS; i++) {
5488 + aux_ctx[i].id = i;
5489 + CALL( pthread_create(aux_task + i, NULL, aux_thread, ctx + i) );
5490 + }
5491 +
5492 + for (i = 0; i < NUM_THREADS; i++) {
5493 + ctx[i].id = i;
5494 + ctx[i].fd = fd;
5495 + ctx[i].rand = rand();
5496 + memset(&ctx[i].mig_count, 0, sizeof(ctx[i].mig_count));
5497 + CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) );
5498 + }
5499 +
5500 + if (NUM_AUX_THREADS) {
5501 + init_rt_task_param(¶m);
5502 + param.exec_cost = EXEC_COST;
5503 + param.period = PERIOD + 10*NUM_THREADS+1;
5504 + param.cls = RT_CLASS_SOFT;
5505 +
5506 + TH_CALL( init_rt_thread() );
5507 + TH_CALL( set_rt_task_param(gettid(), ¶m) );
5508 + TH_CALL( task_mode(LITMUS_RT_TASK) );
5509 +
5510 + printf("[MASTER] Waiting for TS release.\n ");
5511 + wait_for_ts_release();
5512 +
5513 + CALL( enable_aux_rt_tasks(AUX_CURRENT) );
5514 +
5515 + for(i = 0; i < 25000; ++i) {
5516 + sleep_next_period();
5517 + pthread_mutex_lock(&gMutex);
5518 + pthread_mutex_unlock(&gMutex);
5519 + }
5520 +
5521 + CALL( disable_aux_rt_tasks(AUX_CURRENT) );
5522 + __sync_synchronize();
5523 + gAuxRun = 0;
5524 + __sync_synchronize();
5525 +
5526 + for (i = 0; i < NUM_AUX_THREADS; i++)
5527 + pthread_join(aux_task[i], NULL);
5528 +
5529 + TH_CALL( task_mode(BACKGROUND_TASK) );
5530 + }
5531 +
5532 + for (i = 0; i < NUM_THREADS; i++)
5533 + pthread_join(task[i], NULL);
5534 +
5535 + return 0;
5536 +}
5537 +
5538 +int affinity_cost[] = {1, 4, 8, 16};
5539 +
5540 +int affinity_distance(struct thread_context* ctx, int a, int b)
5541 +{
5542 + int i;
5543 + int dist;
5544 +
5545 + if(a >= 0 && b >= 0) {
5546 + for(i = 0; i <= 3; ++i) {
5547 + if(a>>i == b>>i) {
5548 + dist = i;
5549 + goto out;
5550 + }
5551 + }
5552 + dist = 0; // hopefully never reached.
5553 + }
5554 + else {
5555 + dist = 0;
5556 + }
5557 +
5558 +out:
5559 + //printf("[%d]: distance: %d -> %d = %d\n", ctx->id, a, b, dist);
5560 +
5561 + ++(ctx->mig_count[dist]);
5562 +
5563 + return dist;
5564 +
5565 +// int groups[] = {2, 4, 8};
5566 +// int i;
5567 +//
5568 +// if(a < 0 || b < 0)
5569 +// return (sizeof(groups)/sizeof(groups[0])); // worst affinity
5570 +//
5571 +// // no migration
5572 +// if(a == b)
5573 +// return 0;
5574 +//
5575 +// for(i = 0; i < sizeof(groups)/sizeof(groups[0]); ++i) {
5576 +// if(a/groups[i] == b/groups[i])
5577 +// return (i+1);
5578 +// }
5579 +// assert(0);
5580 +// return -1;
5581 +}
5582 +
5583 +
5584 +void* aux_thread(void* _ctx)
5585 +{
5586 + struct thread_context *ctx = (struct thread_context*)_ctx;
5587 +
5588 + while (gAuxRun) {
5589 + pthread_mutex_lock(&gMutex);
5590 + pthread_mutex_unlock(&gMutex);
5591 + }
5592 +
5593 + return ctx;
5594 +}
5595 +
5596 +void* rt_thread(void* _ctx)
5597 +{
5598 + int i;
5599 + int do_exit = 0;
5600 + int last_replica = -1;
5601 + struct rt_task param;
5602 +
5603 + struct thread_context *ctx = (struct thread_context*)_ctx;
5604 +
5605 + init_rt_task_param(¶m);
5606 + param.exec_cost = EXEC_COST;
5607 + param.period = PERIOD + 10*ctx->id; /* Vary period a little bit. */
5608 + param.cls = RT_CLASS_SOFT;
5609 +
5610 + TH_CALL( init_rt_thread() );
5611 + TH_CALL( set_rt_task_param(gettid(), ¶m) );
5612 +
5613 + if(USE_KFMLP) {
5614 + ctx->kexclu = open_kfmlp_gpu_sem(ctx->fd,
5615 + 0, /* name */
5616 + NUM_GPUS,
5617 + GPU_OFFSET,
5618 + NUM_SIMULT_USERS,
5619 + ENABLE_AFFINITY
5620 + );
5621 + }
5622 + else {
5623 +// ctx->kexclu = open_ikglp_sem(ctx->fd, 0, &NUM_GPUS);
5624 + ctx->kexclu = open_gpusync_token_lock(ctx->fd,
5625 + 0, /* name */
5626 + NUM_GPUS,
5627 + GPU_OFFSET,
5628 + NUM_SIMULT_USERS,
5629 + IKGLP_M_IN_FIFOS,
5630 + (!RELAX_FIFO_MAX_LEN) ?
5631 + IKGLP_OPTIMAL_FIFO_LEN :
5632 + IKGLP_UNLIMITED_FIFO_LEN,
5633 + ENABLE_AFFINITY
5634 + );
5635 + }
5636 + if(ctx->kexclu < 0)
5637 + perror("open_kexclu_sem");
5638 + else
5639 + printf("kexclu od = %d\n", ctx->kexclu);
5640 +
5641 + for (i = 0; i < NUM_SEMS; ++i) {
5642 + if(!USE_PRIOQ) {
5643 + ctx->od[i] = open_fifo_sem(ctx->fd, i + ctx->kexclu + 2);
5644 + if(ctx->od[i] < 0)
5645 + perror("open_fifo_sem");
5646 + else
5647 + printf("fifo[%d] od = %d\n", i, ctx->od[i]);
5648 + }
5649 + else {
5650 + ctx->od[i] = open_prioq_sem(ctx->fd, i + ctx->kexclu + 2);
5651 + if(ctx->od[i] < 0)
5652 + perror("open_prioq_sem");
5653 + else
5654 + printf("prioq[%d] od = %d\n", i, ctx->od[i]);
5655 + }
5656 + }
5657 +
5658 + TH_CALL( task_mode(LITMUS_RT_TASK) );
5659 +
5660 + printf("[%d] Waiting for TS release.\n ", ctx->id);
5661 + wait_for_ts_release();
5662 + ctx->count = 0;
5663 +
5664 + do {
5665 + int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0)));
5666 + int last = (first + NEST_DEPTH - 1 >= NUM_SEMS) ? NUM_SEMS - 1 : first + NEST_DEPTH - 1;
5667 + int dgl_size = last - first + 1;
5668 + int replica = -1;
5669 + int distance;
5670 +
5671 + int dgl[dgl_size];
5672 +
5673 + // construct the DGL
5674 + for(i = first; i <= last; ++i) {
5675 + dgl[i-first] = ctx->od[i];
5676 + }
5677 +
5678 + replica = litmus_lock(ctx->kexclu);
5679 +
5680 + //printf("[%d] got kexclu replica %d.\n", ctx->id, replica);
5681 + //fflush(stdout);
5682 +
5683 + distance = affinity_distance(ctx, replica, last_replica);
5684 +
5685 + if(USE_DYNAMIC_GROUP_LOCKS) {
5686 + litmus_dgl_lock(dgl, dgl_size);
5687 + }
5688 + else {
5689 + for(i = 0; i < dgl_size; ++i) {
5690 + litmus_lock(dgl[i]);
5691 + }
5692 + }
5693 +
5694 + //do_exit = nested_job(ctx, &count, &first, affinity_cost[distance]);
5695 + do_exit = job(ctx, affinity_cost[distance]);
5696 +
5697 + if(USE_DYNAMIC_GROUP_LOCKS) {
5698 + litmus_dgl_unlock(dgl, dgl_size);
5699 + }
5700 + else {
5701 + for(i = dgl_size - 1; i >= 0; --i) {
5702 + litmus_unlock(dgl[i]);
5703 + }
5704 + }
5705 +
5706 + //printf("[%d]: freeing kexclu replica %d.\n", ctx->id, replica);
5707 + //fflush(stdout);
5708 +
5709 + litmus_unlock(ctx->kexclu);
5710 +
5711 + last_replica = replica;
5712 +
5713 + if(SLEEP_BETWEEN_JOBS && !do_exit) {
5714 + sleep_next_period();
5715 + }
5716 + } while(!do_exit);
5717 +
5718 +// if (ctx->id == 0 && NUM_AUX_THREADS) {
5719 +// gAuxRun = 0;
5720 +// __sync_synchronize();
5721 +// CALL( disable_aux_rt_tasks() );
5722 +// }
5723 +
5724 + /*****
5725 + * 4) Transition to background mode.
5726 + */
5727 + TH_CALL( task_mode(BACKGROUND_TASK) );
5728 +
5729 + for(i = 0; i < sizeof(ctx->mig_count)/sizeof(ctx->mig_count[0]); ++i)
5730 + {
5731 + printf("[%d]: mig_count[%d] = %d\n", ctx->id, i, ctx->mig_count[i]);
5732 + }
5733 +
5734 + return NULL;
5735 +}
5736 +
5737 +//int nested_job(struct thread_context* ctx, int *count, int *next, int runfactor)
5738 +//{
5739 +// int ret;
5740 +//
5741 +// if(*count == 0 || *next == NUM_SEMS)
5742 +// {
5743 +// ret = job(ctx, runfactor);
5744 +// }
5745 +// else
5746 +// {
5747 +// int which_sem = *next;
5748 +// int rsm_od = ctx->od[which_sem];
5749 +//
5750 +// ++(*next);
5751 +// --(*count);
5752 +//
5753 +// //printf("[%d]: trying to get semaphore %d.\n", ctx->id, which_sem);
5754 +// //fflush(stdout);
5755 +// litmus_lock(rsm_od);
5756 +//
5757 +// //printf("[%d] got semaphore %d.\n", ctx->id, which_sem);
5758 +// //fflush(stdout);
5759 +// ret = nested_job(ctx, count, next, runfactor);
5760 +//
5761 +// //printf("[%d]: freeing semaphore %d.\n", ctx->id, which_sem);
5762 +// //fflush(stdout);
5763 +// litmus_unlock(rsm_od);
5764 +// }
5765 +//
5766 +//return(ret);
5767 +//}
5768 +
5769 +
5770 +void dirty_kb(int kb)
5771 +{
5772 + int32_t one_kb[256];
5773 + int32_t sum = 0;
5774 + int32_t i;
5775 +
5776 + if(!kb)
5777 + return;
5778 +
5779 + for (i = 0; i < 256; i++)
5780 + sum += one_kb[i];
5781 + kb--;
5782 + /* prevent tail recursion */
5783 + if (kb)
5784 + dirty_kb(kb);
5785 + for (i = 0; i < 256; i++)
5786 + sum += one_kb[i];
5787 +}
5788 +
5789 +int job(struct thread_context* ctx, int runfactor)
5790 +{
5791 + //struct timespec tosleep = {0, 100000}; // 0.1 ms
5792 +
5793 + //printf("[%d]: runfactor = %d\n", ctx->id, runfactor);
5794 +
5795 + //dirty_kb(8 * runfactor);
5796 + dirty_kb(1 * runfactor);
5797 + //nanosleep(&tosleep, NULL);
5798 +
5799 + /* Don't exit. */
5800 + //return ctx->count++ > 100;
5801 + //return ctx->count++ > 12000;
5802 + //return ctx->count++ > 120000;
5803 + return ctx->count++ > 25000; // controls number of jobs per task
5804 +}
5805 diff --git a/gpu/locktest.c b/gpu/locktest.c
5806 new file mode 100644
5807 index 0000000..6a1219a
5808 --- /dev/null
5809 +++ b/gpu/locktest.c
5810 @@ -0,0 +1,206 @@
5811 +#include <stdio.h>
5812 +#include <stdlib.h>
5813 +#include <stdint.h>
5814 +#include <unistd.h>
5815 +#include <assert.h>
5816 +#include <errno.h>
5817 +#include <sys/types.h>
5818 +#include <sys/stat.h>
5819 +#include <fcntl.h>
5820 +
5821 +/* Include gettid() */
5822 +#include <sys/types.h>
5823 +
5824 +/* Include threading support. */
5825 +#include <pthread.h>
5826 +
5827 +/* Include the LITMUS^RT API.*/
5828 +#include "litmus.h"
5829 +
5830 +/* Catch errors.
5831 + */
5832 +#define CALL( exp ) do { \
5833 + int ret; \
5834 + ret = exp; \
5835 + if (ret != 0) \
5836 + fprintf(stderr, "%s failed: %m\n", #exp);\
5837 + else \
5838 + fprintf(stderr, "%s ok.\n", #exp); \
5839 + } while (0)
5840 +
5841 +#define TH_CALL( exp ) do { \
5842 + int ret; \
5843 + ret = exp; \
5844 + if (ret != 0) \
5845 + fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \
5846 + else \
5847 + fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \
5848 + } while (0)
5849 +
5850 +#define TH_SAFE_CALL( exp ) do { \
5851 + int ret; \
5852 + fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \
5853 + ret = exp; \
5854 + if (ret != 0) \
5855 + fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \
5856 + else \
5857 + fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \
5858 + } while (0)
5859 +
5860 +
5861 +/* these are only default values */
5862 +int NUM_THREADS=3;
5863 +int NUM_SEMS=10;
5864 +
5865 +#define MAX_SEMS 1000
5866 +
5867 +#define EXEC_COST 10
5868 +#define PERIOD 100
5869 +
5870 +/* The information passed to each thread. Could be anything. */
5871 +struct thread_context {
5872 + int id;
5873 + int fd;
5874 + int od[MAX_SEMS];
5875 + int count;
5876 + unsigned int rand;
5877 +};
5878 +
5879 +void* rt_thread(void* _ctx);
5880 +int nested_job(struct thread_context* ctx, int *count, int *next);
5881 +int job(struct thread_context*);
5882 +
5883 +#define OPTSTR "t:s:"
5884 +
5885 +int main(int argc, char** argv)
5886 +{
5887 + int i;
5888 + struct thread_context* ctx;
5889 + pthread_t* task;
5890 + int fd;
5891 +
5892 + int opt;
5893 + while((opt = getopt(argc, argv, OPTSTR)) != -1) {
5894 + switch(opt) {
5895 + case 't':
5896 + NUM_THREADS = atoi(optarg);
5897 + break;
5898 + case 's':
5899 + NUM_SEMS = atoi(optarg);
5900 + assert(NUM_SEMS <= MAX_SEMS);
5901 + break;
5902 + default:
5903 + fprintf(stderr, "Unknown option: %c\n", opt);
5904 + exit(-1);
5905 + break;
5906 + }
5907 + }
5908 +
5909 + ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context));
5910 + task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t));
5911 +
5912 + srand(0); /* something repeatable for now */
5913 +
5914 + fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
5915 +
5916 + CALL( init_litmus() );
5917 +
5918 + for (i = 0; i < NUM_THREADS; i++) {
5919 + ctx[i].id = i;
5920 + ctx[i].fd = fd;
5921 + ctx[i].rand = rand();
5922 + CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) );
5923 + }
5924 +
5925 +
5926 + for (i = 0; i < NUM_THREADS; i++)
5927 + pthread_join(task[i], NULL);
5928 +
5929 +
5930 + return 0;
5931 +}
5932 +
5933 +void* rt_thread(void* _ctx)
5934 +{
5935 + int i;
5936 + int do_exit = 0;
5937 +
5938 + struct thread_context *ctx = (struct thread_context*)_ctx;
5939 +
5940 + TH_CALL( init_rt_thread() );
5941 +
5942 + /* Vary period a little bit. */
5943 + TH_CALL( sporadic_global(EXEC_COST, PERIOD + 10*ctx->id) );
5944 +
5945 + for (i = 0; i < NUM_SEMS; i++) {
5946 + ctx->od[i] = open_fmlp_sem(ctx->fd, i);
5947 + if(ctx->od[i] < 0)
5948 + perror("open_fmlp_sem");
5949 + }
5950 +
5951 + TH_CALL( task_mode(LITMUS_RT_TASK) );
5952 +
5953 +
5954 + printf("[%d] Waiting for TS release.\n ", ctx->id);
5955 + wait_for_ts_release();
5956 + ctx->count = 0;
5957 +
5958 + do {
5959 + int which_sem = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0)));
5960 +
5961 + printf("[%d]: trying to get semaphore %d.\n", ctx->id, which_sem);
5962 + fflush(stdout);
5963 +
5964 + TH_SAFE_CALL ( litmus_lock(which_sem) );
5965 +
5966 + printf("[%d] got semaphore %d.\n", ctx->id, which_sem);
5967 + fflush(stdout);
5968 +
5969 + do_exit = job(ctx);
5970 +
5971 + printf("[%d]: freeing semaphore %d.\n", ctx->id, which_sem);
5972 + fflush(stdout);
5973 +
5974 + TH_SAFE_CALL ( litmus_unlock(which_sem) );
5975 +
5976 + if(!do_exit) {
5977 + sleep_next_period();
5978 + }
5979 + } while(!do_exit);
5980 +
5981 + /*****
5982 + * 4) Transition to background mode.
5983 + */
5984 + TH_CALL( task_mode(BACKGROUND_TASK) );
5985 +
5986 +
5987 + return NULL;
5988 +}
5989 +
5990 +void dirty_kb(int kb)
5991 +{
5992 + int32_t one_kb[256];
5993 + int32_t sum = 0;
5994 + int32_t i;
5995 +
5996 + for (i = 0; i < 256; i++)
5997 + sum += one_kb[i];
5998 + kb--;
5999 + /* prevent tail recursion */
6000 + if (kb)
6001 + dirty_kb(kb);
6002 + for (i = 0; i < 256; i++)
6003 + sum += one_kb[i];
6004 +}
6005 +
6006 +int job(struct thread_context* ctx)
6007 +{
6008 + /* Do real-time calculation. */
6009 + dirty_kb(8);
6010 +
6011 + /* Don't exit. */
6012 + //return ctx->count++ > 100;
6013 + //return ctx->count++ > 12000;
6014 + //return ctx->count++ > 120000;
6015 + return ctx->count++ > 30000; // controls number of jobs per task
6016 +}
6017 diff --git a/gpu/nested.c b/gpu/nested.c
6018 new file mode 100644
6019 index 0000000..334de10
6020 --- /dev/null
6021 +++ b/gpu/nested.c
6022 @@ -0,0 +1,262 @@
6023 +#include <stdio.h>
6024 +#include <stdlib.h>
6025 +#include <stdint.h>
6026 +#include <unistd.h>
6027 +#include <assert.h>
6028 +#include <errno.h>
6029 +#include <sys/types.h>
6030 +#include <sys/stat.h>
6031 +#include <fcntl.h>
6032 +
6033 +/* Include gettid() */
6034 +#include <sys/types.h>
6035 +
6036 +/* Include threading support. */
6037 +#include <pthread.h>
6038 +
6039 +/* Include the LITMUS^RT API.*/
6040 +#include "litmus.h"
6041 +
6042 +/* Catch errors.
6043 + */
6044 +#define CALL( exp ) do { \
6045 + int ret; \
6046 + ret = exp; \
6047 + if (ret != 0) \
6048 + fprintf(stderr, "%s failed: %m\n", #exp);\
6049 + else \
6050 + fprintf(stderr, "%s ok.\n", #exp); \
6051 + } while (0)
6052 +
6053 +#define TH_CALL( exp ) do { \
6054 + int ret; \
6055 + ret = exp; \
6056 + if (ret != 0) \
6057 + fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \
6058 + else \
6059 + fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \
6060 + } while (0)
6061 +
6062 +#define TH_SAFE_CALL( exp ) do { \
6063 + int ret; \
6064 + fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \
6065 + ret = exp; \
6066 + if (ret != 0) \
6067 + fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \
6068 + else \
6069 + fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \
6070 + } while (0)
6071 +
6072 +
6073 +#define NUM_CPUS 4
6074 +
6075 +//#define NUM_THREADS 3
6076 +int NUM_THREADS=3;
6077 +
6078 +/* NEST_DEPTH may not be greater than NUM_SEMS. */
6079 +//#define NUM_SEMS 10
6080 +int NUM_SEMS=10;
6081 +
6082 +int SLEEP_BETWEEN_JOBS = 1;
6083 +
6084 +int USE_PRIOQ = 0;
6085 +
6086 +#define MAX_SEMS 1000
6087 +
6088 +//#define NEST_DEPTH 5
6089 +int NEST_DEPTH=5;
6090 +
6091 +#define EXEC_COST 1000*1
6092 +#define PERIOD 1000*10
6093 +
6094 +/* The information passed to each thread. Could be anything. */
6095 +struct thread_context {
6096 + int id;
6097 + int fd;
6098 + int od[MAX_SEMS];
6099 + int count;
6100 + unsigned int rand;
6101 +};
6102 +
6103 +void* rt_thread(void* _ctx);
6104 +int nested_job(struct thread_context* ctx, int *count, int *next);
6105 +int job(struct thread_context*);
6106 +
6107 +#define OPTSTR "t:s:d:fq"
6108 +
6109 +int main(int argc, char** argv)
6110 +{
6111 + int i;
6112 + struct thread_context* ctx; //[NUM_THREADS];
6113 + pthread_t* task; //[NUM_THREADS];
6114 + int fd;
6115 +
6116 + int opt;
6117 + while((opt = getopt(argc, argv, OPTSTR)) != -1) {
6118 + switch(opt) {
6119 + case 't':
6120 + NUM_THREADS = atoi(optarg);
6121 + break;
6122 + case 's':
6123 + NUM_SEMS = atoi(optarg);
6124 + assert(NUM_SEMS <= MAX_SEMS);
6125 + break;
6126 + case 'd':
6127 + NEST_DEPTH = atoi(optarg);
6128 + break;
6129 + case 'f':
6130 + SLEEP_BETWEEN_JOBS = 0;
6131 + break;
6132 + case 'q':
6133 + USE_PRIOQ = 1;
6134 + break;
6135 + default:
6136 + fprintf(stderr, "Unknown option: %c\n", opt);
6137 + exit(-1);
6138 + break;
6139 + }
6140 + }
6141 +
6142 + ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context));
6143 + task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t));
6144 +
6145 + srand(0); /* something repeatable for now */
6146 +
6147 + fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
6148 +
6149 + CALL( init_litmus() );
6150 +
6151 + for (i = 0; i < NUM_THREADS; i++) {
6152 + ctx[i].id = i;
6153 + ctx[i].fd = fd;
6154 + ctx[i].rand = rand();
6155 + CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) );
6156 + }
6157 +
6158 +
6159 + for (i = 0; i < NUM_THREADS; i++)
6160 + pthread_join(task[i], NULL);
6161 +
6162 +
6163 + return 0;
6164 +}
6165 +
6166 +void* rt_thread(void* _ctx)
6167 +{
6168 + int i;
6169 + int do_exit = 0;
6170 + struct rt_task param;
6171 +
6172 + struct thread_context *ctx = (struct thread_context*)_ctx;
6173 +
6174 + init_rt_task_param(¶m);
6175 + param.exec_cost = EXEC_COST;
6176 + param.period = PERIOD + 10*ctx->id;
6177 + param.cls = RT_CLASS_SOFT;
6178 +
6179 + /* Make presence visible. */
6180 + //printf("RT Thread %d active.\n", ctx->id);
6181 +
6182 + TH_CALL( init_rt_thread() );
6183 + TH_CALL( set_rt_task_param(gettid(), ¶m) );
6184 +
6185 + for (i = 0; i < NUM_SEMS; i++) {
6186 + if (!USE_PRIOQ) {
6187 + ctx->od[i] = open_fifo_sem(ctx->fd, i);
6188 + if(ctx->od[i] < 0)
6189 + perror("open_fifo_sem");
6190 + }
6191 + else {
6192 + ctx->od[i] = open_prioq_sem(ctx->fd, i);
6193 + if(ctx->od[i] < 0)
6194 + perror("open_prioq_sem");
6195 + }
6196 + //printf("[%d] ctx->od[%d]: %d\n", ctx->id, i, ctx->od[i]);
6197 + }
6198 +
6199 + TH_CALL( task_mode(LITMUS_RT_TASK) );
6200 +
6201 +
6202 + printf("[%d] Waiting for TS release.\n ", ctx->id);
6203 + wait_for_ts_release();
6204 + ctx->count = 0;
6205 +
6206 + do {
6207 + int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0)));
6208 + int count = NEST_DEPTH;
6209 + do_exit = nested_job(ctx, &count, &first);
6210 +
6211 + if(SLEEP_BETWEEN_JOBS && !do_exit) {
6212 + sleep_next_period();
6213 + }
6214 + } while(!do_exit);
6215 +
6216 + /*****
6217 + * 4) Transition to background mode.
6218 + */
6219 + TH_CALL( task_mode(BACKGROUND_TASK) );
6220 +
6221 +
6222 + return NULL;
6223 +}
6224 +
6225 +
6226 +int nested_job(struct thread_context* ctx, int *count, int *next)
6227 +{
6228 + int ret;
6229 +
6230 + if(*count == 0 || *next == NUM_SEMS) /* base case */
6231 + {
6232 + ret = job(ctx);
6233 + }
6234 + else
6235 + {
6236 + int which_sem = ctx->od[*next];
6237 +
6238 + ++(*next);
6239 + --(*count);
6240 +
6241 + printf("[%d]: trying to get semaphore %d.\n", ctx->id, which_sem);
6242 + fflush(stdout);
6243 + TH_SAFE_CALL ( litmus_lock(which_sem) );
6244 + printf("[%d] got semaphore %d.\n", ctx->id, which_sem);
6245 + fflush(stdout);
6246 + ret = nested_job(ctx, count, next);
6247 + TH_SAFE_CALL ( litmus_unlock(which_sem) );
6248 + fflush(stdout);
6249 + }
6250 +
6251 + return(ret);
6252 +}
6253 +
6254 +
6255 +
6256 +void dirty_kb(int kb)
6257 +{
6258 + int32_t one_kb[256];
6259 + int32_t sum = 0;
6260 + int32_t i;
6261 +
6262 + for (i = 0; i < 256; i++)
6263 + sum += one_kb[i];
6264 + kb--;
6265 + /* prevent tail recursion */
6266 + if (kb)
6267 + dirty_kb(kb);
6268 + for (i = 0; i < 256; i++)
6269 + sum += one_kb[i];
6270 +}
6271 +
6272 +
6273 +
6274 +int job(struct thread_context* ctx)
6275 +{
6276 + /* Do real-time calculation. */
6277 + dirty_kb(8);
6278 +
6279 + /* Don't exit. */
6280 + //return ctx->count++ > 100;
6281 + //return ctx->count++ > 12000;
6282 + //return ctx->count++ > 120000;
6283 + return ctx->count++ > 30000;
6284 +}
6285 diff --git a/gpu/normal_task.c b/gpu/normal_task.c
6286 new file mode 100644
6287 index 0000000..ccc265c
6288 --- /dev/null
6289 +++ b/gpu/normal_task.c
6290 @@ -0,0 +1,90 @@
6291 +#include <stdio.h>
6292 +#include <stdlib.h>
6293 +#include <string.h>
6294 +#include <stdint.h>
6295 +#include <unistd.h>
6296 +#include <assert.h>
6297 +#include <errno.h>
6298 +#include <sys/types.h>
6299 +#include <sys/stat.h>
6300 +#include <fcntl.h>
6301 +#include <time.h>
6302 +#include <math.h>
6303 +
6304 +/* Include gettid() */
6305 +#include <sys/types.h>
6306 +
6307 +/* Include threading support. */
6308 +#include <pthread.h>
6309 +
6310 +/* Include the LITMUS^RT API.*/
6311 +#include "litmus.h"
6312 +
6313 +/* Catch errors.
6314 + */
6315 +#if 1
6316 +#define CALL( exp ) do { \
6317 + int ret; \
6318 + ret = exp; \
6319 + if (ret != 0) \
6320 + fprintf(stderr, "%s failed: %m\n", #exp);\
6321 + else \
6322 + fprintf(stderr, "%s ok.\n", #exp); \
6323 + } while (0)
6324 +
6325 +#define TH_CALL( exp ) do { \
6326 + int ret; \
6327 + ret = exp; \
6328 + if (ret != 0) \
6329 + fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \
6330 + else \
6331 + fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \
6332 + } while (0)
6333 +
6334 +#define TH_SAFE_CALL( exp ) do { \
6335 + int ret; \
6336 + fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \
6337 + ret = exp; \
6338 + if (ret != 0) \
6339 + fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \
6340 + else \
6341 + fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \
6342 + } while (0)
6343 +#else
6344 +#define CALL( exp )
6345 +#define TH_CALL( exp )
6346 +#define TH_SAFE_CALL( exp )
6347 +#endif
6348 +
6349 +/* these are only default values */
6350 +// 1000 = 1us
6351 +#define EXEC_COST 1000*1
6352 +#define PERIOD 2*1000*100
6353 +
6354 +
6355 +int main(int argc, char** argv)
6356 +{
6357 + struct rt_task param;
6358 +
6359 + init_rt_task_param(¶m);
6360 + param.exec_cost = EXEC_COST;
6361 + param.period = PERIOD;
6362 + param.cls = RT_CLASS_SOFT;
6363 +
6364 + CALL( init_litmus() );
6365 +
6366 + CALL( init_rt_thread() );
6367 + CALL( set_rt_task_param(gettid(), ¶m) );
6368 + //CALL( task_mode(LITMUS_RT_TASK) );
6369 +
6370 + fprintf(stdout, "Waiting for TS release.\n ");
6371 + wait_for_ts_release();
6372 +
6373 + fprintf(stdout, "Released!\n");
6374 +
6375 + //sleep_next_period();
6376 + //CALL( task_mode(BACKGROUND_TASK) );
6377 +
6378 + return 0;
6379 +}
6380 +
6381 diff --git a/include/common.h b/include/common.h
6382 index d1234ba..faf2c07 100644
6383 --- a/include/common.h
6384 +++ b/include/common.h
6385 @@ -1,7 +1,14 @@
6386 #ifndef COMMON_H
6387 #define COMMON_H
6388
6389 +#ifdef __cplusplus
6390 +extern "C" {
6391 +#endif
6392
6393 void bail_out(const char* msg);
6394
6395 +#ifdef __cplusplus
6396 +}
6397 +#endif
6398 +
6399 #endif
6400 diff --git a/include/litmus.h b/include/litmus.h
6401 index 677f9a9..e785f92 100644
6402 --- a/include/litmus.h
6403 +++ b/include/litmus.h
6404 @@ -7,57 +7,45 @@ extern "C" {
6405
6406 #include <sys/types.h>
6407 #include <stdint.h>
6408 +#include <setjmp.h>
6409
6410 /* Include kernel header.
6411 * This is required for the rt_param
6412 * and control_page structures.
6413 */
6414 #include "litmus/rt_param.h"
6415 +#include "litmus/signal.h"
6416
6417 #include "asm/cycles.h" /* for null_call() */
6418
6419 -typedef int pid_t; /* PID of a task */
6420 -
6421 -/* obtain the PID of a thread */
6422 -pid_t gettid(void);
6423 -
6424 -/* migrate to partition */
6425 -int be_migrate_to(int target_cpu);
6426 +#include "migration.h"
6427
6428 +void init_rt_task_param(struct rt_task* param);
6429 int set_rt_task_param(pid_t pid, struct rt_task* param);
6430 int get_rt_task_param(pid_t pid, struct rt_task* param);
6431
6432 -/* setup helper */
6433 -
6434 -/* Times are given in ms. The 'priority' parameter
6435 - * is only relevant under fixed-priority scheduling (and
6436 - * ignored by other plugins). The task_class_t parameter
6437 - * is ignored by most plugins.
6438 - */
6439 -int sporadic_task(
6440 - lt_t e, lt_t p, lt_t phase,
6441 - int partition, unsigned int priority,
6442 - task_class_t cls,
6443 - budget_policy_t budget_policy, int set_cpu_set);
6444 -
6445 -/* Times are given in ns. The 'priority' parameter
6446 - * is only relevant under fixed-priority scheduling (and
6447 - * ignored by other plugins). The task_class_t parameter
6448 - * is ignored by most plugins.
6449 +/* Release-master-aware functions for getting the first
6450 + * CPU in a particular cluster or partition. Use these
6451 + * to set rt_task::cpu for cluster/partitioned scheduling.
6452 */
6453 -int sporadic_task_ns(
6454 - lt_t e, lt_t p, lt_t phase,
6455 - int cpu, unsigned int priority,
6456 - task_class_t cls,
6457 - budget_policy_t budget_policy, int set_cpu_set);
6458 -
6459 -/* Convenience macros. Budget enforcement off by default in these macros. */
6460 -#define sporadic_global(e, p) \
6461 - sporadic_task(e, p, 0, 0, LITMUS_LOWEST_PRIORITY, \
6462 - RT_CLASS_SOFT, NO_ENFORCEMENT, 0)
6463 -#define sporadic_partitioned(e, p, cpu) \
6464 - sporadic_task(e, p, 0, cpu, LITMUS_LOWEST_PRIORITY, \
6465 - RT_CLASS_SOFT, NO_ENFORCEMENT, 1)
6466 +int partition_to_cpu(int partition);
6467 +int cluster_to_first_cpu(int cluster, int cluster_size);
6468 +
6469 +/* Convenience functions for setting up real-time tasks.
6470 + * Default behaviors set by init_rt_task_params() used.
6471 + * Also sets affinity masks for clustered/partitions
6472 + * functions. Time units in nanoseconds. */
6473 +int sporadic_global(lt_t e_ns, lt_t p_ns);
6474 +int sporadic_partitioned(lt_t e_ns, lt_t p_ns, int partition);
6475 +int sporadic_clustered(lt_t e_ns, lt_t p_ns, int cluster, int cluster_size);
6476 +
6477 +/* simple time unit conversion macros */
6478 +#define s2ns(s) ((s)*1000000000LL)
6479 +#define s2us(s) ((s)*1000000LL)
6480 +#define s2ms(s) ((s)*1000LL)
6481 +#define ms2ns(ms) ((ms)*1000000LL)
6482 +#define ms2us(ms) ((ms)*1000LL)
6483 +#define us2ns(us) ((us)*1000LL)
6484
6485 /* file descriptor attached shared objects support */
6486 typedef enum {
6487 @@ -66,7 +54,18 @@ typedef enum {
6488 MPCP_SEM = 2,
6489 MPCP_VS_SEM = 3,
6490 DPCP_SEM = 4,
6491 - PCP_SEM = 5,
6492 + PCP_SEM = 5,
6493 +
6494 + FIFO_MUTEX = 6,
6495 + IKGLP_SEM = 7,
6496 + KFMLP_SEM = 8,
6497 +
6498 + IKGLP_SIMPLE_GPU_AFF_OBS = 9,
6499 + IKGLP_GPU_AFF_OBS = 10,
6500 + KFMLP_SIMPLE_GPU_AFF_OBS = 11,
6501 + KFMLP_GPU_AFF_OBS = 12,
6502 +
6503 + PRIOQ_MUTEX = 13,
6504 } obj_type_t;
6505
6506 int lock_protocol_for_name(const char* name);
6507 @@ -80,9 +79,32 @@ static inline int od_open(int fd, obj_type_t type, int obj_id)
6508 return od_openx(fd, type, obj_id, 0);
6509 }
6510
6511 +int litmus_open_lock(
6512 + obj_type_t protocol, /* which locking protocol to use, e.g., FMLP_SEM */
6513 + int lock_id, /* numerical id of the lock, user-specified */
6514 + const char* ns, /* path to a shared file */
6515 + void *config_param); /* any extra info needed by the protocol (such
6516 + * as CPU under SRP and PCP), may be NULL */
6517 +
6518 /* real-time locking protocol support */
6519 int litmus_lock(int od);
6520 int litmus_unlock(int od);
6521 +int litmus_should_yield_lock(int od);
6522 +
6523 +/* Dynamic group lock support. ods arrays MUST BE PARTIALLY ORDERED!!!!!!
6524 + * Use the same ordering for lock and unlock.
6525 + *
6526 + * Ex:
6527 + * litmus_dgl_lock({A, B, C, D}, 4);
6528 + * litmus_dgl_unlock({A, B, C, D}, 4);
6529 + */
6530 +int litmus_dgl_lock(int* ods, int dgl_size);
6531 +int litmus_dgl_unlock(int* ods, int dgl_size);
6532 +int litmus_dgl_should_yield_lock(int* ods, int dgl_size);
6533 +
6534 +/* nvidia graphics cards */
6535 +int register_nv_device(int nv_device_id);
6536 +int unregister_nv_device(int nv_device_id);
6537
6538 /* job control*/
6539 int get_job_no(unsigned int* job_no);
6540 @@ -97,10 +119,8 @@ void exit_litmus(void);
6541 /* A real-time program. */
6542 typedef int (*rt_fn_t)(void*);
6543
6544 -/* These two functions configure the RT task to use enforced exe budgets */
6545 -int create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period);
6546 -int __create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet,
6547 - int period, task_class_t cls);
6548 +/* exec another program as a real-time task. */
6549 +int create_rt_task(rt_fn_t rt_prog, void *arg, struct rt_task* param);
6550
6551 /* per-task modes */
6552 enum rt_task_mode_t {
6553 @@ -118,16 +138,14 @@ void exit_np(void);
6554 int requested_to_preempt(void);
6555
6556 /* task system support */
6557 -int wait_for_ts_release(void);
6558 +int wait_for_ts_release();
6559 +int wait_for_ts_release2(struct timespec *release);
6560 int release_ts(lt_t *delay);
6561 int get_nr_ts_release_waiters(void);
6562 +int read_litmus_stats(int *ready, int *total);
6563
6564 -#define __NS_PER_MS 1000000
6565 -
6566 -static inline lt_t ms2lt(unsigned long milliseconds)
6567 -{
6568 - return __NS_PER_MS * milliseconds;
6569 -}
6570 +int enable_aux_rt_tasks(int flags);
6571 +int disable_aux_rt_tasks(int flags);
6572
6573 /* sleep for some number of nanoseconds */
6574 int lt_sleep(lt_t timeout);
6575 @@ -140,11 +158,20 @@ double wctime(void);
6576
6577 /* semaphore allocation */
6578
6579 +typedef int (*open_sem_t)(int fd, int name);
6580 +
6581 static inline int open_fmlp_sem(int fd, int name)
6582 {
6583 return od_open(fd, FMLP_SEM, name);
6584 }
6585
6586 +static inline int open_kfmlp_sem(int fd, int name, unsigned int nr_replicas)
6587 +{
6588 + if (!nr_replicas)
6589 + return -1;
6590 + return od_openx(fd, KFMLP_SEM, name, &nr_replicas);
6591 +}
6592 +
6593 static inline int open_srp_sem(int fd, int name)
6594 {
6595 return od_open(fd, SRP_SEM, name);
6596 @@ -165,6 +192,64 @@ static inline int open_dpcp_sem(int fd, int name, int cpu)
6597 return od_openx(fd, DPCP_SEM, name, &cpu);
6598 }
6599
6600 +static inline int open_fifo_sem(int fd, int name)
6601 +{
6602 + return od_open(fd, FIFO_MUTEX, name);
6603 +}
6604 +
6605 +static inline int open_prioq_sem(int fd, int name)
6606 +{
6607 + return od_open(fd, PRIOQ_MUTEX, name);
6608 +}
6609 +
6610 +int open_ikglp_sem(int fd, int name, unsigned int nr_replicas);
6611 +
6612 +/* KFMLP-based Token Lock for GPUs
6613 + * Legacy; mostly untested.
6614 + */
6615 +int open_kfmlp_gpu_sem(int fd, int name,
6616 + unsigned int num_gpus, unsigned int gpu_offset, unsigned int rho,
6617 + int affinity_aware /* bool */);
6618 +
6619 +/* -- Example Configurations --
6620 + *
6621 + * Optimal IKGLP Configuration:
6622 + * max_in_fifos = IKGLP_M_IN_FIFOS
6623 + * max_fifo_len = IKGLP_OPTIMAL_FIFO_LEN
6624 + *
6625 + * IKGLP with Relaxed FIFO Length Constraints:
6626 + * max_in_fifos = IKGLP_M_IN_FIFOS
6627 + * max_fifo_len = IKGLP_UNLIMITED_FIFO_LEN
6628 + * NOTE: max_in_fifos still limits total number of requests in FIFOs.
6629 + *
6630 + * KFMLP Configuration (FIFO queues only):
6631 + * max_in_fifos = IKGLP_UNLIMITED_IN_FIFOS
6632 + * max_fifo_len = IKGLP_UNLIMITED_FIFO_LEN
6633 + * NOTE: Uses a non-optimal IKGLP configuration, not an actual KFMLP_SEM.
6634 + *
6635 + * RGEM-like Configuration (priority queues only):
6636 + * max_in_fifos = 1..(rho*num_gpus)
6637 + * max_fifo_len = 1
6638 + *
6639 + * For exclusive GPU allocation, use rho = 1
6640 + * For trivial token lock, use rho = # of tasks in task set
6641 + *
6642 + * A simple load-balancing heuristic will still be used if
6643 + * enable_affinity_heuristics = 0.
6644 + *
6645 + * Other constraints:
6646 + * - max_in_fifos <= max_fifo_len * rho
6647 + * (unless max_in_fifos = IKGLP_UNLIMITED_IN_FIFOS and
6648 + * max_fifo_len = IKGLP_UNLIMITED_FIFO_LEN
6649 + * - rho > 0
6650 + * - num_gpus > 0
6651 + */
6652 +// takes names 'name' and 'name+1'
6653 +int open_gpusync_token_lock(int fd, int name,
6654 + unsigned int num_gpus, unsigned int gpu_offset,
6655 + unsigned int rho, unsigned int max_in_fifos,
6656 + unsigned int max_fifo_len,
6657 + int enable_affinity_heuristics /* bool */);
6658
6659 /* syscall overhead measuring */
6660 int null_call(cycles_t *timestamp);
6661 @@ -176,7 +261,146 @@ int null_call(cycles_t *timestamp);
6662 */
6663 struct control_page* get_ctrl_page(void);
6664
6665 +
6666 +/* sched_trace injection */
6667 +int inject_name(void);
6668 +int inject_param(void); /* sporadic_task_ns*() must have already been called */
6669 +int inject_release(lt_t release, lt_t deadline, unsigned int job_no);
6670 +int inject_completion(unsigned int job_no);
6671 +int inject_gpu_migration(unsigned int to, unsigned int from);
6672 +int __inject_action(unsigned int action);
6673 +
6674 +#if 1
6675 +#define inject_action(COUNT) \
6676 +do { \
6677 +__inject_action(COUNT); \
6678 +}while(0);
6679 +#else
6680 +#define inject_action(COUNT) \
6681 +do { \
6682 +}while(0);
6683 +#endif
6684 +
6685 +/* Litmus signal handling */
6686 +
6687 +typedef struct litmus_sigjmp
6688 +{
6689 + sigjmp_buf env;
6690 + struct litmus_sigjmp *prev;
6691 +} litmus_sigjmp_t;
6692 +
6693 +void push_sigjmp(litmus_sigjmp_t* buf);
6694 +litmus_sigjmp_t* pop_sigjmp(void);
6695 +
6696 +typedef void (*litmus_sig_handler_t)(int);
6697 +typedef void (*litmus_sig_actions_t)(int, siginfo_t *, void *);
6698 +
6699 +/* ignore specified signals. all signals raised while ignored are dropped */
6700 +void ignore_litmus_signals(unsigned long litmus_sig_mask);
6701 +
6702 +/* register a handler for the given set of litmus signals */
6703 +void activate_litmus_signals(unsigned long litmus_sig_mask,
6704 + litmus_sig_handler_t handler);
6705 +
6706 +/* register an action signal handler for a given set of signals */
6707 +void activate_litmus_signal_actions(unsigned long litmus_sig_mask,
6708 + litmus_sig_actions_t handler);
6709 +
6710 +/* Block a given set of litmus signals. Any signals raised while blocked
6711 + * are queued and delivered after unblocking. Call ignore_litmus_signals()
6712 + * before unblocking if you wish to discard these. Blocking may be
6713 + * useful to protect COTS code in Litmus that may not be able to deal
6714 + * with exception-raising signals.
6715 + */
6716 +void block_litmus_signals(unsigned long litmus_sig_mask);
6717 +
6718 +/* Unblock a given set of litmus signals. */
6719 +void unblock_litmus_signals(unsigned long litmus_sig_mask);
6720 +
6721 +#define SIG_BUDGET_MASK 0x00000001
6722 +/* more ... */
6723 +
6724 +#define ALL_LITMUS_SIG_MASKS (SIG_BUDGET_MASK)
6725 +
6726 +/* Try/Catch structures useful for implementing abortable jobs.
6727 + * Should only be used in legitimate cases. ;)
6728 + */
6729 +#define LITMUS_TRY \
6730 +do { \
6731 + int sigsetjmp_ret_##__FUNCTION__##__LINE__; \
6732 + litmus_sigjmp_t lit_env_##__FUNCTION__##__LINE__; \
6733 + push_sigjmp(&lit_env_##__FUNCTION__##__LINE__); \
6734 + sigsetjmp_ret_##__FUNCTION__##__LINE__ = \
6735 + sigsetjmp(lit_env_##__FUNCTION__##__LINE__.env, 1); \
6736 + if (sigsetjmp_ret_##__FUNCTION__##__LINE__ == 0) {
6737 +
6738 +#define LITMUS_CATCH(x) \
6739 + } else if (sigsetjmp_ret_##__FUNCTION__##__LINE__ == (x)) {
6740 +
6741 +#define END_LITMUS_TRY \
6742 + } /* end if-else-if chain */ \
6743 +} while(0); /* end do from 'LITMUS_TRY' */
6744 +
6745 +/* Calls siglongjmp(signum). Use with TRY/CATCH.
6746 + * Example:
6747 + * activate_litmus_signals(SIG_BUDGET_MASK, longjmp_on_litmus_signal);
6748 + */
6749 +void longjmp_on_litmus_signal(int signum);
6750 +
6751 #ifdef __cplusplus
6752 }
6753 #endif
6754 +
6755 +
6756 +
6757 +
6758 +#ifdef __cplusplus
6759 +/* Expose litmus exceptions if C++.
6760 + *
6761 + * KLUDGE: We define everything in the header since liblitmus is a C-only
6762 + * library, but this header could be included in C++ code.
6763 + */
6764 +
6765 +#include <exception>
6766 +
6767 +namespace litmus
6768 +{
6769 + class litmus_exception: public std::exception
6770 + {
6771 + public:
6772 + litmus_exception() throw() {}
6773 + virtual ~litmus_exception() throw() {}
6774 + virtual const char* what() const throw() { return "litmus_exception";}
6775 + };
6776 +
6777 + class sigbudget: public litmus_exception
6778 + {
6779 + public:
6780 + sigbudget() throw() {}
6781 + virtual ~sigbudget() throw() {}
6782 + virtual const char* what() const throw() { return "sigbudget"; }
6783 + };
6784 +
6785 + /* Must compile your program with "non-call-exception". */
6786 + static __attribute__((used))
6787 + void throw_on_litmus_signal(int signum)
6788 + {
6789 + /* We have to unblock the received signal to get more in the future
6790 + * because we are not calling siglongjmp(), which normally restores
6791 + * the mask for us.
6792 + */
6793 + if (SIG_BUDGET == signum) {
6794 + unblock_litmus_signals(SIG_BUDGET_MASK);
6795 + throw sigbudget();
6796 + }
6797 + /* else if (...) */
6798 + else {
6799 + /* silently ignore */
6800 + }
6801 + }
6802 +
6803 +}; /* end namespace 'litmus' */
6804 +
6805 +#endif /* end __cplusplus */
6806 +
6807 #endif
6808 diff --git a/include/migration.h b/include/migration.h
6809 new file mode 100644
6810 index 0000000..2413e7c
6811 --- /dev/null
6812 +++ b/include/migration.h
6813 @@ -0,0 +1,24 @@
6814 +
6815 +typedef int pid_t;
6816 +
6817 +/* obtain the PID of a thread */
6818 +pid_t gettid();
6819 +
6820 +/* Assign a task to a cpu/partition/cluster.
6821 + * PRECOND: tid is not yet in real-time mode (it's a best effort task).
6822 + * Set tid == 0 to migrate the caller */
6823 +int be_migrate_thread_to_cpu(pid_t tid, int target_cpu);
6824 +int be_migrate_thread_to_partition(pid_t tid, int partition);
6825 +/* If using release master, set cluster_sz to size of largest cluster. tid
6826 + * will not be scheduled on release master. */
6827 +int be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz);
6828 +
6829 +/* set ignore_rm == 1 to include release master in tid's cpu affinity */
6830 +int __be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz, int ignore_rm);
6831 +
6832 +int be_migrate_to_cpu(int target_cpu);
6833 +int be_migrate_to_partition(int partition);
6834 +int be_migrate_to_cluster(int cluster, int cluster_sz);
6835 +
6836 +int num_online_cpus();
6837 +int release_master();
6838 diff --git a/include/tests.h b/include/tests.h
6839 index ed2b409..4ca21f8 100644
6840 --- a/include/tests.h
6841 +++ b/include/tests.h
6842 @@ -7,8 +7,11 @@
6843
6844 #define fail(fmt, args...) \
6845 do { \
6846 - fprintf(stderr, "\n!! TEST FAILURE " fmt "\n at %s:%d (%s)\n", \
6847 - ## args, __FILE__, __LINE__, __FUNCTION__); \
6848 + fprintf(stderr, "\n!! TEST FAILURE " fmt \
6849 + "\n at %s:%d (%s)" \
6850 + "\n in task PID=%d\n", \
6851 + ## args, __FILE__, __LINE__, __FUNCTION__, \
6852 + getpid()); \
6853 fflush(stderr); \
6854 exit(200); \
6855 } while (0)
6856 diff --git a/src/kernel_iface.c b/src/kernel_iface.c
6857 index 4cc1af5..73d398f 100644
6858 --- a/src/kernel_iface.c
6859 +++ b/src/kernel_iface.c
6860 @@ -56,9 +56,8 @@ ssize_t read_file(const char* fname, void* buf, size_t maxlen)
6861 return got;
6862 }
6863
6864 -int get_nr_ts_release_waiters(void)
6865 +int read_litmus_stats(int *ready, int *all)
6866 {
6867 - int ready = 0, all = 0;
6868 char buf[100];
6869 ssize_t len;
6870
6871 @@ -67,15 +66,21 @@ int get_nr_ts_release_waiters(void)
6872 len = sscanf(buf,
6873 "real-time tasks = %d\n"
6874 "ready for release = %d\n",
6875 - &all, &ready);
6876 - if (len == 2)
6877 + all, ready);
6878 + return len == 2;
6879 +}
6880 +
6881 +int get_nr_ts_release_waiters(void)
6882 +{
6883 + int ready, all;
6884 + if (read_litmus_stats(&ready, &all))
6885 return ready;
6886 else
6887 - return len;
6888 + return -1;
6889 }
6890
6891 /* thread-local pointer to control page */
6892 -static __thread struct control_page *ctrl_page;
6893 +static __thread struct control_page *ctrl_page = NULL;
6894
6895 int init_kernel_iface(void)
6896 {
6897 diff --git a/src/litmus.c b/src/litmus.c
6898 index b32254b..70f7fb6 100644
6899 --- a/src/litmus.c
6900 +++ b/src/litmus.c
6901 @@ -3,7 +3,10 @@
6902 #include <stdio.h>
6903 #include <string.h>
6904 #include <signal.h>
6905 +#include <fcntl.h>
6906 #include <sys/mman.h>
6907 +#include <sys/types.h>
6908 +
6909
6910 #include <sched.h> /* for cpu sets */
6911
6912 @@ -23,6 +26,17 @@ static struct {
6913 {MPCP_VS_SEM, "MPCP-VS"},
6914 LP(DPCP),
6915 LP(PCP),
6916 +
6917 + {FIFO_MUTEX, "FIFO"},
6918 + LP(IKGLP),
6919 + LP(KFMLP),
6920 +
6921 + {IKGLP_SIMPLE_GPU_AFF_OBS, "IKGLP-GPU-SIMPLE"},
6922 + {IKGLP_GPU_AFF_OBS, "IKGLP-GPU"},
6923 + {KFMLP_SIMPLE_GPU_AFF_OBS, "KFMLP-GPU-SIMPLE"},
6924 + {KFMLP_GPU_AFF_OBS, "KFMLP-GPU"},
6925 +
6926 + {PRIOQ_MUTEX, "PRIOQ"},
6927 };
6928
6929 #define NUM_PROTOS (sizeof(protocol)/sizeof(protocol[0]))
6930 @@ -49,6 +63,23 @@ const char* name_for_lock_protocol(int id)
6931 return "<UNKNOWN>";
6932 }
6933
6934 +int litmus_open_lock(
6935 + obj_type_t protocol,
6936 + int lock_id,
6937 + const char* namespace,
6938 + void *config_param)
6939 +{
6940 + int fd, od;
6941 +
6942 + fd = open(namespace, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
6943 + if (fd < 0)
6944 + return -1;
6945 + od = od_openx(fd, protocol, lock_id, config_param);
6946 + close(fd);
6947 + return od;
6948 +}
6949 +
6950 +
6951
6952 void show_rt_param(struct rt_task* tp)
6953 {
6954 @@ -57,6 +88,36 @@ void show_rt_param(struct rt_task* tp)
6955 tp->exec_cost, tp->period, tp->cpu);
6956 }
6957
6958 +void init_rt_task_param(struct rt_task* tp)
6959 +{
6960 + /* Defaults:
6961 + * - implicit deadline (t->relative_deadline == 0)
6962 + * - phase = 0
6963 + * - class = RT_CLASS_SOFT
6964 + * - budget policy = NO_ENFORCEMENT
6965 + * - fixed priority = LITMUS_LOWEST_PRIORITY
6966 + * - release policy = SPORADIC
6967 + * - cpu assignment = 0
6968 + *
6969 + * User must still set the following fields to non-zero values:
6970 + * - tp->exec_cost
6971 + * - tp->period
6972 + *
6973 + * User must set tp->cpu to the appropriate value for non-global
6974 + * schedulers. For clusters, set tp->cpu to the first CPU in the
6975 + * assigned cluster.
6976 + */
6977 +
6978 + memset(tp, 0, sizeof(*tp));
6979 +
6980 + tp->cls = RT_CLASS_SOFT;
6981 + tp->priority = LITMUS_LOWEST_PRIORITY;
6982 + tp->budget_policy = NO_ENFORCEMENT;
6983 + tp->drain_policy = DRAIN_SIMPLE;
6984 + tp->budget_signal_policy = NO_SIGNALS;
6985 + tp->release_policy = SPORADIC;
6986 +}
6987 +
6988 task_class_t str2class(const char* str)
6989 {
6990 if (!strcmp(str, "hrt"))
6991 @@ -66,56 +127,55 @@ task_class_t str2class(const char* str)
6992 else if (!strcmp(str, "be"))
6993 return RT_CLASS_BEST_EFFORT;
6994 else
6995 - return -1;
6996 + return (task_class_t)(-1);
6997 }
6998
6999 #define NS_PER_MS 1000000
7000
7001 -/* only for best-effort execution: migrate to target_cpu */
7002 -int be_migrate_to(int target_cpu)
7003 +int sporadic_global(lt_t e_ns, lt_t p_ns)
7004 {
7005 - cpu_set_t cpu_set;
7006 + struct rt_task param;
7007
7008 - CPU_ZERO(&cpu_set);
7009 - CPU_SET(target_cpu, &cpu_set);
7010 - return sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set);
7011 + init_rt_task_param(¶m);
7012 + param.exec_cost = e_ns;
7013 + param.period = p_ns;
7014 +
7015 + return set_rt_task_param(gettid(), ¶m);
7016 }
7017
7018 -int sporadic_task(lt_t e, lt_t p, lt_t phase,
7019 - int cpu, unsigned int priority,
7020 - task_class_t cls,
7021 - budget_policy_t budget_policy, int set_cpu_set)
7022 +int sporadic_partitioned(lt_t e_ns, lt_t p_ns, int partition)
7023 {
7024 - return sporadic_task_ns(e * NS_PER_MS, p * NS_PER_MS, phase * NS_PER_MS,
7025 - cpu, priority, cls, budget_policy, set_cpu_set);
7026 + int ret;
7027 + struct rt_task param;
7028 +
7029 + ret = be_migrate_to_partition(partition);
7030 + check("be_migrate_to_partition()");
7031 + if (ret != 0)
7032 + return ret;
7033 +
7034 + init_rt_task_param(¶m);
7035 + param.exec_cost = e_ns;
7036 + param.period = p_ns;
7037 + param.cpu = partition_to_cpu(partition);
7038 +
7039 + return set_rt_task_param(gettid(), ¶m);
7040 }
7041
7042 -int sporadic_task_ns(lt_t e, lt_t p, lt_t phase,
7043 - int cpu, unsigned int priority,
7044 - task_class_t cls,
7045 - budget_policy_t budget_policy, int set_cpu_set)
7046 +int sporadic_clustered(lt_t e_ns, lt_t p_ns, int cluster, int cluster_size)
7047 {
7048 - struct rt_task param;
7049 int ret;
7050 + struct rt_task param;
7051 +
7052 + ret = be_migrate_to_cluster(cluster, cluster_size);
7053 + check("be_migrate_to_cluster()");
7054 + if (ret != 0)
7055 + return ret;
7056 +
7057 + init_rt_task_param(¶m);
7058 + param.exec_cost = e_ns;
7059 + param.period = p_ns;
7060 + param.cpu = cluster_to_first_cpu(cluster, cluster_size);
7061
7062 - /* Zero out first --- this is helpful when we add plugin-specific
7063 - * parameters during development.
7064 - */
7065 - memset(¶m, 0, sizeof(param));
7066 -
7067 - param.exec_cost = e;
7068 - param.period = p;
7069 - param.relative_deadline = p; /* implicit deadline */
7070 - param.cpu = cpu;
7071 - param.cls = cls;
7072 - param.phase = phase;
7073 - param.budget_policy = budget_policy;
7074 - param.priority = priority;
7075 -
7076 - if (set_cpu_set) {
7077 - ret = be_migrate_to(cpu);
7078 - check("migrate to cpu");
7079 - }
7080 return set_rt_task_param(gettid(), ¶m);
7081 }
7082
7083 @@ -144,3 +204,141 @@ void exit_litmus(void)
7084 {
7085 /* nothing to do in current version */
7086 }
7087 +
7088 +int open_kfmlp_gpu_sem(int fd, int name,
7089 + unsigned int num_gpus, unsigned int gpu_offset, unsigned int rho,
7090 + int affinity_aware)
7091 +{
7092 + int lock_od;
7093 + int affinity_od;
7094 + unsigned int num_replicas;
7095 + struct gpu_affinity_observer_args aff_args;
7096 + int aff_type;
7097 +
7098 + // number of GPU tokens
7099 + num_replicas = num_gpus * rho;
7100 +
7101 + // create the GPU token lock
7102 + lock_od = open_kfmlp_sem(fd, name, num_replicas);
7103 + if(lock_od < 0) {
7104 + perror("open_kfmlp_sem");
7105 + return -1;
7106 + }
7107 +
7108 + // create the affinity method to use.
7109 + // "no affinity" -> KFMLP_SIMPLE_GPU_AFF_OBS
7110 + aff_args.obs.lock_od = lock_od;
7111 + aff_args.replica_to_gpu_offset = gpu_offset;
7112 + aff_args.rho = rho;
7113 +
7114 + aff_type = (affinity_aware) ? KFMLP_GPU_AFF_OBS : KFMLP_SIMPLE_GPU_AFF_OBS;
7115 + affinity_od = od_openx(fd, aff_type, name+1, &aff_args);
7116 + if(affinity_od < 0) {
7117 + perror("open_kfmlp_aff");
7118 + return -1;
7119 + }
7120 +
7121 + return lock_od;
7122 +}
7123 +
7124 +
7125 +//int open_ikglp_gpu_sem(int fd, int name, int num_gpus, int gpu_offset, int rho, int affinity_aware, int relax_max_fifo_len)
7126 +//{
7127 +// int lock_od;
7128 +// int affinity_od;
7129 +// int num_replicas;
7130 +// struct gpu_affinity_observer_args aff_args;
7131 +// int aff_type;
7132 +//
7133 +// // number of GPU tokens
7134 +// num_replicas = num_gpus * num_simult_users;
7135 +//
7136 +// // create the GPU token lock
7137 +// lock_od = open_ikglp_sem(fd, name, (void*)&num_replicas);
7138 +// if(lock_od < 0) {
7139 +// perror("open_ikglp_sem");
7140 +// return -1;
7141 +// }
7142 +//
7143 +// // create the affinity method to use.
7144 +// // "no affinity" -> KFMLP_SIMPLE_GPU_AFF_OBS
7145 +// aff_args.obs.lock_od = lock_od;
7146 +// aff_args.replica_to_gpu_offset = gpu_offset;
7147 +// aff_args.nr_simult_users = num_simult_users;
7148 +// aff_args.relaxed_rules = (relax_max_fifo_len) ? 1 : 0;
7149 +//
7150 +// aff_type = (affinity_aware) ? IKGLP_GPU_AFF_OBS : IKGLP_SIMPLE_GPU_AFF_OBS;
7151 +// affinity_od = od_openx(fd, aff_type, name+1, &aff_args);
7152 +// if(affinity_od < 0) {
7153 +// perror("open_ikglp_aff");
7154 +// return -1;
7155 +// }
7156 +//
7157 +// return lock_od;
7158 +//}
7159 +
7160 +
7161 +
7162 +
7163 +int open_ikglp_sem(int fd, int name, unsigned int nr_replicas)
7164 +{
7165 + struct ikglp_args args = {
7166 + .nr_replicas = nr_replicas,
7167 + .max_in_fifos = IKGLP_M_IN_FIFOS,
7168 + .max_fifo_len = IKGLP_OPTIMAL_FIFO_LEN};
7169 +
7170 + return od_openx(fd, IKGLP_SEM, name, &args);
7171 +}
7172 +
7173 +
7174 +
7175 +int open_gpusync_token_lock(int fd, int name,
7176 + unsigned int num_gpus, unsigned int gpu_offset,
7177 + unsigned int rho, unsigned int max_in_fifos,
7178 + unsigned int max_fifo_len,
7179 + int enable_affinity_heuristics)
7180 +{
7181 + int lock_od;
7182 + int affinity_od;
7183 +
7184 + struct ikglp_args args = {
7185 + .nr_replicas = num_gpus*rho,
7186 + .max_in_fifos = max_in_fifos,
7187 + .max_fifo_len = max_fifo_len,
7188 + };
7189 + struct gpu_affinity_observer_args aff_args;
7190 + int aff_type;
7191 +
7192 + if (!num_gpus || !rho) {
7193 + perror("open_gpusync_sem");
7194 + return -1;
7195 + }
7196 +
7197 + if ((max_in_fifos != IKGLP_UNLIMITED_IN_FIFOS) &&
7198 + (max_fifo_len != IKGLP_UNLIMITED_FIFO_LEN) &&
7199 + (max_in_fifos > args.nr_replicas * max_fifo_len)) {
7200 + perror("open_gpusync_sem");
7201 + return(-1);
7202 + }
7203 +
7204 + lock_od = od_openx(fd, IKGLP_SEM, name, &args);
7205 + if(lock_od < 0) {
7206 + perror("open_gpusync_sem");
7207 + return -1;
7208 + }
7209 +
7210 + // create the affinity method to use.
7211 + aff_args.obs.lock_od = lock_od;
7212 + aff_args.replica_to_gpu_offset = gpu_offset;
7213 + aff_args.rho = rho;
7214 + aff_args.relaxed_rules = (max_fifo_len == IKGLP_UNLIMITED_FIFO_LEN) ? 1 : 0;
7215 +
7216 + aff_type = (enable_affinity_heuristics) ? IKGLP_GPU_AFF_OBS : IKGLP_SIMPLE_GPU_AFF_OBS;
7217 + affinity_od = od_openx(fd, aff_type, name+1, &aff_args);
7218 + if(affinity_od < 0) {
7219 + perror("open_gpusync_affinity");
7220 + return -1;
7221 + }
7222 +
7223 + return lock_od;
7224 +}
7225 diff --git a/src/migration.c b/src/migration.c
7226 new file mode 100644
7227 index 0000000..084b68c
7228 --- /dev/null
7229 +++ b/src/migration.c
7230 @@ -0,0 +1,217 @@
7231 +#include <stdio.h>
7232 +#include <stdlib.h>
7233 +#include <string.h>
7234 +#include <sched.h> /* for cpu sets */
7235 +#include <unistd.h>
7236 +
7237 +#ifdef LITMUS_NUMA_SUPPORT
7238 +#include <numa.h>
7239 +#endif
7240 +
7241 +#include "migration.h"
7242 +
7243 +
7244 +extern ssize_t read_file(const char* fname, void* buf, size_t maxlen);
7245 +
7246 +int release_master()
7247 +{
7248 + static const char NO_CPU[] = "NO_CPU";
7249 + char buf[5] = {0}; /* up to 9999 CPUs */
7250 + int master = -1;
7251 +
7252 + int ret = read_file("/proc/litmus/release_master", &buf, sizeof(buf)-1);
7253 +
7254 + if ((ret > 0) && (strncmp(buf, NO_CPU, sizeof(NO_CPU)-1) != 0))
7255 + master = atoi(buf);
7256 +
7257 + return master;
7258 +}
7259 +
7260 +int num_online_cpus()
7261 +{
7262 + return sysconf(_SC_NPROCESSORS_ONLN);
7263 +}
7264 +
7265 +int partition_to_cpu(int partition)
7266 +{
7267 + int cpu = partition;
7268 + int master = release_master();
7269 + if (master != -1 && master <= cpu) {
7270 + ++cpu; /* skip over the release master */
7271 + }
7272 + return cpu;
7273 +}
7274 +
7275 +int cluster_to_first_cpu(int cluster, int cluster_sz)
7276 +{
7277 + int first_cpu;
7278 + int master;
7279 +
7280 + if (cluster_sz == 1)
7281 + return partition_to_cpu(cluster);
7282 +
7283 + master = release_master();
7284 + first_cpu = cluster * cluster_sz;
7285 +
7286 + if (master == first_cpu)
7287 + ++first_cpu;
7288 +
7289 + return first_cpu;
7290 +}
7291 +
7292 +#ifdef LITMUS_NUMA_SUPPORT
7293 +/* Restrict the task to the numa nodes in the cpu mask. */
7294 +/* Call this before setting up CPU affinity masks since that mask may be
7295 + * a subset of the numa nodes. */
7296 +static int setup_numa(pid_t tid, int sz, const cpu_set_t *cpus)
7297 +{
7298 + int nr_nodes;
7299 + int nr_cpus = num_online_cpus();
7300 + struct bitmask* new_nodes;
7301 + struct bitmask* old_nodes;
7302 + int i;
7303 + int ret = 0;
7304 +
7305 + if (numa_available() != 0)
7306 + goto out;
7307 +
7308 + nr_nodes = numa_max_node()+1;
7309 + new_nodes = numa_bitmask_alloc(nr_nodes);
7310 + old_nodes = numa_bitmask_alloc(nr_nodes);
7311 + /* map the cpu mask to a numa mask */
7312 + for (i = 0; i < nr_cpus; ++i) {
7313 + if(CPU_ISSET_S(i, sz, cpus)) {
7314 + numa_bitmask_setbit(new_nodes, numa_node_of_cpu(i));
7315 + }
7316 + }
7317 + /* compute the complement numa mask */
7318 + for (i = 0; i < nr_nodes; ++i) {
7319 + if (!numa_bitmask_isbitset(new_nodes, i)) {
7320 + numa_bitmask_setbit(old_nodes, i);
7321 + }
7322 + }
7323 +
7324 + numa_set_strict(1);
7325 + numa_bind(new_nodes); /* sets CPU and memory policy */
7326 + ret = numa_migrate_pages(tid, old_nodes, new_nodes); /* move over prio alloc'ed pages */
7327 + numa_bitmask_free(new_nodes);
7328 + numa_bitmask_free(old_nodes);
7329 +
7330 +out:
7331 + return ret;
7332 +}
7333 +#else
7334 +#define setup_numa(x, y, z) 0
7335 +#endif
7336 +
7337 +int be_migrate_thread_to_cpu(pid_t tid, int target_cpu)
7338 +{
7339 + cpu_set_t *cpu_set;
7340 + size_t sz;
7341 + int num_cpus;
7342 + int ret;
7343 +
7344 + /* TODO: Error check to make sure that tid is not a real-time task. */
7345 +
7346 + if (target_cpu < 0)
7347 + return -1;
7348 +
7349 + num_cpus = num_online_cpus();
7350 + if (num_cpus == -1)
7351 + return -1;
7352 +
7353 + if (target_cpu >= num_cpus)
7354 + return -1;
7355 +
7356 + cpu_set = CPU_ALLOC(num_cpus);
7357 + sz = CPU_ALLOC_SIZE(num_cpus);
7358 +
7359 + CPU_ZERO_S(sz, cpu_set);
7360 + CPU_SET_S(target_cpu, sz, cpu_set);
7361 +
7362 + /* apply to caller */
7363 + if (tid == 0)
7364 + tid = gettid();
7365 +
7366 + ret = (setup_numa(tid, sz, cpu_set) >= 0) ? 0 : -1;
7367 + if (!ret)
7368 + ret = sched_setaffinity(tid, sz, cpu_set);
7369 +
7370 + CPU_FREE(cpu_set);
7371 +
7372 + return ret;
7373 +}
7374 +
7375 +int be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz)
7376 +{
7377 + return __be_migrate_thread_to_cluster(tid, cluster, cluster_sz, 0);
7378 +}
7379 +
7380 +int __be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz,
7381 + int ignore_rm)
7382 +{
7383 + int first_cpu = cluster * cluster_sz; /* first CPU in cluster */
7384 + int last_cpu = first_cpu + cluster_sz - 1;
7385 + int master;
7386 + int num_cpus;
7387 + cpu_set_t *cpu_set;
7388 + size_t sz;
7389 + int i;
7390 + int ret;
7391 +
7392 + /* TODO: Error check to make sure that tid is not a real-time task. */
7393 +
7394 + if (cluster_sz == 1) {
7395 + /* we're partitioned */
7396 + return be_migrate_thread_to_partition(tid, cluster);
7397 + }
7398 +
7399 + master = (ignore_rm) ? -1 : release_master();
7400 + num_cpus = num_online_cpus();
7401 +
7402 + if (num_cpus == -1 || last_cpu >= num_cpus || first_cpu < 0)
7403 + return -1;
7404 +
7405 + cpu_set = CPU_ALLOC(num_cpus);
7406 + sz = CPU_ALLOC_SIZE(num_cpus);
7407 + CPU_ZERO_S(sz, cpu_set);
7408 +
7409 + for (i = first_cpu; i <= last_cpu; ++i) {
7410 + if (i != master) {
7411 + CPU_SET_S(i, sz, cpu_set);
7412 + }
7413 + }
7414 +
7415 + /* apply to caller */
7416 + if (tid == 0)
7417 + tid = gettid();
7418 +
7419 + ret = (setup_numa(tid, sz, cpu_set) >= 0) ? 0 : -1;
7420 + if (!ret)
7421 + ret = sched_setaffinity(tid, sz, cpu_set);
7422 +
7423 + CPU_FREE(cpu_set);
7424 +
7425 + return ret;
7426 +}
7427 +
7428 +int be_migrate_thread_to_partition(pid_t tid, int partition)
7429 +{
7430 + return be_migrate_thread_to_cpu(tid, partition_to_cpu(partition));
7431 +}
7432 +
7433 +
7434 +int be_migrate_to_cpu(int target_cpu)
7435 +{
7436 + return be_migrate_thread_to_cpu(0, target_cpu);
7437 +}
7438 +
7439 +int be_migrate_to_cluster(int cluster, int cluster_sz)
7440 +{
7441 + return be_migrate_thread_to_cluster(0, cluster, cluster_sz);
7442 +}
7443 +
7444 +int be_migrate_to_partition(int partition)
7445 +{
7446 + return be_migrate_thread_to_partition(0, partition);
7447 +}
7448 diff --git a/src/signal.c b/src/signal.c
7449 new file mode 100644
7450 index 0000000..1bd0f62
7451 --- /dev/null
7452 +++ b/src/signal.c
7453 @@ -0,0 +1,109 @@
7454 +#include <stdio.h>
7455 +#include <string.h>
7456 +
7457 +#include "litmus.h"
7458 +#include "internal.h"
7459 +
7460 +/* setjmp calls are stored on a singlely link list,
7461 + * one stack per thread.
7462 + */
7463 +static __thread litmus_sigjmp_t *g_sigjmp_tail = 0;
7464 +
7465 +void push_sigjmp(litmus_sigjmp_t *buf)
7466 +{
7467 + buf->prev = g_sigjmp_tail;
7468 + g_sigjmp_tail = buf;
7469 +}
7470 +
7471 +litmus_sigjmp_t* pop_sigjmp(void)
7472 +{
7473 + litmus_sigjmp_t* ret;
7474 + ret = g_sigjmp_tail;
7475 + g_sigjmp_tail = (ret) ? ret->prev : NULL;
7476 + return ret;
7477 +}
7478 +
7479 +static void reg_litmus_signals(unsigned long litmus_sig_mask,
7480 + struct sigaction *pAction)
7481 +{
7482 + int ret;
7483 +
7484 + if (litmus_sig_mask | SIG_BUDGET_MASK) {
7485 + ret = sigaction(SIG_BUDGET, pAction, NULL);
7486 + check("SIG_BUDGET");
7487 + }
7488 + /* more signals ... */
7489 +}
7490 +
7491 +void ignore_litmus_signals(unsigned long litmus_sig_mask)
7492 +{
7493 + activate_litmus_signals(litmus_sig_mask, SIG_IGN);
7494 +}
7495 +
7496 +void activate_litmus_signals(unsigned long litmus_sig_mask,
7497 + litmus_sig_handler_t handle)
7498 +{
7499 + struct sigaction action;
7500 + memset(&action, 0, sizeof(action));
7501 + action.sa_handler = handle;
7502 +
7503 + reg_litmus_signals(litmus_sig_mask, &action);
7504 +}
7505 +
7506 +void activate_litmus_signal_actions(unsigned long litmus_sig_mask,
7507 + litmus_sig_actions_t handle)
7508 +{
7509 + struct sigaction action;
7510 + memset(&action, 0, sizeof(action));
7511 + action.sa_sigaction = handle;
7512 + action.sa_flags = SA_SIGINFO;
7513 +
7514 + reg_litmus_signals(litmus_sig_mask, &action);
7515 +}
7516 +
7517 +void block_litmus_signals(unsigned long litmus_sig_mask)
7518 +{
7519 + int ret;
7520 + sigset_t sigs;
7521 + sigemptyset(&sigs);
7522 +
7523 + if (litmus_sig_mask | SIG_BUDGET_MASK) {
7524 + sigaddset(&sigs, SIG_BUDGET);
7525 + }
7526 + /* more signals ... */
7527 +
7528 + ret = sigprocmask(SIG_BLOCK, &sigs, NULL);
7529 + check("SIG_BLOCK litmus signals");
7530 +}
7531 +
7532 +void unblock_litmus_signals(unsigned long litmus_sig_mask)
7533 +{
7534 + int ret;
7535 + sigset_t sigs;
7536 + sigemptyset(&sigs);
7537 +
7538 + if (litmus_sig_mask | SIG_BUDGET_MASK) {
7539 + sigaddset(&sigs, SIG_BUDGET);
7540 + }
7541 + /* more ... */
7542 +
7543 + ret = sigprocmask(SIG_UNBLOCK, &sigs, NULL);
7544 + check("SIG_UNBLOCK litmus signals");
7545 +}
7546 +
7547 +
7548 +void longjmp_on_litmus_signal(int signum)
7549 +{
7550 + /* We get signal! Main screen turn on! */
7551 + litmus_sigjmp_t *lit_env;
7552 + lit_env = pop_sigjmp();
7553 + if (lit_env) {
7554 + /* What you say?! */
7555 + //printf("%d: we get signal = %d!\n", gettid(), signum);
7556 + siglongjmp(lit_env->env, signum); /* restores signal mask */
7557 + }
7558 + else {
7559 + /* silently ignore the signal */
7560 + //printf("%d: silently ignoring signal.\n", gettid());
7561 + }
7562 +}
7563 diff --git a/src/syscalls.c b/src/syscalls.c
7564 index c68f15b..ff02b7d 100644
7565 --- a/src/syscalls.c
7566 +++ b/src/syscalls.c
7567 @@ -19,6 +19,12 @@ pid_t gettid(void)
7568
7569 int set_rt_task_param(pid_t pid, struct rt_task *param)
7570 {
7571 + if (param->budget_signal_policy != NO_SIGNALS) {
7572 + /* drop all signals until they're explicitly activated by
7573 + * user code. */
7574 + ignore_litmus_signals(SIG_BUDGET);
7575 + }
7576 +
7577 return syscall(__NR_set_rt_task_param, pid, param);
7578 }
7579
7580 @@ -52,6 +58,26 @@ int litmus_unlock(int od)
7581 return syscall(__NR_litmus_unlock, od);
7582 }
7583
7584 +int litmus_should_yield_lock(int od)
7585 +{
7586 + return syscall(__NR_litmus_should_yield_lock, od);
7587 +}
7588 +
7589 +int litmus_dgl_lock(int *ods, int dgl_size)
7590 +{
7591 + return syscall(__NR_litmus_dgl_lock, ods, dgl_size);
7592 +}
7593 +
7594 +int litmus_dgl_unlock(int *ods, int dgl_size)
7595 +{
7596 + return syscall(__NR_litmus_dgl_unlock, ods, dgl_size);
7597 +}
7598 +
7599 +int litmus_dgl_should_yield_lock(int *ods, int dgl_size)
7600 +{
7601 + return syscall(__NR_litmus_dgl_should_yield_lock, ods, dgl_size);
7602 +}
7603 +
7604 int get_job_no(unsigned int *job_no)
7605 {
7606 return syscall(__NR_query_job_no, job_no);
7607 @@ -72,9 +98,19 @@ int sched_getscheduler(pid_t pid)
7608 return syscall(__NR_sched_getscheduler, pid);
7609 }
7610
7611 +static int __wait_for_ts_release(struct timespec *release)
7612 +{
7613 + return syscall(__NR_wait_for_ts_release, release);
7614 +}
7615 +
7616 int wait_for_ts_release(void)
7617 {
7618 - return syscall(__NR_wait_for_ts_release);
7619 + return __wait_for_ts_release(NULL);
7620 +}
7621 +
7622 +int wait_for_ts_release2(struct timespec *release)
7623 +{
7624 + return __wait_for_ts_release(release);
7625 }
7626
7627 int release_ts(lt_t *delay)
7628 @@ -86,3 +122,47 @@ int null_call(cycles_t *timestamp)
7629 {
7630 return syscall(__NR_null_call, timestamp);
7631 }
7632 +
7633 +int enable_aux_rt_tasks(int flags)
7634 +{
7635 + return syscall(__NR_set_aux_tasks, flags | AUX_ENABLE);
7636 +}
7637 +
7638 +int disable_aux_rt_tasks(int flags)
7639 +{
7640 + return syscall(__NR_set_aux_tasks, flags & ~AUX_ENABLE);
7641 +}
7642 +
7643 +int inject_name(void)
7644 +{
7645 + return syscall(__NR_sched_trace_event, ST_INJECT_NAME, NULL);
7646 +}
7647 +
7648 +int inject_param(void)
7649 +{
7650 + return syscall(__NR_sched_trace_event, ST_INJECT_PARAM, NULL);
7651 +}
7652 +
7653 +int inject_release(lt_t release, lt_t deadline, unsigned int job_no)
7654 +{
7655 + struct st_inject_args args = {.release = release, .deadline = deadline, .job_no = job_no};
7656 + return syscall(__NR_sched_trace_event, ST_INJECT_RELEASE, &args);
7657 +}
7658 +
7659 +int inject_completion(unsigned int job_no)
7660 +{
7661 + struct st_inject_args args = {.release = 0, .deadline = 0, .job_no = job_no};
7662 + return syscall(__NR_sched_trace_event, ST_INJECT_COMPLETION, &args);
7663 +}
7664 +
7665 +int inject_gpu_migration(unsigned int to, unsigned int from)
7666 +{
7667 + struct st_inject_args args = {.to = to, .from = from};
7668 + return syscall(__NR_sched_trace_event, ST_INJECT_MIGRATION, &args);
7669 +}
7670 +
7671 +int __inject_action(unsigned int action)
7672 +{
7673 + struct st_inject_args args = {.action = action};
7674 + return syscall(__NR_sched_trace_event, ST_INJECT_ACTION, &args);
7675 +}
7676 diff --git a/src/task.c b/src/task.c
7677 index 4d237bd..c3a9109 100644
7678 --- a/src/task.c
7679 +++ b/src/task.c
7680 @@ -40,24 +40,16 @@ int __launch_rt_task(rt_fn_t rt_prog, void *rt_arg, rt_setup_fn_t setup,
7681 return rt_task;
7682 }
7683
7684 -int __create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period,
7685 - task_class_t class)
7686 +int create_rt_task(rt_fn_t rt_prog, void *arg, struct rt_task* param)
7687 {
7688 - struct rt_task params;
7689 - params.cpu = cpu;
7690 - params.period = period;
7691 - params.exec_cost = wcet;
7692 - params.cls = class;
7693 - params.phase = 0;
7694 - /* enforce budget for tasks that might not use sleep_next_period() */
7695 - params.budget_policy = QUANTUM_ENFORCEMENT;
7696 -
7697 - return __launch_rt_task(rt_prog, arg,
7698 - (rt_setup_fn_t) set_rt_task_param, ¶ms);
7699 -}
7700 + if (param->budget_policy == NO_ENFORCEMENT) {
7701 + /* This is only safe if the task to be launched does not peg the CPU.
7702 + That is, it must block frequently for I/O or call sleep_next_period()
7703 + at the end of each job. Otherwise, the task may peg the CPU. */
7704 + //printf("Warning: running budget enforcement used.\n");
7705 + }
7706
7707 -int create_rt_task(rt_fn_t rt_prog, void *arg, int cpu, int wcet, int period) {
7708 - return __create_rt_task(rt_prog, arg, cpu, wcet, period, RT_CLASS_HARD);
7709 + return __launch_rt_task(rt_prog, arg, (rt_setup_fn_t) set_rt_task_param, param);
7710 }
7711
7712
7713 diff --git a/tests/core_api.c b/tests/core_api.c
7714 index c0b291e..fc4deb9 100644
7715 --- a/tests/core_api.c
7716 +++ b/tests/core_api.c
7717 @@ -18,6 +18,7 @@ TESTCASE(set_rt_task_param_invalid_params, ALL,
7718 "reject invalid rt_task values")
7719 {
7720 struct rt_task params;
7721 + init_rt_task_param(¶ms);
7722 params.cpu = 0;
7723 params.period = 100;
7724 params.relative_deadline = params.period;
7725 @@ -53,6 +54,7 @@ TESTCASE(reject_bad_priorities, P_FP,
7726 "reject invalid priorities")
7727 {
7728 struct rt_task params;
7729 + init_rt_task_param(¶ms);
7730 params.cpu = 0;
7731 params.exec_cost = 10;
7732 params.period = 100;
7733 @@ -61,7 +63,7 @@ TESTCASE(reject_bad_priorities, P_FP,
7734 params.cls = RT_CLASS_HARD;
7735 params.budget_policy = NO_ENFORCEMENT;
7736
7737 - SYSCALL( be_migrate_to(params.cpu) );
7738 + SYSCALL( be_migrate_to_cpu(params.cpu) );
7739
7740 /* too high */
7741 params.priority = 0;
7742 @@ -79,6 +81,7 @@ TESTCASE(accept_valid_priorities, P_FP,
7743 "accept lowest and highest valid priorities")
7744 {
7745 struct rt_task params;
7746 + init_rt_task_param(¶ms);
7747 params.cpu = 0;
7748 params.exec_cost = 10;
7749 params.period = 100;
7750 @@ -87,7 +90,7 @@ TESTCASE(accept_valid_priorities, P_FP,
7751 params.cls = RT_CLASS_HARD;
7752 params.budget_policy = NO_ENFORCEMENT;
7753
7754 - SYSCALL( be_migrate_to(params.cpu) );
7755 + SYSCALL( be_migrate_to_cpu(params.cpu) );
7756
7757 /* acceptable */
7758 params.priority = LITMUS_LOWEST_PRIORITY;
7759 @@ -120,7 +123,7 @@ TESTCASE(rt_fork_non_rt, LITMUS,
7760 unsigned int pid, job_no;
7761 int status;
7762
7763 - SYSCALL( sporadic_partitioned(10, 100, 0) );
7764 + SYSCALL( sporadic_partitioned(ms2ns(10), ms2ns(100), 0) );
7765 SYSCALL( task_mode(LITMUS_RT_TASK) );
7766
7767 pid = fork();
7768 diff --git a/tests/fdso.c b/tests/fdso.c
7769 index 8a2a0d0..b216cb5 100644
7770 --- a/tests/fdso.c
7771 +++ b/tests/fdso.c
7772 @@ -16,7 +16,7 @@ TESTCASE(fmlp_not_active, C_EDF | PFAIR | LINUX,
7773 {
7774 int fd;
7775
7776 - SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
7777 + SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7778
7779 ASSERT(fd != -1);
7780
7781 @@ -48,8 +48,8 @@ TESTCASE(invalid_od, ALL,
7782 TESTCASE(invalid_obj_type, ALL,
7783 "reject invalid object types")
7784 {
7785 - SYSCALL_FAILS( EINVAL, od_open(0, -1, 0) );
7786 - SYSCALL_FAILS( EINVAL, od_open(0, 10, 0) );
7787 + SYSCALL_FAILS( EINVAL, od_open(0, (obj_type_t)-1, 0) );
7788 + SYSCALL_FAILS( EINVAL, od_open(0, (obj_type_t)10, 0) );
7789 }
7790
7791 TESTCASE(not_inherit_od, GSN_EDF | PSN_EDF,
7792 @@ -57,7 +57,7 @@ TESTCASE(not_inherit_od, GSN_EDF | PSN_EDF,
7793 {
7794 int fd, od, pid, status;
7795
7796 - SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
7797 + SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7798
7799 SYSCALL( od = open_fmlp_sem(fd, 0) );
7800
7801 @@ -66,7 +66,7 @@ TESTCASE(not_inherit_od, GSN_EDF | PSN_EDF,
7802 ASSERT( pid != -1 );
7803
7804 /* must be an RT task to lock at all */
7805 - SYSCALL( sporadic_partitioned(10, 100, 0) );
7806 + SYSCALL( sporadic_partitioned(ms2ns(10), ms2ns(100), 0) );
7807 SYSCALL( task_mode(LITMUS_RT_TASK) );
7808
7809 if (pid == 0) {
7810 diff --git a/tests/locks.c b/tests/locks.c
7811 index d7ebfe2..c3eba4e 100644
7812 --- a/tests/locks.c
7813 +++ b/tests/locks.c
7814 @@ -11,7 +11,7 @@ TESTCASE(not_lock_fmlp_be, GSN_EDF | PSN_EDF | P_FP,
7815 {
7816 int fd, od;
7817
7818 - SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
7819 + SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7820
7821 SYSCALL( od = open_fmlp_sem(fd, 0) );
7822
7823 @@ -34,7 +34,7 @@ TESTCASE(not_lock_srp_be, PSN_EDF | P_FP,
7824 {
7825 int fd, od;
7826
7827 - SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT) );
7828 + SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7829
7830 /* BE tasks may not open SRP semaphores */
7831
7832 @@ -51,9 +51,9 @@ TESTCASE(lock_srp, PSN_EDF | P_FP,
7833 {
7834 int fd, od;
7835
7836 - SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT) );
7837 + SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7838
7839 - SYSCALL( sporadic_partitioned(10, 100, 0) );
7840 + SYSCALL( sporadic_partitioned(ms2ns(10), ms2ns(100), 0) );
7841 SYSCALL( task_mode(LITMUS_RT_TASK) );
7842
7843 SYSCALL( od = open_srp_sem(fd, 0) );
7844 @@ -83,9 +83,9 @@ TESTCASE(lock_fmlp, PSN_EDF | GSN_EDF | P_FP,
7845 {
7846 int fd, od;
7847
7848 - SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
7849 + SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7850
7851 - SYSCALL( sporadic_partitioned(10, 100, 0) );
7852 + SYSCALL( sporadic_partitioned(ms2ns(10), ms2ns(100), 0) );
7853 SYSCALL( task_mode(LITMUS_RT_TASK) );
7854
7855 SYSCALL( od = open_fmlp_sem(fd, 0) );
7856 diff --git a/tests/nesting.c b/tests/nesting.c
7857 new file mode 100644
7858 index 0000000..b294334
7859 --- /dev/null
7860 +++ b/tests/nesting.c
7861 @@ -0,0 +1,468 @@
7862 +#include <fcntl.h>
7863 +#include <unistd.h>
7864 +#include <stdio.h>
7865 +
7866 +#include "tests.h"
7867 +#include "litmus.h"
7868 +
7869 +TESTCASE(lock_fmlp_nesting, PSN_EDF | GSN_EDF | P_FP,
7870 + "FMLP no nesting allowed")
7871 +{
7872 + int fd, od, od2;
7873 +
7874 + SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7875 +
7876 + SYSCALL( sporadic_partitioned(10, 100, 0) );
7877 + SYSCALL( task_mode(LITMUS_RT_TASK) );
7878 +
7879 + SYSCALL( od = open_fmlp_sem(fd, 0) );
7880 + SYSCALL( od2 = open_fmlp_sem(fd, 1) );
7881 +
7882 + SYSCALL( litmus_lock(od) );
7883 + SYSCALL( litmus_unlock(od) );
7884 +
7885 + SYSCALL( litmus_lock(od2) );
7886 + SYSCALL( litmus_unlock(od2) );
7887 +
7888 + SYSCALL( litmus_lock(od) );
7889 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
7890 + SYSCALL( litmus_unlock(od) );
7891 +
7892 + SYSCALL( litmus_lock(od2) );
7893 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
7894 + SYSCALL( litmus_unlock(od2) );
7895 +
7896 + SYSCALL( od_close(od) );
7897 + SYSCALL( od_close(od2) );
7898 +
7899 + SYSCALL( close(fd) );
7900 +
7901 + SYSCALL( remove(".fmlp_locks") );
7902 +}
7903 +
7904 +TESTCASE(lock_fmlp_srp_nesting, PSN_EDF | P_FP,
7905 + "FMLP no nesting with SRP resources allowed")
7906 +{
7907 + int fd, od, od2;
7908 +
7909 + SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7910 +
7911 + SYSCALL( sporadic_partitioned(10, 100, 0) );
7912 + SYSCALL( task_mode(LITMUS_RT_TASK) );
7913 +
7914 + SYSCALL( od = open_fmlp_sem(fd, 0) );
7915 + SYSCALL( od2 = open_srp_sem(fd, 1) );
7916 +
7917 + SYSCALL( litmus_lock(od) );
7918 + SYSCALL( litmus_unlock(od) );
7919 +
7920 + SYSCALL( litmus_lock(od2) );
7921 + SYSCALL( litmus_unlock(od2) );
7922 +
7923 + SYSCALL( litmus_lock(od) );
7924 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
7925 + SYSCALL( litmus_unlock(od) );
7926 +
7927 + SYSCALL( litmus_lock(od2) );
7928 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
7929 + SYSCALL( litmus_unlock(od2) );
7930 +
7931 + SYSCALL( od_close(od) );
7932 + SYSCALL( od_close(od2) );
7933 +
7934 + SYSCALL( close(fd) );
7935 +
7936 + SYSCALL( remove(".fmlp_locks") );
7937 +}
7938 +
7939 +TESTCASE(lock_srp_nesting, PSN_EDF | P_FP,
7940 + "SRP nesting allowed")
7941 +{
7942 + int fd, od, od2;
7943 +
7944 + SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
7945 +
7946 + SYSCALL( sporadic_partitioned(10, 100, 0) );
7947 + SYSCALL( task_mode(LITMUS_RT_TASK) );
7948 +
7949 + SYSCALL( od = open_srp_sem(fd, 0) );
7950 + SYSCALL( od2 = open_srp_sem(fd, 1) );
7951 +
7952 + SYSCALL( litmus_lock(od) );
7953 + SYSCALL( litmus_unlock(od) );
7954 +
7955 + SYSCALL( litmus_lock(od2) );
7956 + SYSCALL( litmus_unlock(od2) );
7957 +
7958 + SYSCALL( litmus_lock(od) );
7959 + SYSCALL( litmus_lock(od2) );
7960 + SYSCALL( litmus_unlock(od2) );
7961 + SYSCALL( litmus_unlock(od) );
7962 +
7963 + SYSCALL( litmus_lock(od2) );
7964 + SYSCALL( litmus_lock(od) );
7965 + SYSCALL( litmus_unlock(od) );
7966 + SYSCALL( litmus_unlock(od2) );
7967 +
7968 + SYSCALL( od_close(od) );
7969 + SYSCALL( od_close(od2) );
7970 +
7971 + SYSCALL( close(fd) );
7972 +
7973 + SYSCALL( remove(".fmlp_locks") );
7974 +}
7975 +
7976 +TESTCASE(lock_pcp_nesting, P_FP,
7977 + "PCP nesting allowed")
7978 +{
7979 + int od, od2;
7980 + const char* namespace = ".pcp_locks";
7981 +
7982 + SYSCALL( sporadic_partitioned(10, 100, 0) );
7983 + SYSCALL( task_mode(LITMUS_RT_TASK) );
7984 +
7985 + SYSCALL( od = litmus_open_lock(PCP_SEM, 0, namespace, NULL) );
7986 + SYSCALL( od2 = litmus_open_lock(PCP_SEM, 1, namespace, NULL) );
7987 +
7988 + SYSCALL( litmus_lock(od) );
7989 + SYSCALL( litmus_unlock(od) );
7990 +
7991 + SYSCALL( litmus_lock(od2) );
7992 + SYSCALL( litmus_unlock(od2) );
7993 +
7994 + SYSCALL( litmus_lock(od) );
7995 + SYSCALL( litmus_lock(od2) );
7996 + SYSCALL( litmus_unlock(od2) );
7997 + SYSCALL( litmus_unlock(od) );
7998 +
7999 + SYSCALL( litmus_lock(od2) );
8000 + SYSCALL( litmus_lock(od) );
8001 + SYSCALL( litmus_unlock(od) );
8002 + SYSCALL( litmus_unlock(od2) );
8003 +
8004 + SYSCALL( od_close(od) );
8005 + SYSCALL( od_close(od2) );
8006 +
8007 + SYSCALL( remove(namespace) );
8008 +}
8009 +
8010 +TESTCASE(lock_mpcp_pcp_no_nesting, P_FP,
8011 + "PCP and MPCP nesting not allowed")
8012 +{
8013 + int od, od2;
8014 + const char* namespace = ".pcp_locks";
8015 +
8016 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8017 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8018 +
8019 + SYSCALL( od = litmus_open_lock(PCP_SEM, 0, namespace, NULL) );
8020 + SYSCALL( od2 = litmus_open_lock(MPCP_SEM, 1, namespace, NULL) );
8021 +
8022 + SYSCALL( litmus_lock(od) );
8023 + SYSCALL( litmus_unlock(od) );
8024 +
8025 + SYSCALL( litmus_lock(od2) );
8026 + SYSCALL( litmus_unlock(od2) );
8027 +
8028 + SYSCALL( litmus_lock(od) );
8029 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8030 + SYSCALL( litmus_unlock(od) );
8031 +
8032 + SYSCALL( litmus_lock(od2) );
8033 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8034 + SYSCALL( litmus_unlock(od2) );
8035 +
8036 + SYSCALL( od_close(od) );
8037 + SYSCALL( od_close(od2) );
8038 +
8039 + SYSCALL( remove(namespace) );
8040 +}
8041 +
8042 +TESTCASE(lock_fmlp_pcp_no_nesting, P_FP,
8043 + "PCP and FMLP nesting not allowed")
8044 +{
8045 + int od, od2;
8046 + const char* namespace = ".pcp_locks";
8047 +
8048 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8049 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8050 +
8051 + SYSCALL( od = litmus_open_lock(PCP_SEM, 0, namespace, NULL) );
8052 + SYSCALL( od2 = litmus_open_lock(FMLP_SEM, 1, namespace, NULL) );
8053 +
8054 + SYSCALL( litmus_lock(od) );
8055 + SYSCALL( litmus_unlock(od) );
8056 +
8057 + SYSCALL( litmus_lock(od2) );
8058 + SYSCALL( litmus_unlock(od2) );
8059 +
8060 + SYSCALL( litmus_lock(od) );
8061 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8062 + SYSCALL( litmus_unlock(od) );
8063 +
8064 + SYSCALL( litmus_lock(od2) );
8065 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8066 + SYSCALL( litmus_unlock(od2) );
8067 +
8068 + SYSCALL( od_close(od) );
8069 + SYSCALL( od_close(od2) );
8070 +
8071 + SYSCALL( remove(namespace) );
8072 +}
8073 +
8074 +TESTCASE(lock_dpcp_pcp_no_nesting, P_FP,
8075 + "PCP and DPCP nesting not allowed")
8076 +{
8077 + int od, od2;
8078 + int cpu = 0;
8079 + const char* namespace = ".pcp_locks";
8080 +
8081 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8082 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8083 +
8084 + SYSCALL( od = litmus_open_lock(PCP_SEM, 0, namespace, NULL) );
8085 + SYSCALL( od2 = litmus_open_lock(DPCP_SEM, 1, namespace, &cpu) );
8086 +
8087 + SYSCALL( litmus_lock(od) );
8088 + SYSCALL( litmus_unlock(od) );
8089 +
8090 + SYSCALL( litmus_lock(od2) );
8091 + SYSCALL( litmus_unlock(od2) );
8092 +
8093 + SYSCALL( litmus_lock(od) );
8094 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8095 + SYSCALL( litmus_unlock(od) );
8096 +
8097 + SYSCALL( litmus_lock(od2) );
8098 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8099 + SYSCALL( litmus_unlock(od2) );
8100 +
8101 + SYSCALL( od_close(od) );
8102 + SYSCALL( od_close(od2) );
8103 +
8104 + SYSCALL( remove(namespace) );
8105 +}
8106 +
8107 +TESTCASE(lock_mpcp_srp_no_nesting, P_FP,
8108 + "SRP and MPCP nesting not allowed")
8109 +{
8110 + int od, od2;
8111 + const char* namespace = ".pcp_locks";
8112 +
8113 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8114 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8115 +
8116 + SYSCALL( od = litmus_open_lock(SRP_SEM, 0, namespace, NULL) );
8117 + SYSCALL( od2 = litmus_open_lock(MPCP_SEM, 1, namespace, NULL) );
8118 +
8119 + SYSCALL( litmus_lock(od) );
8120 + SYSCALL( litmus_unlock(od) );
8121 +
8122 + SYSCALL( litmus_lock(od2) );
8123 + SYSCALL( litmus_unlock(od2) );
8124 +
8125 + SYSCALL( litmus_lock(od) );
8126 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8127 + SYSCALL( litmus_unlock(od) );
8128 +
8129 + SYSCALL( litmus_lock(od2) );
8130 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8131 + SYSCALL( litmus_unlock(od2) );
8132 +
8133 + SYSCALL( od_close(od) );
8134 + SYSCALL( od_close(od2) );
8135 +
8136 + SYSCALL( remove(namespace) );
8137 +}
8138 +
8139 +TESTCASE(lock_dpcp_srp_no_nesting, P_FP,
8140 + "SRP and DPCP nesting not allowed")
8141 +{
8142 + int od, od2;
8143 + int cpu = 0;
8144 + const char* namespace = ".pcp_locks";
8145 +
8146 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8147 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8148 +
8149 + SYSCALL( od = litmus_open_lock(SRP_SEM, 0, namespace, NULL) );
8150 + SYSCALL( od2 = litmus_open_lock(DPCP_SEM, 1, namespace, &cpu) );
8151 +
8152 + SYSCALL( litmus_lock(od) );
8153 + SYSCALL( litmus_unlock(od) );
8154 +
8155 + SYSCALL( litmus_lock(od2) );
8156 + SYSCALL( litmus_unlock(od2) );
8157 +
8158 + SYSCALL( litmus_lock(od) );
8159 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8160 + SYSCALL( litmus_unlock(od) );
8161 +
8162 + SYSCALL( litmus_lock(od2) );
8163 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8164 + SYSCALL( litmus_unlock(od2) );
8165 +
8166 + SYSCALL( od_close(od) );
8167 + SYSCALL( od_close(od2) );
8168 +
8169 + SYSCALL( remove(namespace) );
8170 +}
8171 +
8172 +TESTCASE(lock_fmlp_mpcp_no_nesting, P_FP,
8173 + "MPCP and FMLP nesting not allowed")
8174 +{
8175 + int od, od2;
8176 + const char* namespace = ".pcp_locks";
8177 +
8178 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8179 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8180 +
8181 + SYSCALL( od = litmus_open_lock(MPCP_SEM, 0, namespace, NULL) );
8182 + SYSCALL( od2 = litmus_open_lock(FMLP_SEM, 1, namespace, NULL) );
8183 +
8184 + SYSCALL( litmus_lock(od) );
8185 + SYSCALL( litmus_unlock(od) );
8186 +
8187 + SYSCALL( litmus_lock(od2) );
8188 + SYSCALL( litmus_unlock(od2) );
8189 +
8190 + SYSCALL( litmus_lock(od) );
8191 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8192 + SYSCALL( litmus_unlock(od) );
8193 +
8194 + SYSCALL( litmus_lock(od2) );
8195 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8196 + SYSCALL( litmus_unlock(od2) );
8197 +
8198 + SYSCALL( od_close(od) );
8199 + SYSCALL( od_close(od2) );
8200 +
8201 + SYSCALL( remove(namespace) );
8202 +}
8203 +
8204 +TESTCASE(lock_fmlp_dpcp_no_nesting, P_FP,
8205 + "DPCP and FMLP nesting not allowed")
8206 +{
8207 + int od, od2;
8208 + const char* namespace = ".pcp_locks";
8209 + int cpu = 0;
8210 +
8211 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8212 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8213 +
8214 + SYSCALL( od = litmus_open_lock(DPCP_SEM, 0, namespace, &cpu) );
8215 + SYSCALL( od2 = litmus_open_lock(FMLP_SEM, 1, namespace, NULL) );
8216 +
8217 + SYSCALL( litmus_lock(od) );
8218 + SYSCALL( litmus_unlock(od) );
8219 +
8220 + SYSCALL( litmus_lock(od2) );
8221 + SYSCALL( litmus_unlock(od2) );
8222 +
8223 + SYSCALL( litmus_lock(od) );
8224 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8225 + SYSCALL( litmus_unlock(od) );
8226 +
8227 + SYSCALL( litmus_lock(od2) );
8228 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8229 + SYSCALL( litmus_unlock(od2) );
8230 +
8231 + SYSCALL( od_close(od) );
8232 + SYSCALL( od_close(od2) );
8233 +
8234 + SYSCALL( remove(namespace) );
8235 +}
8236 +
8237 +TESTCASE(mpcp_nesting, P_FP,
8238 + "MPCP no nesting allowed")
8239 +{
8240 + int od, od2;
8241 +
8242 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8243 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8244 +
8245 + SYSCALL( od = litmus_open_lock(MPCP_SEM, 0, ".mpcp_locks", NULL) );
8246 + SYSCALL( od2 = litmus_open_lock(MPCP_SEM, 1, ".mpcp_locks", NULL) );
8247 +
8248 + SYSCALL( litmus_lock(od) );
8249 + SYSCALL( litmus_unlock(od) );
8250 +
8251 + SYSCALL( litmus_lock(od2) );
8252 + SYSCALL( litmus_unlock(od2) );
8253 +
8254 + SYSCALL( litmus_lock(od) );
8255 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8256 + SYSCALL( litmus_unlock(od) );
8257 +
8258 + SYSCALL( litmus_lock(od2) );
8259 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8260 + SYSCALL( litmus_unlock(od2) );
8261 +
8262 + SYSCALL( od_close(od) );
8263 + SYSCALL( od_close(od2) );
8264 +
8265 + SYSCALL( remove(".mpcp_locks") );
8266 +}
8267 +
8268 +TESTCASE(mpcpvs_nesting, P_FP,
8269 + "MPCP-VS no nesting allowed")
8270 +{
8271 + int od, od2;
8272 +
8273 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8274 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8275 +
8276 + SYSCALL( od = litmus_open_lock(MPCP_VS_SEM, 0, ".mpcp_locks", NULL) );
8277 + SYSCALL( od2 = litmus_open_lock(MPCP_VS_SEM, 1, ".mpcp_locks", NULL) );
8278 +
8279 + SYSCALL( litmus_lock(od) );
8280 + SYSCALL( litmus_unlock(od) );
8281 +
8282 + SYSCALL( litmus_lock(od2) );
8283 + SYSCALL( litmus_unlock(od2) );
8284 +
8285 + SYSCALL( litmus_lock(od) );
8286 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8287 + SYSCALL( litmus_unlock(od) );
8288 +
8289 + SYSCALL( litmus_lock(od2) );
8290 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8291 + SYSCALL( litmus_unlock(od2) );
8292 +
8293 + SYSCALL( od_close(od) );
8294 + SYSCALL( od_close(od2) );
8295 +
8296 + SYSCALL( remove(".mpcp_locks") );
8297 +}
8298 +
8299 +TESTCASE(dpcp_nesting, P_FP,
8300 + "DPCP no nesting allowed")
8301 +{
8302 + int od, od2;
8303 + int cpu = 0;
8304 +
8305 + SYSCALL( sporadic_partitioned(10, 100, 0) );
8306 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8307 +
8308 + SYSCALL( od = litmus_open_lock(DPCP_SEM, 0, ".dpcp_locks", &cpu) );
8309 + SYSCALL( od2 = litmus_open_lock(DPCP_SEM, 1, ".dpcp_locks", &cpu) );
8310 +
8311 + SYSCALL( litmus_lock(od) );
8312 + SYSCALL( litmus_unlock(od) );
8313 +
8314 + SYSCALL( litmus_lock(od2) );
8315 + SYSCALL( litmus_unlock(od2) );
8316 +
8317 + SYSCALL( litmus_lock(od) );
8318 + SYSCALL_FAILS(EBUSY, litmus_lock(od2));
8319 + SYSCALL( litmus_unlock(od) );
8320 +
8321 + SYSCALL( litmus_lock(od2) );
8322 + SYSCALL_FAILS(EBUSY, litmus_lock(od));
8323 + SYSCALL( litmus_unlock(od2) );
8324 +
8325 + SYSCALL( od_close(od) );
8326 + SYSCALL( od_close(od2) );
8327 +
8328 + SYSCALL( remove(".dpcp_locks") );
8329 +}
8330 diff --git a/tests/pcp.c b/tests/pcp.c
8331 index 88d1be3..19009a3 100644
8332 --- a/tests/pcp.c
8333 +++ b/tests/pcp.c
8334 @@ -1,6 +1,8 @@
8335 #include <fcntl.h>
8336 #include <unistd.h>
8337 #include <stdio.h>
8338 +#include <sys/wait.h> /* for waitpid() */
8339 +
8340
8341 #include "tests.h"
8342 #include "litmus.h"
8343 @@ -11,9 +13,9 @@ TESTCASE(lock_pcp, P_FP,
8344 {
8345 int fd, od, cpu = 0;
8346
8347 - SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
8348 + SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
8349
8350 - SYSCALL( sporadic_partitioned(10, 100, cpu) );
8351 + SYSCALL( sporadic_partitioned(ms2ns(10), ms2ns(100), cpu) );
8352 SYSCALL( task_mode(LITMUS_RT_TASK) );
8353
8354 SYSCALL( od = open_pcp_sem(fd, 0, cpu) );
8355 @@ -37,14 +39,222 @@ TESTCASE(lock_pcp, P_FP,
8356 SYSCALL( remove(".pcp_locks") );
8357 }
8358
8359 +TESTCASE(pcp_inheritance, P_FP,
8360 + "PCP priority inheritance")
8361 +{
8362 + int fd, od, cpu = 0;
8363 +
8364 + int child_hi, child_lo, child_middle, status, waiters;
8365 + lt_t delay = ms2ns(100);
8366 + double start, stop;
8367 +
8368 + struct rt_task params;
8369 + init_rt_task_param(¶ms);
8370 + params.cpu = 0;
8371 + params.exec_cost = ms2ns(10000);
8372 + params.period = ms2ns(100000);
8373 + params.relative_deadline = params.period;
8374 + params.phase = 0;
8375 + params.cls = RT_CLASS_HARD;
8376 + params.budget_policy = NO_ENFORCEMENT;
8377 +
8378 + SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
8379 +
8380 +
8381 + child_lo = FORK_TASK(
8382 + params.priority = LITMUS_LOWEST_PRIORITY;
8383 + params.phase = 0;
8384 + SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8385 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8386 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8387 +
8388 + SYSCALL( od = open_pcp_sem(fd, 0, cpu) );
8389 +
8390 + SYSCALL( wait_for_ts_release() );
8391 +
8392 + SYSCALL( litmus_lock(od) );
8393 + start = cputime();
8394 + while (cputime() - start < 0.25)
8395 + ;
8396 + SYSCALL( litmus_unlock(od) );
8397 +
8398 + SYSCALL(sleep_next_period() );
8399 + );
8400 +
8401 + child_middle = FORK_TASK(
8402 + params.priority = LITMUS_HIGHEST_PRIORITY + 1;
8403 + params.phase = ms2ns(100);
8404 +
8405 + SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8406 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8407 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8408 +
8409 +
8410 + SYSCALL( wait_for_ts_release() );
8411 +
8412 + start = cputime();
8413 + while (cputime() - start < 5)
8414 + ;
8415 + SYSCALL( sleep_next_period() );
8416 + );
8417 +
8418 + child_hi = FORK_TASK(
8419 + params.priority = LITMUS_HIGHEST_PRIORITY;
8420 + params.phase = ms2ns(50);
8421 +
8422 + SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8423 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8424 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8425 +
8426 + SYSCALL( od = open_pcp_sem(fd, 0, cpu) );
8427 +
8428 + SYSCALL( wait_for_ts_release() );
8429 +
8430 + start = wctime();
8431 + /* block on semaphore */
8432 + SYSCALL( litmus_lock(od) );
8433 + SYSCALL( litmus_unlock(od) );
8434 + stop = wctime();
8435 +
8436 + /* Assert we had some blocking. */
8437 + ASSERT( stop - start > 0.1);
8438 +
8439 + /* Assert we woke up 'soonish' after the sleep. */
8440 + ASSERT( stop - start < 1 );
8441 +
8442 + SYSCALL( kill(child_middle, SIGUSR2) );
8443 + SYSCALL( kill(child_lo, SIGUSR2) );
8444 + );
8445 +
8446 + do {
8447 + waiters = get_nr_ts_release_waiters();
8448 + ASSERT( waiters >= 0 );
8449 + } while (waiters != 3);
8450 +
8451 + SYSCALL( be_migrate_to_cpu(1) );
8452 +
8453 + waiters = release_ts(&delay);
8454 +
8455 + SYSCALL( waitpid(child_hi, &status, 0) );
8456 + ASSERT( status == 0 );
8457 +
8458 + SYSCALL( waitpid(child_lo, &status, 0) );
8459 + ASSERT( status == SIGUSR2);
8460 +
8461 + SYSCALL( waitpid(child_middle, &status, 0) );
8462 + ASSERT( status == SIGUSR2);
8463 +}
8464 +
8465 +TESTCASE(srp_ceiling_blocking, P_FP | PSN_EDF,
8466 + "SRP ceiling blocking")
8467 +{
8468 + int fd, od;
8469 +
8470 + int child_hi, child_lo, child_middle, status, waiters;
8471 + lt_t delay = ms2ns(100);
8472 + double start, stop;
8473 +
8474 + struct rt_task params;
8475 + init_rt_task_param(¶ms);
8476 + params.cpu = 0;
8477 + params.exec_cost = ms2ns(10000);
8478 + params.period = ms2ns(100000);
8479 + params.relative_deadline = params.period;
8480 + params.phase = 0;
8481 + params.cls = RT_CLASS_HARD;
8482 + params.budget_policy = NO_ENFORCEMENT;
8483 +
8484 + SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
8485 +
8486 +
8487 + child_lo = FORK_TASK(
8488 + params.priority = LITMUS_LOWEST_PRIORITY;
8489 + params.phase = 0;
8490 + SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8491 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8492 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8493 +
8494 + SYSCALL( od = open_srp_sem(fd, 0) );
8495 +
8496 + SYSCALL( wait_for_ts_release() );
8497 +
8498 + SYSCALL( litmus_lock(od) );
8499 + start = cputime();
8500 + while (cputime() - start < 0.25)
8501 + ;
8502 + SYSCALL( litmus_unlock(od) );
8503 + );
8504 +
8505 + child_middle = FORK_TASK(
8506 + params.priority = LITMUS_HIGHEST_PRIORITY + 1;
8507 + params.phase = ms2ns(100);
8508 + params.relative_deadline -= ms2ns(110);
8509 +
8510 + SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8511 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8512 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8513 +
8514 +
8515 + SYSCALL( wait_for_ts_release() );
8516 +
8517 + start = cputime();
8518 + while (cputime() - start < 5)
8519 + ;
8520 + );
8521 +
8522 + child_hi = FORK_TASK(
8523 + params.priority = LITMUS_HIGHEST_PRIORITY;
8524 + params.phase = ms2ns(50);
8525 + params.relative_deadline -= ms2ns(200);
8526 +
8527 + SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8528 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8529 + SYSCALL( task_mode(LITMUS_RT_TASK) );
8530 +
8531 + SYSCALL( od = open_srp_sem(fd, 0) );
8532 +
8533 + SYSCALL( wait_for_ts_release() );
8534 +
8535 + start = wctime();
8536 + /* block on semaphore */
8537 + SYSCALL( litmus_lock(od) );
8538 + SYSCALL( litmus_unlock(od) );
8539 + stop = wctime();
8540 +
8541 + /* Assert we had "no" blocking (modulo qemu overheads). */
8542 + ASSERT( stop - start < 0.01);
8543 +
8544 + SYSCALL( kill(child_middle, SIGUSR2) );
8545 + SYSCALL( kill(child_lo, SIGUSR2) );
8546 + );
8547 +
8548 + do {
8549 + waiters = get_nr_ts_release_waiters();
8550 + ASSERT( waiters >= 0 );
8551 + } while (waiters != 3);
8552 +
8553 + SYSCALL( be_migrate_to_cpu(1) );
8554 +
8555 + waiters = release_ts(&delay);
8556 +
8557 + SYSCALL( waitpid(child_hi, &status, 0) );
8558 + ASSERT( status == 0 );
8559 +
8560 + SYSCALL( waitpid(child_lo, &status, 0) );
8561 + ASSERT( status == SIGUSR2);
8562 +
8563 + SYSCALL( waitpid(child_middle, &status, 0) );
8564 + ASSERT( status == SIGUSR2);
8565 +}
8566 +
8567 TESTCASE(lock_dpcp, P_FP,
8568 "DPCP acquisition and release")
8569 {
8570 int fd, od, cpu = 1;
8571
8572 - SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
8573 + SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
8574
8575 - SYSCALL( sporadic_partitioned(10, 100, 0) );
8576 + SYSCALL( sporadic_partitioned(ms2ns(10), ms2ns(100), 0) );
8577 SYSCALL( task_mode(LITMUS_RT_TASK) );
8578
8579 SYSCALL( od = open_dpcp_sem(fd, 0, cpu) );
8580 @@ -73,7 +283,7 @@ TESTCASE(not_lock_pcp_be, P_FP,
8581 {
8582 int fd, od;
8583
8584 - SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
8585 + SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
8586
8587 /* BE tasks are not even allowed to open a PCP semaphore */
8588 SYSCALL_FAILS(EPERM, od = open_pcp_sem(fd, 0, 1) );
8589 @@ -95,9 +305,9 @@ TESTCASE(lock_mpcp, P_FP,
8590 {
8591 int fd, od;
8592
8593 - SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
8594 + SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
8595
8596 - SYSCALL( sporadic_partitioned(10, 100, 0) );
8597 + SYSCALL( sporadic_partitioned(ms2ns(10), ms2ns(100), 0) );
8598 SYSCALL( task_mode(LITMUS_RT_TASK) );
8599
8600 SYSCALL( od = open_mpcp_sem(fd, 0) );
8601 diff --git a/tests/sched.c b/tests/sched.c
8602 index ab47a91..6726b46 100644
8603 --- a/tests/sched.c
8604 +++ b/tests/sched.c
8605 @@ -9,13 +9,14 @@ TESTCASE(preempt_on_resume, P_FP | PSN_EDF,
8606 "preempt lower-priority task when a higher-priority task resumes")
8607 {
8608 int child_hi, child_lo, status, waiters;
8609 - lt_t delay = ms2lt(100);
8610 + lt_t delay = ms2ns(100);
8611 double start, stop;
8612
8613 struct rt_task params;
8614 + init_rt_task_param(¶ms);
8615 params.cpu = 0;
8616 - params.exec_cost = ms2lt(10000);
8617 - params.period = ms2lt(100000);
8618 + params.exec_cost = ms2ns(10000);
8619 + params.period = ms2ns(100000);
8620 params.relative_deadline = params.period;
8621 params.phase = 0;
8622 params.cls = RT_CLASS_HARD;
8623 @@ -24,7 +25,7 @@ TESTCASE(preempt_on_resume, P_FP | PSN_EDF,
8624 child_lo = FORK_TASK(
8625 params.priority = LITMUS_LOWEST_PRIORITY;
8626 SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8627 - SYSCALL( be_migrate_to(params.cpu) );
8628 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8629 SYSCALL( task_mode(LITMUS_RT_TASK) );
8630
8631 SYSCALL( wait_for_ts_release() );
8632 @@ -40,7 +41,7 @@ TESTCASE(preempt_on_resume, P_FP | PSN_EDF,
8633 params.priority = LITMUS_HIGHEST_PRIORITY;
8634 params.relative_deadline -= 1000000;
8635 SYSCALL( set_rt_task_param(gettid(), ¶ms) );
8636 - SYSCALL( be_migrate_to(params.cpu) );
8637 + SYSCALL( be_migrate_to_cpu(params.cpu) );
8638 SYSCALL( task_mode(LITMUS_RT_TASK) );
8639
8640 SYSCALL( wait_for_ts_release() );
8641 @@ -51,14 +52,14 @@ TESTCASE(preempt_on_resume, P_FP | PSN_EDF,
8642 ;
8643
8644 start = wctime();
8645 - SYSCALL( lt_sleep(ms2lt(100)) );
8646 + SYSCALL( lt_sleep(ms2ns(100)) );
8647 stop = wctime();
8648
8649 SYSCALL( kill(child_lo, SIGUSR2) );
8650
8651 if (stop - start >= 0.2)
8652 fprintf(stderr, "\nHi-prio delay = %fsec\n",
8653 - stop - start - (ms2lt(100) / 1E9));
8654 + stop - start - (ms2ns(100) / (float)s2ns(1)));
8655
8656 /* Assert we woke up 'soonish' after the sleep. */
8657 ASSERT( stop - start < 0.2 );
8658 --
8659 1.7.10.4
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.