/* ########################################################################## */
/* (C) UPMC, 2010-2011                                                        */
/*     Authors:                                                               */
/*       Jean-Pierre Lozi <jean-pierre.lozi@lip6.fr>                          */
/*       Gaël Thomas <gael.thomas@lip6.fr>                                    */
/*       Florian David <florian.david@lip6.fr>                                */
/*       Julia Lawall <julia.lawall@lip6.fr>                                  */
/*       Gilles Muller <gilles.muller@lip6.fr>                                */
/* -------------------------------------------------------------------------- */
/* ########################################################################## */
//#define DEBUG
#include <stdint.h>
#include <sys/time.h>
#include <string.h>
#include <errno.h>
#include <ucontext.h>
#include <sys/mman.h>
#include <stdarg.h>
#include <numa.h>
#include "liblock-fatal.h"
#include "liblock.h"

#ifdef USE_PAPI
#include <papi.h>
#endif

/*
 *      constants
 */
static const struct timespec manager_timeout = { 0, 50000000 };

#define PRIO_BACKUP    1
#define PRIO_SERVICING 2
#define PRIO_MANAGER   3

#define SERVER_DOWN     0
#define SERVER_STOPPING 1
#define SERVER_STARTING 2
#define SERVER_UP       3

#define STACK_SIZE            r_align(1024*1024, PAGE_SIZE)
#define MINI_STACK_SIZE       r_align(64*1024, PAGE_SIZE)

/*
 *  structures
 */
struct request {
	struct liblock_lock* volatile lock;            /* lock associated with the request */
	void* volatile                val;             /* argument of the pending request */
	void*              (*volatile  pending)(void*); /* pending request or null if no pending request */
	char                          pad[pad_to_cache_line(sizeof(void*(*)(void*)) + 
																											sizeof(void*) +
																											sizeof(struct liblock_lock*))];
};

struct liblock_lock {
	declare_liblock_lock_header();
	struct server*  server;      /* server which owns the lock */
	char            pad0[pad_to_cache_line(sizeof(struct liblock_lock_header) + sizeof(struct server*))];
	int volatile    locked;	     /* state of the lock */
	char            pad1[pad_to_cache_line(sizeof(int))];
};

struct native_thread {
	struct native_thread* volatile next;            /* next native thread */
	struct mini_thread* volatile   mini_thread;     /* currently associated mini thread */
	struct server*                 server;          /* server of the thread */
	pthread_t                      tid;             /* thread id */
	int volatile                   is_servicing;    /* currently servicing */
	int volatile                   has_run;         /* has recently run */
	ucontext_t                     initial_context; /* initial context of the thread */
	void*                          stack;           /* pointer to the stack */
};

struct mini_thread_ll {
	uintptr_t volatile  next_and_mark;
	struct mini_thread* content;
};

struct mini_thread {
	ucontext_t                   context;       /* context of the mini thread */
	struct server*               server;        /* server of the mini thread, used for broadcast */
	struct request* volatile     cur_request;   /* current executed request, used in wait */
	int volatile                 is_timed;      /* true if timed */
	struct timespec              deadline;      /* deadline, only used when the mini-thread is in a timed wait */
	liblock_cond_t* volatile     wait_on;       /* queue of the mini_thread */
	int volatile                 wait_res;      /* result of the wait (timeout or not) */
	struct mini_thread_ll        ll_ready;
	struct mini_thread_ll        ll_timed;
	struct mini_thread_ll        ll_all;
	void*                        stack;
};

struct server {
	/* always shared accesses */
	struct request*                 requests;               /* the request array */
	struct core*                    core;                   /* core where the server run */
	char                            pad0[pad_to_cache_line(2*sizeof(void*))];

	/* shared accesses when blocking */
	struct mini_thread_ll* volatile mini_thread_all;        /* list of all active mini threads               */
	struct mini_thread_ll* volatile mini_thread_timed;      /* sorted list of mini threads that have timeout */
	struct mini_thread_ll* volatile mini_thread_ready;      /* list of active mini threads                   */
	struct mini_thread_ll* volatile mini_thread_prepared;   /* list of sleeping mini threads                 */
	int volatile                    nb_ready_and_servicing; /* number of threads and of mini threads that are pendings */ 

	char                            pad1[pad_to_cache_line(4*sizeof(void*) + sizeof(int))];

	/* intensively used */
	struct timespec volatile        next_deadline;          /* next timed wait deadline */
	int volatile                    alive;                  /* true if native thread are able to make progress */

	int volatile                    nb_servicing_threads;   /* number of servicing threads (protected by lock) */
	int volatile                    nb_free_threads;        /* number of servicing threads that are not executing critical sections */
	int volatile                    nb_attached_locks;      /* number of locks attached to this server */
	int volatile                    state;                  /* state of the server (running, starting...) */
	struct native_thread* volatile  native_threads;         /* list of all the native thread, for shutdown */
	int volatile                    nb_native_threads;      /* total number of native threads (protected by lock) */
	pthread_mutex_t                 lock_server;            /* lock of the server, used for state transition and to wakeup unservicing threads */
	pthread_cond_t                  cond_state;             /* condition to wait on state transition */
	pthread_cond_t                  cond_servicing;         /* condition to wait on transition from the unservicing to the servicing state */ 
	void                            (*volatile callback)(); /* callback called when the server is ready to handle request */
};

static struct server*                 servers = 0; /* array of server (one per core) */
static struct liblock_lock            fake_lock;   /* fake lock always taken, used in wait to avoid a second call to the request */
static __thread struct native_thread* volatile me;          /* (local) pointer to the the native thread */

/*
 *    functions pre-declarations
 */
static void servicing_loop();

/* 
 * debug 
 */
void rclprintf(struct server* server, const char* msg, ...) {
	va_list va;

	printf("[%d/%d - ", server->core->core_id, self.id);
	switch(server->state) {
		case SERVER_DOWN:     printf("    down"); break;
		case SERVER_STOPPING: printf("stopping"); break;
		case SERVER_STARTING: printf("starting"); break;
		case SERVER_UP:       printf("      up"); break;
	}
	printf(" - %d/%d/%d/%d] - ", server->nb_free_threads, server->nb_servicing_threads, server->nb_ready_and_servicing, server->nb_native_threads);
	va_start(va, msg);
	vprintf(msg, va);
	printf("\n");
	if(server->nb_free_threads < 0)
		fatal("should not happen");
}

/*
 *      time spec shortcuts
 */
#define ts_lt(ts1, ts2)																									\
	(((ts1)->tv_sec < (ts2)->tv_sec) ||																		\
	 (((ts1)->tv_sec == (ts2)->tv_sec) && ((ts1)->tv_nsec < (ts2)->tv_nsec)))

#define ts_le(ts1, ts2)																									\
	(((ts1)->tv_sec < (ts2)->tv_sec) ||																		\
	 (((ts1)->tv_sec == (ts2)->tv_sec) && ((ts1)->tv_nsec <= (ts2)->tv_nsec)))

#define ts_add(res, ts1, ts2)																						\
	({																																		\
		(res)->tv_sec = (ts1)->tv_sec + (ts2)->tv_sec;											\
		(res)->tv_nsec = (ts1)->tv_nsec + (ts2)->tv_nsec;										\
		if((res)->tv_nsec > 1e9) {																					\
			(res)->tv_nsec -= 1e9;																						\
			(res)->tv_sec++;																									\
		}																																		\
	})

#define ts_gettimeofday(ts, tz)																					\
	({ struct timeval tv; int r = gettimeofday(&tv, tz); (ts)->tv_sec = tv.tv_sec; (ts)->tv_nsec = tv.tv_usec*1e3; r; })

#define ts_print(ts) printf("%ld.%9.0ld", (ts)->tv_sec, (ts)->tv_nsec)

/*
 * lock server
 */
#if 1
#define lock_server(server)   pthread_mutex_lock(&(server)->lock_server)
#define unlock_server(server) pthread_mutex_unlock(&(server)->lock_server)
#else
#define lock_server(server)   ({ rclprintf(server, "locking"); pthread_mutex_lock(&(server)->lock_server); rclprintf(server, "locked"); })
#define unlock_server(server) ({ rclprintf(server, "unlocking"); pthread_mutex_unlock(&(server)->lock_server); })
#endif

/*
 * scheduling
 */
static __attribute__((always_inline)) inline void setprio(pthread_t thread_id, unsigned int prio) {
	if(pthread_setschedprio(thread_id, prio))
		fatal("unable to set priority: %s", strerror(errno));
}

/*
 * atomic operations on a local core
 */
/* CAS but without any global synchronization */
#define local_val_compare_and_swap(type, ptr, old_val, new_val) ({			\
			type tmp = old_val;																								\
			asm volatile( "\tcmpxchg %3,%1;" /* no lock! */										\
										: "=a" (tmp), "=m" (*(ptr))													\
										: "0" (tmp), "r" (new_val)													\
										: "cc"); /* remove "memory" to avoid reload of other adresses */ \
																																				\
			tmp;																															\
		})

/* atomic add but without any global synchronization */
static inline __attribute__((always_inline)) int local_fetch_and_add(int volatile* ptr, int val) {
	asm volatile( "\txadd %2, %1;" /* no lock! */
								: "=r" (val), "=m" (*(ptr))
								: "0" (val)
								: "cc"); /* just tremove "memory" to avoid reload of other adresses */
	return val;
}

/* atomic add but without any global synchronization */
static inline __attribute__((always_inline)) int local_add_and_fetch(int volatile* ptr, int val) {
	return local_fetch_and_add(ptr, val) + val;
}

/*
 * mini-threads
 */
/* commut from in to out */
static inline __attribute__((always_inline)) void swap_mini_thread(struct mini_thread* in, struct mini_thread* out) {
	//rclprintf(in->server, "switching from %p to %p", in, out);
	me->mini_thread = out;
	swapcontext(&in->context, &out->context);
}

static inline int ll_is_marked(uintptr_t next_and_mark) {
	return next_and_mark & 1;
}

static inline struct mini_thread_ll* ll_ptr(uintptr_t next_and_mark) {
	return (struct mini_thread_ll*)(next_and_mark & -2);
}

static inline uintptr_t ll_mark(struct mini_thread_ll* node) {
	return (uintptr_t)node | 1;
}

void ll_print(const char* msg, struct mini_thread_ll* volatile* root) {
	uintptr_t volatile* pred = (uintptr_t*)root;
	uintptr_t cur;

	printf("%s: %p => ", msg, root);
	while((cur = *pred)) {
		if(ll_is_marked(cur)) {
			printf("abort: has removed nodes\n");
			return;
		} else {
			printf(" %p", ((struct mini_thread_ll*)cur)->content);
			pred = &((struct mini_thread_ll*)cur)->next_and_mark;
		}
	}
	printf("\n");
}

int ll_remove(struct mini_thread_ll* volatile* root, struct mini_thread_ll* node, void on_remove(struct mini_thread*)) {
	uintptr_t volatile* pred;
	struct mini_thread_ll* cur;

	cur = ll_ptr(node->next_and_mark);

	if(__sync_val_compare_and_swap(&node->next_and_mark, cur, ll_mark(cur)) != (uintptr_t)cur)
		return 0;

	/* find and compress */
 restart:
	pred = (uintptr_t*)root;

	while(1) {
		cur = ll_ptr(*pred);

		if(!cur)
			return 0;

		if(ll_is_marked(cur->next_and_mark)) {
			/* invariant: if the node is already removed, pred does not contain cur */
			if(__sync_val_compare_and_swap(pred, cur, ll_ptr(cur->next_and_mark)) != (uintptr_t)cur)
				goto restart;
			if(on_remove)
				on_remove(cur->content);
			if(cur == node)
				return 1;
		}
			
		pred = &cur->next_and_mark;
	}
}

void ll_timed_insert(struct mini_thread_ll* volatile* root, struct mini_thread_ll* node) {
	uintptr_t volatile* pred;
	struct mini_thread_ll* cur;

	/* find and compress */
 restart:
	pred = (uintptr_t*)root;

	while(1) {
		cur = ll_ptr(*pred);

		if(!cur) {
			node->next_and_mark = 0;
			if(__sync_val_compare_and_swap(pred, cur, node) != (uintptr_t)cur)
				goto restart;
			else
				return;
		}

		/* compress */
		if(ll_is_marked(cur->next_and_mark)) {
			/* invariant: if the node is already removed, pred does not contain cur */
			if(__sync_val_compare_and_swap(pred, cur, ll_ptr(cur->next_and_mark)) != (uintptr_t)cur)
				goto restart;
		}

		if(ts_lt(&node->content->deadline, &cur->content->deadline)) {
			node->next_and_mark = (uintptr_t)cur;
			if(__sync_val_compare_and_swap(pred, cur, node) != (uintptr_t)cur)
				goto restart;
			else
				return;
		}
			
		pred = &cur->next_and_mark;
	}
}

void ll_enqueue(struct mini_thread_ll* volatile* root, struct mini_thread_ll* node) {
	uintptr_t attempt;
	
	do {
		attempt = (uintptr_t)*root;
		node->next_and_mark = attempt;
		//printf("** enqueue %p with next at %p\n", node, (void*)node->next_and_mark);
	} while(__sync_val_compare_and_swap((uintptr_t*)root, attempt, node) != attempt);
}

struct mini_thread_ll* ll_dequeue(struct mini_thread_ll* volatile* root) {
	struct mini_thread_ll* cur;
	uintptr_t mark;

	while((cur = *root)) {
		mark = cur->next_and_mark;

		if(ll_is_marked(mark)) {
			/* second, try to remove it safely */
			if(__sync_val_compare_and_swap((uintptr_t*)root, cur, ll_ptr(mark)) == (uintptr_t)cur)
				return cur;
		} else
			/* first, mark the node for deletion */
			__sync_val_compare_and_swap(&cur->next_and_mark, mark, ll_mark((struct mini_thread_ll*)mark));
	}

	return 0;
}

static struct mini_thread* allocate_mini_thread(struct server* server) {
	struct mini_thread* res;

	res = liblock_allocate(sizeof(struct mini_thread));

	res->stack = anon_mmap(STACK_SIZE);
	mprotect(res->stack, PAGE_SIZE, PROT_NONE);

	//rclprintf(server, "CREATE context %p with stack at %p and size %d", res, stack, STACK_SIZE);

	getcontext(&res->context);
	res->server = server;
	res->context.uc_link = 0;
	res->context.uc_stack.ss_sp = res->stack;
	res->context.uc_stack.ss_size = STACK_SIZE;
	res->ll_ready.content = res;
	res->ll_timed.content = res;
	res->ll_all.content   = res;

	makecontext(&res->context, (void(*)())servicing_loop, 0);

	ll_enqueue(&server->mini_thread_all, &res->ll_all);

	return res;
}

struct mini_thread* get_ready_mini_thread(struct server* server) {
	struct mini_thread_ll* res;

	res = ll_dequeue(&server->mini_thread_ready);
	
	if(res) {
		__sync_fetch_and_sub(&server->nb_ready_and_servicing, 1);
		return res->content;
	} else
		return 0;
}

struct mini_thread* get_or_allocate_mini_thread(struct server* server) {
	struct mini_thread* res;
	struct mini_thread_ll* node;

	res = get_ready_mini_thread(server);

	//rclprintf(server, "*** get or allocate mini thread: ready is %p", res);
	if(res) 
		return res;

	node = ll_dequeue(&server->mini_thread_prepared);

	if(node)
		return node->content;
	else
		return allocate_mini_thread(server);
}

static void insert_in_ready_and_remove_from_timed(struct mini_thread* mini_thread) {
	struct server* server = mini_thread->server;
	//rclprintf(server, "++++      reinjecting mini thread: %p", mini_thread);
	//ll_print("ready", &server->mini_thread_ready);
	ll_remove(&server->mini_thread_timed, &mini_thread->ll_timed, 0);
	ll_enqueue(&server->mini_thread_ready, &mini_thread->ll_ready);
	__sync_fetch_and_add(&server->nb_ready_and_servicing, 1);
	//rclprintf(server, "++++      reinjecting mini thread: %p done", mini_thread);
	//ll_print("ready", &server->mini_thread_ready);
}

/*
 *   liblock API
 */
/* execute operation (client side) */
static void* do_liblock_execute_operation(rcl)(struct liblock_lock* lock, void* (*pending)(void*), void* val) {
	struct server* server = lock->server;

	//rclprintf(server, "*** sending operation %p::%p for client %d - %p", pending, val, self.id, (void*)pthread_self());
	if(me && self.running_core == server->core) {
		void* res;

		while(local_val_compare_and_swap(int, &lock->locked, 0, 1)) { /* one of my thread own the lock */
			me->has_run = 1;
			pthread_yield();                          /* give a chance to one of our thread to release the lock */
		}

		res = pending(val);
		lock->locked = 0;                           /* I release the lock */

		return res;
	}

	struct request* req = &server->requests[self.id];

	req->lock = lock;
	req->val = val;
	req->pending = pending;

	while(req->pending)
		PAUSE();

	return req->val;
}

static void servicing_loop() {
	struct server* server = me->server;
	struct request* request, *last;
	void* (*pending)(void*);
	int time = 0;

	//rclprintf(server, "::: start servicing loop %p", me->mini_thread);

	while(server->state == SERVER_UP) {
		server->alive = 1;
		me->has_run   = 1;

 		last = &server->requests[id_manager.first_free];

 		for(request=&server->requests[id_manager.first]; request<last; request++) {
 			pending = request->pending;

 			if(pending) {
				if(!local_val_compare_and_swap(int, &request->lock->locked, 0, 1)) {
					me->mini_thread->cur_request = request;
					//rclprintf(server, "executing request %p::%p", pending, request->val);
					request->val = pending(request->val);
					//rclprintf(server, "executing request %p::%p done", pending, request->val);
					request->pending = 0;
					request->lock->locked = 0;
					time += 100;
				}
 			}
 		}

		//{ static int n=0; if(!(++n % 200000)) rclprintf(server, "servicing loop is running"); }
		
		if(server->nb_ready_and_servicing > 1) {
			struct mini_thread *next;
			//{ static int n=0; if(!(n++ % 200000)) rclprintf(server, "still alive 2 %p", next); }

			/* the first test is an optimization to avoid the CAS */
			if(server->nb_ready_and_servicing > server->nb_servicing_threads && (next = get_ready_mini_thread(server))) {
				struct mini_thread *cur = me->mini_thread;
				/* more than one ready mini threads, activate the next one and put the running one in the prepared list */
				//rclprintf(server, "servicing-loop::elect mini-thread: %p (and %p goes to prepared)", next, cur);
				ll_enqueue(&server->mini_thread_prepared, &cur->ll_ready);
				//ll_print("prepared", &server->mini_thread_prepared);
				swap_mini_thread(cur, next);
				//rclprintf(server, "servicing-loop::mini-thread: %p is up", me->mini_thread);
				time = 0;
			} else if(server->nb_free_threads > 0) {
				/* more than one free thread, put this thread in the sleeping state (not servicing anymore) */
				lock_server(server);  /* the lock is only used to go to the wait state! */

				if(server->state >= SERVER_STARTING) {
					me->is_servicing = 0;
					server->nb_servicing_threads--;

					__sync_fetch_and_sub(&server->nb_ready_and_servicing, 1);
				
					//rclprintf(server, "servicing-loop::unactivating");
					pthread_cond_wait(&server->cond_servicing, &server->lock_server);
					local_fetch_and_add(&server->nb_free_threads, -1);
					//rclprintf(server, "servicing-loop::activating");

					me->is_servicing = 1;

				}

				unlock_server(server);
				time = 0;
			} else {
				/* ok, I have strictly more than one servicing thread and no free threads, this one is free, activate the next */
				//static int z=0; if(!(++z % 200000)) rclprintf(server, "servicing-loop::yield processor");
				local_fetch_and_add(&server->nb_free_threads, 1);
				if(time++ > 1000) {
					pthread_yield(); /* all the threads are busy */
					time = 0;
				}
				local_fetch_and_add(&server->nb_free_threads, -1);
			}
		}
	}

	//rclprintf(server, "::: releasing resources of %p", pthread_self());

	lock_server(server);

	server->nb_native_threads--;
	server->nb_servicing_threads--;

	__sync_fetch_and_sub(&server->nb_ready_and_servicing, 1);
	
	unlock_server(server);

	//rclprintf(server, "::: quitting serviving-loop %p", pthread_self());

	setcontext(&me->initial_context);
}

static void* servicing_thread(void* arg) {
	struct native_thread* native_thread = arg;
	struct server*        server = native_thread->server;
	void (*callback)() = server->callback;

	//rclprintf(server, "::: starting servicing thread %d with stack at %p", self.id, __builtin_frame_address(0));

	me = native_thread;
	me->mini_thread = get_or_allocate_mini_thread(server);

#ifdef COUNT_EVENTS
	int event_set = PAPI_NULL;
	long long values[1];
	
	if (event_id != 0) {
		if (PAPI_create_eventset(&event_set) != PAPI_OK)
			fatal("PAPI_create_eventset");
		if (PAPI_add_event(event_set, event_id) != PAPI_OK)
			fatal("PAPI_add_events");
		
		/* This seemingly helps increasing PAPI's accuracy. */
		if (PAPI_start(event_set) != PAPI_OK)
			fatal("PAPI_start");
		if (PAPI_stop(event_set, values) != PAPI_OK)
			fatal("PAPI_stop");
		
		if (PAPI_start(event_set) != PAPI_OK)
			fatal("PAPI_start");
	}
#endif

	if(callback && __sync_val_compare_and_swap(&server->callback, callback, 0) == callback) {
		callback();
	}

	local_fetch_and_add(&server->nb_free_threads, -1);

	getcontext(&me->initial_context);

	if(server->state == SERVER_UP)
		setcontext(&me->mini_thread->context);

#ifdef COUNT_EVENTS
	if (event_id != 0) {
		if (PAPI_stop(event_set, values) != PAPI_OK)
			fatal("PAPI_stop");
		
		server_event_count = values[0];
	}
	//printf("Server DCM: %lld\n", server_event_count);
#endif

	return 0;
}

static void ensure_at_least_one_free_thread(struct server* server) {
	//rclprintf(server, "ensure at least");
	if(server->nb_free_threads < 1) {
		/* ouch, no more free thread, creates or activates a new one */
		lock_server(server);

		//rclprintf(server, "no more free threads");

		if(server->state >= SERVER_STARTING) {
			//rclprintf(server, "state %d %d", server->nb_servicing_threads, server->nb_native_threads);
			if(server->nb_servicing_threads < server->nb_native_threads) {
				//rclprintf(server, "activating existing thread");

				local_fetch_and_add(&server->nb_free_threads, 1);
				__sync_fetch_and_add(&server->nb_ready_and_servicing, 1);
				server->nb_servicing_threads++;

				pthread_cond_signal(&server->cond_servicing);
				unlock_server(server);
			} else {
				struct native_thread* native_thread = liblock_allocate(sizeof(struct native_thread));
				struct sched_param    param;
				pthread_attr_t        attr;

				native_thread->stack = anon_mmap(MINI_STACK_SIZE);
				mprotect(native_thread->stack, PAGE_SIZE, PROT_NONE);
				
				//rclprintf(server, "CREATE a new servicing thread %p with stack at %p", native_thread, native_thread->stack);

				param.sched_priority = PRIO_SERVICING;
				pthread_attr_init(&attr);

				pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
				pthread_attr_setschedpolicy(&attr, SCHED_FIFO);
				pthread_attr_setschedparam(&attr, &param);
				//				pthread_attr_setstack(&attr, native_thread->stack, MINI_STACK_SIZE);

				local_fetch_and_add(&server->nb_free_threads, 1);

				server->nb_native_threads++;
				server->nb_servicing_threads++;

				__sync_fetch_and_add(&server->nb_ready_and_servicing, 1);

				native_thread->server = server;
				native_thread->is_servicing = 1;
				native_thread->has_run = 0;

				native_thread->next = server->native_threads;
				server->native_threads = native_thread;

				unlock_server(server);

				//rclprintf(server, "launching the new servicing thread %p", native_thread);
				liblock_thread_create_and_bind(server->core, "rcl", &native_thread->tid, &attr, servicing_thread, native_thread);
				//rclprintf(server, "launching of the servicing thread %p done", native_thread);
			}
		} else
			unlock_server(server);
	}
	//rclprintf(server, "ensure done");
}

static void* backup_thread(void* arg) {
	struct server* server = arg;

	//rclprintf(server, "+++ start backup thread %p", pthread_self());
	while(server->state == SERVER_UP) {
		//rclprintf(server, "+++ backup thread is running");
		ensure_at_least_one_free_thread(server);
	}
	//rclprintf(server, "+++ quitting backup thread %p", pthread_self());

	return 0;
}

static void manager_loop(struct server* server) {
	struct timespec now, deadline;
	int done;
	
	while(server->state == SERVER_UP) {
		//rclprintf(server, "manager is working (%d)", server->alive);
		if(!server->alive) {
			done = 0;

			//rclprintf(server, "no more alive servicing threads");
			unlock_server(server);
			ensure_at_least_one_free_thread(server);
			lock_server(server);

			while(!done) {
				struct native_thread* cur;
				for(cur=server->native_threads; cur; cur=cur->next) {
					if(cur->is_servicing == 1) {
						if(done || cur->has_run) {
							setprio(cur->tid, PRIO_BACKUP);
							setprio(cur->tid, PRIO_SERVICING);
						} else {
							//rclprintf(server, "manager elect %p", cur);
							done = 1;
							cur->has_run = 1; /* set it here because could be in I/O */
						}
					}
				}
						
				if(!done)
					for(cur=server->native_threads; cur; cur=cur->next)
						cur->has_run = 0;
			}
		} else
			server->alive = 0;

		ts_gettimeofday(&now, 0);
		ts_add(&deadline, &now, &manager_timeout);

		done = 0;

		//printf("+++++++++++++++++++++++++++++++++++++\n");
		//printf("++++      manager: current time: "); ts_print(&now); printf("\n");
		//printf("++++      manager: initial deadline: "); ts_print(&deadline); printf("\n");
		while(!done) {
			struct mini_thread_ll* node = server->mini_thread_timed;
 			if(node) {
				struct mini_thread* cur = node->content;
				//printf("++++      manager: find waiter: %p\n", cur);
				if(ts_le(&cur->deadline, &now)) {
					//printf("++++      manager: reinject expired deadline\n");
					ll_remove((struct mini_thread_ll**)&cur->wait_on->data, &cur->ll_ready, insert_in_ready_and_remove_from_timed);
				} else {
					struct timespec ddd = cur->deadline;
					if(ts_lt(&ddd, &deadline)) {
						//printf("++++      manager: change deadline to: "); ts_print(&deadline); printf("\n");
						deadline = ddd;
					}
					done = 1;
				}
			} else
				done = 1;
		}
		//printf("++++      manager: next deadline: "); ts_print(&deadline); printf("\n");

		server->next_deadline = deadline;

		//rclprintf(server, "manager::sleeping");
		pthread_cond_timedwait(&server->cond_state, &server->lock_server, &deadline);
		//rclprintf(server, "manager::sleeping done");
	}
}

static void* manager_thread(void* arg) {
	pthread_t backup_tid;
	struct server* server = arg;
	struct native_thread* native_thread;
	struct sched_param param;
	pthread_attr_t attr;

	server->state = SERVER_UP;
	server->alive = 1;

	param.sched_priority = PRIO_BACKUP;
	pthread_attr_init(&attr);
	pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
	pthread_attr_setschedpolicy(&attr, SCHED_FIFO);
	pthread_attr_setschedparam(&attr, &param);

	liblock_thread_create_and_bind(server->core, "rcl", &backup_tid, &attr, backup_thread, server);

	ensure_at_least_one_free_thread(server);

	lock_server(server);

	pthread_cond_broadcast(&server->cond_state);

	manager_loop(server);
	
	local_fetch_and_add(&server->nb_free_threads, server->nb_native_threads - server->nb_servicing_threads);
	__sync_fetch_and_add(&server->nb_ready_and_servicing, server->nb_native_threads - server->nb_servicing_threads);
	local_fetch_and_add(&server->nb_servicing_threads, server->nb_native_threads - server->nb_servicing_threads);

	//rclprintf(server, "broadcast final down message");
	pthread_cond_broadcast(&server->cond_servicing);

	unlock_server(server);

	//rclprintf(server, "waiting servicing threads");

	for(native_thread=server->native_threads; native_thread; native_thread=native_thread->next)
		pthread_join(native_thread->tid, 0);

	//rclprintf(server, "waiting backup");

	if(pthread_join(backup_tid, 0) != 0)
		fatal("pthread_join");

	//rclprintf(server, "retake the lock");

	lock_server(server);

	//rclprintf(server, "finalizing native threads");

	/* cleanup resources */
	while(server->native_threads) {
		struct native_thread* next = server->native_threads->next;
		munmap(server->native_threads->stack, MINI_STACK_SIZE);
		free(server->native_threads);
		server->native_threads = next;
	}

	//rclprintf(server, "finalizing mini threads");

	{
		struct mini_thread_ll* ll_cur;
		while((ll_cur = ll_dequeue(&server->mini_thread_all))) {
			struct mini_thread* cur = ll_cur->content;
			munmap(cur->stack, STACK_SIZE);
			free(cur);
		}
	}

	//rclprintf(server, "quitting");

	server->state = SERVER_DOWN;

	pthread_cond_broadcast(&server->cond_state);

	unlock_server(server);

	return 0;
}

static void launch_server(struct server* server, void (*callback)()) {
	struct sched_param param;
	pthread_attr_t attr;
	pthread_t tid;

	param.sched_priority = PRIO_MANAGER;
	pthread_attr_init(&attr);
	pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
	pthread_attr_setschedpolicy(&attr, SCHED_FIFO);
	pthread_attr_setschedparam(&attr, &param);

	//rclprintf(server, "launch server???\n");
	while(server->state != SERVER_DOWN && server->state != SERVER_UP) {
		//rclprintf(server, "launch_server::waiting the server");
		pthread_cond_wait(&server->cond_state, &server->lock_server);
		//rclprintf(server, "launch_server::waiting the server done");
	}

	if(server->state == SERVER_UP)
		return;

	server->callback = callback;

	//rclprintf(server, "launching server");

	server->state = SERVER_STARTING;

	liblock_thread_create_and_bind(server->core, "rcl", &tid, &attr, manager_thread, server);

	while(server->state != SERVER_UP) {
		//rclprintf(server, "launch_server::waiting the server");
		pthread_cond_wait(&server->cond_state, &server->lock_server);
		//rclprintf(server, "launch_server::waiting the server done");
	}
}

static int do_liblock_cond_signal(rcl)(liblock_cond_t* cond) { 
	fatal("implement me"); 
}

static int do_liblock_cond_broadcast(rcl)(liblock_cond_t* cond) { 
	struct mini_thread_ll* node;
	struct mini_thread*    mini_thread;
	struct server*         server;

	while((node = ll_dequeue((struct mini_thread_ll**)&cond->data))) {
		mini_thread = node->content;
		server = mini_thread->server;

		//rclprintf(server, "broadcast::dequeuing: %p", mini_thread);

		if(mini_thread->is_timed)
			ll_remove((struct mini_thread_ll**)&mini_thread->wait_on->data, &mini_thread->ll_timed, 0);

		ll_enqueue(&server->mini_thread_ready, node);
		__sync_fetch_and_add(&server->nb_ready_and_servicing, 1);
	}

	return 0;
}

static int do_liblock_cond_timedwait(rcl)(liblock_cond_t* cond, struct liblock_lock* lock, const struct timespec* ts) { 
	struct mini_thread*  cur = me->mini_thread;
	struct server* server = me->server;
	//rclprintf(server, "timed wait");
	struct mini_thread*  next = get_or_allocate_mini_thread(server);

	/* prepare meta informations */
	cur->is_timed = ts ? 1 : 0;
	cur->wait_on = cond;
	cur->wait_res = 0;

	/* first, don't re-execute the request */ 
	cur->cur_request->lock = &fake_lock;

	/* then, enqueue my request in cond  */
	//rclprintf(cur->server, "cond:enqueuing: %p", cur);
	ll_enqueue((struct mini_thread_ll**)&cond->data, &cur->ll_ready);
	//ll_print("cond->data", (struct mini_thread_ll**)&cond->data);

	/* release the lock */
	lock->locked = 0;

	if(ts) {
		//rclprintf(cur->server, "cond:timed: %p", cur);
		cur->deadline = *ts;

		ll_timed_insert(&server->mini_thread_timed, &cur->ll_timed);

		if(ts_lt(ts, &server->next_deadline))
			pthread_cond_broadcast(&server->cond_state);
	}

	//rclprintf(cur->server, "swapping: me is %p", me);

	/* and finally, jump to the next mini thread */
	swap_mini_thread(cur, next);
	//rclprintf(server, "%p mini-thread is running (%d)", cur, lock->locked);

	//rclprintf(cur->server, "relected: me is %p", me);
 	while(local_val_compare_and_swap(int, &lock->locked, 0, 1)) { /* one of my thread own the lock */
		//rclprintf(cur->server, "cond: retake the lock - check free threads");
 		ensure_at_least_one_free_thread(server);                    /* verify that we have enough active thread */
		//rclprintf(cur->server, "me has run: %p", me);
 		me->has_run = 1;
		//rclprintf(cur->server, "me has run done: %p", me);
 		pthread_yield();                                            /* give a chance to one of our thread to release the lock */
 	}

	//rclprintf(cur->server, "relected: me continue %p", me);

	cur->cur_request->lock = lock;

	return cur->wait_res;
}

static int do_liblock_cond_wait(rcl)(liblock_cond_t* cond, struct liblock_lock* lock) { 
	return do_liblock_cond_timedwait(rcl)(cond, lock, 0);
}

static int do_liblock_cond_init(rcl)(liblock_cond_t* cond) { 
	cond->data = 0;
	return 0;
}

static int do_liblock_cond_destroy(rcl)(liblock_cond_t* cond) { 
	return 0;
}

static void do_liblock_unlock_in_cs(rcl)(struct liblock_lock* lock) {
	fatal("implement me"); 
}

static void do_liblock_relock_in_cs(rcl)(struct liblock_lock* lock) {
	fatal("implement me"); 
}

static void do_liblock_on_thread_exit(rcl)(struct thread_descriptor* desc) {
}

static void do_liblock_on_thread_start(rcl)(struct thread_descriptor* desc) {
	// int i;

	// //printf("on thread start: %d %d\n", self.id, self.running_core->core_id);
	// /* retreive my requests, just a cache prefetch */
	// for(i=0; i<topology->nb_cores; i++)
	// 	servers[i].requests[self.id].pending = 0;
}

static struct liblock_lock* do_liblock_create_lock(rcl)(struct liblock* lib, struct core* core, pthread_mutexattr_t* attr) {
	liblock_reserve_core_for(core, "rcl");

	struct liblock_lock* lock = liblock_allocate(sizeof(struct liblock_lock));

	lock->lib = lib;
	lock->server = &servers[core->core_id];
	lock->locked = 0;

	lock_server(lock->server);

	lock->server->nb_attached_locks++;

	//rclprintf(lock->server, "create attached lock (%s)", liblock_start_server_threads_by_hand ? "by hand" : "direct");

	switch(lock->server->nb_attached_locks) {
		case 1:  
			if(!liblock_start_server_threads_by_hand) 
				launch_server(lock->server, 0); 
			break;
		default: 
			pthread_cond_broadcast(&lock->server->cond_state); 
			break;
	}

	unlock_server(lock->server);

	return lock;
}

static void do_liblock_destroy_lock(rcl)(struct liblock_lock* lock) {
	//rclprintf(lock->server, "destroying lock %p", lock);

	lock_server(lock->server);

	if(!liblock_servers_always_up && !--lock->server->nb_attached_locks && lock->server->state == SERVER_UP) {
		lock->server->state = SERVER_STOPPING;
		pthread_cond_broadcast(&lock->server->cond_state);

		while(lock->server->state != SERVER_DOWN) {
			//rclprintf(lock->server, "launch_server::waiting the server");
			pthread_cond_wait(&lock->server->cond_state, &lock->server->lock_server);
			//rclprintf(lock->server, "launch_server::waiting the server done");
		}
	}

	//rclprintf(lock->server, "lock %p destroyed", lock);

	unlock_server(lock->server);
}

static void do_liblock_run(rcl)(void (*callback)()) {
	int i;

	if(__sync_val_compare_and_swap(&liblock_start_server_threads_by_hand, 1, 0) != 1)
		fatal("servers are not managed by hand");

	for(i=0; i<topology->nb_cores; i++) {
		lock_server(&servers[i]);
		if(topology->cores[i].server_type && !strcmp(topology->cores[i].server_type, "rcl"))
			launch_server(&servers[i], callback);
		unlock_server(&servers[i]);
	}
}

static void do_liblock_declare_server(rcl)(struct core* core) {
	if(!liblock_start_server_threads_by_hand) {
		lock_server(&servers[core->core_id]);
		launch_server(&servers[core->core_id], 0);
		unlock_server(&servers[core->core_id]);
	}
}

static void do_liblock_init_library(rcl)() {
	int i;

	servers = liblock_allocate(sizeof(struct server) * topology->nb_cores);

	fake_lock.locked = 1;

	for(i=0; i<topology->nb_cores; i++) {
		servers[i].state = SERVER_DOWN;
		servers[i].native_threads = 0;
		servers[i].nb_attached_locks = 0;
		servers[i].nb_native_threads = 0;
		servers[i].nb_servicing_threads = 0;
		servers[i].nb_free_threads = 0;
		servers[i].nb_ready_and_servicing = 0;
		servers[i].requests = numa_alloc_onnode(sizeof(struct request)*id_manager.last, topology->cores[i].node->node_id);
		servers[i].mini_thread_all = 0;
		servers[i].mini_thread_timed = 0;
		servers[i].mini_thread_ready = 0;
		servers[i].mini_thread_prepared = 0;

		servers[topology->cores[i].core_id].core = &topology->cores[i];


		pthread_mutex_init(&servers[i].lock_server, 0);
		pthread_cond_init(&servers[i].cond_state, 0);
		pthread_cond_init(&servers[i].cond_servicing, 0);
	}
}

static void do_liblock_kill_library(rcl)() {
	fatal("implement me");
}

liblock_declare(rcl);
