/* ########################################################################## */
/* (C) UPMC, 2010-2011                                                        */
/*     Authors:                                                               */
/*       Jean-Pierre Lozi <jean-pierre.lozi@lip6.fr>                          */
/*       Gaël Thomas <gael.thomas@lip6.fr>                                    */
/*       Florian David <florian.david@lip6.fr>                                */
/*       Julia Lawall <julia.lawall@lip6.fr>                                  */
/*       Gilles Muller <gilles.muller@lip6.fr>                                */
/* -------------------------------------------------------------------------- */
/* ########################################################################## */
#include <sys/mman.h>
#include <sys/time.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <pthread.h>
#include <assert.h>
#include <string.h>
#include <errno.h>
#include <stdint.h>
#include <unistd.h>
#include <numa.h>
#include "liblock.h"
#include "liblock-fatal.h"

#ifdef USE_PAPI
#include <papi.h>
#endif

//#define DO_PRINT

// just for debug
#define MAX_THREADS 128

#define SCHED_ALGO SCHED_FIFO

#define PRIO_BACKUP    1
#define PRIO_SERVICING 2
#define PRIO_MANAGER   3
#define PRIO_SPECIAL   4

#define SERVER_DOWN     0
#define SERVER_STOPPING 1
#define SERVER_STARTING 2
#define SERVER_UP       3

#define initial_mapped_size r_align(sizeof(struct request) * 256 * 1024, PAGE_SIZE)

struct request {
	void*              (*volatile pending)(void*);
	void* volatile                val;
	struct liblock_lock* volatile lock;
	char                          pad[pad_to_cache_line(sizeof(void*(*)(void*)) + 
																											sizeof(void*) +
																											sizeof(struct liblock_lock*))];
};

struct server_thread {
	struct server_thread* volatile next;
	struct server*                 server;
	pthread_t                      tid;
	int volatile                   has_run;
	int volatile                   is_servicing;
};

struct server {
	struct request*                      request_area;         /* the mapped area */
	struct core*                         core;
	unsigned int volatile                nb_attached_locks;    /* synchronized with plock */
	unsigned int volatile                state;                /* synchronized with plock */
	pthread_mutex_t                      plock;
	pthread_cond_t                       pcond_state;
	pthread_cond_t                       pcond_servicing;
	struct server_thread* volatile       server_threads;       /* threads in the servicing queue */
	pthread_t                            backup_tid;           /* tid of the backup thread */
	int volatile                         nb_threads;           /* total number of threads */
	int volatile                         nb_servicing;         /* number of thread in the servicing state */
	int volatile                         nb_free;              /* number of free threads */
	int volatile                         alive;                /* information on liveness of the current server thread */
	void                               (*callback)();
};

struct liblock_lock {
	declare_liblock_lock_header();
	struct server*  server;
	char            pad0[pad_to_cache_line(sizeof(struct liblock_lock_header) + sizeof(struct server*))];
	int volatile    locked;
	int             mutex_type;
	pthread_mutex_t posix_lock;
	char            pad1[pad_to_cache_line(sizeof(int) + sizeof(int) + sizeof(pthread_mutex_t))];
};

static const struct timespec manager_timeout = { 0, 1e7 };

int multircl_always_up = 0;

static pthread_attr_t                    pthread_ready_fifo;
 struct server*                    servers;
static __thread struct server_thread*    me = 0;
static __thread struct request* volatile cur_request = 0;
static __thread struct liblock_lock*     cur_lock = 0;
static struct liblock_lock               fake_lock;

static void* server_thread_thread(void* _arg);

void print_multircl_server_state(struct server* server, const char* pattern, const char* msg) {
#if defined(DO_PRINT)
	int k = (30 - strlen(msg))/2, l = 30 - strlen(msg) - k;
	printf("[%2d/%2d] %s%s%*s%s%*s%s%s [%2d attached, %2d threads, %2d servicing, %2d free]: '%s'/'%s'\n",
				 server->core->core_id, 
				 self.id, 
				 pattern, pattern,
				 l, "", msg, k, "",
				 pattern, pattern,
				 server->nb_attached_locks,
				 server->nb_threads,
				 server->nb_servicing,
				 server->nb_free,
				 server->state == SERVER_UP ? "up" : "other",
				 server->alive ? "alive" : "dead");
#endif
}

static __attribute__((always_inline)) inline void setprio(pthread_t thread_id, unsigned int prio) {
	if(pthread_setschedprio(thread_id, prio))
		fatal("unable to set priority: %s", strerror(errno));
}

static inline __attribute__((always_inline)) int local_compare_and_swap(int volatile* ptr, int old_val, int new_val) {
	asm volatile( "\tcmpxchg %3,%1;" /* no lock! */
								: "=a" (old_val), "=m" (*(ptr))
								: "0" (old_val), "r" (new_val)
								: "cc"); /* just tremove "memory" to avoid reload of other adresses */

	return old_val;
}

static inline __attribute__((always_inline)) int local_fetch_and_add(int volatile* ptr, int val) {
	asm volatile( "\txadd %2, %1;" /* no lock! */
								: "=r" (val), "=m" (*(ptr))
								: "0" (val)
								: "cc"); /* just tremove "memory" to avoid reload of other adresses */
	return val;
}

static inline __attribute__((always_inline)) int local_add_and_fetch(int volatile* ptr, int val) {
	return local_fetch_and_add(ptr, val) + val;
}

static inline __attribute__((always_inline)) void ensure_at_least_one_free_thread(struct server* server) {
	if(server->nb_free <= 0) {
		pthread_mutex_lock(&server->plock);
		if(server->nb_servicing >= server->nb_threads) {
			if(server->nb_threads >= MAX_THREADS)
				fatal("too many servicing threads");
			if(server->state == SERVER_UP) {
				struct server_thread* server_thread = liblock_allocate(sizeof(struct server_thread));

				print_multircl_server_state(server, "=", "create thread");
				server_thread->server    = server;
				server_thread->is_servicing = 1;
				local_fetch_and_add(&server->nb_threads, 1);
				local_fetch_and_add(&server->nb_servicing, 1);          /* begin in the servicing state */
				local_fetch_and_add(&server->nb_free, 1);

				server_thread->next = server->server_threads;
				server->server_threads = server_thread;

				pthread_mutex_unlock(&server->plock);

				liblock_thread_create_and_bind(server->core, "multircl", 
																			 &server_thread->tid, &pthread_ready_fifo, server_thread_thread, server_thread);
			} else
				pthread_mutex_unlock(&server->plock);
		} else {
			local_fetch_and_add(&server->nb_free, 1);
			pthread_cond_signal(&server->pcond_servicing);                 /* wake up one thread */
			pthread_mutex_unlock(&server->plock);
		}
	}
}

static void* do_liblock_execute_operation(multircl)(struct liblock_lock* lock, void* (*pending)(void*), void* val) {
	struct server* server = lock->server;

	//printf("*** sending operation %p::%p for client %d - %p\n", pending, val, self.id, (void*)pthread_self());
	if(me && self.running_core == server->core) {
		void* res;

		while(local_compare_and_swap(&lock->locked, 0, 1)) { /* one of my thread own the lock */
			me->has_run = 1;
			pthread_yield();                          /* give a chance to one of our thread to release the lock */
		}

		res = pending(val);
		lock->locked = 0;                           /* I release the lock */

		return res;
	} 

	struct request* req = &lock->server->request_area[self.id];

	req->lock = lock;
	req->val = val;
	req->pending = pending;

	while(req->pending)
		PAUSE();

	return req->val;
}

static void* server_thread_thread(void* _arg) {
	struct server_thread* server_thread = _arg;
	struct server* server               = server_thread->server;
	struct request* request, *last;
	void (*cb)() = server->callback;

#ifdef COUNT_EVENTS
	int event_set = PAPI_NULL;
	long long values[1];
	
	if (event_id != 0) {
		if (PAPI_create_eventset(&event_set) != PAPI_OK)
			fatal("PAPI_create_eventset");
		if (PAPI_add_event(event_set, event_id) != PAPI_OK)
			fatal("PAPI_add_events");
		
		/* This seemingly helps increasing PAPI's accuracy. */
		if (PAPI_start(event_set) != PAPI_OK)
			fatal("PAPI_start");
		if (PAPI_stop(event_set, values) != PAPI_OK)
			fatal("PAPI_stop");
		
		if (PAPI_start(event_set) != PAPI_OK)
			fatal("PAPI_start");
	}
#endif

	print_multircl_server_state(server, "=", "starting thread");
	me = server_thread;
	
	local_fetch_and_add(&server->nb_free, -1);

	if(cb && __sync_val_compare_and_swap(&server->callback, cb, 0)) {
		cb();
	}

	while(server->state == SERVER_UP) {
		server->alive          = 1;
		server_thread->has_run = 1;

		last = &server->request_area[id_manager.first_free];

		for(request=&server->request_area[id_manager.first]; request<last; request++) {
			void* (*pending)(void*) = request->pending;

			if(pending) {
				if(!local_compare_and_swap(&request->lock->locked, 0, 1)) {
					cur_request = request;
					request->val = pending(request->val);
					request->pending = 0;
					request->lock->locked = 0;
				}
			}
		}

		if(server->nb_servicing > 1) {
			if(server->nb_free > 0) {
				pthread_mutex_lock(&server->plock);
				local_fetch_and_add(&server->nb_servicing, -1);
				server_thread->is_servicing = 0;
				print_multircl_server_state(server, "-", "unactivating");
				pthread_cond_wait(&server->pcond_servicing, &server->plock);
				local_fetch_and_add(&server->nb_free, -1);
				print_multircl_server_state(server, "+", "activating");
				server_thread->is_servicing = 1;
				local_fetch_and_add(&server->nb_servicing, 1);
				pthread_mutex_unlock(&server->plock);
			} else {
				local_fetch_and_add(&server->nb_free, 1);
				pthread_yield(); /* all the threads are busy */
				local_fetch_and_add(&server->nb_free, -1);
			}
		}
	}

	print_multircl_server_state(server, "=", "quitting thread");
	pthread_mutex_lock(&server->plock);
	local_fetch_and_add(&server->nb_servicing, -1);
	local_fetch_and_add(&server->nb_threads, -1);
	pthread_mutex_unlock(&server->plock);
	print_multircl_server_state(server, "=", "quit thread");
	
#ifdef COUNT_EVENTS
	if (event_id != 0) {
		if (PAPI_stop(event_set, values) != PAPI_OK)
			fatal("PAPI_stop");
		
		server_event_count = values[0];
	}
//	printf("Server DCM: %lld\n", server_dcms);
#endif

	return 0;
}

static void* backup_thread(void* _arg) {
	struct server* server = _arg;

	print_multircl_server_state(server, "=", "starting backup");
	while(server->state == SERVER_UP) {
		ensure_at_least_one_free_thread(server);
	}
	print_multircl_server_state(server, "=", "quitting backup");
	return 0;
}

static void* manager_thread(void* _arg) {
	struct server*        server = _arg;
	struct server_thread* cur;
	struct timeval        tv;
	struct timespec       ts;
	int                   done;
	struct sched_param    param;

	print_multircl_server_state(server, "*", "starting manager");

	server->state = SERVER_UP;

	param.sched_priority = PRIO_BACKUP;
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	liblock_thread_create_and_bind(server->core, "multircl", 
																 &server->backup_tid, &pthread_ready_fifo, backup_thread, server);

	param.sched_priority = PRIO_SERVICING;
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	ensure_at_least_one_free_thread(server);

	pthread_mutex_lock(&server->plock);
	
	pthread_cond_broadcast(&server->pcond_state);

	server->alive = 1;

	while(server->state == SERVER_UP) {
		if(server->alive)
			server->alive = 0;
		else {
			/* all my threads are dead */
			print_multircl_server_state(server, ":", "rescuing");
				
			pthread_mutex_unlock(&server->plock);
			ensure_at_least_one_free_thread(server);
			pthread_mutex_lock(&server->plock);

			done = 0;
			while(!done) {
				for(cur=server->server_threads; cur; cur=cur->next) {
					if(cur->is_servicing == 1) {
						if(done || cur->has_run) {
							setprio(cur->tid, PRIO_BACKUP);
							setprio(cur->tid, PRIO_SERVICING);
						} else {
							//printf("++ elect %p\n", (void*)cur->tid);
							done = 1;
							cur->has_run = 1; /* set it here ecause maybe in I/O */
						}
					}
				}
						
				if(!done)
					for(cur=server->server_threads; cur; cur=cur->next)
						cur->has_run = 0;
			}
		}

		if(server->nb_attached_locks > 1) {
			gettimeofday(&tv, 0);
			ts.tv_sec =  tv.tv_sec      + manager_timeout.tv_sec;
			ts.tv_nsec = tv.tv_usec*1e3 + manager_timeout.tv_nsec;
			if(ts.tv_nsec > 1e9) {
				ts.tv_nsec -= 1e9;
				ts.tv_sec++;
			}
			//print_multircl_server_state(server, "=", "sleep");
			pthread_cond_timedwait(&server->pcond_state, &server->plock, &ts);
			//print_multircl_server_state(server, "=", "wakeup");
		} else
			pthread_cond_wait(&server->pcond_state, &server->plock);
	}

	print_multircl_server_state(server, ":", "quitting");

	local_fetch_and_add(&server->nb_free, server->nb_threads - server->nb_servicing);
	pthread_cond_broadcast(&server->pcond_servicing);

	pthread_mutex_unlock(&server->plock);

	pthread_join(server->backup_tid, 0);

	for(cur=server->server_threads; cur; cur=cur->next)
		pthread_join(cur->tid, 0);

	pthread_mutex_lock(&server->plock);

	while(server->server_threads) {
		cur=server->server_threads;
		server->server_threads = cur->next;
		free(cur);
	}

	param.sched_priority = PRIO_MANAGER;
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	print_multircl_server_state(server, ":", "down");
	server->state = SERVER_DOWN;

	pthread_cond_broadcast(&server->pcond_state);

	pthread_mutex_unlock(&server->plock);

	return 0;
}

static void launch_server(struct server* server, void (*callback)(void)) {
	pthread_t tid;

	pthread_mutex_lock(&server->plock);
	if(server->state == SERVER_DOWN) {
		server->state    = SERVER_STARTING;
		//printf("************************$    callback is %p\n", callback);
		server->callback = callback;
	
		if(server->core == self.running_core) {
			struct sched_param param;
			param.sched_priority = PRIO_MANAGER;
			if(pthread_setschedparam(pthread_self(), SCHED_ALGO, &param))
				fatal("pthread_setschedparam");
			pthread_mutex_unlock(&server->plock);
			manager_thread(server);
			pthread_mutex_lock(&server->plock);
		} else {
			liblock_bind_thread(pthread_self(), 0, 0); /* bind the current thread elsewere */
			liblock_thread_create_and_bind(server->core, "multircl", 
																		 &tid, &pthread_ready_fifo, manager_thread, server);
			while(server->state != SERVER_UP)
				pthread_cond_wait(&server->pcond_state, &server->plock);
		}
	} else
		while(server->state != SERVER_UP)
			pthread_cond_wait(&server->pcond_state, &server->plock);
	pthread_mutex_unlock(&server->plock);
}

static struct liblock_lock* do_liblock_create_lock(multircl)(struct liblock* lib, struct core* core, pthread_mutexattr_t* attr) {
	struct liblock_lock* lock   = liblock_allocate(cache_align(sizeof(struct liblock_lock)));
	struct server*       server = &servers[core->core_id];

	//static int n; __sync_fetch_and_add(&n, 1); printf("creating lock %d\n", n);

	//printf("create multircl lock: %p\n", lock);
	lock->lib        = lib;
	lock->server     = server;
	lock->mutex_type = liblock_getmutex_type(attr);
	lock->locked     = 0;
	pthread_mutex_init(&lock->posix_lock, 0);

	liblock_reserve_core_for(core, "multircl");

	pthread_mutex_lock(&server->plock);

	//printf("attached locks: %d %d\n", server->nb_attached_locks, server->state);
	server->nb_attached_locks++;

	if(server->nb_attached_locks == 2) {/* the manager is sleeping without any timeout */
		pthread_cond_broadcast(&server->pcond_state);
	}

	pthread_mutex_unlock(&server->plock);
	
	return lock;
}

static void do_liblock_destroy_lock(multircl)(struct liblock_lock* lock) {
	struct server* server = lock->server;
	//printf("destroying lock!\n");

	pthread_mutex_lock(&server->plock);

	print_multircl_server_state(server, "*", "destroying lock");
	if(server->state != SERVER_DOWN) {
		if(!--server->nb_attached_locks && !liblock_servers_always_up) {
			server->state = SERVER_STOPPING;
			pthread_cond_broadcast(&server->pcond_state);
			pthread_cond_wait(&server->pcond_state, &server->plock);
		}
	}

	pthread_mutex_unlock(&server->plock);
	free(lock);
}

static void do_liblock_run(multircl)(void (*callback)()) {
	int i;

	if(__sync_val_compare_and_swap(&liblock_start_server_threads_by_hand, 1, 0) != 1)
		fatal("servers are not managed by hand");

	for(i=0; i<topology->nb_cores; i++) {
		if(topology->cores[i].server_type && !strcmp(topology->cores[i].server_type, "multircl"))
			launch_server(&servers[i], callback);
	}
}

static void do_liblock_kill_library(multircl)() {
	// TODO
}

static void do_liblock_init_library(multircl)(struct core* unused) {
	struct sched_param param;
	int i;

	param.sched_priority = PRIO_MANAGER;
	pthread_attr_init(&pthread_ready_fifo);
	pthread_attr_setinheritsched(&pthread_ready_fifo, PTHREAD_EXPLICIT_SCHED);
	pthread_attr_setschedpolicy(&pthread_ready_fifo, SCHED_ALGO);
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	servers = liblock_allocate(sizeof(struct server) * topology->nb_cores);

	fake_lock.locked = 1;

	for(i=0; i<topology->nb_cores; i++) {
		servers[topology->cores[i].core_id].core = &topology->cores[i];

		servers[i].nb_attached_locks  = 0;
		servers[i].state              = SERVER_DOWN;
		servers[i].request_area       = numa_alloc_onnode(initial_mapped_size, topology->cores[i].node->node_id);

		servers[i].nb_threads         = 0;
		servers[i].nb_servicing       = 0;
		servers[i].nb_free            = 0;

		pthread_mutex_init(&servers[i].plock, 0);
		pthread_cond_init(&servers[i].pcond_state, 0);
		pthread_cond_init(&servers[i].pcond_servicing, 0);
	}
}

static void do_liblock_on_thread_start(multircl)(struct thread_descriptor* desc) {
	int i;

	//printf("on thread start: %d %d\n", self.id, self.running_core->core_id);
	/* retreive my requests, just a cache prefetch */
	for(i=0; i<topology->nb_cores; i++)
		servers[i].request_area[self.id].pending = 0;
}

static void do_liblock_on_thread_exit(multircl)(struct thread_descriptor* desc) {
	int i;

	for(i=0; i<topology->nb_cores; i++)
		servers[i].request_area[self.id].pending = 0;
}

static void do_liblock_unlock_in_cs(multircl)(struct liblock_lock* lock) {
	if(cur_lock)
		fatal("unable to unlock more than one lock in cs");
	cur_lock = cur_request->lock;
	cur_request->lock = &fake_lock;
	lock->locked = 0;
}

static void do_liblock_relock_in_cs(multircl)(struct liblock_lock* lock) {
	while(local_compare_and_swap(&lock->locked, 0, 1)) { /* one of my thread own the lock */
		ensure_at_least_one_free_thread(lock->server);     /* verify that we have enough active thread */
		me->has_run = 1;
		pthread_yield();                                   /* give a chance to one of our thread to release the lock */
	}
	cur_request->lock = cur_lock;
}

static int do_liblock_cond_init(multircl)(liblock_cond_t* cond) { 
	fatal("implement me"); 
}

static int do_liblock_cond_signal(multircl)(liblock_cond_t* cond) { 
	fatal("implement me"); 
}

static int do_liblock_cond_broadcast(multircl)(liblock_cond_t* cond) { 
	fatal("implement me"); 
}

static int do_liblock_cond_wait(multircl)(liblock_cond_t* cond, struct liblock_lock* lock) { 
	fatal("implement me"); 
}

static int do_liblock_cond_timedwait(multircl)(liblock_cond_t* cond, struct liblock_lock* lock, const struct timespec* ts) { 
	fatal("implement me"); 
}

static int do_liblock_cond_destroy(multircl)(liblock_cond_t* cond) { 
	fatal("implement me"); 
}

// static int do_liblock_cond_timedwait(multircl)(pthread_cond_t* cond, struct liblock_lock* lock, const struct timespec* ts) { 
// 	struct liblock_lock* old = cur_request->lock;
// 	struct server* server = lock->server;

// 	setprio(pthread_self(), PRIO_SPECIAL);

// 	me->is_servicing = 2;
//  	local_fetch_and_add(&server->nb_threads, -1);
//  	local_fetch_and_add(&server->nb_servicing, -1);

// 	ensure_at_least_one_free_thread(lock->server);

// 	cur_request->lock = &fake_lock;

// 	pthread_mutex_lock(&lock->posix_lock);
// 	lock->locked = 0;

// 	if(ts)
// 		pthread_cond_timedwait(cond, &lock->posix_lock, ts);
// 	else
// 		pthread_cond_wait(cond, &lock->posix_lock);

// 	pthread_mutex_unlock(&lock->posix_lock);

// 	me->is_servicing = 1;
//  	local_fetch_and_add(&server->nb_threads, 1);
//  	local_fetch_and_add(&server->nb_servicing, 1);

// 	setprio(pthread_self(), PRIO_SERVICING);

// 	while(local_compare_and_swap(&lock->locked, 0, 1)) { /* one of my thread own the lock */
// 		ensure_at_least_one_free_thread(lock->server);     /* verify that we have enough active thread */
// 		me->has_run = 1;
// 		pthread_yield();                                   /* give a chance to one of our thread to release the lock */
// 	}
// 	cur_request->lock = old;
// 	return 0;
// }

// static int do_liblock_cond_wait(multircl)(pthread_cond_t* cond, struct liblock_lock* lock) {
// 	return do_liblock_cond_timedwait(multircl)(cond, lock, 0);
// }

static void do_liblock_declare_server(multircl)(struct core* core) {
	if(!liblock_start_server_threads_by_hand)
		launch_server(&servers[core->core_id], 0);
}

liblock_declare(multircl);
