/* ########################################################################## */
/* (C) UPMC, 2010-2011                                                        */
/*     Authors:                                                               */
/*       Jean-Pierre Lozi <jean-pierre.lozi@lip6.fr>                          */
/*       Gaël Thomas <gael.thomas@lip6.fr>                                    */
/*       Florian David <florian.david@lip6.fr>                                */
/*       Julia Lawall <julia.lawall@lip6.fr>                                  */
/*       Gilles Muller <gilles.muller@lip6.fr>                                */
/* -------------------------------------------------------------------------- */
/* ########################################################################## */
#include <sys/mman.h>
#include <sys/time.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <pthread.h>
#include <assert.h>
#include <string.h>
#include <errno.h>
#include <stdint.h>
#include <unistd.h>
#include "liblock.h"
#include "liblock-fatal.h"

#define MAX_LOCKS      4096

#define PRIO_BACKUP    40
#define PRIO_SERVICING 50
#define PRIO_MANAGER   60

#define SERVER_DOWN     0
#define SERVER_STOPPING 1
#define SERVER_STARTING 2
#define SERVER_UP       3

#define initial_mapped_size r_align(sizeof(struct request) * 256 * 1024, PAGE_SIZE)

static const struct timespec manager_timeout = { 0, 1e8 };

struct request {
	void*                 (*volatile pending)(void*);
	void*                   val;
	struct liblock_lock*    lock;
	char                    pad[pad_to_cache_line(sizeof(void*(*)(void*)) + 
																								sizeof(void*) + 
																								sizeof(struct liblock_lock*))];
};

struct server_thread {
	struct server_thread* volatile next;
	struct server*                 server;
	pthread_t                      tid;
	int volatile                   has_run;
	int volatile                   is_servicing;
};



struct server {
	struct request*                      request_area;         /* the mapped area */
	struct core*                         core;
	struct liblock_lock* volatile*       monitored_locks;
	unsigned int volatile*               monitored_locks_n; 
	unsigned int volatile                nb_monitored_locks;
	unsigned int volatile                nb_attached_locks;    /* synchronized with plock */
	unsigned int volatile                state;                /* synchronized with plock */
	pthread_mutex_t                      plock;
	pthread_cond_t                       pcond_state;
	pthread_cond_t                       pcond_servicing;
	struct server_thread* volatile       server_threads;       /* threads in the servicing queue */
	pthread_t                            backup_tid;           /* tid of the backup thread */
	int volatile                         nb_threads;           /* total number of threads */
	int volatile                         nb_servicing;         /* number of thread in the servicing state */
	int volatile                         nb_free;              /* number of free threads */
	int volatile                         alive;                /* information on liveness of the current server thread */
	void                               (*callback)();
};

struct liblock_lock {
	declare_liblock_lock_header();
	struct server*  server;
	unsigned int volatile    nb_taken;             /* common part */
	unsigned int volatile    nb_changed;
	unsigned int volatile    nb_contended;
	void* volatile           last_owner;           /* just to know the last owner */
	char                     pad0[pad_to_cache_line(sizeof(struct liblock_lock_header) + sizeof(struct server*)
																									+ 3*sizeof(unsigned int) + sizeof(void*))];
	int volatile             locked;               /* server informations */
	pthread_mutex_t          posix_lock;
	unsigned int             nb_useless;
	char                     pad1[pad_to_cache_line(sizeof(int) + sizeof(unsigned int) + sizeof(pthread_mutex_t))];
	unsigned int volatile    spin_lock;            /* the bad spinlock, should be in a separate line when in spinlock mode */
	unsigned int volatile    lock_idx;
	char                     pad2[pad_to_cache_line(2*sizeof(unsigned int))];
};

int adapt_always_up = 0;

static pthread_attr_t                 pthread_ready_fifo;
static struct server*                 servers;
static __thread struct server_thread* me = 0;

static void* server_thread_thread(void* _arg);

void print_adapt_server_state(struct server* server, const char* pattern, const char* msg) {
#if 1
	int k = (30 - strlen(msg))/2, l = 30 - strlen(msg) - k;
	printf("[%2d/%2d] %s%s%*s%s%*s%s%s [%2d attached, %2d threads, %2d servicing, %2d free]: '%s'/'%s'\n",
				 server->core->core_id, 
				 self.id, 
				 pattern, pattern,
				 l, "", msg, k, "",
				 pattern, pattern,
				 server->nb_attached_locks,
				 server->nb_threads,
				 server->nb_servicing,
				 server->nb_free,
				 server->state == SERVER_UP ? "up" : "other",
				 server->alive ? "alive" : "dead");
#endif
}

static __attribute__((always_inline)) inline void setprio(pthread_t thread_id, unsigned int prio) {
	if(pthread_setschedprio(thread_id, prio))
		fatal("unable to set priority: %s", strerror(errno));
}

static inline __attribute__((always_inline)) int local_compare_and_swap(int volatile* ptr, int old_val, int new_val) {
	asm volatile( "\tcmpxchg %3,%1;" /* no lock! */
								: "=a" (old_val), "=m" (*(ptr))
								: "0" (old_val), "r" (new_val)
								: "cc"); /* just tremove "memory" to avoid reload of other adresses */

	return old_val;
}

static inline __attribute__((always_inline)) int local_fetch_and_add(int volatile* ptr, int val) {
	asm volatile( "\txadd %2, %1;" /* no lock! */
								: "=r" (val), "=m" (*(ptr))
								: "0" (val)
								: "cc"); /* just tremove "memory" to avoid reload of other adresses */
	return val;
}

static inline __attribute__((always_inline)) int local_add_and_fetch(int volatile* ptr, int val) {
	return local_fetch_and_add(ptr, val) + val;
}

static inline __attribute__((always_inline)) void ensure_at_least_one_free_thread(struct server* server) {
	if(server->nb_free <= 0) {
		pthread_mutex_lock(&server->plock);
		if(server->nb_servicing == server->nb_threads) {
			if(server->state == SERVER_UP) {
				struct server_thread* server_thread = liblock_allocate(sizeof(struct server_thread));

				print_adapt_server_state(server, "=", "create thread");
				server_thread->server    = server;
				server_thread->is_servicing = 1;
				server->nb_threads++;
				server->nb_servicing++;          /* begin in the servicing state */
				local_fetch_and_add(&server->nb_free, 1);

				server_thread->next = server->server_threads;
				server->server_threads = server_thread;

				pthread_mutex_unlock(&server->plock);

				liblock_thread_create_and_bind(server->core, "adapt", 
																			 &server_thread->tid, &pthread_ready_fifo, server_thread_thread, server_thread);
			} else
				pthread_mutex_unlock(&server->plock);
		} else {
			local_fetch_and_add(&server->nb_free, 1);
			pthread_cond_signal(&server->pcond_servicing);                 /* wake up one thread */
			pthread_mutex_unlock(&server->plock);
		}
	}
}

void* multircl_execute_operation(struct liblock_lock* lock, void* (*pending)(void*), void* val) {
	struct server* server = lock->server;

	if(me && self.running_core == server->core) {
		struct server* server = lock->server;
		void*          res;
		while(local_compare_and_swap(&lock->locked, 0, 1)) { /* one of my thread own the lock */
			ensure_at_least_one_free_thread(server);        /* verify that we have enough active thread */
			me->has_run = 1;
			pthread_yield();                          /* give a chance to one of our thread to release the lock */
		}

		res = pending(val);
		lock->locked = 0;                           /* I release the lock */

		return res;
	}
	
	struct request* req = &lock->server->request_area[self.id];

	req->lock = lock;
	req->val = val;
	req->pending = pending;

	while(req->pending)
		PAUSE();

	return req->val;
}

void* do_liblock_execute_operation(adapt)(struct liblock_lock* lock, void* (*pending)(void*), void* val) {
	void* res;
	int was_taken = 0, prev;

	while((prev = __sync_val_compare_and_swap(&lock->spin_lock, 0, 1))) {
		if(prev == 2)
			return multircl_execute_operation(lock, pending, val);
		was_taken = 1;
		PAUSE();
	}

	res = pending(val);

	lock->nb_taken++;

	if(lock->last_owner != &me) {
		lock->last_owner = &me;

		lock->nb_changed++;
		if(was_taken)
			lock->nb_contended++;

		if(!(lock->nb_taken % 10000))
			printf("state: %d %d %d %d from %d\n", lock->nb_contended, lock->nb_changed, lock->nb_taken, was_taken, self.running_core->core_id);

		if(lock->nb_taken > 100 && (10*lock->nb_contended > 9*lock->nb_changed)) {
			printf("transit: %d %d %d %d %d\n", lock->nb_contended, lock->nb_changed, lock->nb_taken, lock->spin_lock, was_taken);
			lock->lib->_execute_operation = multircl_execute_operation;
			lock->spin_lock = 2;
			lock->nb_taken = 0;
			lock->nb_changed = 0;
			lock->nb_contended = 0;

			pthread_mutex_lock(&lock->server->plock);
			lock->lock_idx = lock->server->nb_monitored_locks;
			lock->server->monitored_locks[lock->lock_idx] = lock;
			lock->server->monitored_locks_n[lock->lock_idx] = 0;
			lock->server->nb_monitored_locks++;
			pthread_mutex_unlock(&lock->server->plock);

			return res;
		}
	}
 
	lock->spin_lock = 0;
	
	return res;
}

static void* server_thread_thread(void* _arg) {
	struct server_thread* server_thread = _arg;
	struct server* server               = server_thread->server;
	struct request* request, *last;
	void (*cb)() = server->callback;
	int served;
	struct liblock_lock* monitored_lock = 0;
	static const unsigned int phase_threshold = 20;
	unsigned int monitored_lock_idx = -1, nb_monitored_locks;
	unsigned int phase = 0;

	print_adapt_server_state(server, "=", "starting thread");
	me = server_thread;

	if(cb && __sync_val_compare_and_swap(&server->callback, cb, 0))
		cb();

	local_fetch_and_add(&server->nb_free, -1);

	while(server->state == SERVER_UP) {
		server->alive          = 1;
		server_thread->has_run = 1;

		last = &server->request_area[id_manager.first_free];

		served = 0;
		for(request=&server->request_area[id_manager.first]; request<last; request++) {
			if(request->pending && !local_compare_and_swap(&request->lock->locked, 0, 1)) {
				request->val = request->pending(request->val);
				request->pending = 0;
				request->lock->locked = 0;
			}
		}		

		if(phase++ > phase_threshold) {
			if(monitored_lock)
				__sync_fetch_and_sub(&server->monitored_locks_n[monitored_lock_idx], 1);
			nb_monitored_locks = server->nb_monitored_locks;
			if(nb_monitored_locks) {
				monitored_lock_idx = (monitored_lock_idx + 1) % nb_monitored_locks;
				__sync_fetch_and_add(&server->monitored_locks_n[monitored_lock_idx], 1);
				monitored_lock = server->monitored_locks[monitored_lock_idx];
			} else
				monitored_lock = 0;
		}

		if(server->nb_servicing > 1) {
			if(server->nb_free > 0) {
				pthread_mutex_lock(&server->plock);
				server->nb_servicing--;
				server_thread->is_servicing = 0;
				print_adapt_server_state(server, "-", "unactivating");
				pthread_cond_wait(&server->pcond_servicing, &server->plock);
				local_fetch_and_add(&server->nb_free, -1);
				print_adapt_server_state(server, "+", "activating");
				server_thread->is_servicing = 1;
				server->nb_servicing++;
				pthread_mutex_unlock(&server->plock);
			} else {
				local_fetch_and_add(&server->nb_free, 1);
				pthread_yield(); /* all the threads are busy */
				local_fetch_and_add(&server->nb_free, -1);
			}
		}
	}

	if(monitored_lock)
		__sync_fetch_and_sub(&server->monitored_locks_n[monitored_lock_idx], 1);

	print_adapt_server_state(server, "=", "quit thread - take lock");
	pthread_mutex_lock(&server->plock);
	server->nb_servicing--;
	server->nb_threads--;
	pthread_mutex_unlock(&server->plock);
	print_adapt_server_state(server, "=", "quit thread - release lock");
	return 0;
}

static void* backup_thread(void* _arg) {
	struct server* server = _arg;

	print_adapt_server_state(server, "=", "starting backup");
	while(server->state == SERVER_UP) {
		print_adapt_server_state(server, "=", "ensure");
		ensure_at_least_one_free_thread(server);
		print_adapt_server_state(server, "=", "ensure done");
	}
	print_adapt_server_state(server, "=", "quitting backup");
	return 0;
}

static void* manager_thread(void* _arg) {
	struct server*        server = _arg;
	struct server_thread* cur;
	struct timeval        tv;
	struct timespec       ts;
	int                   done;
	struct sched_param    param;

	print_adapt_server_state(server, "*", "starting manager");

	server->state = SERVER_UP;

	param.sched_priority = PRIO_BACKUP;
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	liblock_thread_create_and_bind(server->core, "adapt", 
																 &server->backup_tid, &pthread_ready_fifo, backup_thread, server);

	param.sched_priority = PRIO_SERVICING;
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	ensure_at_least_one_free_thread(server);

	pthread_mutex_lock(&server->plock);
	
	pthread_cond_broadcast(&server->pcond_state);

	server->alive = 1;

	while(server->state == SERVER_UP) {
		if(server->alive)
			server->alive = 0;
		else {
			/* all my threads are dead */
			print_adapt_server_state(server, ":", "rescuing");
				
			pthread_mutex_unlock(&server->plock);
			ensure_at_least_one_free_thread(server);
			pthread_mutex_lock(&server->plock);

			done = 0;
			while(!done) {
				for(cur=server->server_threads; cur; cur=cur->next) {
					if(cur->is_servicing) {
						if(done || cur->has_run) {
							setprio(cur->tid, PRIO_BACKUP);
							setprio(cur->tid, PRIO_SERVICING);
						} else {
							//printf("++ elect %p\n", (void*)cur->tid);
							done = 1;
							cur->has_run = 1; /* set it here ecause maybe in I/O */
						}
					}
				}
						
				if(!done)
					for(cur=server->server_threads; cur; cur=cur->next)
						cur->has_run = 0;
			}
		}

		if(server->nb_attached_locks > 1) {
			gettimeofday(&tv, 0);
			ts.tv_sec =  tv.tv_sec      + manager_timeout.tv_sec;
			ts.tv_nsec = tv.tv_usec*1e3 + manager_timeout.tv_nsec;
			if(ts.tv_nsec > 1e9) {
				ts.tv_nsec -= 1e9;
				ts.tv_sec++;
			}
			pthread_cond_timedwait(&server->pcond_state, &server->plock, &ts);
		} else
			pthread_cond_wait(&server->pcond_state, &server->plock);
	}

	print_adapt_server_state(server, ":", "manager shutdown");

	local_fetch_and_add(&server->nb_free, server->nb_threads - server->nb_servicing);
	pthread_cond_broadcast(&server->pcond_servicing);

	pthread_mutex_unlock(&server->plock);

	//param.sched_priority = 0;
	//if(pthread_setschedparam(pthread_self(), SCHED_OTHER, &param)) fatal("pthread_setschedparam");

	print_adapt_server_state(server, ":", "joining backup");

	pthread_join(server->backup_tid, 0);

	print_adapt_server_state(server, ":", "joining threads");

	for(cur=server->server_threads; cur; cur=cur->next) {
		pthread_join(cur->tid, 0);
	}

	print_adapt_server_state(server, ":", "taking lock");

	pthread_mutex_lock(&server->plock);

	print_adapt_server_state(server, ":", "freeing threads");

	while(server->server_threads) {
		cur=server->server_threads;
		server->server_threads = cur->next;
		free(cur);
	}

	print_adapt_server_state(server, ":", "changing prio");

	param.sched_priority = PRIO_MANAGER;
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	print_adapt_server_state(server, ":", "down");
	server->state = SERVER_DOWN;

	pthread_cond_broadcast(&server->pcond_state);

	pthread_mutex_unlock(&server->plock);

	return 0;
}

static void launch_server(struct server* server, void (*callback)(void)) {
	pthread_t tid;

	pthread_mutex_lock(&server->plock);
	if(server->state == SERVER_DOWN) {
		server->state    = SERVER_STARTING;
		//printf("************************$    callback is %p\n", callback);
		server->callback = callback;
	
		if(server->core == self.running_core) {
			struct sched_param param;
			param.sched_priority = PRIO_MANAGER;
			if(pthread_setschedparam(pthread_self(), SCHED_FIFO, &param))
				fatal("pthread_setschedparam");
			pthread_mutex_unlock(&server->plock);
			manager_thread(server);
			pthread_mutex_lock(&server->plock);
		} else {
			liblock_bind_thread(pthread_self(), 0, 0); /* bind the current thread elsewere */
			liblock_thread_create_and_bind(server->core, "adapt", 
																		 &tid, &pthread_ready_fifo, manager_thread, server);
			while(server->state != SERVER_UP)
				pthread_cond_wait(&server->pcond_state, &server->plock);
		}
	}
	pthread_mutex_unlock(&server->plock);
}

static struct liblock_lock* do_liblock_create_lock(adapt)(struct liblock* lib, struct core* core, void* arg) {
	struct liblock_lock* lock   = liblock_allocate(cache_align(sizeof(struct liblock_lock)));
	struct server*       server = &servers[core->core_id];

	lock->lib          = lib;
	lock->server       = server;
	lock->locked       = 0;
	pthread_mutex_init(&lock->posix_lock, 0);
	lock->nb_taken     = 0;
	lock->nb_contended = 0;
	lock->spin_lock    = 0;
	lock->nb_useless   = 0;

	liblock_reserve_core_for(core, "adapt");

	pthread_mutex_lock(&server->plock);

	//printf("attached locks: %d %d\n", server->nb_attached_locks, server->state);
	server->nb_attached_locks++;

	if(server->nb_attached_locks == 2) {/* the manager is sleeping without any timeout */
		pthread_cond_broadcast(&server->pcond_state);
	}

	pthread_mutex_unlock(&server->plock);

	return lock;
}

static void do_liblock_destroy_lock(adapt)(struct liblock_lock* lock) {
	struct server* server = lock->server;
	printf("destroying lock!\n");

	pthread_mutex_lock(&server->plock);
	server->monitored_locks[lock->lock_idx] = server->monitored_locks[server->nb_monitored_locks - 1];
	server->nb_monitored_locks--;
	pthread_mutex_unlock(&server->plock);

	while(server->monitored_locks_n[lock->lock_idx])
		PAUSE();

	pthread_mutex_lock(&server->plock);

	print_adapt_server_state(server, "*", "destroying lock");
	if(server->state != SERVER_DOWN) {

		if(!--server->nb_attached_locks && !adapt_always_up) {
			server->state = SERVER_STOPPING;
			pthread_cond_broadcast(&server->pcond_state);
			pthread_cond_wait(&server->pcond_state, &server->plock);
		}
	}

	pthread_mutex_unlock(&server->plock);
	free(lock);
}

static void do_liblock_run(adapt)(void (*callback)()) {
	int i;

	if(__sync_val_compare_and_swap(&liblock_start_server_threads_by_hand, 1, 0) != 1)
		fatal("servers are not managed by hand");
	
	for(i=0; i<topology->nb_cores; i++) {
		if(topology->cores[i].server_type && !strcmp(topology->cores[i].server_type, "adapt"))
			launch_server(&servers[i], callback);
	}
}

static void do_liblock_kill_library(adapt)() {
	// TODO
}

static void do_liblock_init_library(adapt)(struct core* unused) {
	struct sched_param param;
	int i;

	param.sched_priority = PRIO_MANAGER;
	pthread_attr_init(&pthread_ready_fifo);
	pthread_attr_setinheritsched(&pthread_ready_fifo, PTHREAD_EXPLICIT_SCHED);
	pthread_attr_setschedpolicy(&pthread_ready_fifo, SCHED_FIFO);
	pthread_attr_setschedparam(&pthread_ready_fifo, &param);

	servers = liblock_allocate(sizeof(struct server) * topology->nb_cores);

	for(i=0; i<topology->nb_cores; i++) {
		servers[topology->cores[i].core_id].core = &topology->cores[i];

		servers[i].nb_attached_locks  = 0;
		servers[i].nb_monitored_locks = 0;
		servers[i].state              = SERVER_DOWN;
		servers[i].request_area       = anon_mmap(initial_mapped_size);
		servers[i].monitored_locks    = anon_mmap(MAX_LOCKS*sizeof(struct liblock_lock*));
		servers[i].monitored_locks_n  = anon_mmap(MAX_LOCKS*sizeof(unsigned int));

		servers[i].nb_threads         = 0;
		servers[i].nb_servicing       = 0;
		servers[i].nb_free            = 0;

		pthread_mutex_init(&servers[i].plock, 0);
		pthread_cond_init(&servers[i].pcond_state, 0);
	}
}

static void do_liblock_on_thread_start(adapt)(struct thread_descriptor* desc) {
	int i;

	/* retreive my requests, just a cache pbm */
	for(i=0; i<topology->nb_cores; i++)
		servers[i].request_area[self.id].pending = 0;
}

static void do_liblock_on_thread_exit(adapt)(struct thread_descriptor* desc) {
}

static void do_liblock_unlock_in_cs(adapt)(struct liblock_lock* lock) {
	fatal("implement me");
	lock->locked = 0;
}

static void do_liblock_relock_in_cs(adapt)(struct liblock_lock* lock) {
	fatal("implement me");
	while(__sync_val_compare_and_swap(&lock->locked, 0, 1)) { /* one of my thread own the lock */
		ensure_at_least_one_free_thread(lock->server);     /* verify that we have enough active thread */
		me->has_run = 1;
		pthread_yield();                                   /* give a chance to one of our thread to release the lock */
	}
}

static int do_liblock_cond_wait(adapt)(pthread_cond_t* cond, struct liblock_lock* lock) { 
	ensure_at_least_one_free_thread(lock->server);
	pthread_mutex_lock(&lock->posix_lock);
	lock->locked = 0;
	pthread_cond_wait(cond, &lock->posix_lock);
	pthread_mutex_unlock(&lock->posix_lock);
	liblock_adapt_relock_in_cs(lock);
	return 0;
}

static int do_liblock_cond_timedwait(adapt)(pthread_cond_t* cond, struct liblock_lock* lock, const struct timespec* ts) { 
	ensure_at_least_one_free_thread(lock->server);
	pthread_mutex_lock(&lock->posix_lock);
	lock->locked = 0;
	pthread_cond_timedwait(cond, &lock->posix_lock, ts);
	pthread_mutex_unlock(&lock->posix_lock);
	liblock_adapt_relock_in_cs(lock);
	return 0;
}

static void do_liblock_declare_server(adapt)(struct core* core) {
	if(!liblock_start_server_threads_by_hand)
		launch_server(&servers[core->core_id], 0);
}

liblock_declare(adapt);
