/* ########################################################################## */
/* (C) UPMC, 2010-2011                                                        */
/*     Authors:                                                               */
/*       Jean-Pierre Lozi <jean-pierre.lozi@lip6.fr>                          */
/*       Gaël Thomas <gael.thomas@lip6.fr>                                    */
/*       Florian David <florian.david@lip6.fr>                                */
/*       Julia Lawall <julia.lawall@lip6.fr>                                  */
/*       Gilles Muller <gilles.muller@lip6.fr>                                */
/* -------------------------------------------------------------------------- */
/* ########################################################################## */

/*
 * =============================================================================
 * Code based on Tudor David's libslock library.
 * =============================================================================
 */

#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>

#include "liblock.h"
#include "liblock-fatal.h"

#include "extra-libslock-utils.h"
#include "extra-libslock-platform-defs.h"
#include "extra-libslock-atomic-ops.h"

#define MAX_DELAY 10000000
#define UNLOCKED 0
#define LOCKED 1

struct liblock_impl {
    unsigned int                       lock_id;
    pthread_mutex_t                    posix_lock;
    char                               pad1[CACHE_LINE_SIZE];
    volatile uint8_t                   lock;
    char __pad[pad_to_cache_line(sizeof(unsigned int) +
                                 sizeof(pthread_mutex_t) +
                                 sizeof(uint8_t))];
};

// We use the same seeds for each lock.
static __thread unsigned long *ttas_seeds;
static __thread uint32_t      *limits;

static volatile unsigned int cur_lock_number = 0;


static void lock_extra_bo(struct liblock_impl *impl)
{
#if defined(OPTERON_OPTIMIZE)
    volatile uint8_t *l = &(impl->lock);
    uint32_t delay;

    while (1)
    {
        PREFETCHW(l);

        while ((*l) == 1)
        {
            PREFETCHW(l);
        }

        if (TAS_U8(&(impl->lock)) == UNLOCKED)
        {
            return;
        }
        else
        {
            delay = my_random(&(ttas_seeds[0]),
                              &(ttas_seeds[1]),
                              &(ttas_seeds[2])) % limits[impl->lock_id];
            limits[impl->lock_id] = MAX_DELAY > 2 * limits[impl->lock_id] ?
                                    2 * limits[impl->lock_id] : MAX_DELAY;
            cdelay(delay);
        }
    }
#else /* !OPTERON_OPTIMIZE */
    uint32_t delay;
    volatile uint8_t *l = &(impl->lock);

    while (1)
    {
        while ((*l)==1) {}

        if (TAS_U8(l) == UNLOCKED)
        {
            return;
        }
        else
        {
            delay = my_random(&(ttas_seeds[0]),
                              &(ttas_seeds[1]),
                              &(ttas_seeds[2])) % limits[impl->lock_id];
            limits[impl->lock_id] = MAX_DELAY > 2 * limits[impl->lock_id] ?
                                    2 * limits[impl->lock_id] : MAX_DELAY;
            cdelay(delay);
        }
    }
#endif /* OPTERON_OPTIMIZE */
}

static void unlock_extra_bo(struct liblock_impl *impl)
{
    COMPILER_BARRIER;
    impl->lock = 0;
}

static struct liblock_impl *do_liblock_init_lock(extra_bo)
                               (liblock_lock_t *lock,
                                struct hw_thread *core,
                                pthread_mutexattr_t *attr)
{
    struct liblock_impl *impl =
        liblock_allocate(sizeof(struct liblock_impl));

    impl->lock_id = __sync_fetch_and_add(&cur_lock_number, 1);
    pthread_mutex_init(&impl->posix_lock, 0);

    impl->lock = 0;

    MEM_BARRIER;

    return impl;
}

static int do_liblock_destroy_lock(extra_bo)(liblock_lock_t *lock)
{
    pthread_mutex_destroy(&lock->impl->posix_lock);

    free(lock->impl);

    return 0;
}

static void* do_liblock_execute_operation(extra_bo)(liblock_lock_t *lock,
                                                    void* (*pending)(void*),
                                                    void *val)
{
    struct liblock_impl *impl = lock->impl;
    void *res;

    lock_extra_bo(impl);

    res = pending(val);

    unlock_extra_bo(impl);

    return res;
}

static void do_liblock_init_library(extra_bo)()
{}

static void do_liblock_kill_library(extra_bo)()
{}

static void do_liblock_run(extra_bo)(void (*callback)())
{
    if(__sync_val_compare_and_swap(&liblock_start_server_threads_by_hand,
                                   1, 0) != 1)
        fatal("servers are not managed by hand");
    if(callback)
        callback();
}

static int do_liblock_cond_init(extra_bo)(liblock_cond_t* cond)
{
    return cond->has_attr ?
        pthread_cond_init(&cond->impl.posix_cond, &cond->attr) :
        pthread_cond_init(&cond->impl.posix_cond, 0);
}

static int cond_timedwait(liblock_cond_t* cond,
                          liblock_lock_t* lock,
                          const struct timespec* ts)
{
    struct liblock_impl *impl = lock->impl;
    int res;

    pthread_mutex_lock(&impl->posix_lock);

    unlock_extra_bo(impl);

    if(ts)
        res = pthread_cond_timedwait(&cond->impl.posix_cond,
                                     &impl->posix_lock,
                                     ts);
    else
        res = pthread_cond_wait(&cond->impl.posix_cond, &impl->posix_lock);

    pthread_mutex_unlock(&impl->posix_lock);

    lock_extra_bo(impl);

    return res;
}

static int do_liblock_cond_timedwait(extra_bo)(liblock_cond_t* cond,
                                          liblock_lock_t* lock,
                                          const struct timespec* ts)
{
    return cond_timedwait(cond, lock, ts);
}

static int do_liblock_cond_wait(extra_bo)(liblock_cond_t* cond,
                                     liblock_lock_t* lock)
{
    return cond_timedwait(cond, lock, 0);
}

static int do_liblock_cond_signal(extra_bo)(liblock_cond_t* cond)
{
    return pthread_cond_signal(&cond->impl.posix_cond);
}

static int do_liblock_cond_broadcast(extra_bo)(liblock_cond_t* cond)
{
    return pthread_cond_broadcast(&cond->impl.posix_cond);
}

static int do_liblock_cond_destroy(extra_bo)(liblock_cond_t* cond)
{
    return pthread_cond_destroy(&cond->impl.posix_cond);
}

static void do_liblock_on_thread_start(extra_bo)(struct thread_descriptor* desc)
{
    int i;

    ttas_seeds = seed_rand();

    limits = liblock_allocate(sizeof(uint32_t) * MAX_LOCKS);

    for (i = 0; i < MAX_LOCKS; i++)
    {
        limits[i] = 1;
    }

    MEM_BARRIER;
}

static void do_liblock_on_thread_exit(extra_bo)(struct thread_descriptor* desc)
{
    // We do not free local data to avoid potential overhead in benchmarks.
/*
    free(ttas_seeds);
    free(limits);
*/
}

static void do_liblock_unlock_in_cs(extra_bo)(liblock_lock_t* lock)
{
    unlock_extra_bo(lock->impl);
}

static void do_liblock_relock_in_cs(extra_bo)(liblock_lock_t* lock)
{
    lock_extra_bo(lock->impl);
}

static void do_liblock_declare_server(extra_bo)(struct hw_thread* core)
{}

static void do_liblock_cleanup(extra_bo)(void)
{}

liblock_declare(extra_bo);

