/* ########################################################################## */
/* (C) UPMC, 2010-2011                                                        */
/*     Authors:                                                               */
/*       Jean-Pierre Lozi <jean-pierre.lozi@lip6.fr>                          */
/*       Gaël Thomas <gael.thomas@lip6.fr>                                    */
/*       Florian David <florian.david@lip6.fr>                                */
/*       Julia Lawall <julia.lawall@lip6.fr>                                  */
/*       Gilles Muller <gilles.muller@lip6.fr>                                */
/* -------------------------------------------------------------------------- */
/* ########################################################################## */

/*
 * =============================================================================
 * Code based on Tudor David's libslock library.
 * =============================================================================
 */

#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>

#include "liblock.h"
#include "liblock-fatal.h"

#include "extra-libslock-utils.h"
#include "extra-libslock-platform-defs.h"
#include "extra-libslock-atomic-ops.h"


#ifdef AMD48B
#define TICKET_BASE_WAIT 512
#define TICKET_MAX_WAIT  4095
#define TICKET_WAIT_NEXT 64
#elif defined(NIAGARA2)
#define TICKET_BASE_WAIT 16
#define TICKET_MAX_WAIT  63
#define TICKET_WAIT_NEXT 4
#endif


struct liblock_impl {
    pthread_mutex_t                    posix_lock;
    char                               p1[CACHE_LINE_SIZE];
    volatile uint32_t                  head;
    char                               p2[CACHE_LINE_SIZE];
    volatile uint32_t                  tail;
    char __pad[pad_to_cache_line(sizeof(pthread_mutex_t) +
                                 2 * sizeof(uint32_t))];
};


static inline uint32_t
sub_abs(const uint32_t a, const uint32_t b)
{
    if (a > b)
    {
        return a - b;
    }
    else
    {
        return b - a;
    }
}

static void lock_extra_ticket(struct liblock_impl *impl)
{
    uint32_t my_ticket = IAF_U32(&(impl->tail));

#if defined(OPTERON_OPTIMIZE)
    uint32_t wait = TICKET_BASE_WAIT;
    uint32_t distance_prev = 1;

    while (1)
    {
        PREFETCHW(lock);
        uint32_t cur = impl->head;
        if (cur == my_ticket)
        {
            break;
        }
        uint32_t distance = sub_abs(cur, my_ticket);

        if (distance > 1)
        {
            if (distance != distance_prev)
            {
                distance_prev = distance;
                wait = TICKET_BASE_WAIT;
            }

            nop_rep(distance * wait);
        }
        else
        {
            nop_rep(TICKET_WAIT_NEXT);
        }

        if (distance > 20)
        {
            sched_yield();
            /* pthread_yield(); */
        }
    }
#else  /* !OPTERON_OPTIMIZE */
#if defined(__x86_64__)
    uint32_t wait = TICKET_BASE_WAIT;
    uint32_t distance_prev = 1;

    while (1)
    {
        uint32_t cur = impl->head;
        if (cur == my_ticket)
        {
            break;
        }

        uint32_t distance = sub_abs(cur, my_ticket);

        if (distance > 1)
        {
            if (distance != distance_prev)
            {
                distance_prev = distance;
                wait = TICKET_BASE_WAIT;
            }

            nop_rep(distance * wait);
        }
        else
        {
            nop_rep(TICKET_WAIT_NEXT);
        }

        if (distance > 20)
        {
            sched_yield();
        }
    }
#else
    while (impl->head != my_ticket)
    {
        PAUSE;
    }
#endif
#endif  /* OPTERON_OPTIMIZE */
}

static void unlock_extra_ticket(struct liblock_impl *impl)
{
#if defined(OPTERON_OPTIMIZE)
  PREFETCHW(impl);
#endif  /* OPTERON_OPTIMIZE */
  COMPILER_BARRIER;
  impl->head++;
}

static struct liblock_impl *do_liblock_init_lock(extra_ticket)
                               (liblock_lock_t *lock,
                                struct hw_thread *core,
                                pthread_mutexattr_t *attr)
{
    struct liblock_impl *impl =
        liblock_allocate(sizeof(struct liblock_impl));

    pthread_mutex_init(&impl->posix_lock, 0);
    impl->head = 1;
    impl->tail = 0;

    MEM_BARRIER;

    return impl;
}

static int do_liblock_destroy_lock(extra_ticket)(liblock_lock_t *lock)
{
    pthread_mutex_destroy(&lock->impl->posix_lock);

    free(lock->impl);

    return 0;
}

static void* do_liblock_execute_operation(extra_ticket)(liblock_lock_t *lock,
                                                    void* (*pending)(void*),
                                                    void *val)
{
    struct liblock_impl *impl = lock->impl;
    void *res;

    lock_extra_ticket(impl);

    res = pending(val);

    unlock_extra_ticket(impl);

    return res;
}

static void do_liblock_init_library(extra_ticket)()
{}

static void do_liblock_kill_library(extra_ticket)()
{}

static void do_liblock_run(extra_ticket)(void (*callback)())
{
    if(__sync_val_compare_and_swap(&liblock_start_server_threads_by_hand,
                                   1, 0) != 1)
        fatal("servers are not managed by hand");
    if(callback)
        callback();
}

static int do_liblock_cond_init(extra_ticket)(liblock_cond_t* cond)
{
    return cond->has_attr ?
        pthread_cond_init(&cond->impl.posix_cond, &cond->attr) :
        pthread_cond_init(&cond->impl.posix_cond, 0);
}

static int cond_timedwait(liblock_cond_t* cond,
                          liblock_lock_t* lock,
                          const struct timespec* ts)
{
    struct liblock_impl *impl = lock->impl;
    int res;

    pthread_mutex_lock(&impl->posix_lock);

    unlock_extra_ticket(impl);

    if(ts)
        res = pthread_cond_timedwait(&cond->impl.posix_cond,
                                     &impl->posix_lock,
                                     ts);
    else
        res = pthread_cond_wait(&cond->impl.posix_cond, &impl->posix_lock);

    pthread_mutex_unlock(&impl->posix_lock);

    lock_extra_ticket(impl);

    return res;
}

static int do_liblock_cond_timedwait(extra_ticket)(liblock_cond_t* cond,
                                          liblock_lock_t* lock,
                                          const struct timespec* ts)
{
    return cond_timedwait(cond, lock, ts);
}

static int do_liblock_cond_wait(extra_ticket)(liblock_cond_t* cond,
                                     liblock_lock_t* lock)
{
    return cond_timedwait(cond, lock, 0);
}

static int do_liblock_cond_signal(extra_ticket)(liblock_cond_t* cond)
{
    return pthread_cond_signal(&cond->impl.posix_cond);
}

static int do_liblock_cond_broadcast(extra_ticket)(liblock_cond_t* cond)
{
    return pthread_cond_broadcast(&cond->impl.posix_cond);
}

static int do_liblock_cond_destroy(extra_ticket)(liblock_cond_t* cond)
{
    return pthread_cond_destroy(&cond->impl.posix_cond);
}

static void do_liblock_on_thread_start(extra_ticket)(struct thread_descriptor* desc)
{}

static void do_liblock_on_thread_exit(extra_ticket)(struct thread_descriptor* desc)
{}

static void do_liblock_unlock_in_cs(extra_ticket)(liblock_lock_t* lock)
{
    unlock_extra_ticket(lock->impl);
}

static void do_liblock_relock_in_cs(extra_ticket)(liblock_lock_t* lock)
{
    lock_extra_ticket(lock->impl);
}

static void do_liblock_declare_server(extra_ticket)(struct hw_thread* core)
{}

static void do_liblock_cleanup(extra_ticket)(void)
{}

liblock_declare(extra_ticket);

