#include <stdint.h>
#include <unistd.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>


void gausselimination(size_t n, size_t k, int8_t *A, int8_t *b);

void inner_logbp(
        size_t m, size_t n,
        size_t bits_count, size_t nodes_count, const uint32_t bits_values[], const uint32_t nodes_values[],
        int8_t Lc[],
        float Lq[], float Lr[],
        unsigned int n_iter,
        float L_posteriori_out[]);

//decode(384, 6, 8, ...)
int decode(size_t n, size_t nodes_count, size_t bits_count, uint32_t bits[], int8_t y[], int8_t out[], unsigned int maxiter) {
    const size_t m = n * nodes_count / bits_count;
    float Lq[m*n];
    float Lr[m*n];
    float L_posteriori[n];

    /* Calculate column bit positions from row bit positions */
    int32_t bits_transposed[nodes_count * n];
    for (size_t i=0; i<nodes_count * n; i++)
        bits_transposed[i] = -1;

    for (size_t i=0; i<m; i++) {
        for (size_t j=0; j<bits_count; j++) {
            int32_t *base = bits_transposed + bits[i*bits_count + j] * nodes_count;
            for (; *base != -1; base++)
                ;
            *base = i;
        }
    }

    /*
    printf("Row positions: [");
    for (size_t i=0; i<m*bits_count; i++) {
        if (i)
            printf(", ");
        if (i%32 == 0)
            printf("\n    ");
        printf("%4d", bits[i]);
    }
    printf("\n]\n");

    printf("Column positions: [");
    for (size_t i=0; i<n*nodes_count; i++) {
        if (i)
            printf(", ");
        if (i%32 == 0)
            printf("\n    ");
        printf("%4d", bits_transposed[i]);
    }
    printf("\n]\n");
    */

    /* Run iterative optimization algorithm */
    for (unsigned int n_iter=0; n_iter<maxiter; n_iter++) {
        inner_logbp(m, n, bits_count, nodes_count, bits, (uint32_t*)bits_transposed, y, Lq, Lr, n_iter, L_posteriori);

        /*
        float *arrs[3] = {Lq, Lr, L_posteriori};
        const char *names[3] = {"Lq", "Lr", "L_posteriori"};
        size_t lens[3] = {m*n, m*n, n};
        const size_t head_tail = 10;
        for (int j=0; j<3; j++) {
            printf("%s=[", names[j]);
            bool ellipsis = false;
            const int w = 16;
            for (size_t i=0; i<lens[j]; i++) {
                if (lens[j] > 1000 && i/w > head_tail && i/w < m*n/w-head_tail) {
                    if (!ellipsis) {
                        ellipsis = true;
                        printf("\n    ...");
                    }
                    continue;
                }
                if (i)
                    printf(", ");
                if (i%w == 0)
                    printf("\n    ");
                float outf = arrs[j][i];
                char *s = outf < 0 ? "\033[91m" : (outf > 0 ? "\033[92m" : "\033[94m");
                printf("%s% 012.6g\033[38;5;240m", s, outf);
            }
            printf("\n]\n");
        }
        */

        for (size_t i=0; i<n; i++)
            out[i] = L_posteriori[i] <= 0.0f;

        for (size_t i=0; i<m; i++) {
            bool sum = 0;
            for (size_t j=0; j<bits_count; j++)
                sum ^= out[bits[i*bits_count + j]];
            if (sum)
                continue;
        }

        fflush(stdout);
        return n_iter;
    }

    fflush(stdout);
    return -1;
}

/* Perform inner ext LogBP solver */
void inner_logbp(
        size_t m, size_t n,
        size_t bits_count, size_t nodes_count, uint32_t const bits_values[], const uint32_t nodes_values[],
        int8_t Lc[],
        float Lq[], float Lr[],
        unsigned int n_iter,
        float L_posteriori_out[]) {

    /*
    printf("Input data: [");
    for (size_t i=0; i<n; i++) {
        if (i)
            printf(", ");
        if (i%32 == 0)
            printf("\n    ");
        printf("%4d", Lc[i]);
    }
    printf("\n]\n");
    */

    /* step 1 : Horizontal */
    unsigned int bits_counter = 0;
    for (size_t i=0; i<m; i++) {
        //printf("=== i=%zu\n", i);
        for (size_t p=bits_counter; p<bits_counter+bits_count; p++) {
            size_t j = bits_values[p];
            //printf("\033[38;5;240mj=%04zd ", j);

            float x = 1;
            if (n_iter == 0) {
                for (size_t q=bits_counter; q<bits_counter+bits_count; q++) {
                    if (bits_values[q] != j) {
                        //int lcv = Lc[bits_values[q]];
                        //char *s = lcv < 0 ? "\033[91m" : (lcv > 0 ? "\033[92m" : "\033[94m");
                        //printf("nij=%04u Lc=%s%3d\033[38;5;240m ", bits_values[q], s, lcv);
                        x *= tanhf(0.5f * Lc[bits_values[q]]);
                    }
                }

            } else {
                for (size_t q=bits_counter; q<bits_counter+bits_count; q++) {
                    if (bits_values[q] != j)
                        x *= tanhf(0.5f * Lq[i*n + bits_values[q]]);
                }
            }

            //printf("\n==== i=%03zd p=%01zd x=%08f\n", i, p-bits_counter, x);

            float num = 1 + x;
            float denom = 1 - x;
            if (num == 0)
                Lr[i*n + j] = -1.0f;
            else if (denom == 0)
                Lr[i*n + j] = 1.0f;
            else
                Lr[i*n + j] = logf(num/denom);
        }

        bits_counter += bits_count;
    }

    /* step 2 : Vertical */
    unsigned int nodes_counter = 0;
    for (size_t j=0; j<n; j++) {
        for (size_t p=bits_counter; p<nodes_counter+nodes_count; p++) {
            size_t i = nodes_values[p];

            Lq[i*n + j] = Lc[j];

            for (size_t q=bits_counter; q<nodes_counter+nodes_count; q++) {
                if (nodes_values[q] != i)
                    Lq[i*n + j] += Lr[nodes_values[q]*n + j];
            }
        }

        nodes_counter += nodes_count;
    }

    /* LLR a posteriori */
    nodes_counter = 0;
    for (size_t j=0; j<n; j++) {
        float sum = 0;
        for (size_t k=bits_counter; k<nodes_counter+nodes_count; k++)
            sum += Lr[nodes_values[k]*n + j];
        nodes_counter += nodes_count;

        L_posteriori_out[j] = Lc[j] + sum;
    }
}

/* Compute the original (k) bit message from a (n) bit codeword x.
 *
 * tG: (n, k)-matrix
 * x: (n)-vector
 * out: (k)-vector
 */
void get_message(size_t n, size_t k, int8_t *tG, int8_t *x, int8_t *out) {

    gausselimination(n, k, tG, x);

    out[k - 1] = x[k - 1];
    for (ssize_t i=k-2; i>=0; i--) {
        out[i] = x[i];

        uint8_t sum = 0;
        for (size_t j=i+1; j<k; j++)
            sum ^= tG[i*k + j] * out[j];

        out[i] = !!(out[i] - sum);
    }
}

/* Solve linear system in Z/2Z via Gauss Gauss elimination.
 *
 * A: (n, k)-matrix
 * b: (n)-vector
 */
void gausselimination(size_t n, size_t k, int8_t *A, int8_t *b) {
    ssize_t d = k<n ? k : n;
    for (ssize_t j=0; j<d; j++) {

        ssize_t pivot = -1;
        for (size_t i=j; i<n; i++) {
            if (A[i*k + j]) {
                pivot = i;
                break;
            }
        }
        if (pivot == -1)
            continue;

        if (pivot != j) {
            for (size_t i=0; i<k; i++) {
                int8_t tmp = A[j*k + i];
                A[j*k + i] = A[pivot*k + i];
                A[pivot*k + i] = tmp;
            }

            int8_t tmp = b[j];
            b[j] = b[pivot];
            b[pivot] = tmp;
        }

        for (size_t i=j+1; i<n; i++) {
            if (A[i*k + j]) {
                for (size_t p=0; p<k; p++)
                    A[i*k + p] = !!(A[i*k + p] - A[j*k + p]);
                b[i] = !!(b[i] - b[j]);
            }
        }
    }
}