/* Megumin LED display firmware
 * Copyright (C) 2018 Sebastian Götte <code@jaseg.net>
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "global.h"
#include "8b10b.h"
#include "crc32.h"
#include "protocol.h"
#include "xorshift.h"

static volatile struct state_8b10b_dec st_8b10b_dec;

/* Modulation constants */
#define THRESHOLD_ADC_COUNTS 28500 /* ADC counts */
#define MIN_RECTIFIER_MARGIN 5000 /* ADC counts */
#define SAMPLES_PER_BAUD 16
#define OVERSAMPLING_RATIO 16
#define SAMPLING_PHASE (SAMPLES_PER_BAUD / 2)
#define LED_DEAD_TIME 4 /* in ADC samples */
#ifndef CONFIG_MODULE_ADDRESS
#warn "CONFIG_MODULE_ADDRESS is not defined, defaulting to 0."
#define CONFIG_MODULE_ADDRESS 0
#endif /* CONFIG_MODULE_ADDRESS */

#define DEBUG_DISABLE_DRIVERS 1


volatile union {
    struct data_packet packet;
    uint8_t bytes[sizeof(struct data_packet)];
} rx_buf;

volatile ssize_t rx_pos;
volatile bool packet_received;
volatile bool rng_reset;
uint32_t packet_rng_state = 0;

struct data_packet foobar;
int global_brightness;
int channel_mask;

struct error_counters {
    int crc_errors;
    int receive_overflows;
    int processing_overflows;
    int decoding_errors;
} errors;

/* generated by ./gamma.py */
uint16_t brightness_lut[16] = {
      54,   247,   604,  1137,  1857,  2774,  3894,  5223,
    6768,  8534, 10525, 12745, 15199, 17891, 20823, 24000
};


int main(void) {
    /* Configure clocks for 64 MHz system clock.
     * 
     * HSI @ 16 MHz --[PLL x16 /4]--> PLL "R" clock @ 64 MHz
     */
    /* Enable peripherals */
    RCC->APBENR1 |= RCC_APBENR1_PWREN;
    /* Increase flash wait states to 2 required for operation above 48 MHz */
    FLASH->ACR = FLASH_ACR_ICEN | FLASH_ACR_PRFTEN | (FLASH->ACR & ~FLASH_ACR_LATENCY_Msk) | (2<<FLASH_ACR_LATENCY_Pos);
    while ((FLASH->ACR & FLASH_ACR_LATENCY_Msk) != (2<<FLASH_ACR_LATENCY_Pos)) {
        /* wait for flash controller to acknowledge change. */
    }
    /* Configure PLL with multiplier 16, divisor 2 for "R" output, and enable "R" (sysclk) output */
    RCC->PLLCFGR = (16<<RCC_PLLCFGR_PLLN_Pos) | (2<<RCC_PLLCFGR_PLLSRC_Pos) | (3<<RCC_PLLCFGR_PLLR_Pos) | RCC_PLLCFGR_PLLREN;
    RCC->CR |= RCC_CR_PLLON;
    while (!(RCC->CR & RCC_CR_PLLRDY)) {
        /* wait for PLL to stabilize. */
    }
    /* Switch SYSCLK to PLL source. */
    RCC->CFGR |= (2<<RCC_CFGR_SW_Pos);
    while ((RCC->CFGR & RCC_CFGR_SWS_Msk) != (2<<RCC_CFGR_SWS_Pos)) {
        /* wait for RCC to switch over. */
    }

    RCC->AHBENR |= RCC_AHBENR_DMA1EN;
    RCC->APBENR1 |= RCC_APBENR1_TIM3EN | RCC_APBENR1_DBGEN;
    RCC->APBENR2 |= RCC_APBENR2_TIM1EN | RCC_APBENR2_ADCEN | RCC_APBENR2_TIM14EN;
    RCC->IOPENR |= RCC_IOPENR_GPIOAEN | RCC_IOPENR_GPIOBEN | RCC_IOPENR_GPIOCEN;

    TIM14->CR1 = TIM_CR1_ARPE | TIM_CR1_OPM;
    /* External clock mode, with TIM 3 as source */
    TIM14->PSC = 0;
    static_assert(125 * (SAMPLES_PER_BAUD - LED_DEAD_TIME) * OVERSAMPLING_RATIO <= 0xffff);
    TIM14->ARR = 125 * (SAMPLES_PER_BAUD - LED_DEAD_TIME) * OVERSAMPLING_RATIO;
    TIM14->CCER = TIM_CCER_CC1E;
    TIM14->DIER = TIM_DIER_CC1IE;
    NVIC_EnableIRQ(TIM14_IRQn);
    NVIC_SetPriority(TIM14_IRQn, 1<<6);

    for (int i=0; i<COUNT_OF(brightness_lut); i++) {
    }

    xfr_8b10b_reset((struct state_8b10b_dec *)&st_8b10b_dec);
    rx_pos = -1;
    packet_received = false;
    rng_reset = false;
    memset(&errors, 0, sizeof(errors));

    TIM3->CR1 = TIM_CR1_ARPE;
    TIM3->CR2 = (2<<TIM_CR2_MMS_Pos); /* Update event on TRGO */
    TIM3->PSC = 0;
    /* We sample 32 times per 1 kHz AC cycle, and use 32 times oversampling. */
    TIM3->ARR = 125; /* Output 64 MHz / 125 = 512.0 kHz signal */
    TIM3->CR1 |= TIM_CR1_CEN;
    
    ADC1->ISR = ADC_ISR_CCRDY | ADC_ISR_ADRDY; /* Clear CCRDY */
    ADC1->CR = ADC_CR_ADVREGEN;
    delay_us(20);
    ADC1->CR = ADC_CR_ADCAL;
    while (ADC1->CR & ADC_CR_ADCAL) {
        /* wait. */
    }
    ADC1->CFGR1 = (1<<ADC_CFGR1_EXTEN_Pos) | (3<<ADC_CFGR1_EXTSEL_Pos); /* TIM3 TRGO */
    ADC1->CFGR2 = (1<<ADC_CFGR2_CKMODE_Pos) | (3<<ADC_CFGR2_OVSR_Pos) | (0<<ADC_CFGR2_OVSS_Pos) | ADC_CFGR2_TOVS | ADC_CFGR2_OVSE;
    ADC1->CHSELR = (1<<4); /* Enable input 4 -> PA4 (Vdiff)*/
    while (!(ADC1->ISR & ADC_ISR_CCRDY)) {
        /* wait. */
    }
    ADC1->ISR = ADC_ISR_CCRDY; /* Clear CCRDY */
    ADC->CCR = ADC_CCR_TSEN | ADC_CCR_VREFEN;
    ADC1->CR = ADC_CR_ADVREGEN | ADC_CR_ADEN;
    while (!(ADC1->ISR & ADC_ISR_ADRDY)) {
        /* wait. */
    }
    ADC1->IER = ADC_IER_EOCIE;
    NVIC_EnableIRQ(ADC1_IRQn);
    NVIC_SetPriority(ADC1_IRQn, 0);
    ADC1->CR |= ADC_CR_ADSTART;

    GPIOA->MODER = OUT(0) | IN(1) | OUT(2) | OUT(3) | ANALOG(4) | OUT(5) | OUT(6) | IN(7) | ANALOG(9) | ANALOG(10) | OUT(11) | ANALOG(12)| AF(13) | AF(14);
    GPIOB->MODER = ANALOG(0) | OUT(3) | ANALOG(1) | ANALOG(2) | ANALOG(4) | ANALOG(5) | ANALOG(6) | ANALOG(8) | OUT(7) | ANALOG(9);
    GPIOC->MODER = OUT(15) | ANALOG(14) | ANALOG(9);

    DBG->APBFZ1 |= DBG_APB_FZ1_DBG_TIM3_STOP;
    DBG->APBFZ2 |= DBG_APB_FZ2_DBG_TIM1_STOP;
    while (42) {
        if (packet_received) {
            if (rng_reset) {
                packet_rng_state = xorshift32(1);
                rng_reset = false;
                GPIOB->BSRR = (1<<7);
            } else {
                GPIOB->BRR = (1<<7);
            }

            for(size_t i=0; i<sizeof(rx_buf.packet); i++) {
                packet_rng_state = xorshift32(packet_rng_state);
                rx_buf.bytes[i] ^= packet_rng_state;
            }

            uint32_t crc_state = crc32_reset();
            for(size_t i=0; i<offsetof(struct data_packet, crc); i++) {
                crc_state = crc32_update(crc_state, rx_buf.bytes[i]);
            }
            crc_state = crc32_finalize(crc_state);

            if (crc_state == rx_buf.packet.crc) {
                GPIOA->BSRR = (1<<6);
                /* good packet received */
                int val = rx_buf.packet.brightness[CONFIG_MODULE_ADDRESS/2];
                if (CONFIG_MODULE_ADDRESS & 1) {
                    val >>= 4;
                }
                global_brightness = val;
                channel_mask = rx_buf.packet.channels[CONFIG_MODULE_ADDRESS];

            } else {
                GPIOA->BRR = (1<<6);
                errors.crc_errors++;
            }

            packet_received = false;
        }
    }
}

int16_t sym_dump[512];
size_t sym_dump_pos = 0;

uint8_t adc_dump[32];
size_t adc_dump_pos = 0;

uint8_t bit_dump[4096];
size_t bit_dump_pos = 0;

bool armed = false;
void gdb_dump(void) {
    armed = false;
}

void ADC1_IRQHandler(void) {
    static int phase = 0;
    static int last_bit = 0;

    /* Read sample and apply threshold */
    int sample = ADC1->DR; /* resets the EOC interrupt flag */
    int bit = sample > THRESHOLD_ADC_COUNTS;
    int bit_margin = ((int)sample) - THRESHOLD_ADC_COUNTS;
    if (bit_margin < 0) {
        bit_margin = -bit_margin;
    }

    adc_dump[adc_dump_pos] = (sample>>10) & 0x3f;

    /* Find edges and compute current phase */
    if (bit && !last_bit) { /* rising edge */
        phase = 0;
        adc_dump[adc_dump_pos] |= 0x40;

    } else if (last_bit && !bit) { /* falling edge */
        phase = 0;
        adc_dump[adc_dump_pos] |= 0x40;

    } else {
        phase ++;
        if (phase == SAMPLES_PER_BAUD) {
            phase = 0;
        }
    }

    /* Trigger 8b10b sample */ 
    if (phase == SAMPLING_PHASE) {
        adc_dump[adc_dump_pos] |= 0x80;

        bit_dump[bit_dump_pos] = bit;
        bit_dump_pos++;
        if (bit_dump_pos == COUNT_OF(bit_dump)) {
            bit_dump_pos = 0;
            armed = true;
        }

        int rc = xfr_8b10b_feed_bit((struct state_8b10b_dec *)&st_8b10b_dec, bit);
        if (rc == 0xfb) {
            if (armed)
                gdb_dump();
        }

        if (rc > -K_CODES_LAST) {
            sym_dump[sym_dump_pos++] = rc;
            if (sym_dump_pos == COUNT_OF(sym_dump)) {
                sym_dump_pos = 0;
            }

            if (rc < 0) {
                if (rc == -K28_1) {
                    rng_reset = true;
                    rx_pos = 0;

                } else if (rc == -K27_7) {
                    if (rx_pos >= 0) {
                        rx_pos = 0;
                    }
                } else {
                    rx_pos = -1;
                }
            } else {
                if (packet_received) {
                    /* receive buffer overflow */
                    rx_pos = -1;
                    errors.processing_overflows++;

                } else {
                    if (rx_pos == sizeof(rx_buf.packet)) {
                        /* receive buffer overflow */
                        rx_pos = -1;
                        errors.receive_overflows++;
                    }

                    rx_buf.bytes[rx_pos] = rc;
                    rx_pos++;
                    if (rx_pos == sizeof(rx_buf.packet)) {
                        packet_received = true;
                    }
                }
            }
        } else if (rc == -DECODING_ERROR) {
            errors.decoding_errors++;
        }
    }

    adc_dump_pos++;
    if (adc_dump_pos == COUNT_OF(adc_dump)) {
        adc_dump_pos = 0;
    }

    /* Trigger synchronous rectifier */
    if (phase == SAMPLES_PER_BAUD - LED_DEAD_TIME || bit != last_bit || bit_margin < MIN_RECTIFIER_MARGIN) { /* reset */
        GPIOA->BRR = (1<<11); /* RECT1 */
        GPIOC->BRR = (1<<15); /* RECT2 */

    } else if (phase == LED_DEAD_TIME) { /* set */
        if (bit) {
#ifndef DEBUG_DISABLE_DRIVERS
            GPIOC->BSRR = (1<<15); /* RECT2 */
#endif
        } else {
#ifndef DEBUG_DISABLE_DRIVERS
            GPIOA->BSRR = (1<<11); /* RECT1 */
#endif
        }

        int nibble = (bit ? (channel_mask >> 4) : channel_mask) & 0x0f;
        int b0 = (nibble>>0) & 1;
        int b1 = (nibble>>1) & 1;
        int b2 = (nibble>>2) & 1;
        int b3 = (nibble>>3) & 1;

#ifndef DEBUG_DISABLE_DRIVERS
        GPIOA->BSRR = (b0<<2) | (b3<<3) | (b2<<5);
        GPIOB->BSRR = (b1<<3);
#endif
        TIM14->CCR1 = brightness_lut[global_brightness];
        TIM14->CR1 |= TIM_CR1_CEN;
    }

    last_bit = bit;
}

void TIM14_IRQHandler(void) {
    TIM14->SR = 0;

    /* Reset all LED outputs */
    GPIOA->BRR = (1<<2) | (1<<3) | (1<<5);
    GPIOB->BRR = (1<<3);
}

void delay_us(int duration_us) {
    while (duration_us--) {
        for (int i=0; i<32; i++) {
            asm volatile ("nop");
        }
    }
}

void *memset(void *s, int c, size_t n) {
    uint8_t *b = (uint8_t *)s;
    while (n--) {
        *b++ = c;
    }
    return s;
}

void NMI_Handler(void) {
    asm volatile ("bkpt");
}

void HardFault_Handler(void) __attribute__((naked));
void HardFault_Handler() {
    asm volatile ("bkpt");
}

void SVC_Handler(void) {
    asm volatile ("bkpt");
}


void PendSV_Handler(void) {
    asm volatile ("bkpt");
}

void __libc_init_array (void) __attribute__((weak));
void __libc_init_array () {
}