aboutsummaryrefslogtreecommitdiff
path: root/fw/main.c
blob: 50c9cfa2851b480e359dc8538af9dbf53be5a999 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#include <stm32f0xx.h>
#include <stm32f0xx_ll_utils.h>
#include <stm32f0xx_ll_spi.h>
#pragma GCC diagnostic pop

#include <system_stm32f0xx.h>

#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>

#include "transpose.h"
#include "mac.h"

/* 
 * Part number: STM32F030F4C6
 */

typedef struct
{
  volatile uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  Control Register */
  volatile uint32_t CYCCNT;                 /*!< Offset: 0x004 (R/W)  Cycle Count Register */
  volatile uint32_t CPICNT;                 /*!< Offset: 0x008 (R/W)  CPI Count Register */
  volatile uint32_t EXCCNT;                 /*!< Offset: 0x00C (R/W)  Exception Overhead Count Register */
  volatile uint32_t SLEEPCNT;               /*!< Offset: 0x010 (R/W)  Sleep Count Register */
  volatile uint32_t LSUCNT;                 /*!< Offset: 0x014 (R/W)  LSU Count Register */
  volatile uint32_t FOLDCNT;                /*!< Offset: 0x018 (R/W)  Folded-instruction Count Register */
  volatile uint32_t PCSR;                   /*!< Offset: 0x01C (R/ )  Program Counter Sample Register */
  volatile uint32_t COMP0;                  /*!< Offset: 0x020 (R/W)  Comparator Register 0 */
  volatile uint32_t MASK0;                  /*!< Offset: 0x024 (R/W)  Mask Register 0 */
  volatile uint32_t FUNCTION0;              /*!< Offset: 0x028 (R/W)  Function Register 0 */
           uint32_t RESERVED0[1];
  volatile uint32_t COMP1;                  /*!< Offset: 0x030 (R/W)  Comparator Register 1 */
  volatile uint32_t MASK1;                  /*!< Offset: 0x034 (R/W)  Mask Register 1 */
  volatile uint32_t FUNCTION1;              /*!< Offset: 0x038 (R/W)  Function Register 1 */
           uint32_t RESERVED1[1];
} DWT_Type;

#define DWT ((DWT_Type *)0xE0001000)
DWT_Type *dwt = DWT;

void dwt0_configure(volatile void *addr) {
    dwt->COMP0 = (uint32_t)addr;
    dwt->MASK0 = 0;
}

enum DWT_Function {
    DWT_R = 5,
    DWT_W = 6,
    DWT_RW = 7
};

void dwt0_enable(enum DWT_Function function) {
    dwt->FUNCTION0 = function;
}

/* Wait for about 0.2us */
static void tick(void) {
                    /* 1 */         /* 2 */         /* 3 */         /* 4 */         /* 5 */
    /*  5 */ __asm__("nop"); __asm__("nop"); __asm__("nop"); __asm__("nop"); __asm__("nop");
    /* 10 */ __asm__("nop"); __asm__("nop"); __asm__("nop"); __asm__("nop"); __asm__("nop");
}

void spi_send(int data) {
    SPI1->DR = data;
    while (SPI1->SR & SPI_SR_BSY);
}

void strobe_aux(void) {
    GPIOA->BSRR = GPIO_BSRR_BS_10;
    tick();
    GPIOA->BSRR = GPIO_BSRR_BR_10;
}

void strobe_leds(void) {
    GPIOA->BSRR = GPIO_BSRR_BS_9;
    tick();
    GPIOA->BSRR = GPIO_BSRR_BR_9;
}

#define FIRMWARE_VERSION 2
#define HARDWARE_VERSION 4

#define TS_CAL1 (*(uint16_t *)0x1FFFF7B8)
#define VREFINT_CAL (*(uint16_t *)0x1FFFF7BA)

volatile  int16_t adc_vcc_mv = 0;
volatile  int16_t adc_temp_celsius = 0;

volatile uint16_t adc_buf[2];

volatile unsigned int sys_time = 0;
volatile unsigned int sys_time_seconds = 0;

volatile struct framebuf fb[2] = {0};
volatile struct framebuf *read_fb=fb+0, *write_fb=fb+1;
volatile int led_state = 0;
volatile enum { FB_WRITE, FB_FORMAT, FB_UPDATE } fb_op;
volatile union {
    struct __attribute__((packed)) { struct framebuf fb; uint8_t end[0]; } set_fb_rq;
    struct __attribute__((packed)) { uint8_t nbits;      uint8_t end[0]; } set_nbits_rq;
    uint8_t byte_data[0];
    uint32_t mac_data;
} rx_buf;

#define LED_COMM     0x0001
#define LED_ERROR    0x0002
#define LED_ID       0x0004
#define SR_ILED_HIGH 0x0080
#define SR_ILED_LOW  0x0040

unsigned int stk_start(void) {
    return SysTick->VAL;
}

unsigned int stk_end(unsigned int start) {
    return (start - SysTick->VAL) & 0xffffff;
}

unsigned int stk_microseconds(void) {
    return sys_time*1000 + (1000 - (SysTick->VAL / (SystemCoreClock/1000000)));
}

void cfg_spi1() {
    /* Configure SPI controller */
    SPI1->I2SCFGR = 0;
    SPI1->CR2 &= ~SPI_CR2_DS_Msk;
    SPI1->CR2 &= ~SPI_CR2_DS_Msk;
    SPI1->CR2 |= LL_SPI_DATAWIDTH_16BIT;

    /* Baud rate PCLK/4 -> 12.5MHz */
    SPI1->CR1 =
          SPI_CR1_BIDIMODE
        | SPI_CR1_BIDIOE
        | SPI_CR1_SSM
        | SPI_CR1_SSI
        | SPI_CR1_SPE
        | (1<<SPI_CR1_BR_Pos)
        | SPI_CR1_MSTR
        | SPI_CR1_CPOL
        | SPI_CR1_CPHA;
    /* FIXME maybe try w/o BIDI */
}

/* This is a lookup table mapping segments to present a standard segment order on the UART interface. This is converted
 * into an internal representation once on startup in main(). The data type must be at least uint16. */
uint32_t segment_map[8] = {5, 7, 6, 4, 1, 3, 0, 2};

static volatile int frame_duration_us;
volatile int nbits = MAX_BITS;

static unsigned int active_bit = 0;
static int active_segment = 0;

/* Bit timing base value. This is the lowes bit interval used in TIM1/TIM3 timer counts. */
#define PERIOD_BASE 4

/* This value is a constant offset added to every bit period to allow for the timer IRQ handler to execute. This is set
 * empirically using a debugger and a logic analyzer.
 *
 * This value is in TIM1/TIM3 timer counts. */
#define TIMER_CYCLES_FOR_SPI_TRANSMISSIONS 9

/* This value sets the point when the LED strobe is asserted after the begin of the current bit cycle and IRQ
 * processing. This must be less than TIMER_CYCLES_FOR_SPI_TRANSMISSIONS but must be large enough to allow for the SPI
 * transmission to reliably finish.
 *
 * This value is in TIM1/TIM3 timer counts. */
#define TIMER_CYCLES_BEFORE_LED_STROBE 8

/* This value sets how long the TIM1 CC IRQ used for AUX register setting etc. is triggered before the end of the
 * longest cycle. This value should not be larger than PERIOD_BASE<<MIN_BITS to make sure the TIM1 CC IRQ does only
 * trigger in the longest cycle no matter what nbits is set to.
 *
 * This value is in TIM1/TIM3 timer counts. */
#define AUX_SPI_PRETRIGGER 64 /* trigger with about 24us margin to the end of cycle/next TIM3 IRQ */

/* This value sets how long a batch of ADC conversions used for temperature measurement is started before the end of the
 * longest cycle. Here too the above caveats apply.
 *
 * This value is in TIM1/TIM3 timer counts. */
#define ADC_PRETRIGGER 150 /* trigger with about 12us margin to TIM1 CC IRQ */

/* Defines for brevity */
#define A TIMER_CYCLES_FOR_SPI_TRANSMISSIONS
#define B PERIOD_BASE

/* This is a constant offset containing some empirically determined correction values */
#define C (0)

/* This lookup table maps bit positions to timer period values. This is a lookup table to allow for the compensation for
 * non-linear effects of ringing at lower bit durations.
 */
static uint16_t timer_period_lookup[MAX_BITS+1] = {
    /* LSB here */
    A - C + (B<< 0),
    A - C + (B<< 1),
    A - C + (B<< 2),
    A - C + (B<< 3),
    A - C + (B<< 4),
    A - C + (B<< 5),
    A - C + (B<< 6),
    A - C + (B<< 7),
    A - C + (B<< 8),
    A - C + (B<< 9),
    A - C + (B<< 0),
    /* MSB here */
};

/* Don't pollute the global namespace */
#undef A
#undef B
#undef C

void cfg_timers_led() {
    /* Ok, so this part is unfortunately a bit involved.
     *
     * Because the GPIO alternate function assignments worked out that way, the LED driving logic uses timers 1 and 3.
     * Timer 1 is synchronized to timer 3. When timer 3 overflows, timer 1 is reset. Both use the same prescaler so both
     * are synchronous possibly modulo some propagation delay in the synchronization hardware.
     *
     * Timer 3:
     *  * The IRQ handler is set to trigger on overflow and
     *    * triggers the SPI transmissions to the LED drivers and
     *    * updates the timing logic with the delays for the next cycle
     *  * Compare unit 1 generates the !OE signal for the led drivers
     * Timer 1:
     *  * Compare unit 1 triggers the interrupt handler only in the longest bit cycle. The IRQ handler
     *    * transmits the data to the auxiliary shift registers and
     *    * swaps the frame buffers if pending
     *  * Compare unit 2 generates the led drivers' STROBE signal
     * 
     * The AUX_STROBE signal for the two auxiliary shift registers that deal with segment selection, current setting and
     * status leds is generated in software in both ISRs. TIM3's ISR indiscriminately resets this strobe every bit
     * cycle, and TIM1's ISR sets it every NBITSth bit cycle.
     *
     * The reason both timers' IRQ handlers are used is that this way no big if/else statement is necessary to
     * distinguish between both cases. Timer 1's IRQ handler is set via CC2 to trigger a few cycles earlier than the end
     * of the longest bit cycle. This means that if both timers perform bit cycles of length 1, 2, 4, 8, 16 and 32
     * TIM1_CC2 will be set to trigger at count e.g. 28. This means it is only triggered once in the last timer cycle.
     */

    TIM3->CR2   = (2<<TIM_CR2_MMS_Pos); /* master mode: update */
    TIM3->CCMR1 = (6<<TIM_CCMR1_OC1M_Pos) | TIM_CCMR1_OC1PE; /* PWM Mode 1, enable CCR preload */
    TIM3->CCER  = TIM_CCER_CC1E;
    TIM3->CCR1  = TIMER_CYCLES_FOR_SPI_TRANSMISSIONS;
    TIM3->DIER  = TIM_DIER_UIE;
    TIM3->PSC   = SystemCoreClock/5000000 * 2 - 1; /* 0.20us/tick */
    TIM3->ARR   = 0xffff;
    TIM3->EGR  |= TIM_EGR_UG;
    TIM3->CR1   = TIM_CR1_ARPE;
    TIM3->CR1  |= TIM_CR1_CEN;

    /* Slave TIM1 to TIM3. */
    TIM1->PSC   = TIM3->PSC;
    TIM1->SMCR  = (2<<TIM_SMCR_TS_Pos) | (4<<TIM_SMCR_SMS_Pos); /* Internal Trigger 2 (ITR2) -> TIM3; slave mode: reset */

    /* Setup CC1 and CC2. CC2 generates the LED drivers' STROBE, CC1 triggers the IRQ handler */
    TIM1->BDTR  = TIM_BDTR_MOE;
    TIM1->CCMR1 = (6<<TIM_CCMR1_OC2M_Pos) | TIM_CCMR1_OC2PE; /* PWM Mode 1, enable CCR preload for AUX_STROBE */
    TIM1->CCMR2 = (6<<TIM_CCMR2_OC4M_Pos); /* PWM Mode 1 */
    TIM1->CCER  = TIM_CCER_CC1E | TIM_CCER_CC2E | TIM_CCER_CC4E;
    TIM1->CCR2  = TIMER_CYCLES_BEFORE_LED_STROBE;
    /* Trigger at the end of the longest bit cycle. This means this does not trigger in shorter bit cycles. */
    TIM1->CCR1  = timer_period_lookup[nbits-1] - AUX_SPI_PRETRIGGER;
    TIM1->CCR4  = timer_period_lookup[nbits-1] - ADC_PRETRIGGER;
    TIM1->DIER  = TIM_DIER_CC1IE;

    TIM1->ARR   = 0xffff; /* This is as large as possible since TIM1 is reset by TIM3. */
    /* Preload all values */
    TIM1->EGR  |= TIM_EGR_UG;
    TIM1->CR1   = TIM_CR1_ARPE;
    /* And... go! */
    TIM1->CR1  |= TIM_CR1_CEN;

    /* Sends aux data and swaps frame buffers if necessary */
    NVIC_EnableIRQ(TIM1_CC_IRQn);
    NVIC_SetPriority(TIM1_CC_IRQn, 0);
    /* Sends LED data and sets up the next bit cycle's timings */
    NVIC_EnableIRQ(TIM3_IRQn);
    NVIC_SetPriority(TIM3_IRQn, 0);
}

void TIM1_CC_IRQHandler() {
    /* This handler takes about 1.5us */
    GPIOA->BSRR = GPIO_BSRR_BS_0; // Debug

    /* Set SPI baudrate to 12.5MBd for slow-ish 74HC(T)595. This is reset again in TIM3's IRQ handler.*/
    SPI1->CR1 |= (2<<SPI_CR1_BR_Pos);

    /* Advance bit counts and perform pending frame buffer swap */
    active_bit = 0;
    active_segment++;
    if (active_segment == NSEGMENTS) {
        active_segment = 0;

        /* FIXME remove this?
        int time = stk_microseconds();
        frame_duration_us = time - last_frame_time;
        last_frame_time = time;
        */
        /* Frame buffer swap */
        if (fb_op == FB_UPDATE) {
            volatile struct framebuf *tmp = read_fb;
            read_fb = write_fb;
            write_fb = tmp;
            fb_op = FB_WRITE;
        }
    }

    /* Reset aux strobe */
    GPIOA->BSRR = GPIO_BSRR_BR_10;
    /* Send AUX register data */
    uint32_t aux_reg = (read_fb->brightness ? SR_ILED_HIGH : SR_ILED_LOW) | (led_state<<1);
    SPI1->DR = aux_reg | segment_map[active_segment];

    /* Clear interrupt flag */
    TIM1->SR &= ~TIM_SR_CC1IF_Msk;

    GPIOA->BSRR = GPIO_BSRR_BR_0; // Debug
}

void TIM3_IRQHandler() {
    /* This handler takes about 2.1us */
    GPIOA->BSRR = GPIO_BSRR_BS_0; // Debug

    /* Reset SPI baudrate to 25MBd for fast MBI5026. Every couple of cycles, TIM1's ISR will set this to a slower value
     * for the slower AUX registers.*/
    SPI1->CR1 &= ~SPI_CR1_BR_Msk;
    /* Assert aux strobe reset by TIM1's IRQ handler */
    GPIOA->BSRR = GPIO_BSRR_BS_10;

    /* Queue LED driver data into SPI peripheral */
    uint32_t spi_word = read_fb->data[active_bit*FRAME_SIZE_WORDS + active_segment];
    SPI1->DR = spi_word>>16;
    spi_word &= 0xFFFF;
    /* Note that this only waits until the internal FIFO is ready, not until all data has been sent. */
    while (!(SPI1->SR & SPI_SR_TXE));
    SPI1->DR = spi_word;

    /* Advance bit. This will overflow, but that is OK since before the next invocation of this ISR, the other ISR will
     * reset it. */
    active_bit++;
    /* Schedule next bit cycle */
    TIM3->ARR = timer_period_lookup[active_bit];

    /* Clear interrupt flag */
    TIM3->SR &= ~TIM_SR_UIF_Msk;

    GPIOA->BSRR = GPIO_BSRR_BR_0; // Debug
}

void uart_config(void) {
    USART1->CR1 = /* 8-bit -> M1, M0 clear */
        /* RTOIE clear */
          (8 << USART_CR1_DEAT_Pos) /* 8 sample cycles/1 bit DE assertion time */
        | (8 << USART_CR1_DEDT_Pos) /* 8 sample cycles/1 bit DE assertion time */
        /* OVER8 clear. Use default 16x oversampling */
        /* CMIF clear */
        | USART_CR1_MME
        /* WAKE clear */
        /* PCE, PS clear */
        | USART_CR1_RXNEIE /* Enable receive interrupt */
        /* other interrupts clear */
        | USART_CR1_TE
        | USART_CR1_RE;
    //USART1->CR2 = USART_CR2_RTOEN; /* Timeout enable */
    USART1->CR3 = USART_CR3_DEM; /* RS485 DE enable (output on RTS) */
    /* Set divider for 25MHz baud rate @50MHz system clock. */
    int usartdiv = 25;
    USART1->BRR = usartdiv;

    /* And... go! */
    USART1->CR1 |= USART_CR1_UE;

    /* Enable receive interrupt */
    NVIC_EnableIRQ(USART1_IRQn);
    NVIC_SetPriority(USART1_IRQn, 1);
}

#define LED_STRETCHING_MS 50
static volatile int error_led_timeout = 0;
static volatile int comm_led_timeout = 0;
static volatile int id_led_timeout = 0;

void trigger_error_led() {
    error_led_timeout = LED_STRETCHING_MS;
}

void trigger_comm_led() {
    comm_led_timeout = LED_STRETCHING_MS;
}

void trigger_id_led() {
    id_led_timeout = LED_STRETCHING_MS;
}

/* Error counters for debugging */
static unsigned int uart_overruns = 0;
static unsigned int frame_overruns = 0;
static unsigned int invalid_frames = 0;

void tx_char(uint8_t c) {
    while (!(USART1->ISR & USART_ISR_TC));
    USART1->TDR = c;
}

void send_frame_formatted(uint8_t *buf, int len) {
    uint8_t *p=buf, *q=buf, *end=buf+len;
    do {
        while (*q && q!=end)
            q++;
        tx_char(q-p+1);
        while (*p && p!=end)
            tx_char(*p++);
        p++, q++;
    } while (p < end);
    tx_char('\0');
}

union {
    struct __attribute__((packed)) {
        uint8_t  firmware_version,
                 hardware_version,
                 digit_rows,
                 digit_cols;
        uint32_t uptime_s,
                 framerate_millifps,
                 uart_overruns,
                 frame_overruns,
                 invalid_frames;
         int16_t vcc_mv,
                 temp_celsius;
         uint8_t nbits;
    } desc_reply;
    uint8_t byte_data[0];
} tx_buf;

void send_status_reply(void) {
    tx_buf.desc_reply.firmware_version = FIRMWARE_VERSION;
    tx_buf.desc_reply.hardware_version = HARDWARE_VERSION;
    tx_buf.desc_reply.digit_rows = NROWS;
    tx_buf.desc_reply.digit_cols = NCOLS;
    tx_buf.desc_reply.uptime_s = sys_time_seconds;
    tx_buf.desc_reply.vcc_mv = adc_vcc_mv;
    tx_buf.desc_reply.temp_celsius = adc_temp_celsius;
    tx_buf.desc_reply.nbits = nbits;
    tx_buf.desc_reply.framerate_millifps = frame_duration_us > 0 ? 1000000000 / frame_duration_us : 0;
    tx_buf.desc_reply.uart_overruns = uart_overruns;
    tx_buf.desc_reply.frame_overruns = frame_overruns;
    tx_buf.desc_reply.invalid_frames = invalid_frames;
    send_frame_formatted(tx_buf.byte_data, sizeof(tx_buf.desc_reply));
}

/* This is the higher-level protocol handler for the serial protocol. It gets passed the number of data bytes in this
 * frame (which may be zero) and returns a pointer to the buffer where the next frame should be stored.
 */
volatile uint8_t *packet_received(int len) {
    static enum {
        PROT_ADDRESSED = 0,
        PROT_EXPECT_FRAME_SECOND_HALF = 1,
        PROT_IGNORE = 2,
    } protocol_state = PROT_IGNORE; 
    /* Use mac frames as delimiters to synchronize this protocol layer */
    trigger_comm_led();
    if (len == 0) { /* Discovery packet */
        if (sys_time < 100 && sys_time_seconds == 0) { /* Only respond during the first 100ms after boot */
            send_frame_formatted((uint8_t*)&device_mac, sizeof(device_mac));
        }

    } else if (len == 1) { /* Command packet */
        if (protocol_state == PROT_ADDRESSED) {
            switch (rx_buf.byte_data[0]) {
            case 0x01:
                GPIOA->BSRR = GPIO_BSRR_BS_4; // Debug
                //for (int i=0; i<100; i++)
                //    tick();
                send_status_reply();
                GPIOA->BSRR = GPIO_BSRR_BR_4; // Debug
                break;
            }
        } else {
            invalid_frames++;
            trigger_error_led();
        }
        protocol_state = PROT_IGNORE;

    } else if (len == 4) { /* Address packet */
        if (rx_buf.mac_data == device_mac) { /* we are addressed */
            protocol_state = PROT_ADDRESSED; /* start listening for frame buffer data */
        } else { /* we are not addressed */
            protocol_state = PROT_IGNORE; /* ignore packet */
        }

    } else if (len == sizeof(rx_buf.set_fb_rq)/2) {
        if (protocol_state == PROT_ADDRESSED) { /* First of two half-framebuffer data frames */
            protocol_state = PROT_EXPECT_FRAME_SECOND_HALF;
            /* Return second half of receive buffer */
            return rx_buf.byte_data + (sizeof(rx_buf.set_fb_rq)/2);

        } else if (protocol_state == PROT_EXPECT_FRAME_SECOND_HALF) { /* Second of two half-framebuffer data frames */
            /* Kick off buffer transfer. This triggers the main loop to copy data out of the receive buffer and paste it
             * properly formatted into the frame buffer. */
            if (fb_op == FB_WRITE) {
                fb_op = FB_FORMAT;
                trigger_id_led();
            } else {
                /* FIXME An overrun happend. What should we do? */
                frame_overruns++;
                trigger_error_led();
            }

            /* Go to "hang mode" until next zero-length packet. */
            protocol_state = PROT_IGNORE;
        }

    } else {
        /* FIXME An invalid packet has been received. What should we do? */
        invalid_frames++;
        trigger_error_led();
        protocol_state = PROT_IGNORE; /* go into "hang mode" until next zero-length packet */
    }

    /* By default, return rx_buf.byte_data . This means if an invalid protocol state is reached ("hang mode"), the next
     * frame is still written to rx_buf. This is not a problem since whatever garbage is written at that point will be
     * overwritten before the next buffer transfer. */
    return rx_buf.byte_data;
}

void USART1_IRQHandler(void) {
    /* Since a large amount of data will be shoved down this UART interface we need a more reliable and more efficient
     * way of framing than just waiting between transmissions.
     *
     * This code uses "Consistent Overhead Byte Stuffing" (COBS). For details, see its Wikipedia page[0] or the proper
     * scientific paper[1] published on it. Roughly, it works like this:
     *
     * * A frame is at most 254 bytes in length.
     * * The null byte 0x00 acts as a frame delimiter. There is no null bytes inside frames.
     * * Every frame starts with an "overhead" byte indicating the number of non-null payload bytes until the next null
     *   byte in the payload, **plus one**. This means this byte can never be zero.
     * * Every null byte in the payload is replaced by *its* distance to *its* next null byte as above.
     *
     * This means, at any point the receiver can efficiently be synchronized on the next frame boundary by simply
     * waiting for a null byte. After that, only a simple state machine is necessary to strip the overhead byte and a
     * counter to then count skip intervals.
     *
     * Here is Wikipedia's table of example values:
     *
     *    Unencoded data          Encoded with COBS
     *    00                      01 01 00
     *    00 00                   01 01 01 00
     *    11 22 00 33             03 11 22 02 33 00
     *    11 22 33 44             05 11 22 33 44 00
     *    11 00 00 00             02 11 01 01 01 00
     *    01 02 ...FE             FF 01 02 ...FE 00
     *
     * [0] https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing
     * [1] Cheshire, Stuart; Baker, Mary (1999). "Consistent Overhead Byte Stuffing"
     *     IEEE/ACM Transactions on Networking. doi:10.1109/90.769765
     *     http://www.stuartcheshire.org/papers/COBSforToN.pdf
     */

    /* This pointer stores where we write data. The higher-level protocol logic decides on a frame-by-frame-basis where
     * the next frame's data will be stored. */
    static volatile uint8_t *writep = rx_buf.byte_data;
    /* Index inside the current frame payload */
    static int rxpos = 0;
    /* COBS state machine. This implementation might be a little too complicated, but it works well enough and I find it
     * reasonably easy to understand. */
    static enum {
        COBS_WAIT_SYNC = 0,  /* Synchronize with frame */
        COBS_WAIT_START = 1, /* Await overhead byte */
        COBS_RUNNING = 2     /* Process payload */
    } cobs_state = 0;
    /* COBS skip counter. During payload processing this contains the remaining non-null payload bytes */
    static int cobs_count = 0;

    if (USART1->ISR & USART_ISR_ORE) { /* Overrun handling */
        uart_overruns++;
        trigger_error_led();
        /* Reset and re-synchronize. Retry next frame. */
        rxpos = 0;
        cobs_state = COBS_WAIT_SYNC;
        /* Clear interrupt flag */
        USART1->ICR = USART_ICR_ORECF;

    } else { /* Data received */
        uint8_t data = USART1->RDR; /* This automatically acknowledges the IRQ */

        if (data == 0x00) { /* End-of-packet */
            /* Process higher protocol layers on this packet. */
            writep = packet_received(rxpos);

            /* Reset for next packet. */
            cobs_state = COBS_WAIT_START;
            rxpos = 0;

        } else { /* non-null byte */
            if (cobs_state == COBS_WAIT_SYNC) { /* Wait for null byte */
                /* ignore data */

            } else if (cobs_state == COBS_WAIT_START) { /* Overhead byte */
                cobs_count = data;
                cobs_state = COBS_RUNNING;

            } else { /* Payload byte */
                if (--cobs_count == 0) { /* Skip byte */
                    cobs_count = data;
                    data = 0;
                }

                /* Write processed payload byte to current receive buffer */
                writep[rxpos++] = data;
            }
        }
    }
}

#define ADC_OVERSAMPLING 8
uint32_t vsense;
void DMA1_Channel1_IRQHandler(void) {
    /* This interrupt takes either 1.2us or 13us. It can be pre-empted by the more timing-critical UART and LED timer
     * interrupts. */
    static int count = 0; /* oversampling accumulator sample count */
    static uint32_t adc_aggregate[2] = {0, 0}; /* oversampling accumulator */

    /* Clear the interrupt flag */
    DMA1->IFCR |= DMA_IFCR_CGIF1;

    adc_aggregate[0] += adc_buf[0];
    adc_aggregate[1] += adc_buf[1];

    if (++count == (1<<ADC_OVERSAMPLING)) {
        /* This has been copied from the code examples to section 12.9 ADC>"Temperature sensor and internal reference
         * voltage" in the reference manual with the extension that we actually measure the supply voltage instead of
         * hardcoding it. This is not strictly necessary since we're running off a bored little LDO but it's free and
         * the current supply voltage is a nice health value.
         */
        adc_vcc_mv = (3300 * VREFINT_CAL)/(adc_aggregate[0]>>ADC_OVERSAMPLING);
        int32_t temperature = (((uint32_t)TS_CAL1) - ((adc_aggregate[1]>>ADC_OVERSAMPLING) * adc_vcc_mv / 3300)) * 1000;
        temperature = (temperature/5336) + 30;
        adc_temp_celsius = temperature;

        count = 0;
        adc_aggregate[0] = 0;
        adc_aggregate[1] = 0;
    }
}

void adc_config(void) {
    /* The ADC is used for temperature measurement. To compute the temperature from an ADC reading of the internal
     * temperature sensor, the supply voltage must also be measured. Thus we are using two channels.
     *
     * The ADC is triggered by compare channel 4 of timer 1. The trigger is set to falling edge to trigger on compare
     * match, not overflow.
     */
    ADC1->CFGR1 = ADC_CFGR1_DMAEN | ADC_CFGR1_DMACFG | (2<<ADC_CFGR1_EXTEN_Pos) | (1<<ADC_CFGR1_EXTSEL_Pos);
    /* Clock from PCLK/4 instead of the internal exclusive high-speed RC oscillator. */
    ADC1->CFGR2 = (2<<ADC_CFGR2_CKMODE_Pos);
    /* Use the slowest available sample rate */
    ADC1->SMPR  = (7<<ADC_SMPR_SMP_Pos);
    /* Internal VCC and temperature sensor channels */
    ADC1->CHSELR = ADC_CHSELR_CHSEL16 | ADC_CHSELR_CHSEL17;
    /* Enable internal voltage reference and temperature sensor */
    ADC->CCR = ADC_CCR_TSEN | ADC_CCR_VREFEN;
    /* Perform ADC calibration */
    ADC1->CR |= ADC_CR_ADCAL;
    while (ADC1->CR & ADC_CR_ADCAL)
        ;
    /* Enable ADC */
    ADC1->CR |= ADC_CR_ADEN;
    ADC1->CR |= ADC_CR_ADSTART;

    /* Configure DMA 1 Channel 1 to get rid of all the data */
    DMA1_Channel1->CPAR = (unsigned int)&ADC1->DR;
    DMA1_Channel1->CMAR = (unsigned int)&adc_buf;
    DMA1_Channel1->CNDTR = sizeof(adc_buf)/sizeof(adc_buf[0]);
    DMA1_Channel1->CCR = (0<<DMA_CCR_PL_Pos);
    DMA1_Channel1->CCR |=
          DMA_CCR_CIRC /* circular mode so we can leave it running indefinitely */
        | (1<<DMA_CCR_MSIZE_Pos) /* 16 bit */
        | (1<<DMA_CCR_PSIZE_Pos) /* 16 bit */
        | DMA_CCR_MINC
        | DMA_CCR_TCIE; /* Enable transfer complete interrupt. */
    DMA1_Channel1->CCR |= DMA_CCR_EN; /* Enable channel */

    /* triggered on transfer completion. We use this to process the ADC data */
    NVIC_EnableIRQ(DMA1_Channel1_IRQn);
    NVIC_SetPriority(DMA1_Channel1_IRQn, 3);
}

int main(void) {
    RCC->CR |= RCC_CR_HSEON;
    while (!(RCC->CR&RCC_CR_HSERDY));
    RCC->CFGR &= ~RCC_CFGR_PLLMUL_Msk & ~RCC_CFGR_SW_Msk & ~RCC_CFGR_PPRE_Msk & ~RCC_CFGR_HPRE_Msk;
    RCC->CFGR |= (2<<RCC_CFGR_PLLMUL_Pos) | RCC_CFGR_PLLSRC_HSE_PREDIV; /* PLL x4 -> 50.0MHz */
    RCC->CFGR2 &= ~RCC_CFGR2_PREDIV_Msk;
    RCC->CFGR2 |= RCC_CFGR2_PREDIV_DIV2; /* prediv :2 -> 12.5MHz */
    RCC->CR |= RCC_CR_PLLON;
    while (!(RCC->CR&RCC_CR_PLLRDY));
    RCC->CFGR |= (2<<RCC_CFGR_SW_Pos);
    SystemCoreClockUpdate();

    /* Turn on lots of neat things */
    RCC->AHBENR  |= RCC_AHBENR_GPIOAEN | RCC_AHBENR_DMAEN | RCC_AHBENR_CRCEN | RCC_AHBENR_FLITFEN;
    RCC->APB2ENR |= RCC_APB2ENR_SPI1EN | RCC_APB2ENR_USART1EN | RCC_APB2ENR_SYSCFGEN | RCC_APB2ENR_ADCEN | RCC_APB2ENR_DBGMCUEN | RCC_APB2ENR_TIM1EN;
    RCC->APB1ENR |= RCC_APB1ENR_TIM3EN;

    GPIOA->MODER |=
          (1<<GPIO_MODER_MODER0_Pos)  /* PA0  - Debug */
        | (2<<GPIO_MODER_MODER1_Pos)  /* PA1  - RS485 DE */
        | (2<<GPIO_MODER_MODER2_Pos)  /* PA2  - RS485 TX */
        | (2<<GPIO_MODER_MODER3_Pos)  /* PA3  - RS485 RX */
        | (1<<GPIO_MODER_MODER4_Pos)  /* PA4  - Debug */
        | (2<<GPIO_MODER_MODER5_Pos)  /* PA5  - SCLK */
        | (2<<GPIO_MODER_MODER6_Pos)  /* PA6  - LED !OE */
        | (2<<GPIO_MODER_MODER7_Pos)  /* PA7  - MOSI */
        | (2<<GPIO_MODER_MODER9_Pos)  /* PA9  - LED strobe */
        | (1<<GPIO_MODER_MODER10_Pos);/* PA10 - Auxiliary strobe */

    /* Set shift register IO GPIO output speed */
    GPIOA->OSPEEDR |=
          (2<<GPIO_OSPEEDR_OSPEEDR0_Pos)   /* Debug */
        | (2<<GPIO_OSPEEDR_OSPEEDR1_Pos)   /* RS485 DE */
        | (2<<GPIO_OSPEEDR_OSPEEDR2_Pos)   /* TX */
        | (2<<GPIO_OSPEEDR_OSPEEDR3_Pos)   /* RX */
        | (2<<GPIO_OSPEEDR_OSPEEDR4_Pos)   /* Debug */
        | (2<<GPIO_OSPEEDR_OSPEEDR5_Pos)   /* SCLK */
        | (2<<GPIO_OSPEEDR_OSPEEDR6_Pos)   /* LED !OE   */
        | (2<<GPIO_OSPEEDR_OSPEEDR7_Pos)   /* MOSI */
        | (2<<GPIO_OSPEEDR_OSPEEDR9_Pos)   /* LED strobe */
        | (2<<GPIO_OSPEEDR_OSPEEDR10_Pos); /* Auxiliary strobe */

    GPIOA->AFR[0] |=
          (1<<GPIO_AFRL_AFRL1_Pos)   /* USART1_RTS (DE) */
        | (1<<GPIO_AFRL_AFRL2_Pos)   /* USART1_TX */
        | (1<<GPIO_AFRL_AFRL3_Pos)   /* USART1_RX */
        | (0<<GPIO_AFRL_AFRL5_Pos)   /* SPI1_SCK */
        | (1<<GPIO_AFRL_AFRL6_Pos)   /* TIM3_CH1 */
        | (0<<GPIO_AFRL_AFRL7_Pos);  /* SPI1_MOSI */
    GPIOA->AFR[1] |=
          (2<<GPIO_AFRH_AFRH1_Pos);  /* TIM1_CH2 */

    GPIOA->PUPDR |=
          (2<<GPIO_PUPDR_PUPDR1_Pos)  /* RS485 DE: Pulldown */
        | (1<<GPIO_PUPDR_PUPDR2_Pos)  /* TX */
        | (1<<GPIO_PUPDR_PUPDR3_Pos); /* RX */

    cfg_spi1();

    /* Pre-compute aux register values for timer ISR */
    for (int i=0; i<NSEGMENTS; i++) {
        segment_map[i] = 0xff00 ^ (0x100<<segment_map[i]);
    }

    /* Clear frame buffer */
    read_fb->brightness = 1;
    for (int i=0; i<sizeof(read_fb->data)/sizeof(uint32_t); i++) {
        read_fb->data[i] = 0xffffffff; /* FIXME DEBUG 0x00000000; */
    }

    cfg_timers_led();
    SysTick_Config(SystemCoreClock/1000); /* 1ms interval */
    uart_config();
    adc_config();

    int last_time = 0;
    while (42) {
        /* Crude LED logic. The comm, id and error LEDs each have a timeout counter that is reset to the
         * LED_STRETCHING_MS constant on an event (either a frame received correctly or some uart, framing or protocol
         * error). These timeout counters count down in milliseconds and the LEDs are set while they are non-zero. This
         * means a train of several very brief events will make the LED lit permanently.
         */
        int time_now = sys_time; /* Latch sys_time here to avoid race conditions */
        if (last_time != time_now) {
            int diff = (time_now - last_time);

            error_led_timeout -= diff;
            if (error_led_timeout < 0)
                error_led_timeout = 0;

            comm_led_timeout -= diff;
            if (comm_led_timeout < 0)
                comm_led_timeout = 0;

            id_led_timeout -= diff;
            if (id_led_timeout < 0)
                id_led_timeout = 0;

            led_state = (led_state & ~7) | (!!id_led_timeout)<<2 | (!!error_led_timeout)<<1 | (!!comm_led_timeout)<<0;
            last_time = time_now;
        }

        /* Process pending buffer transfer */
        if (fb_op == FB_FORMAT) {
            transpose_data(rx_buf.byte_data, write_fb);
            fb_op = FB_UPDATE;
        }
    }
}

void NMI_Handler(void) {
}

void HardFault_Handler(void) __attribute__((naked));
void HardFault_Handler() {
    asm volatile ("bkpt");
}

void SVC_Handler(void) {
}


void PendSV_Handler(void) {
}

void SysTick_Handler(void) {
    static int n = 0;
    sys_time++;
    if (n++ == 1000) {
        n = 0;
        sys_time_seconds++;
    }
}