Error correcting codes (by alaric)
#include <stdio.h>
#include <assert.h>
#define INBITS 8
#define OUTBITS 12
#define INSYMBOLS (1<<INBITS)
#define OUTSYMBOLS (1<<OUTBITS)
#define FLAG_CORRECT 0
#define FLAG_ERROR 1
typedef unsigned int WORD;
int used[OUTSYMBOLS];
WORD decode[OUTSYMBOLS];
WORD encode[INSYMBOLS];
// is outsym unused, and all symbols a single bit-error away from it?
int is_clear(WORD outsym) {
int i;
if (used[outsym]) return 0;
for (i=0;i<OUTBITS;i++) {
if (used[outsym ^ (1 << i)]) return 0;
}
return 1;
}
void mark_used(WORD insym, WORD outsym) {
int i;
assert (!used[outsym]);
used[outsym] = 1;
decode[outsym] = (insym << 1) | FLAG_CORRECT;
encode[insym] = outsym;
printf ("%u -> %u\n", insym, outsym);
for (i=0;i<OUTBITS;i++) {
assert (!used[outsym ^ (1 << i)]);
used[outsym ^ (1 << i)] = 1;
decode[outsym ^ (1 << i)] = (insym << 1) | FLAG_ERROR;
printf (" %u -> %u\n", insym, outsym ^ (1 << i));
}
}
int main(void) {
WORD in;
WORD out;
WORD max_out;
int i;
unsigned int outsymbols_used;
for(out=0;out<OUTSYMBOLS;out++) {
used[out] = 0;
decode[out] = 0;
}
max_out = 0;
for(in=0;in<INSYMBOLS;in++) {
while (!is_clear(max_out)) {
if (max_out>OUTSYMBOLS) {
printf("Run out. Giving up!\n");
return 1;
}
max_out++;
}
mark_used(in, max_out);
}
outsymbols_used = 0;
for(out=0;out<OUTSYMBOLS;out++)
if (used[out]) outsymbols_used++;
printf ("Done! Used %u symbols (0..%u) out of %u\n", outsymbols_used, max_out, OUTSYMBOLS);
// Check correctness
for(in=0;in<INSYMBOLS;in++) {
if (decode[encode[in]] != (in<<1) | FLAG_CORRECT) {
printf ("Hmmm, %u encodes to %u, but that decodes to %u/%s\n",
in, encode[in],
decode[encode[in]] >> 1, ((decode[encode[in]] & 1) == FLAG_CORRECT)?"correct":"error");
}
for(i=0;i<OUTBITS;i++) {
if (decode[encode[in] ^ (1<<i)] != ((in<<1) | FLAG_ERROR)) {
printf ("Hmmm, %u encodes to %u, but %u(%u ^ 1<<%d) decodes to %u/%s\n",
in, encode[in],
encode[in] ^ (1<<i), encode[in], i,
decode[encode[in] ^ (1<<i)] >> 1, ((decode[encode[in] ^ (1<<i)] & 1) == FLAG_CORRECT)?"correct":"error");
}
}
}
}
Running the above produces:
0 -> 0
0 -> 1
0 -> 2
0 -> 4
0 -> 8
0 -> 16
0 -> 32
0 -> 64
0 -> 128
0 -> 256
0 -> 512
0 -> 1024
0 -> 2048
1 -> 7
1 -> 6
1 -> 5
1 -> 3
1 -> 15
1 -> 23
1 -> 39
[...]
254 -> 3888
254 -> 4080
254 -> 3696
254 -> 3440
254 -> 2928
254 -> 1904
255 -> 3959
255 -> 3958
255 -> 3957
255 -> 3955
255 -> 3967
255 -> 3943
255 -> 3927
255 -> 3895
255 -> 4087
255 -> 3703
255 -> 3447
255 -> 2935
255 -> 1911
Done! Used 3328 symbols (0..3959) out of 4096
Whaddyaknow? We've derived a single-bit-error correcting code by brute force. This means its implementation involves a table lookup rather than linear feedback shift registers, so is more expensive in hardware... but still a simple circuit (a small ROM!) and trivial in software.
Trying to do it in eleven bits fails, with a helpful cry of Run out. Giving up!
.
We can extend the brute-force algorithm to cover two-bit errors, too, by just expanding the inner loops in is_clear
and mark_used
to cover all combinations of two different bits (being careful not to trip over picking the same bit twice...). And extending the self-test code at the end to check for collisions, naturally.
This also leaves some unused 12-bit messages left over, which can be used as special values for block terminators and stuff, as long as you also allocate single-bit-different versions of them all so they can be error corrected!
However, such lookup tables get unwieldy as you move into larger block sizes (trying to mess around with 16->20 bit codes makes my machine run out of virtual memory and throw a bus error). So if you want to use large block sizes to get more compact codes, you need to go back to things that can be implemented algorithmically rather than as a lookup table. But then again, larger blocks introduce an increasing risk of errors, so you need to increase the error tolerance by using a wider message anyway, so it might be worth using a smaller block after all.
Oh, and in practice, errors tend to come in small bursts - but you can use a single-bit-correcting code to protect against them by taking a number of blocks as one and transmitting the first bit of each, then the second bit of each, and so on. Any single burst of errors in the encoded form, as long as it's smaller than the number of blocks, will corrupt zero or one bits of each block after the transposition.
Pages: 1 2
By @ndy Macolleague, Wed 30th Apr 2008 @ 2:42 pm
You're writing recreationally in C again? 🙂
Maybe this can be simplified into a deduction problem and specified in Prolog? I guess that would take longer to write and you'd end up with a less impressive piece of code. 😉