2980173ee7
The main reason to switch from AES-256-GCM to ChaCha-Poly1305 is to remove a dependency on OpenSSL, whose behaviour of the AES-256-GCM decryption function changes between versions. The source code for ChaCha-Pol1305 is small and in the public domain, and can therefore be easily included in tinc itself. Moreover, it is very fast even without using any optimized assembler, easily outperforming AES-256-GCM on platforms that don't have special AES instructions in hardware.
215 lines
5.1 KiB
C
215 lines
5.1 KiB
C
/*
|
|
chacha-merged.c version 20080118
|
|
D. J. Bernstein
|
|
Public domain.
|
|
*/
|
|
|
|
#include "../system.h"
|
|
|
|
#include "chacha.h"
|
|
|
|
typedef struct chacha_ctx chacha_ctx;
|
|
|
|
#define U8C(v) (v##U)
|
|
#define U32C(v) (v##U)
|
|
|
|
#define U8V(v) ((uint8_t)(v) & U8C(0xFF))
|
|
#define U32V(v) ((uint32_t)(v) & U32C(0xFFFFFFFF))
|
|
|
|
#define ROTL32(v, n) \
|
|
(U32V((v) << (n)) | ((v) >> (32 - (n))))
|
|
|
|
#define U8TO32_LITTLE(p) \
|
|
(((uint32_t)((p)[0]) ) | \
|
|
((uint32_t)((p)[1]) << 8) | \
|
|
((uint32_t)((p)[2]) << 16) | \
|
|
((uint32_t)((p)[3]) << 24))
|
|
|
|
#define U32TO8_LITTLE(p, v) \
|
|
do { \
|
|
(p)[0] = U8V((v) ); \
|
|
(p)[1] = U8V((v) >> 8); \
|
|
(p)[2] = U8V((v) >> 16); \
|
|
(p)[3] = U8V((v) >> 24); \
|
|
} while (0)
|
|
|
|
#define ROTATE(v,c) (ROTL32(v,c))
|
|
#define XOR(v,w) ((v) ^ (w))
|
|
#define PLUS(v,w) (U32V((v) + (w)))
|
|
#define PLUSONE(v) (PLUS((v),1))
|
|
|
|
#define QUARTERROUND(a,b,c,d) \
|
|
a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
|
|
c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
|
|
a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
|
|
c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
|
|
|
|
static const char sigma[16] = "expand 32-byte k";
|
|
static const char tau[16] = "expand 16-byte k";
|
|
|
|
void chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits)
|
|
{
|
|
const char *constants;
|
|
|
|
x->input[4] = U8TO32_LITTLE(k + 0);
|
|
x->input[5] = U8TO32_LITTLE(k + 4);
|
|
x->input[6] = U8TO32_LITTLE(k + 8);
|
|
x->input[7] = U8TO32_LITTLE(k + 12);
|
|
if (kbits == 256) { /* recommended */
|
|
k += 16;
|
|
constants = sigma;
|
|
} else { /* kbits == 128 */
|
|
constants = tau;
|
|
}
|
|
x->input[8] = U8TO32_LITTLE(k + 0);
|
|
x->input[9] = U8TO32_LITTLE(k + 4);
|
|
x->input[10] = U8TO32_LITTLE(k + 8);
|
|
x->input[11] = U8TO32_LITTLE(k + 12);
|
|
x->input[0] = U8TO32_LITTLE(constants + 0);
|
|
x->input[1] = U8TO32_LITTLE(constants + 4);
|
|
x->input[2] = U8TO32_LITTLE(constants + 8);
|
|
x->input[3] = U8TO32_LITTLE(constants + 12);
|
|
}
|
|
|
|
void chacha_ivsetup(chacha_ctx *x, const uint8_t *iv, const uint8_t *counter)
|
|
{
|
|
x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
|
|
x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
|
|
x->input[14] = U8TO32_LITTLE(iv + 0);
|
|
x->input[15] = U8TO32_LITTLE(iv + 4);
|
|
}
|
|
|
|
void
|
|
chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes)
|
|
{
|
|
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
|
|
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
|
|
uint8_t *ctarget = NULL;
|
|
uint8_t tmp[64];
|
|
uint32_t i;
|
|
|
|
if (!bytes)
|
|
return;
|
|
|
|
j0 = x->input[0];
|
|
j1 = x->input[1];
|
|
j2 = x->input[2];
|
|
j3 = x->input[3];
|
|
j4 = x->input[4];
|
|
j5 = x->input[5];
|
|
j6 = x->input[6];
|
|
j7 = x->input[7];
|
|
j8 = x->input[8];
|
|
j9 = x->input[9];
|
|
j10 = x->input[10];
|
|
j11 = x->input[11];
|
|
j12 = x->input[12];
|
|
j13 = x->input[13];
|
|
j14 = x->input[14];
|
|
j15 = x->input[15];
|
|
|
|
for (;;) {
|
|
if (bytes < 64) {
|
|
for (i = 0; i < bytes; ++i)
|
|
tmp[i] = m[i];
|
|
m = tmp;
|
|
ctarget = c;
|
|
c = tmp;
|
|
}
|
|
x0 = j0;
|
|
x1 = j1;
|
|
x2 = j2;
|
|
x3 = j3;
|
|
x4 = j4;
|
|
x5 = j5;
|
|
x6 = j6;
|
|
x7 = j7;
|
|
x8 = j8;
|
|
x9 = j9;
|
|
x10 = j10;
|
|
x11 = j11;
|
|
x12 = j12;
|
|
x13 = j13;
|
|
x14 = j14;
|
|
x15 = j15;
|
|
for (i = 20; i > 0; i -= 2) {
|
|
QUARTERROUND(x0, x4, x8, x12)
|
|
QUARTERROUND(x1, x5, x9, x13)
|
|
QUARTERROUND(x2, x6, x10, x14)
|
|
QUARTERROUND(x3, x7, x11, x15)
|
|
QUARTERROUND(x0, x5, x10, x15)
|
|
QUARTERROUND(x1, x6, x11, x12)
|
|
QUARTERROUND(x2, x7, x8, x13)
|
|
QUARTERROUND(x3, x4, x9, x14)
|
|
}
|
|
x0 = PLUS(x0, j0);
|
|
x1 = PLUS(x1, j1);
|
|
x2 = PLUS(x2, j2);
|
|
x3 = PLUS(x3, j3);
|
|
x4 = PLUS(x4, j4);
|
|
x5 = PLUS(x5, j5);
|
|
x6 = PLUS(x6, j6);
|
|
x7 = PLUS(x7, j7);
|
|
x8 = PLUS(x8, j8);
|
|
x9 = PLUS(x9, j9);
|
|
x10 = PLUS(x10, j10);
|
|
x11 = PLUS(x11, j11);
|
|
x12 = PLUS(x12, j12);
|
|
x13 = PLUS(x13, j13);
|
|
x14 = PLUS(x14, j14);
|
|
x15 = PLUS(x15, j15);
|
|
|
|
x0 = XOR(x0, U8TO32_LITTLE(m + 0));
|
|
x1 = XOR(x1, U8TO32_LITTLE(m + 4));
|
|
x2 = XOR(x2, U8TO32_LITTLE(m + 8));
|
|
x3 = XOR(x3, U8TO32_LITTLE(m + 12));
|
|
x4 = XOR(x4, U8TO32_LITTLE(m + 16));
|
|
x5 = XOR(x5, U8TO32_LITTLE(m + 20));
|
|
x6 = XOR(x6, U8TO32_LITTLE(m + 24));
|
|
x7 = XOR(x7, U8TO32_LITTLE(m + 28));
|
|
x8 = XOR(x8, U8TO32_LITTLE(m + 32));
|
|
x9 = XOR(x9, U8TO32_LITTLE(m + 36));
|
|
x10 = XOR(x10, U8TO32_LITTLE(m + 40));
|
|
x11 = XOR(x11, U8TO32_LITTLE(m + 44));
|
|
x12 = XOR(x12, U8TO32_LITTLE(m + 48));
|
|
x13 = XOR(x13, U8TO32_LITTLE(m + 52));
|
|
x14 = XOR(x14, U8TO32_LITTLE(m + 56));
|
|
x15 = XOR(x15, U8TO32_LITTLE(m + 60));
|
|
|
|
j12 = PLUSONE(j12);
|
|
if (!j12) {
|
|
j13 = PLUSONE(j13);
|
|
/* stopping at 2^70 bytes per nonce is user's responsibility */
|
|
}
|
|
|
|
U32TO8_LITTLE(c + 0, x0);
|
|
U32TO8_LITTLE(c + 4, x1);
|
|
U32TO8_LITTLE(c + 8, x2);
|
|
U32TO8_LITTLE(c + 12, x3);
|
|
U32TO8_LITTLE(c + 16, x4);
|
|
U32TO8_LITTLE(c + 20, x5);
|
|
U32TO8_LITTLE(c + 24, x6);
|
|
U32TO8_LITTLE(c + 28, x7);
|
|
U32TO8_LITTLE(c + 32, x8);
|
|
U32TO8_LITTLE(c + 36, x9);
|
|
U32TO8_LITTLE(c + 40, x10);
|
|
U32TO8_LITTLE(c + 44, x11);
|
|
U32TO8_LITTLE(c + 48, x12);
|
|
U32TO8_LITTLE(c + 52, x13);
|
|
U32TO8_LITTLE(c + 56, x14);
|
|
U32TO8_LITTLE(c + 60, x15);
|
|
|
|
if (bytes <= 64) {
|
|
if (bytes < 64) {
|
|
for (i = 0; i < bytes; ++i)
|
|
ctarget[i] = c[i];
|
|
}
|
|
x->input[12] = j12;
|
|
x->input[13] = j13;
|
|
return;
|
|
}
|
|
bytes -= 64;
|
|
c += 64;
|
|
m += 64;
|
|
}
|
|
}
|