Avoid undefined behavior.

Left shifts of negative values is undefined in C. This happens a lot in
the Ed25519 code. Cast to unsigned first, then cast the result back to
signed where necessary.
This commit is contained in:
Guus Sliepen 2015-10-26 13:46:30 +01:00
parent 7306823843
commit 7a8515112a
4 changed files with 302 additions and 281 deletions

View file

@ -8,9 +8,9 @@
static uint64_t load_3(const unsigned char *in) {
uint64_t result;
result = (uint64_t) in[0];
result |= ((uint64_t) in[1]) << 8;
result |= ((uint64_t) in[2]) << 16;
result = in[0];
result |= shlu64(in[1], 8);
result |= shlu64(in[2], 16);
return result;
}
@ -18,10 +18,10 @@ static uint64_t load_3(const unsigned char *in) {
static uint64_t load_4(const unsigned char *in) {
uint64_t result;
result = (uint64_t) in[0];
result |= ((uint64_t) in[1]) << 8;
result |= ((uint64_t) in[2]) << 16;
result |= ((uint64_t) in[3]) << 24;
result = in[0];
result |= shlu64(in[1], 8);
result |= shlu64(in[2], 16);
result |= shlu64(in[3], 24);
return result;
}
@ -316,47 +316,47 @@ void fe_frombytes(fe h, const unsigned char *s) {
int64_t carry8;
int64_t carry9;
carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
carry9 = (h9 + (1L << 24)) >> 25;
h0 += carry9 * 19;
h9 -= carry9 << 25;
carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
h9 -= shl64(carry9, 25);
carry1 = (h1 + (1L << 24)) >> 25;
h2 += carry1;
h1 -= carry1 << 25;
carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
h1 -= shl64(carry1, 25);
carry3 = (h3 + (1L << 24)) >> 25;
h4 += carry3;
h3 -= carry3 << 25;
carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
h3 -= shl64(carry3, 25);
carry5 = (h5 + (1L << 24)) >> 25;
h6 += carry5;
h5 -= carry5 << 25;
carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
h5 -= shl64(carry5, 25);
carry7 = (h7 + (1L << 24)) >> 25;
h8 += carry7;
h7 -= carry7 << 25;
carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
h7 -= shl64(carry7, 25);
carry0 = (h0 + (1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 << 26;
carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
h0 -= shl64(carry0, 26);
carry2 = (h2 + (1L << 25)) >> 26;
h3 += carry2;
h2 -= carry2 << 26;
carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
h2 -= shl64(carry2, 26);
carry4 = (h4 + (1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 << 26;
carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
h4 -= shl64(carry4, 26);
carry6 = (h6 + (1L << 25)) >> 26;
h7 += carry6;
h6 -= carry6 << 26;
carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
h6 -= shl64(carry6, 26);
carry8 = (h8 + (1L << 25)) >> 26;
h9 += carry8;
h8 -= carry8 << 26;
h8 -= shl64(carry8, 26);
h[0] = (int32_t) h0;
h[1] = (int32_t) h1;
h[2] = (int32_t) h2;
h[3] = (int32_t) h3;
h[4] = (int32_t) h4;
h[5] = (int32_t) h5;
h[6] = (int32_t) h6;
h[7] = (int32_t) h7;
h[8] = (int32_t) h8;
h[9] = (int32_t) h9;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}
@ -709,48 +709,48 @@ void fe_mul(fe h, const fe f, const fe g) {
int64_t carry8;
int64_t carry9;
carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
carry0 = (h0 + (1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 << 26;
carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
h0 -= shl64(carry0, 26);
carry4 = (h4 + (1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 << 26;
h4 -= shl64(carry4, 26);
carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
carry1 = (h1 + (1L << 24)) >> 25;
h2 += carry1;
h1 -= carry1 << 25;
carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
h1 -= shl64(carry1, 25);
carry5 = (h5 + (1L << 24)) >> 25;
h6 += carry5;
h5 -= carry5 << 25;
h5 -= shl64(carry5, 25);
carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
carry2 = (h2 + (1L << 25)) >> 26;
h3 += carry2;
h2 -= carry2 << 26;
carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
h2 -= shl64(carry2, 26);
carry6 = (h6 + (1L << 25)) >> 26;
h7 += carry6;
h6 -= carry6 << 26;
h6 -= shl64(carry6, 26);
carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
carry3 = (h3 + (1L << 24)) >> 25;
h4 += carry3;
h3 -= carry3 << 25;
carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
h3 -= shl64(carry3, 25);
carry7 = (h7 + (1L << 24)) >> 25;
h8 += carry7;
h7 -= carry7 << 25;
h7 -= shl64(carry7, 25);
carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
carry4 = (h4 + (1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 << 26;
carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
h4 -= shl64(carry4, 26);
carry8 = (h8 + (1L << 25)) >> 26;
h9 += carry8;
h8 -= carry8 << 26;
h8 -= shl64(carry8, 26);
carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
carry9 = (h9 + (1L << 24)) >> 25;
h0 += carry9 * 19;
h9 -= carry9 << 25;
h9 -= shl64(carry9, 25);
carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
carry0 = (h0 + (1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 << 26;
h0 -= shl64(carry0, 26);
h[0] = (int32_t) h0;
h[1] = (int32_t) h1;
@ -808,17 +808,17 @@ void fe_mul121666(fe h, fe f) {
int64_t carry8;
int64_t carry9;
carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= shl64(carry9, 25);
carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= shl64(carry1, 25);
carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= shl64(carry3, 25);
carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= shl64(carry5, 25);
carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= shl64(carry7, 25);
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= shl64(carry0, 26);
carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= shl64(carry2, 26);
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= shl64(carry4, 26);
carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= shl64(carry6, 26);
carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= shl64(carry8, 26);
h[0] = h0;
h[1] = h1;
@ -1078,42 +1078,42 @@ void fe_sq(fe h, const fe f) {
int64_t carry7;
int64_t carry8;
int64_t carry9;
carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
carry0 = (h0 + (1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 << 26;
carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
h0 -= shl64(carry0, 26);
carry4 = (h4 + (1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 << 26;
carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
h4 -= shl64(carry4, 26);
carry1 = (h1 + (1L << 24)) >> 25;
h2 += carry1;
h1 -= carry1 << 25;
carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
h1 -= shl64(carry1, 25);
carry5 = (h5 + (1L << 24)) >> 25;
h6 += carry5;
h5 -= carry5 << 25;
carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
h5 -= shl64(carry5, 25);
carry2 = (h2 + (1L << 25)) >> 26;
h3 += carry2;
h2 -= carry2 << 26;
carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
h2 -= shl64(carry2, 26);
carry6 = (h6 + (1L << 25)) >> 26;
h7 += carry6;
h6 -= carry6 << 26;
carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
h6 -= shl64(carry6, 26);
carry3 = (h3 + (1L << 24)) >> 25;
h4 += carry3;
h3 -= carry3 << 25;
carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
h3 -= shl64(carry3, 25);
carry7 = (h7 + (1L << 24)) >> 25;
h8 += carry7;
h7 -= carry7 << 25;
carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
h7 -= shl64(carry7, 25);
carry4 = (h4 + (1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 << 26;
carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
h4 -= shl64(carry4, 26);
carry8 = (h8 + (1L << 25)) >> 26;
h9 += carry8;
h8 -= carry8 << 26;
carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
h8 -= shl64(carry8, 26);
carry9 = (h9 + (1L << 24)) >> 25;
h0 += carry9 * 19;
h9 -= carry9 << 25;
carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
h9 -= shl64(carry9, 25);
carry0 = (h0 + (1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 << 26;
h0 -= shl64(carry0, 26);
h[0] = (int32_t) h0;
h[1] = (int32_t) h1;
h[2] = (int32_t) h2;
@ -1251,42 +1251,42 @@ void fe_sq2(fe h, const fe f) {
h7 += h7;
h8 += h8;
h9 += h9;
carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
carry0 = (h0 + (1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 << 26;
carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
h0 -= shl64(carry0, 26);
carry4 = (h4 + (1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 << 26;
carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
h4 -= shl64(carry4, 26);
carry1 = (h1 + (1L << 24)) >> 25;
h2 += carry1;
h1 -= carry1 << 25;
carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
h1 -= shl64(carry1, 25);
carry5 = (h5 + (1L << 24)) >> 25;
h6 += carry5;
h5 -= carry5 << 25;
carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
h5 -= shl64(carry5, 25);
carry2 = (h2 + (1L << 25)) >> 26;
h3 += carry2;
h2 -= carry2 << 26;
carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
h2 -= shl64(carry2, 26);
carry6 = (h6 + (1L << 25)) >> 26;
h7 += carry6;
h6 -= carry6 << 26;
carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
h6 -= shl64(carry6, 26);
carry3 = (h3 + (1L << 24)) >> 25;
h4 += carry3;
h3 -= carry3 << 25;
carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
h3 -= shl64(carry3, 25);
carry7 = (h7 + (1L << 24)) >> 25;
h8 += carry7;
h7 -= carry7 << 25;
carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
h7 -= shl64(carry7, 25);
carry4 = (h4 + (1L << 25)) >> 26;
h5 += carry4;
h4 -= carry4 << 26;
carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
h4 -= shl64(carry4, 26);
carry8 = (h8 + (1L << 25)) >> 26;
h9 += carry8;
h8 -= carry8 << 26;
carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
h8 -= shl64(carry8, 26);
carry9 = (h9 + (1L << 24)) >> 25;
h0 += carry9 * 19;
h9 -= carry9 << 25;
carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
h9 -= shl64(carry9, 25);
carry0 = (h0 + (1L << 25)) >> 26;
h1 += carry0;
h0 -= carry0 << 26;
h0 -= shl64(carry0, 26);
h[0] = (int32_t) h0;
h[1] = (int32_t) h1;
h[2] = (int32_t) h2;
@ -1421,33 +1421,33 @@ void fe_tobytes(unsigned char *s, const fe h) {
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
carry0 = h0 >> 26;
h1 += carry0;
h0 -= carry0 << 26;
h0 -= shl32(carry0, 26);
carry1 = h1 >> 25;
h2 += carry1;
h1 -= carry1 << 25;
h1 -= shl32(carry1, 25);
carry2 = h2 >> 26;
h3 += carry2;
h2 -= carry2 << 26;
h2 -= shl32(carry2, 26);
carry3 = h3 >> 25;
h4 += carry3;
h3 -= carry3 << 25;
h3 -= shl32(carry3, 25);
carry4 = h4 >> 26;
h5 += carry4;
h4 -= carry4 << 26;
h4 -= shl32(carry4, 26);
carry5 = h5 >> 25;
h6 += carry5;
h5 -= carry5 << 25;
h5 -= shl32(carry5, 25);
carry6 = h6 >> 26;
h7 += carry6;
h6 -= carry6 << 26;
h6 -= shl32(carry6, 26);
carry7 = h7 >> 25;
h8 += carry7;
h7 -= carry7 << 25;
h7 -= shl32(carry7, 25);
carry8 = h8 >> 26;
h9 += carry8;
h8 -= carry8 << 26;
h8 -= shl32(carry8, 26);
carry9 = h9 >> 25;
h9 -= carry9 << 25;
h9 -= shl32(carry9, 25);
/* h10 = carry9 */
/*
@ -1459,32 +1459,32 @@ void fe_tobytes(unsigned char *s, const fe h) {
s[0] = (unsigned char) (h0 >> 0);
s[1] = (unsigned char) (h0 >> 8);
s[2] = (unsigned char) (h0 >> 16);
s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2));
s[3] = (unsigned char) ((h0 >> 24) | shl32(h1, 2));
s[4] = (unsigned char) (h1 >> 6);
s[5] = (unsigned char) (h1 >> 14);
s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3));
s[6] = (unsigned char) ((h1 >> 22) | shl32(h2, 3));
s[7] = (unsigned char) (h2 >> 5);
s[8] = (unsigned char) (h2 >> 13);
s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5));
s[9] = (unsigned char) ((h2 >> 21) | shl32(h3, 5));
s[10] = (unsigned char) (h3 >> 3);
s[11] = (unsigned char) (h3 >> 11);
s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6));
s[12] = (unsigned char) ((h3 >> 19) | shl32(h4, 6));
s[13] = (unsigned char) (h4 >> 2);
s[14] = (unsigned char) (h4 >> 10);
s[15] = (unsigned char) (h4 >> 18);
s[16] = (unsigned char) (h5 >> 0);
s[17] = (unsigned char) (h5 >> 8);
s[18] = (unsigned char) (h5 >> 16);
s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1));
s[19] = (unsigned char) ((h5 >> 24) | shl32(h6, 1));
s[20] = (unsigned char) (h6 >> 7);
s[21] = (unsigned char) (h6 >> 15);
s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3));
s[22] = (unsigned char) ((h6 >> 23) | shl32(h7, 3));
s[23] = (unsigned char) (h7 >> 5);
s[24] = (unsigned char) (h7 >> 13);
s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4));
s[25] = (unsigned char) ((h7 >> 21) | shl32(h8, 4));
s[26] = (unsigned char) (h8 >> 4);
s[27] = (unsigned char) (h8 >> 12);
s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6));
s[28] = (unsigned char) ((h8 >> 20) | shl32(h9, 6));
s[29] = (unsigned char) (h9 >> 2);
s[30] = (unsigned char) (h9 >> 10);
s[31] = (unsigned char) (h9 >> 18);

View file

@ -4,6 +4,9 @@
Not a compatible replacement for <stdint.h>, do not blindly use it as such.
*/
#ifndef __TINC_FIXEDINT_H__
#define __TINC_FIXEDINT_H__
#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)) )) && !defined(FIXEDINT_H_INCLUDED)
#include <stdint.h>
#define FIXEDINT_H_INCLUDED
@ -68,3 +71,21 @@
#define INT64_C(v) v ##I64
#endif
#endif
static inline unsigned char shlu8(unsigned char a, uint32_t b) {
return a << b;
}
static inline int32_t shl32(uint32_t a, uint32_t b) {
return a << b;
}
static inline int64_t shl64(uint64_t a, uint32_t b) {
return a << b;
}
static inline uint64_t shlu64(uint64_t a, uint32_t b) {
return a << b;
}
#endif

View file

@ -356,7 +356,7 @@ static void cmov(ge_precomp *t, ge_precomp *u, unsigned char b) {
static void select(ge_precomp *t, int pos, signed char b) {
ge_precomp minust;
unsigned char bnegative = negative(b);
unsigned char babs = b - (((-bnegative) & b) << 1);
unsigned char babs = b - shlu8(((-bnegative) & b), 1);
fe_1(t->yplusx);
fe_1(t->yminusx);
fe_0(t->xy2d);
@ -404,7 +404,7 @@ void ge_scalarmult_base(ge_p3 *h, const unsigned char *a) {
e[i] += carry;
carry = e[i] + 8;
carry >>= 4;
e[i] -= carry << 4;
e[i] -= shl32(carry, 4);
}
e[63] += carry;

View file

@ -4,9 +4,9 @@
static uint64_t load_3(const unsigned char *in) {
uint64_t result;
result = (uint64_t) in[0];
result |= ((uint64_t) in[1]) << 8;
result |= ((uint64_t) in[2]) << 16;
result = in[0];
result |= shlu64(in[1], 8);
result |= shlu64(in[2], 16);
return result;
}
@ -14,10 +14,10 @@ static uint64_t load_3(const unsigned char *in) {
static uint64_t load_4(const unsigned char *in) {
uint64_t result;
result = (uint64_t) in[0];
result |= ((uint64_t) in[1]) << 8;
result |= ((uint64_t) in[2]) << 16;
result |= ((uint64_t) in[3]) << 24;
result = in[0];
result |= shlu64(in[1], 8);
result |= shlu64(in[2], 16);
result |= shlu64(in[3], 24);
return result;
}
@ -119,37 +119,37 @@ void sc_reduce(unsigned char *s) {
s18 = 0;
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
carry12 = (s12 + (1 << 20)) >> 21;
s13 += carry12;
s12 -= carry12 << 21;
s12 -= shl64(carry12, 21);
carry14 = (s14 + (1 << 20)) >> 21;
s15 += carry14;
s14 -= carry14 << 21;
s14 -= shl64(carry14, 21);
carry16 = (s16 + (1 << 20)) >> 21;
s17 += carry16;
s16 -= carry16 << 21;
s16 -= shl64(carry16, 21);
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s11 -= shl64(carry11, 21);
carry13 = (s13 + (1 << 20)) >> 21;
s14 += carry13;
s13 -= carry13 << 21;
s13 -= shl64(carry13, 21);
carry15 = (s15 + (1 << 20)) >> 21;
s16 += carry15;
s15 -= carry15 << 21;
s15 -= shl64(carry15, 21);
s5 += s17 * 666643;
s6 += s17 * 470296;
s7 += s17 * 654183;
@ -194,40 +194,40 @@ void sc_reduce(unsigned char *s) {
s12 = 0;
carry0 = (s0 + (1 << 20)) >> 21;
s1 += carry0;
s0 -= carry0 << 21;
s0 -= shl64(carry0, 21);
carry2 = (s2 + (1 << 20)) >> 21;
s3 += carry2;
s2 -= carry2 << 21;
s2 -= shl64(carry2, 21);
carry4 = (s4 + (1 << 20)) >> 21;
s5 += carry4;
s4 -= carry4 << 21;
s4 -= shl64(carry4, 21);
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
carry1 = (s1 + (1 << 20)) >> 21;
s2 += carry1;
s1 -= carry1 << 21;
s1 -= shl64(carry1, 21);
carry3 = (s3 + (1 << 20)) >> 21;
s4 += carry3;
s3 -= carry3 << 21;
s3 -= shl64(carry3, 21);
carry5 = (s5 + (1 << 20)) >> 21;
s6 += carry5;
s5 -= carry5 << 21;
s5 -= shl64(carry5, 21);
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s11 -= shl64(carry11, 21);
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
@ -237,40 +237,40 @@ void sc_reduce(unsigned char *s) {
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
s0 -= shl64(carry0, 21);
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
s1 -= shl64(carry1, 21);
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
s2 -= shl64(carry2, 21);
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
s3 -= shl64(carry3, 21);
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
s4 -= shl64(carry4, 21);
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
s5 -= shl64(carry5, 21);
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
carry11 = s11 >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s11 -= shl64(carry11, 21);
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
@ -280,67 +280,67 @@ void sc_reduce(unsigned char *s) {
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
s0 -= shl64(carry0, 21);
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
s1 -= shl64(carry1, 21);
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
s2 -= shl64(carry2, 21);
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
s3 -= shl64(carry3, 21);
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
s4 -= shl64(carry4, 21);
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
s5 -= shl64(carry5, 21);
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
s[0] = (unsigned char) (s0 >> 0);
s[1] = (unsigned char) (s0 >> 8);
s[2] = (unsigned char) ((s0 >> 16) | (s1 << 5));
s[2] = (unsigned char) ((s0 >> 16) | shl64(s1, 5));
s[3] = (unsigned char) (s1 >> 3);
s[4] = (unsigned char) (s1 >> 11);
s[5] = (unsigned char) ((s1 >> 19) | (s2 << 2));
s[5] = (unsigned char) ((s1 >> 19) | shl64(s2, 2));
s[6] = (unsigned char) (s2 >> 6);
s[7] = (unsigned char) ((s2 >> 14) | (s3 << 7));
s[7] = (unsigned char) ((s2 >> 14) | shl64(s3, 7));
s[8] = (unsigned char) (s3 >> 1);
s[9] = (unsigned char) (s3 >> 9);
s[10] = (unsigned char) ((s3 >> 17) | (s4 << 4));
s[10] = (unsigned char) ((s3 >> 17) | shl64(s4, 4));
s[11] = (unsigned char) (s4 >> 4);
s[12] = (unsigned char) (s4 >> 12);
s[13] = (unsigned char) ((s4 >> 20) | (s5 << 1));
s[13] = (unsigned char) ((s4 >> 20) | shl64(s5, 1));
s[14] = (unsigned char) (s5 >> 7);
s[15] = (unsigned char) ((s5 >> 15) | (s6 << 6));
s[15] = (unsigned char) ((s5 >> 15) | shl64(s6, 6));
s[16] = (unsigned char) (s6 >> 2);
s[17] = (unsigned char) (s6 >> 10);
s[18] = (unsigned char) ((s6 >> 18) | (s7 << 3));
s[18] = (unsigned char) ((s6 >> 18) | shl64(s7, 3));
s[19] = (unsigned char) (s7 >> 5);
s[20] = (unsigned char) (s7 >> 13);
s[21] = (unsigned char) (s8 >> 0);
s[22] = (unsigned char) (s8 >> 8);
s[23] = (unsigned char) ((s8 >> 16) | (s9 << 5));
s[23] = (unsigned char) ((s8 >> 16) | shl64(s9, 5));
s[24] = (unsigned char) (s9 >> 3);
s[25] = (unsigned char) (s9 >> 11);
s[26] = (unsigned char) ((s9 >> 19) | (s10 << 2));
s[26] = (unsigned char) ((s9 >> 19) | shl64(s10, 2));
s[27] = (unsigned char) (s10 >> 6);
s[28] = (unsigned char) ((s10 >> 14) | (s11 << 7));
s[28] = (unsigned char) ((s10 >> 14) | shl64(s11, 7));
s[29] = (unsigned char) (s11 >> 1);
s[30] = (unsigned char) (s11 >> 9);
s[31] = (unsigned char) (s11 >> 17);
@ -470,73 +470,73 @@ void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b,
s23 = 0;
carry0 = (s0 + (1 << 20)) >> 21;
s1 += carry0;
s0 -= carry0 << 21;
s0 -= shl64(carry0, 21);
carry2 = (s2 + (1 << 20)) >> 21;
s3 += carry2;
s2 -= carry2 << 21;
s2 -= shl64(carry2, 21);
carry4 = (s4 + (1 << 20)) >> 21;
s5 += carry4;
s4 -= carry4 << 21;
s4 -= shl64(carry4, 21);
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
carry12 = (s12 + (1 << 20)) >> 21;
s13 += carry12;
s12 -= carry12 << 21;
s12 -= shl64(carry12, 21);
carry14 = (s14 + (1 << 20)) >> 21;
s15 += carry14;
s14 -= carry14 << 21;
s14 -= shl64(carry14, 21);
carry16 = (s16 + (1 << 20)) >> 21;
s17 += carry16;
s16 -= carry16 << 21;
s16 -= shl64(carry16, 21);
carry18 = (s18 + (1 << 20)) >> 21;
s19 += carry18;
s18 -= carry18 << 21;
s18 -= shl64(carry18, 21);
carry20 = (s20 + (1 << 20)) >> 21;
s21 += carry20;
s20 -= carry20 << 21;
s20 -= shl64(carry20, 21);
carry22 = (s22 + (1 << 20)) >> 21;
s23 += carry22;
s22 -= carry22 << 21;
s22 -= shl64(carry22, 21);
carry1 = (s1 + (1 << 20)) >> 21;
s2 += carry1;
s1 -= carry1 << 21;
s1 -= shl64(carry1, 21);
carry3 = (s3 + (1 << 20)) >> 21;
s4 += carry3;
s3 -= carry3 << 21;
s3 -= shl64(carry3, 21);
carry5 = (s5 + (1 << 20)) >> 21;
s6 += carry5;
s5 -= carry5 << 21;
s5 -= shl64(carry5, 21);
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s11 -= shl64(carry11, 21);
carry13 = (s13 + (1 << 20)) >> 21;
s14 += carry13;
s13 -= carry13 << 21;
s13 -= shl64(carry13, 21);
carry15 = (s15 + (1 << 20)) >> 21;
s16 += carry15;
s15 -= carry15 << 21;
s15 -= shl64(carry15, 21);
carry17 = (s17 + (1 << 20)) >> 21;
s18 += carry17;
s17 -= carry17 << 21;
s17 -= shl64(carry17, 21);
carry19 = (s19 + (1 << 20)) >> 21;
s20 += carry19;
s19 -= carry19 << 21;
s19 -= shl64(carry19, 21);
carry21 = (s21 + (1 << 20)) >> 21;
s22 += carry21;
s21 -= carry21 << 21;
s21 -= shl64(carry21, 21);
s11 += s23 * 666643;
s12 += s23 * 470296;
s13 += s23 * 654183;
@ -581,37 +581,37 @@ void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b,
s18 = 0;
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
carry12 = (s12 + (1 << 20)) >> 21;
s13 += carry12;
s12 -= carry12 << 21;
s12 -= shl64(carry12, 21);
carry14 = (s14 + (1 << 20)) >> 21;
s15 += carry14;
s14 -= carry14 << 21;
s14 -= shl64(carry14, 21);
carry16 = (s16 + (1 << 20)) >> 21;
s17 += carry16;
s16 -= carry16 << 21;
s16 -= shl64(carry16, 21);
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s11 -= shl64(carry11, 21);
carry13 = (s13 + (1 << 20)) >> 21;
s14 += carry13;
s13 -= carry13 << 21;
s13 -= shl64(carry13, 21);
carry15 = (s15 + (1 << 20)) >> 21;
s16 += carry15;
s15 -= carry15 << 21;
s15 -= shl64(carry15, 21);
s5 += s17 * 666643;
s6 += s17 * 470296;
s7 += s17 * 654183;
@ -656,40 +656,40 @@ void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b,
s12 = 0;
carry0 = (s0 + (1 << 20)) >> 21;
s1 += carry0;
s0 -= carry0 << 21;
s0 -= shl64(carry0, 21);
carry2 = (s2 + (1 << 20)) >> 21;
s3 += carry2;
s2 -= carry2 << 21;
s2 -= shl64(carry2, 21);
carry4 = (s4 + (1 << 20)) >> 21;
s5 += carry4;
s4 -= carry4 << 21;
s4 -= shl64(carry4, 21);
carry6 = (s6 + (1 << 20)) >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry8 = (s8 + (1 << 20)) >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry10 = (s10 + (1 << 20)) >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
carry1 = (s1 + (1 << 20)) >> 21;
s2 += carry1;
s1 -= carry1 << 21;
s1 -= shl64(carry1, 21);
carry3 = (s3 + (1 << 20)) >> 21;
s4 += carry3;
s3 -= carry3 << 21;
s3 -= shl64(carry3, 21);
carry5 = (s5 + (1 << 20)) >> 21;
s6 += carry5;
s5 -= carry5 << 21;
s5 -= shl64(carry5, 21);
carry7 = (s7 + (1 << 20)) >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry9 = (s9 + (1 << 20)) >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry11 = (s11 + (1 << 20)) >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s11 -= shl64(carry11, 21);
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
@ -699,40 +699,40 @@ void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b,
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
s0 -= shl64(carry0, 21);
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
s1 -= shl64(carry1, 21);
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
s2 -= shl64(carry2, 21);
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
s3 -= shl64(carry3, 21);
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
s4 -= shl64(carry4, 21);
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
s5 -= shl64(carry5, 21);
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
carry11 = s11 >> 21;
s12 += carry11;
s11 -= carry11 << 21;
s11 -= shl64(carry11, 21);
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
@ -742,67 +742,67 @@ void sc_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b,
s12 = 0;
carry0 = s0 >> 21;
s1 += carry0;
s0 -= carry0 << 21;
s0 -= shl64(carry0, 21);
carry1 = s1 >> 21;
s2 += carry1;
s1 -= carry1 << 21;
s1 -= shl64(carry1, 21);
carry2 = s2 >> 21;
s3 += carry2;
s2 -= carry2 << 21;
s2 -= shl64(carry2, 21);
carry3 = s3 >> 21;
s4 += carry3;
s3 -= carry3 << 21;
s3 -= shl64(carry3, 21);
carry4 = s4 >> 21;
s5 += carry4;
s4 -= carry4 << 21;
s4 -= shl64(carry4, 21);
carry5 = s5 >> 21;
s6 += carry5;
s5 -= carry5 << 21;
s5 -= shl64(carry5, 21);
carry6 = s6 >> 21;
s7 += carry6;
s6 -= carry6 << 21;
s6 -= shl64(carry6, 21);
carry7 = s7 >> 21;
s8 += carry7;
s7 -= carry7 << 21;
s7 -= shl64(carry7, 21);
carry8 = s8 >> 21;
s9 += carry8;
s8 -= carry8 << 21;
s8 -= shl64(carry8, 21);
carry9 = s9 >> 21;
s10 += carry9;
s9 -= carry9 << 21;
s9 -= shl64(carry9, 21);
carry10 = s10 >> 21;
s11 += carry10;
s10 -= carry10 << 21;
s10 -= shl64(carry10, 21);
s[0] = (unsigned char) (s0 >> 0);
s[1] = (unsigned char) (s0 >> 8);
s[2] = (unsigned char) ((s0 >> 16) | (s1 << 5));
s[2] = (unsigned char) ((s0 >> 16) | shl64(s1, 5));
s[3] = (unsigned char) (s1 >> 3);
s[4] = (unsigned char) (s1 >> 11);
s[5] = (unsigned char) ((s1 >> 19) | (s2 << 2));
s[5] = (unsigned char) ((s1 >> 19) | shl64(s2, 2));
s[6] = (unsigned char) (s2 >> 6);
s[7] = (unsigned char) ((s2 >> 14) | (s3 << 7));
s[7] = (unsigned char) ((s2 >> 14) | shl64(s3, 7));
s[8] = (unsigned char) (s3 >> 1);
s[9] = (unsigned char) (s3 >> 9);
s[10] = (unsigned char) ((s3 >> 17) | (s4 << 4));
s[10] = (unsigned char) ((s3 >> 17) | shl64(s4, 4));
s[11] = (unsigned char) (s4 >> 4);
s[12] = (unsigned char) (s4 >> 12);
s[13] = (unsigned char) ((s4 >> 20) | (s5 << 1));
s[13] = (unsigned char) ((s4 >> 20) | shl64(s5, 1));
s[14] = (unsigned char) (s5 >> 7);
s[15] = (unsigned char) ((s5 >> 15) | (s6 << 6));
s[15] = (unsigned char) ((s5 >> 15) | shl64(s6, 6));
s[16] = (unsigned char) (s6 >> 2);
s[17] = (unsigned char) (s6 >> 10);
s[18] = (unsigned char) ((s6 >> 18) | (s7 << 3));
s[18] = (unsigned char) ((s6 >> 18) | shl64(s7, 3));
s[19] = (unsigned char) (s7 >> 5);
s[20] = (unsigned char) (s7 >> 13);
s[21] = (unsigned char) (s8 >> 0);
s[22] = (unsigned char) (s8 >> 8);
s[23] = (unsigned char) ((s8 >> 16) | (s9 << 5));
s[23] = (unsigned char) ((s8 >> 16) | shl64(s9, 5));
s[24] = (unsigned char) (s9 >> 3);
s[25] = (unsigned char) (s9 >> 11);
s[26] = (unsigned char) ((s9 >> 19) | (s10 << 2));
s[26] = (unsigned char) ((s9 >> 19) | shl64(s10, 2));
s[27] = (unsigned char) (s10 >> 6);
s[28] = (unsigned char) ((s10 >> 14) | (s11 << 7));
s[28] = (unsigned char) ((s10 >> 14) | shl64(s11, 7));
s[29] = (unsigned char) (s11 >> 1);
s[30] = (unsigned char) (s11 >> 9);
s[31] = (unsigned char) (s11 >> 17);