/* ----------------------------------------------------------------- Software simulation of a 1024-bit hardware hash. A work-in-progress with the goal of entering the NIST cryptographic hash competition. Bits are arranged in an 8x8 grid of 16-bit blocks. A round consists of 64 16-bit permutations followed by rotating each of the 16 bit positions by different amounts through the 64 blocks. The rotates are chosen so 2 bits are shifted up by 0..7 and left by 0..7. The nonlinear mix of 16-bit blocks is actually an 8-bit permutation applied to bits 0..7 and again to 8..15, with the 16 bits shuffled at the end. The mix is 5 NAND-gates deep. Certain bits can be inverted and still be implemented 5 NAND-gates deep. Symmetry is broken by inverting different subsets of these bits in each permutation. Symmetry breaking is done by XORing 64-bit constants to the invertible bits. All 128 8-bit permutations could be done at once with SSE instructions, but I'm only using 64-bit registers here. This isn't optimized much. It takes 7 rounds forward or 7 rounds backwards for all bits to be affected. Is this better or worse than my previous Feistel-network based 16-bit permutations? At the moment, I'd say worse. But I haven't measured which function is best, or tuned to shift or XOR constants yet. (by Bob Jenkins, July 24 2008, public domain) ----------------------------------------------------------------- */ #include #include typedef unsigned long u4; typedef unsigned long long u8; #define LEN 16 #define ROWS 8 #define COLS 8 #define ROUNDS 200 /* do one 8-bit permutation */ void eight( u8 *x, u8 *y) { u8 a0 = x[0]; u8 a1 = x[1]; u8 a2 = x[2]; u8 a3 = x[3]; u8 a4 = x[4]; u8 a5 = x[5]; u8 a6 = x[6]; u8 a7 = x[7]; u8 a8 = a3 ^ a6; u8 a9 = ~a7; u8 a10 = a5 & a9; u8 a11 = a4 | a5; u8 a12 = ~a11; u8 a13 = a10 | a12; u8 a14 = a8 & a13; u8 a15 = a5 ^ a7; u8 a16 = a8 | a15; u8 a17 = ~a16; y[0] = a14 | a17; u8 a19 = a0 ^ a4; u8 a20 = ~a15; u8 a21 = a19 & a20; u8 a22 = ~a6; u8 a23 = a4 & a22; u8 a24 = a0 | a4; u8 a25 = ~a24; u8 a26 = a23 | a25; u8 a27 = a26 & a15; y[1] = a21 | a27; u8 a29 = a0 ^ a6; u8 a30 = a1 ^ a2; y[2] = a29 ^ a30; u8 a32 = a0 ^ a3; u8 a33 = a4 & a9; u8 a34 = a3 & a7; u8 a35 = ~a34; u8 a36 = a5 & a35; u8 a37 = a33 | a36; u8 a38 = a32 & a37; u8 a39 = a4 & a5; u8 a40 = a39 | a9; u8 a41 = a40 & a11; u8 a42 = a32 | a41; u8 a43 = ~a42; y[3] = a38 | a43; u8 a45 = a1 & a4; u8 a46 = ~a4; u8 a47 = a2 & a46; u8 a48 = a45 | a47; u8 a49 = a48 & a20; u8 a50 = ~a5; u8 a51 = a4 & a50; u8 a52 = a2 | a5; u8 a53 = a52 & a7; u8 a54 = ~a53; u8 a55 = a51 | a54; u8 a56 = ~a1; u8 a57 = a5 & a56; u8 a58 = ~a45; u8 a59 = a7 & a58; u8 a60 = a57 | a59; u8 a61 = a55 & a60; y[4] = a49 | a61; y[5] = a32 ^ a30; u8 a64 = a1 ^ a6; u8 a65 = a2 ^ a3; y[6] = a64 ^ a65; u8 a67 = a7 & a46; u8 a68 = a39 | a67; u8 a69 = a29 & a68; u8 a70 = a5 | a46; u8 a71 = a4 | a7; u8 a72 = a70 & a71; u8 a73 = a29 | a72; u8 a74 = ~a73; y[7] = a69 | a74; } static int shift[LEN] = {56,43,22,41,15,52,28,18,38,1,48,35,13,2,31,61}; static u4 map[] = {2,3,4,7,9,12,14,15,0,5,1,8,6,13,10,11}; void perm(u8 x[LEN]) { u8 y[LEN]; int i; /* the two 8-bit permutations */ eight(x, y); eight(&x[LEN/2], &y[LEN/2]); /* break symmetry among the 64 16-bit permutations */ y[2] ^= 0x18f8aa72369b75c2LL; y[5] ^= 0x337b824aab77201fLL; y[6] ^= 0x60bd51315e37b49cLL; y[10] ^= 0x82ed31eb138e02efLL; y[13] ^= 0x5fe101ed66fc3130LL; y[14] ^= 0x1019906dca58dffbLL; /* shuffle the output bits */ for (i=0; i> (64-shift[i])); } } static int unperm[256]; void uneight(u8 *y, u8 *x) { int i, j; for (i=0; i<8; ++i) { x[i] = 0; } for (i=0; i<64; ++i) { int z = 0; for (j=0; j<8; ++j) { if (y[j] & (((u8)1)<> shift[i]) ^ (x[i] << (64-shift[i])); } /* unshuffle the output bits */ for (i=0; i>(32-k))) u4 ranval( ranctx *x ) { u4 e = x->a - rot(x->b, 27); x->a = x->b ^ rot(x->c, 17); x->b = x->c + x->d; x->c = x->d + e; x->d = e + x->a; return e; } void show(u8 x[LEN]) { int i,j,k; for (i=0; i=4; ) { printf(" %s", (x[k] & (((u8)1)<<(COLS*i+j))) ? "1" : "0"); } } printf("\n"); for (j=0; j=12; ) { printf(" %s", (x[k] & (((u8)1)<<(COLS*i+j))) ? "1" : "0"); } } printf("\n"); } printf("\n"); } void showx(float x, float *worst, int verbose) { x = x/ROUNDS; if (x < *worst) *worst = x; if (verbose) printf("%3d", (int)(100*x + 0.5)); if (1.0-x < *worst) *worst = 1.0-x; } float showc(float c[ROWS][COLS][LEN], int verbose) { int i,j,k; float worst = 1.0; for (i=0; i=4; ) { showx(c[i][j][k], &worst, verbose); } if (verbose) printf(" "); } if (verbose) printf("\n"); for (j=0; j=12; ) { showx(c[i][j][k], &worst, verbose); } if (verbose) printf(" "); } if (verbose) printf("\n\n"); } /* printf(" worst = %f\n", worst); */ return worst; } void test(ranctx *rctx) { u8 x[LEN], y[LEN]; u4 m,n,o,p,count,i,j,k; float val, worst = 1.0; float zcount = 0.0; /* see how thoroughly bit "tab[m][n] & (1<