From af3fc89a287b5f6722088d99d502b1b108ee7c56 Mon Sep 17 00:00:00 2001 From: Illia Volochii Date: Sat, 20 Feb 2021 19:58:19 +0200 Subject: [PATCH] bpo-43279: Update code taken from Keccak Code Package There were some updates to the Keccak package since it was integrated in CPython initially. History can be found in https://github.com/XKCP/XKCP. XKCP's contributors did some refactoring. In particular, they replaced `UINT64` and `UINT8` with `uint64_t` and `uint8_t`. Also, they added support for 64-bit big-endian platforms. The changes are reflected in sha3module.c. I replaced files in the kcp folder with ones generated with `generic64lc/libXKCP.a.pack` and `generic32lc/libXKCP.a.pack` targets. And removed PlSnP-Fallback.inc because it is not used. --- .../2021-02-20-20-01-01.bpo-43279.5JwOiY.rst | 1 + Modules/_sha3/README.txt | 12 +- Modules/_sha3/cleanup.py | 4 + Modules/_sha3/kcp/KeccakHash.c | 35 +- Modules/_sha3/kcp/KeccakHash.h | 49 +- Modules/_sha3/kcp/KeccakP-1600-64.macros | 1716 ++--------------- Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h | 20 +- Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h | 25 +- Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c | 1419 +++++++------- Modules/_sha3/kcp/KeccakP-1600-opt64-config.h | 4 + Modules/_sha3/kcp/KeccakP-1600-opt64.c | 267 ++- .../_sha3/kcp/KeccakP-1600-unrolling.macros | 134 +- Modules/_sha3/kcp/KeccakSponge.c | 41 +- Modules/_sha3/kcp/KeccakSponge.h | 166 +- Modules/_sha3/kcp/KeccakSponge.inc | 34 +- Modules/_sha3/kcp/PlSnP-Fallback.inc | 257 --- Modules/_sha3/kcp/SnP-Relaned.h | 21 +- Modules/_sha3/kcp/align.h | 14 +- Modules/_sha3/sha3module.c | 37 +- 19 files changed, 1340 insertions(+), 2916 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-02-20-20-01-01.bpo-43279.5JwOiY.rst delete mode 100644 Modules/_sha3/kcp/PlSnP-Fallback.inc diff --git a/Misc/NEWS.d/next/Library/2021-02-20-20-01-01.bpo-43279.5JwOiY.rst b/Misc/NEWS.d/next/Library/2021-02-20-20-01-01.bpo-43279.5JwOiY.rst new file mode 100644 index 00000000000000..b81cb72e3a8b12 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-02-20-20-01-01.bpo-43279.5JwOiY.rst @@ -0,0 +1 @@ +Update code taken from Keccak Code Package. Patch by Illia Volochii. diff --git a/Modules/_sha3/README.txt b/Modules/_sha3/README.txt index e34b1d12f702fa..8e93b002de52f3 100644 --- a/Modules/_sha3/README.txt +++ b/Modules/_sha3/README.txt @@ -1,11 +1,9 @@ Keccak Code Package =================== -The files in kcp are taken from the Keccak Code Package. They have been -slightly to be C89 compatible. The architecture specific header file -KeccakP-1600-SnP.h ha been renamed to KeccakP-1600-SnP-opt32.h or -KeccakP-1600-SnP-opt64.h. - -The 64bit files were generated with generic64lc/libkeccak.a.pack target, the -32bit files with generic32lc/libkeccak.a.pack. +The files in kcp are taken from the eXtended Keccak Code Package. +The architecture specific header file KeccakP-1600-SnP.h has been renamed to +KeccakP-1600-SnP-opt32.h or KeccakP-1600-SnP-opt64.h. +The 64bit files were generated with generic64lc/libXKCP.a.pack target, the +32bit files with generic32lc/libXKCP.a.pack. diff --git a/Modules/_sha3/cleanup.py b/Modules/_sha3/cleanup.py index 4f53681b49e67b..17f9372a910cd2 100755 --- a/Modules/_sha3/cleanup.py +++ b/Modules/_sha3/cleanup.py @@ -37,6 +37,10 @@ def cleanup(f): if "brg_endian.h" in line: buf.append("/* %s */\n" % line.strip()) continue + # remove #include "config.h" + if '#include "config.h"' in line: + buf.append("/* %s */\n" % line.strip()) + continue # transform C++ comments into ANSI C comments line = CPP1.sub(r"/*\1 */\n", line) line = CPP2.sub(r" /*\1 */\n", line) diff --git a/Modules/_sha3/kcp/KeccakHash.c b/Modules/_sha3/kcp/KeccakHash.c index e09fb43cacea1d..c660f94076ae9d 100644 --- a/Modules/_sha3/kcp/KeccakHash.c +++ b/Modules/_sha3/kcp/KeccakHash.c @@ -1,12 +1,13 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. @@ -23,30 +24,28 @@ HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rat HashReturn result; if (delimitedSuffix == 0) - return FAIL; + return KECCAK_FAIL; result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity); - if (result != SUCCESS) + if (result != KECCAK_SUCCESS) return result; instance->fixedOutputLength = hashbitlen; instance->delimitedSuffix = delimitedSuffix; - return SUCCESS; + return KECCAK_SUCCESS; } /* ---------------------------------------------------------------- */ -HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, DataLength databitlen) +HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen) { if ((databitlen % 8) == 0) return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); else { HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); - if (ret == SUCCESS) { + if (ret == KECCAK_SUCCESS) { /* The last partial byte is assumed to be aligned on the least significant bits */ - unsigned char lastByte = data[databitlen/8]; /* Concatenate the last few bits provided here with those of the suffix */ - - unsigned short delimitedLastBytes = (unsigned short)((unsigned short)lastByte | ((unsigned short)instance->delimitedSuffix << (databitlen % 8))); + unsigned short delimitedLastBytes = (unsigned short)((unsigned short)(lastByte & ((1 << (databitlen % 8)) - 1)) | ((unsigned short)instance->delimitedSuffix << (databitlen % 8))); if ((delimitedLastBytes & 0xFF00) == 0x0000) { instance->delimitedSuffix = delimitedLastBytes & 0xFF; } @@ -66,7 +65,7 @@ HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *d HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) { HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); - if (ret == SUCCESS) + if (ret == KECCAK_SUCCESS) return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8); else return ret; @@ -74,9 +73,9 @@ HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) /* ---------------------------------------------------------------- */ -HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, DataLength databitlen) +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen) { if ((databitlen % 8) != 0) - return FAIL; + return KECCAK_FAIL; return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8); } diff --git a/Modules/_sha3/kcp/KeccakHash.h b/Modules/_sha3/kcp/KeccakHash.h index bbd3dc64a2285b..3c6222bb0907db 100644 --- a/Modules/_sha3/kcp/KeccakHash.h +++ b/Modules/_sha3/kcp/KeccakHash.h @@ -1,12 +1,13 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. @@ -16,14 +17,21 @@ and related or neighboring rights to the source code in this file. #ifndef _KeccakHashInterface_h_ #define _KeccakHashInterface_h_ -#ifndef KeccakP1600_excluded +/* #include "config.h" */ +#ifdef XKCP_has_KeccakP1600 -#include "KeccakSponge.h" +#include #include +#include "KeccakSponge.h" + +#ifndef _Keccak_BitTypes_ +#define _Keccak_BitTypes_ +typedef uint8_t BitSequence; + +typedef size_t BitLength; +#endif -typedef unsigned char BitSequence; -typedef size_t DataLength; -typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn; +typedef enum { KECCAK_SUCCESS = 0, KECCAK_FAIL = 1, KECCAK_BAD_HASHLEN = 2 } HashReturn; typedef struct { KeccakWidth1600_SpongeInstance sponge; @@ -44,7 +52,7 @@ typedef struct { * formatted like the @a delimitedData parameter of * the Keccak_SpongeAbsorbLastFewBits() function. * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. - * @return SUCCESS if successful, FAIL otherwise. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. */ HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); @@ -78,11 +86,13 @@ HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int * @param data Pointer to the input data. * When @a databitLen is not a multiple of 8, the last bits of data must be * in the least significant bits of the last byte (little-endian convention). + * In this case, the (8 - @a databitLen mod 8) most significant bits + * of the last byte are ignored. * @param databitLen The number of input bits provided in the input data. * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8. - * @return SUCCESS if successful, FAIL otherwise. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. */ -HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, DataLength databitlen); +HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen); /** * Function to call after all input blocks have been input and to get @@ -92,9 +102,8 @@ HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequenc * output bits is equal to @a hashbitlen. * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits * must be extracted using the Keccak_HashSqueeze() function. - * @param state Pointer to the state of the sponge function initialized by Init(). * @param hashval Pointer to the buffer where to store the output data. - * @return SUCCESS if successful, FAIL otherwise. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. */ HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); @@ -105,10 +114,12 @@ HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hash * @param databitlen The number of output bits desired (must be a multiple of 8). * @pre Keccak_HashFinal() must have been already called. * @pre @a databitlen is a multiple of 8. - * @return SUCCESS if successful, FAIL otherwise. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. */ -HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, DataLength databitlen); +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen); +#else +#error This requires an implementation of Keccak-p[1600] #endif #endif diff --git a/Modules/_sha3/kcp/KeccakP-1600-64.macros b/Modules/_sha3/kcp/KeccakP-1600-64.macros index 1f11fe3e79fbba..aabb307ba2f4dc 100644 --- a/Modules/_sha3/kcp/KeccakP-1600-64.macros +++ b/Modules/_sha3/kcp/KeccakP-1600-64.macros @@ -1,12 +1,13 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. @@ -14,23 +15,23 @@ http://creativecommons.org/publicdomain/zero/1.0/ */ #define declareABCDE \ - UINT64 Aba, Abe, Abi, Abo, Abu; \ - UINT64 Aga, Age, Agi, Ago, Agu; \ - UINT64 Aka, Ake, Aki, Ako, Aku; \ - UINT64 Ama, Ame, Ami, Amo, Amu; \ - UINT64 Asa, Ase, Asi, Aso, Asu; \ - UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ - UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ - UINT64 Bka, Bke, Bki, Bko, Bku; \ - UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ - UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ - UINT64 Ca, Ce, Ci, Co, Cu; \ - UINT64 Da, De, Di, Do, Du; \ - UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ - UINT64 Ega, Ege, Egi, Ego, Egu; \ - UINT64 Eka, Eke, Eki, Eko, Eku; \ - UINT64 Ema, Eme, Emi, Emo, Emu; \ - UINT64 Esa, Ese, Esi, Eso, Esu; \ + uint64_t Aba, Abe, Abi, Abo, Abu; \ + uint64_t Aga, Age, Agi, Ago, Agu; \ + uint64_t Aka, Ake, Aki, Ako, Aku; \ + uint64_t Ama, Ame, Ami, Amo, Amu; \ + uint64_t Asa, Ase, Asi, Aso, Asu; \ + uint64_t Bba, Bbe, Bbi, Bbo, Bbu; \ + uint64_t Bga, Bge, Bgi, Bgo, Bgu; \ + uint64_t Bka, Bke, Bki, Bko, Bku; \ + uint64_t Bma, Bme, Bmi, Bmo, Bmu; \ + uint64_t Bsa, Bse, Bsi, Bso, Bsu; \ + uint64_t Ca, Ce, Ci, Co, Cu; \ + uint64_t Da, De, Di, Do, Du; \ + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; \ + uint64_t Ega, Ege, Egi, Ego, Egu; \ + uint64_t Eka, Eke, Eki, Eko, Eku; \ + uint64_t Ema, Eme, Emi, Emo, Emu; \ + uint64_t Esa, Ese, Esi, Eso, Esu; \ #define prepareTheta \ Ca = Aba^Aga^Aka^Ama^Asa; \ @@ -41,9 +42,7 @@ http://creativecommons.org/publicdomain/zero/1.0/ #ifdef UseBebigokimisa /* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ - /* --- 64-bit lanes mapped to 64-bit words */ - #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ Da = Cu^ROL64(Ce, 1); \ De = Ca^ROL64(Ci, 1); \ @@ -159,9 +158,7 @@ http://creativecommons.org/publicdomain/zero/1.0/ \ /* --- Code for round (lane complementing pattern 'bebigokimisa') */ - /* --- 64-bit lanes mapped to 64-bit words */ - #define thetaRhoPiChiIota(i, A, E) \ Da = Cu^ROL64(Ce, 1); \ De = Ca^ROL64(Ci, 1); \ @@ -252,11 +249,8 @@ http://creativecommons.org/publicdomain/zero/1.0/ \ #else /* UseBebigokimisa */ - /* --- Code for round, with prepare-theta */ - /* --- 64-bit lanes mapped to 64-bit words */ - #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ Da = Cu^ROL64(Ce, 1); \ De = Ca^ROL64(Ci, 1); \ @@ -372,9 +366,7 @@ http://creativecommons.org/publicdomain/zero/1.0/ \ /* --- Code for round */ - /* --- 64-bit lanes mapped to 64-bit words */ - #define thetaRhoPiChiIota(i, A, E) \ Da = Cu^ROL64(Ce, 1); \ De = Ca^ROL64(Ci, 1); \ @@ -466,7 +458,6 @@ http://creativecommons.org/publicdomain/zero/1.0/ #endif /* UseBebigokimisa */ - #define copyFromState(X, state) \ X##ba = state[ 0]; \ X##be = state[ 1]; \ @@ -548,257 +539,43 @@ http://creativecommons.org/publicdomain/zero/1.0/ X##so = Y##so; \ X##su = Y##su; \ -#define copyFromStateAndAdd(X, state, input, laneCount) \ - if (laneCount < 16) { \ - if (laneCount < 8) { \ - if (laneCount < 4) { \ - if (laneCount < 2) { \ - if (laneCount < 1) { \ - X##ba = state[ 0]; \ - } \ - else { \ - X##ba = state[ 0]^input[ 0]; \ - } \ - X##be = state[ 1]; \ - X##bi = state[ 2]; \ - } \ - else { \ - X##ba = state[ 0]^input[ 0]; \ - X##be = state[ 1]^input[ 1]; \ - if (laneCount < 3) { \ - X##bi = state[ 2]; \ - } \ - else { \ - X##bi = state[ 2]^input[ 2]; \ - } \ - } \ - X##bo = state[ 3]; \ - X##bu = state[ 4]; \ - X##ga = state[ 5]; \ - X##ge = state[ 6]; \ - } \ - else { \ - X##ba = state[ 0]^input[ 0]; \ - X##be = state[ 1]^input[ 1]; \ - X##bi = state[ 2]^input[ 2]; \ - X##bo = state[ 3]^input[ 3]; \ - if (laneCount < 6) { \ - if (laneCount < 5) { \ - X##bu = state[ 4]; \ - } \ - else { \ - X##bu = state[ 4]^input[ 4]; \ - } \ - X##ga = state[ 5]; \ - X##ge = state[ 6]; \ - } \ - else { \ - X##bu = state[ 4]^input[ 4]; \ - X##ga = state[ 5]^input[ 5]; \ - if (laneCount < 7) { \ - X##ge = state[ 6]; \ - } \ - else { \ - X##ge = state[ 6]^input[ 6]; \ - } \ - } \ - } \ - X##gi = state[ 7]; \ - X##go = state[ 8]; \ - X##gu = state[ 9]; \ - X##ka = state[10]; \ - X##ke = state[11]; \ - X##ki = state[12]; \ - X##ko = state[13]; \ - X##ku = state[14]; \ - } \ - else { \ - X##ba = state[ 0]^input[ 0]; \ - X##be = state[ 1]^input[ 1]; \ - X##bi = state[ 2]^input[ 2]; \ - X##bo = state[ 3]^input[ 3]; \ - X##bu = state[ 4]^input[ 4]; \ - X##ga = state[ 5]^input[ 5]; \ - X##ge = state[ 6]^input[ 6]; \ - X##gi = state[ 7]^input[ 7]; \ - if (laneCount < 12) { \ - if (laneCount < 10) { \ - if (laneCount < 9) { \ - X##go = state[ 8]; \ - } \ - else { \ - X##go = state[ 8]^input[ 8]; \ - } \ - X##gu = state[ 9]; \ - X##ka = state[10]; \ - } \ - else { \ - X##go = state[ 8]^input[ 8]; \ - X##gu = state[ 9]^input[ 9]; \ - if (laneCount < 11) { \ - X##ka = state[10]; \ - } \ - else { \ - X##ka = state[10]^input[10]; \ - } \ - } \ - X##ke = state[11]; \ - X##ki = state[12]; \ - X##ko = state[13]; \ - X##ku = state[14]; \ - } \ - else { \ - X##go = state[ 8]^input[ 8]; \ - X##gu = state[ 9]^input[ 9]; \ - X##ka = state[10]^input[10]; \ - X##ke = state[11]^input[11]; \ - if (laneCount < 14) { \ - if (laneCount < 13) { \ - X##ki = state[12]; \ - } \ - else { \ - X##ki = state[12]^input[12]; \ - } \ - X##ko = state[13]; \ - X##ku = state[14]; \ - } \ - else { \ - X##ki = state[12]^input[12]; \ - X##ko = state[13]^input[13]; \ - if (laneCount < 15) { \ - X##ku = state[14]; \ - } \ - else { \ - X##ku = state[14]^input[14]; \ - } \ - } \ - } \ - } \ - X##ma = state[15]; \ - X##me = state[16]; \ - X##mi = state[17]; \ - X##mo = state[18]; \ - X##mu = state[19]; \ - X##sa = state[20]; \ - X##se = state[21]; \ - X##si = state[22]; \ - X##so = state[23]; \ - X##su = state[24]; \ - } \ - else { \ - X##ba = state[ 0]^input[ 0]; \ - X##be = state[ 1]^input[ 1]; \ - X##bi = state[ 2]^input[ 2]; \ - X##bo = state[ 3]^input[ 3]; \ - X##bu = state[ 4]^input[ 4]; \ - X##ga = state[ 5]^input[ 5]; \ - X##ge = state[ 6]^input[ 6]; \ - X##gi = state[ 7]^input[ 7]; \ - X##go = state[ 8]^input[ 8]; \ - X##gu = state[ 9]^input[ 9]; \ - X##ka = state[10]^input[10]; \ - X##ke = state[11]^input[11]; \ - X##ki = state[12]^input[12]; \ - X##ko = state[13]^input[13]; \ - X##ku = state[14]^input[14]; \ - X##ma = state[15]^input[15]; \ - if (laneCount < 24) { \ - if (laneCount < 20) { \ - if (laneCount < 18) { \ - if (laneCount < 17) { \ - X##me = state[16]; \ - } \ - else { \ - X##me = state[16]^input[16]; \ - } \ - X##mi = state[17]; \ - X##mo = state[18]; \ - } \ - else { \ - X##me = state[16]^input[16]; \ - X##mi = state[17]^input[17]; \ - if (laneCount < 19) { \ - X##mo = state[18]; \ - } \ - else { \ - X##mo = state[18]^input[18]; \ - } \ - } \ - X##mu = state[19]; \ - X##sa = state[20]; \ - X##se = state[21]; \ - X##si = state[22]; \ - } \ - else { \ - X##me = state[16]^input[16]; \ - X##mi = state[17]^input[17]; \ - X##mo = state[18]^input[18]; \ - X##mu = state[19]^input[19]; \ - if (laneCount < 22) { \ - if (laneCount < 21) { \ - X##sa = state[20]; \ - } \ - else { \ - X##sa = state[20]^input[20]; \ - } \ - X##se = state[21]; \ - X##si = state[22]; \ - } \ - else { \ - X##sa = state[20]^input[20]; \ - X##se = state[21]^input[21]; \ - if (laneCount < 23) { \ - X##si = state[22]; \ - } \ - else { \ - X##si = state[22]^input[22]; \ - } \ - } \ - } \ - X##so = state[23]; \ - X##su = state[24]; \ - } \ - else { \ - X##me = state[16]^input[16]; \ - X##mi = state[17]^input[17]; \ - X##mo = state[18]^input[18]; \ - X##mu = state[19]^input[19]; \ - X##sa = state[20]^input[20]; \ - X##se = state[21]^input[21]; \ - X##si = state[22]^input[22]; \ - X##so = state[23]^input[23]; \ - if (laneCount < 25) { \ - X##su = state[24]; \ - } \ - else { \ - X##su = state[24]^input[24]; \ - } \ - } \ - } +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define HTOLE64(x) (x) +#else +#define HTOLE64(x) (\ + ((x & 0xff00000000000000ull) >> 56) | \ + ((x & 0x00ff000000000000ull) >> 40) | \ + ((x & 0x0000ff0000000000ull) >> 24) | \ + ((x & 0x000000ff00000000ull) >> 8) | \ + ((x & 0x00000000ff000000ull) << 8) | \ + ((x & 0x0000000000ff0000ull) << 24) | \ + ((x & 0x000000000000ff00ull) << 40) | \ + ((x & 0x00000000000000ffull) << 56)) +#endif #define addInput(X, input, laneCount) \ if (laneCount == 21) { \ - X##ba ^= input[ 0]; \ - X##be ^= input[ 1]; \ - X##bi ^= input[ 2]; \ - X##bo ^= input[ 3]; \ - X##bu ^= input[ 4]; \ - X##ga ^= input[ 5]; \ - X##ge ^= input[ 6]; \ - X##gi ^= input[ 7]; \ - X##go ^= input[ 8]; \ - X##gu ^= input[ 9]; \ - X##ka ^= input[10]; \ - X##ke ^= input[11]; \ - X##ki ^= input[12]; \ - X##ko ^= input[13]; \ - X##ku ^= input[14]; \ - X##ma ^= input[15]; \ - X##me ^= input[16]; \ - X##mi ^= input[17]; \ - X##mo ^= input[18]; \ - X##mu ^= input[19]; \ - X##sa ^= input[20]; \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + X##ku ^= HTOLE64(input[14]); \ + X##ma ^= HTOLE64(input[15]); \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + X##sa ^= HTOLE64(input[20]); \ } \ else if (laneCount < 16) { \ if (laneCount < 8) { \ @@ -807,1402 +584,165 @@ http://creativecommons.org/publicdomain/zero/1.0/ if (laneCount < 1) { \ } \ else { \ - X##ba ^= input[ 0]; \ - } \ - } \ - else { \ - X##ba ^= input[ 0]; \ - X##be ^= input[ 1]; \ - if (laneCount < 3) { \ - } \ - else { \ - X##bi ^= input[ 2]; \ - } \ - } \ - } \ - else { \ - X##ba ^= input[ 0]; \ - X##be ^= input[ 1]; \ - X##bi ^= input[ 2]; \ - X##bo ^= input[ 3]; \ - if (laneCount < 6) { \ - if (laneCount < 5) { \ - } \ - else { \ - X##bu ^= input[ 4]; \ - } \ - } \ - else { \ - X##bu ^= input[ 4]; \ - X##ga ^= input[ 5]; \ - if (laneCount < 7) { \ - } \ - else { \ - X##ge ^= input[ 6]; \ - } \ - } \ - } \ - } \ - else { \ - X##ba ^= input[ 0]; \ - X##be ^= input[ 1]; \ - X##bi ^= input[ 2]; \ - X##bo ^= input[ 3]; \ - X##bu ^= input[ 4]; \ - X##ga ^= input[ 5]; \ - X##ge ^= input[ 6]; \ - X##gi ^= input[ 7]; \ - if (laneCount < 12) { \ - if (laneCount < 10) { \ - if (laneCount < 9) { \ - } \ - else { \ - X##go ^= input[ 8]; \ - } \ - } \ - else { \ - X##go ^= input[ 8]; \ - X##gu ^= input[ 9]; \ - if (laneCount < 11) { \ - } \ - else { \ - X##ka ^= input[10]; \ - } \ - } \ - } \ - else { \ - X##go ^= input[ 8]; \ - X##gu ^= input[ 9]; \ - X##ka ^= input[10]; \ - X##ke ^= input[11]; \ - if (laneCount < 14) { \ - if (laneCount < 13) { \ - } \ - else { \ - X##ki ^= input[12]; \ - } \ - } \ - else { \ - X##ki ^= input[12]; \ - X##ko ^= input[13]; \ - if (laneCount < 15) { \ - } \ - else { \ - X##ku ^= input[14]; \ - } \ - } \ - } \ - } \ - } \ - else { \ - X##ba ^= input[ 0]; \ - X##be ^= input[ 1]; \ - X##bi ^= input[ 2]; \ - X##bo ^= input[ 3]; \ - X##bu ^= input[ 4]; \ - X##ga ^= input[ 5]; \ - X##ge ^= input[ 6]; \ - X##gi ^= input[ 7]; \ - X##go ^= input[ 8]; \ - X##gu ^= input[ 9]; \ - X##ka ^= input[10]; \ - X##ke ^= input[11]; \ - X##ki ^= input[12]; \ - X##ko ^= input[13]; \ - X##ku ^= input[14]; \ - X##ma ^= input[15]; \ - if (laneCount < 24) { \ - if (laneCount < 20) { \ - if (laneCount < 18) { \ - if (laneCount < 17) { \ - } \ - else { \ - X##me ^= input[16]; \ - } \ - } \ - else { \ - X##me ^= input[16]; \ - X##mi ^= input[17]; \ - if (laneCount < 19) { \ - } \ - else { \ - X##mo ^= input[18]; \ - } \ - } \ - } \ - else { \ - X##me ^= input[16]; \ - X##mi ^= input[17]; \ - X##mo ^= input[18]; \ - X##mu ^= input[19]; \ - if (laneCount < 22) { \ - if (laneCount < 21) { \ - } \ - else { \ - X##sa ^= input[20]; \ - } \ - } \ - else { \ - X##sa ^= input[20]; \ - X##se ^= input[21]; \ - if (laneCount < 23) { \ - } \ - else { \ - X##si ^= input[22]; \ - } \ - } \ - } \ - } \ - else { \ - X##me ^= input[16]; \ - X##mi ^= input[17]; \ - X##mo ^= input[18]; \ - X##mu ^= input[19]; \ - X##sa ^= input[20]; \ - X##se ^= input[21]; \ - X##si ^= input[22]; \ - X##so ^= input[23]; \ - if (laneCount < 25) { \ - } \ - else { \ - X##su ^= input[24]; \ - } \ - } \ - } - -#ifdef UseBebigokimisa - -#define copyToStateAndOutput(X, state, output, laneCount) \ - if (laneCount < 16) { \ - if (laneCount < 8) { \ - if (laneCount < 4) { \ - if (laneCount < 2) { \ - state[ 0] = X##ba; \ - if (laneCount >= 1) { \ - output[ 0] = X##ba; \ - } \ - state[ 1] = X##be; \ - state[ 2] = X##bi; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = ~X##be; \ - state[ 2] = X##bi; \ - if (laneCount >= 3) { \ - output[ 2] = ~X##bi; \ - } \ - } \ - state[ 3] = X##bo; \ - state[ 4] = X##bu; \ - state[ 5] = X##ga; \ - state[ 6] = X##ge; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = ~X##be; \ - state[ 2] = X##bi; \ - output[ 2] = ~X##bi; \ - state[ 3] = X##bo; \ - output[ 3] = X##bo; \ - if (laneCount < 6) { \ - state[ 4] = X##bu; \ - if (laneCount >= 5) { \ - output[ 4] = X##bu; \ - } \ - state[ 5] = X##ga; \ - state[ 6] = X##ge; \ - } \ - else { \ - state[ 4] = X##bu; \ - output[ 4] = X##bu; \ - state[ 5] = X##ga; \ - output[ 5] = X##ga; \ - state[ 6] = X##ge; \ - if (laneCount >= 7) { \ - output[ 6] = X##ge; \ - } \ - } \ - } \ - state[ 7] = X##gi; \ - state[ 8] = X##go; \ - state[ 9] = X##gu; \ - state[10] = X##ka; \ - state[11] = X##ke; \ - state[12] = X##ki; \ - state[13] = X##ko; \ - state[14] = X##ku; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = ~X##be; \ - state[ 2] = X##bi; \ - output[ 2] = ~X##bi; \ - state[ 3] = X##bo; \ - output[ 3] = X##bo; \ - state[ 4] = X##bu; \ - output[ 4] = X##bu; \ - state[ 5] = X##ga; \ - output[ 5] = X##ga; \ - state[ 6] = X##ge; \ - output[ 6] = X##ge; \ - state[ 7] = X##gi; \ - output[ 7] = X##gi; \ - if (laneCount < 12) { \ - if (laneCount < 10) { \ - state[ 8] = X##go; \ - if (laneCount >= 9) { \ - output[ 8] = ~X##go; \ - } \ - state[ 9] = X##gu; \ - state[10] = X##ka; \ - } \ - else { \ - state[ 8] = X##go; \ - output[ 8] = ~X##go; \ - state[ 9] = X##gu; \ - output[ 9] = X##gu; \ - state[10] = X##ka; \ - if (laneCount >= 11) { \ - output[10] = X##ka; \ - } \ - } \ - state[11] = X##ke; \ - state[12] = X##ki; \ - state[13] = X##ko; \ - state[14] = X##ku; \ - } \ - else { \ - state[ 8] = X##go; \ - output[ 8] = ~X##go; \ - state[ 9] = X##gu; \ - output[ 9] = X##gu; \ - state[10] = X##ka; \ - output[10] = X##ka; \ - state[11] = X##ke; \ - output[11] = X##ke; \ - if (laneCount < 14) { \ - state[12] = X##ki; \ - if (laneCount >= 13) { \ - output[12] = ~X##ki; \ - } \ - state[13] = X##ko; \ - state[14] = X##ku; \ - } \ - else { \ - state[12] = X##ki; \ - output[12] = ~X##ki; \ - state[13] = X##ko; \ - output[13] = X##ko; \ - state[14] = X##ku; \ - if (laneCount >= 15) { \ - output[14] = X##ku; \ - } \ - } \ - } \ - } \ - state[15] = X##ma; \ - state[16] = X##me; \ - state[17] = X##mi; \ - state[18] = X##mo; \ - state[19] = X##mu; \ - state[20] = X##sa; \ - state[21] = X##se; \ - state[22] = X##si; \ - state[23] = X##so; \ - state[24] = X##su; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = ~X##be; \ - state[ 2] = X##bi; \ - output[ 2] = ~X##bi; \ - state[ 3] = X##bo; \ - output[ 3] = X##bo; \ - state[ 4] = X##bu; \ - output[ 4] = X##bu; \ - state[ 5] = X##ga; \ - output[ 5] = X##ga; \ - state[ 6] = X##ge; \ - output[ 6] = X##ge; \ - state[ 7] = X##gi; \ - output[ 7] = X##gi; \ - state[ 8] = X##go; \ - output[ 8] = ~X##go; \ - state[ 9] = X##gu; \ - output[ 9] = X##gu; \ - state[10] = X##ka; \ - output[10] = X##ka; \ - state[11] = X##ke; \ - output[11] = X##ke; \ - state[12] = X##ki; \ - output[12] = ~X##ki; \ - state[13] = X##ko; \ - output[13] = X##ko; \ - state[14] = X##ku; \ - output[14] = X##ku; \ - state[15] = X##ma; \ - output[15] = X##ma; \ - if (laneCount < 24) { \ - if (laneCount < 20) { \ - if (laneCount < 18) { \ - state[16] = X##me; \ - if (laneCount >= 17) { \ - output[16] = X##me; \ - } \ - state[17] = X##mi; \ - state[18] = X##mo; \ - } \ - else { \ - state[16] = X##me; \ - output[16] = X##me; \ - state[17] = X##mi; \ - output[17] = ~X##mi; \ - state[18] = X##mo; \ - if (laneCount >= 19) { \ - output[18] = X##mo; \ - } \ - } \ - state[19] = X##mu; \ - state[20] = X##sa; \ - state[21] = X##se; \ - state[22] = X##si; \ - } \ - else { \ - state[16] = X##me; \ - output[16] = X##me; \ - state[17] = X##mi; \ - output[17] = ~X##mi; \ - state[18] = X##mo; \ - output[18] = X##mo; \ - state[19] = X##mu; \ - output[19] = X##mu; \ - if (laneCount < 22) { \ - state[20] = X##sa; \ - if (laneCount >= 21) { \ - output[20] = ~X##sa; \ - } \ - state[21] = X##se; \ - state[22] = X##si; \ - } \ - else { \ - state[20] = X##sa; \ - output[20] = ~X##sa; \ - state[21] = X##se; \ - output[21] = X##se; \ - state[22] = X##si; \ - if (laneCount >= 23) { \ - output[22] = X##si; \ - } \ - } \ - } \ - state[23] = X##so; \ - state[24] = X##su; \ - } \ - else { \ - state[16] = X##me; \ - output[16] = X##me; \ - state[17] = X##mi; \ - output[17] = ~X##mi; \ - state[18] = X##mo; \ - output[18] = X##mo; \ - state[19] = X##mu; \ - output[19] = X##mu; \ - state[20] = X##sa; \ - output[20] = ~X##sa; \ - state[21] = X##se; \ - output[21] = X##se; \ - state[22] = X##si; \ - output[22] = X##si; \ - state[23] = X##so; \ - output[23] = X##so; \ - state[24] = X##su; \ - if (laneCount >= 25) { \ - output[24] = X##su; \ - } \ - } \ - } - -#define output(X, output, laneCount) \ - if (laneCount < 16) { \ - if (laneCount < 8) { \ - if (laneCount < 4) { \ - if (laneCount < 2) { \ - if (laneCount >= 1) { \ - output[ 0] = X##ba; \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = ~X##be; \ - if (laneCount >= 3) { \ - output[ 2] = ~X##bi; \ - } \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = ~X##be; \ - output[ 2] = ~X##bi; \ - output[ 3] = X##bo; \ - if (laneCount < 6) { \ - if (laneCount >= 5) { \ - output[ 4] = X##bu; \ - } \ - } \ - else { \ - output[ 4] = X##bu; \ - output[ 5] = X##ga; \ - if (laneCount >= 7) { \ - output[ 6] = X##ge; \ - } \ - } \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = ~X##be; \ - output[ 2] = ~X##bi; \ - output[ 3] = X##bo; \ - output[ 4] = X##bu; \ - output[ 5] = X##ga; \ - output[ 6] = X##ge; \ - output[ 7] = X##gi; \ - if (laneCount < 12) { \ - if (laneCount < 10) { \ - if (laneCount >= 9) { \ - output[ 8] = ~X##go; \ - } \ - } \ - else { \ - output[ 8] = ~X##go; \ - output[ 9] = X##gu; \ - if (laneCount >= 11) { \ - output[10] = X##ka; \ - } \ - } \ - } \ - else { \ - output[ 8] = ~X##go; \ - output[ 9] = X##gu; \ - output[10] = X##ka; \ - output[11] = X##ke; \ - if (laneCount < 14) { \ - if (laneCount >= 13) { \ - output[12] = ~X##ki; \ - } \ - } \ - else { \ - output[12] = ~X##ki; \ - output[13] = X##ko; \ - if (laneCount >= 15) { \ - output[14] = X##ku; \ - } \ - } \ - } \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = ~X##be; \ - output[ 2] = ~X##bi; \ - output[ 3] = X##bo; \ - output[ 4] = X##bu; \ - output[ 5] = X##ga; \ - output[ 6] = X##ge; \ - output[ 7] = X##gi; \ - output[ 8] = ~X##go; \ - output[ 9] = X##gu; \ - output[10] = X##ka; \ - output[11] = X##ke; \ - output[12] = ~X##ki; \ - output[13] = X##ko; \ - output[14] = X##ku; \ - output[15] = X##ma; \ - if (laneCount < 24) { \ - if (laneCount < 20) { \ - if (laneCount < 18) { \ - if (laneCount >= 17) { \ - output[16] = X##me; \ - } \ - } \ - else { \ - output[16] = X##me; \ - output[17] = ~X##mi; \ - if (laneCount >= 19) { \ - output[18] = X##mo; \ - } \ - } \ - } \ - else { \ - output[16] = X##me; \ - output[17] = ~X##mi; \ - output[18] = X##mo; \ - output[19] = X##mu; \ - if (laneCount < 22) { \ - if (laneCount >= 21) { \ - output[20] = ~X##sa; \ - } \ - } \ - else { \ - output[20] = ~X##sa; \ - output[21] = X##se; \ - if (laneCount >= 23) { \ - output[22] = X##si; \ - } \ - } \ - } \ - } \ - else { \ - output[16] = X##me; \ - output[17] = ~X##mi; \ - output[18] = X##mo; \ - output[19] = X##mu; \ - output[20] = ~X##sa; \ - output[21] = X##se; \ - output[22] = X##si; \ - output[23] = X##so; \ - if (laneCount >= 25) { \ - output[24] = X##su; \ - } \ - } \ - } - -#define wrapOne(X, input, output, index, name) \ - X##name ^= input[index]; \ - output[index] = X##name; - -#define wrapOneInvert(X, input, output, index, name) \ - X##name ^= input[index]; \ - output[index] = ~X##name; - -#define unwrapOne(X, input, output, index, name) \ - output[index] = input[index] ^ X##name; \ - X##name ^= output[index]; - -#define unwrapOneInvert(X, input, output, index, name) \ - output[index] = ~(input[index] ^ X##name); \ - X##name ^= output[index]; \ - -#else /* UseBebigokimisa */ - - -#define copyToStateAndOutput(X, state, output, laneCount) \ - if (laneCount < 16) { \ - if (laneCount < 8) { \ - if (laneCount < 4) { \ - if (laneCount < 2) { \ - state[ 0] = X##ba; \ - if (laneCount >= 1) { \ - output[ 0] = X##ba; \ - } \ - state[ 1] = X##be; \ - state[ 2] = X##bi; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = X##be; \ - state[ 2] = X##bi; \ - if (laneCount >= 3) { \ - output[ 2] = X##bi; \ - } \ - } \ - state[ 3] = X##bo; \ - state[ 4] = X##bu; \ - state[ 5] = X##ga; \ - state[ 6] = X##ge; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = X##be; \ - state[ 2] = X##bi; \ - output[ 2] = X##bi; \ - state[ 3] = X##bo; \ - output[ 3] = X##bo; \ - if (laneCount < 6) { \ - state[ 4] = X##bu; \ - if (laneCount >= 5) { \ - output[ 4] = X##bu; \ - } \ - state[ 5] = X##ga; \ - state[ 6] = X##ge; \ - } \ - else { \ - state[ 4] = X##bu; \ - output[ 4] = X##bu; \ - state[ 5] = X##ga; \ - output[ 5] = X##ga; \ - state[ 6] = X##ge; \ - if (laneCount >= 7) { \ - output[ 6] = X##ge; \ - } \ - } \ - } \ - state[ 7] = X##gi; \ - state[ 8] = X##go; \ - state[ 9] = X##gu; \ - state[10] = X##ka; \ - state[11] = X##ke; \ - state[12] = X##ki; \ - state[13] = X##ko; \ - state[14] = X##ku; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = X##be; \ - state[ 2] = X##bi; \ - output[ 2] = X##bi; \ - state[ 3] = X##bo; \ - output[ 3] = X##bo; \ - state[ 4] = X##bu; \ - output[ 4] = X##bu; \ - state[ 5] = X##ga; \ - output[ 5] = X##ga; \ - state[ 6] = X##ge; \ - output[ 6] = X##ge; \ - state[ 7] = X##gi; \ - output[ 7] = X##gi; \ - if (laneCount < 12) { \ - if (laneCount < 10) { \ - state[ 8] = X##go; \ - if (laneCount >= 9) { \ - output[ 8] = X##go; \ - } \ - state[ 9] = X##gu; \ - state[10] = X##ka; \ - } \ - else { \ - state[ 8] = X##go; \ - output[ 8] = X##go; \ - state[ 9] = X##gu; \ - output[ 9] = X##gu; \ - state[10] = X##ka; \ - if (laneCount >= 11) { \ - output[10] = X##ka; \ - } \ - } \ - state[11] = X##ke; \ - state[12] = X##ki; \ - state[13] = X##ko; \ - state[14] = X##ku; \ - } \ - else { \ - state[ 8] = X##go; \ - output[ 8] = X##go; \ - state[ 9] = X##gu; \ - output[ 9] = X##gu; \ - state[10] = X##ka; \ - output[10] = X##ka; \ - state[11] = X##ke; \ - output[11] = X##ke; \ - if (laneCount < 14) { \ - state[12] = X##ki; \ - if (laneCount >= 13) { \ - output[12]= X##ki; \ - } \ - state[13] = X##ko; \ - state[14] = X##ku; \ - } \ - else { \ - state[12] = X##ki; \ - output[12]= X##ki; \ - state[13] = X##ko; \ - output[13] = X##ko; \ - state[14] = X##ku; \ - if (laneCount >= 15) { \ - output[14] = X##ku; \ - } \ - } \ - } \ - } \ - state[15] = X##ma; \ - state[16] = X##me; \ - state[17] = X##mi; \ - state[18] = X##mo; \ - state[19] = X##mu; \ - state[20] = X##sa; \ - state[21] = X##se; \ - state[22] = X##si; \ - state[23] = X##so; \ - state[24] = X##su; \ - } \ - else { \ - state[ 0] = X##ba; \ - output[ 0] = X##ba; \ - state[ 1] = X##be; \ - output[ 1] = X##be; \ - state[ 2] = X##bi; \ - output[ 2] = X##bi; \ - state[ 3] = X##bo; \ - output[ 3] = X##bo; \ - state[ 4] = X##bu; \ - output[ 4] = X##bu; \ - state[ 5] = X##ga; \ - output[ 5] = X##ga; \ - state[ 6] = X##ge; \ - output[ 6] = X##ge; \ - state[ 7] = X##gi; \ - output[ 7] = X##gi; \ - state[ 8] = X##go; \ - output[ 8] = X##go; \ - state[ 9] = X##gu; \ - output[ 9] = X##gu; \ - state[10] = X##ka; \ - output[10] = X##ka; \ - state[11] = X##ke; \ - output[11] = X##ke; \ - state[12] = X##ki; \ - output[12]= X##ki; \ - state[13] = X##ko; \ - output[13] = X##ko; \ - state[14] = X##ku; \ - output[14] = X##ku; \ - state[15] = X##ma; \ - output[15] = X##ma; \ - if (laneCount < 24) { \ - if (laneCount < 20) { \ - if (laneCount < 18) { \ - state[16] = X##me; \ - if (laneCount >= 17) { \ - output[16] = X##me; \ - } \ - state[17] = X##mi; \ - state[18] = X##mo; \ - } \ - else { \ - state[16] = X##me; \ - output[16] = X##me; \ - state[17] = X##mi; \ - output[17] = X##mi; \ - state[18] = X##mo; \ - if (laneCount >= 19) { \ - output[18] = X##mo; \ - } \ - } \ - state[19] = X##mu; \ - state[20] = X##sa; \ - state[21] = X##se; \ - state[22] = X##si; \ - } \ - else { \ - state[16] = X##me; \ - output[16] = X##me; \ - state[17] = X##mi; \ - output[17] = X##mi; \ - state[18] = X##mo; \ - output[18] = X##mo; \ - state[19] = X##mu; \ - output[19] = X##mu; \ - if (laneCount < 22) { \ - state[20] = X##sa; \ - if (laneCount >= 21) { \ - output[20] = X##sa; \ - } \ - state[21] = X##se; \ - state[22] = X##si; \ - } \ - else { \ - state[20] = X##sa; \ - output[20] = X##sa; \ - state[21] = X##se; \ - output[21] = X##se; \ - state[22] = X##si; \ - if (laneCount >= 23) { \ - output[22] = X##si; \ - } \ - } \ - } \ - state[23] = X##so; \ - state[24] = X##su; \ - } \ - else { \ - state[16] = X##me; \ - output[16] = X##me; \ - state[17] = X##mi; \ - output[17] = X##mi; \ - state[18] = X##mo; \ - output[18] = X##mo; \ - state[19] = X##mu; \ - output[19] = X##mu; \ - state[20] = X##sa; \ - output[20] = X##sa; \ - state[21] = X##se; \ - output[21] = X##se; \ - state[22] = X##si; \ - output[22] = X##si; \ - state[23] = X##so; \ - output[23] = X##so; \ - state[24] = X##su; \ - if (laneCount >= 25) { \ - output[24] = X##su; \ - } \ - } \ - } - -#define output(X, output, laneCount) \ - if (laneCount < 16) { \ - if (laneCount < 8) { \ - if (laneCount < 4) { \ - if (laneCount < 2) { \ - if (laneCount >= 1) { \ - output[ 0] = X##ba; \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = X##be; \ - if (laneCount >= 3) { \ - output[ 2] = X##bi; \ - } \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = X##be; \ - output[ 2] = X##bi; \ - output[ 3] = X##bo; \ - if (laneCount < 6) { \ - if (laneCount >= 5) { \ - output[ 4] = X##bu; \ - } \ - } \ - else { \ - output[ 4] = X##bu; \ - output[ 5] = X##ga; \ - if (laneCount >= 7) { \ - output[ 6] = X##ge; \ - } \ - } \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = X##be; \ - output[ 2] = X##bi; \ - output[ 3] = X##bo; \ - output[ 4] = X##bu; \ - output[ 5] = X##ga; \ - output[ 6] = X##ge; \ - output[ 7] = X##gi; \ - if (laneCount < 12) { \ - if (laneCount < 10) { \ - if (laneCount >= 9) { \ - output[ 8] = X##go; \ - } \ - } \ - else { \ - output[ 8] = X##go; \ - output[ 9] = X##gu; \ - if (laneCount >= 11) { \ - output[10] = X##ka; \ - } \ - } \ - } \ - else { \ - output[ 8] = X##go; \ - output[ 9] = X##gu; \ - output[10] = X##ka; \ - output[11] = X##ke; \ - if (laneCount < 14) { \ - if (laneCount >= 13) { \ - output[12] = X##ki; \ - } \ - } \ - else { \ - output[12] = X##ki; \ - output[13] = X##ko; \ - if (laneCount >= 15) { \ - output[14] = X##ku; \ - } \ - } \ - } \ - } \ - } \ - else { \ - output[ 0] = X##ba; \ - output[ 1] = X##be; \ - output[ 2] = X##bi; \ - output[ 3] = X##bo; \ - output[ 4] = X##bu; \ - output[ 5] = X##ga; \ - output[ 6] = X##ge; \ - output[ 7] = X##gi; \ - output[ 8] = X##go; \ - output[ 9] = X##gu; \ - output[10] = X##ka; \ - output[11] = X##ke; \ - output[12] = X##ki; \ - output[13] = X##ko; \ - output[14] = X##ku; \ - output[15] = X##ma; \ - if (laneCount < 24) { \ - if (laneCount < 20) { \ - if (laneCount < 18) { \ - if (laneCount >= 17) { \ - output[16] = X##me; \ - } \ - } \ - else { \ - output[16] = X##me; \ - output[17] = X##mi; \ - if (laneCount >= 19) { \ - output[18] = X##mo; \ - } \ - } \ - } \ - else { \ - output[16] = X##me; \ - output[17] = X##mi; \ - output[18] = X##mo; \ - output[19] = X##mu; \ - if (laneCount < 22) { \ - if (laneCount >= 21) { \ - output[20] = X##sa; \ - } \ - } \ - else { \ - output[20] = X##sa; \ - output[21] = X##se; \ - if (laneCount >= 23) { \ - output[22] = X##si; \ - } \ - } \ - } \ - } \ - else { \ - output[16] = X##me; \ - output[17] = X##mi; \ - output[18] = X##mo; \ - output[19] = X##mu; \ - output[20] = X##sa; \ - output[21] = X##se; \ - output[22] = X##si; \ - output[23] = X##so; \ - if (laneCount >= 25) { \ - output[24] = X##su; \ - } \ - } \ - } - -#define wrapOne(X, input, output, index, name) \ - X##name ^= input[index]; \ - output[index] = X##name; - -#define wrapOneInvert(X, input, output, index, name) \ - X##name ^= input[index]; \ - output[index] = X##name; - -#define unwrapOne(X, input, output, index, name) \ - output[index] = input[index] ^ X##name; \ - X##name ^= output[index]; - -#define unwrapOneInvert(X, input, output, index, name) \ - output[index] = input[index] ^ X##name; \ - X##name ^= output[index]; - -#endif - -#define wrap(X, input, output, laneCount, trailingBits) \ - if (laneCount < 16) { \ - if (laneCount < 8) { \ - if (laneCount < 4) { \ - if (laneCount < 2) { \ - if (laneCount < 1) { \ - X##ba ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 0, ba) \ - X##be ^= trailingBits; \ - } \ - } \ - else { \ - wrapOne(X, input, output, 0, ba) \ - wrapOneInvert(X, input, output, 1, be) \ - if (laneCount < 3) { \ - X##bi ^= trailingBits; \ - } \ - else { \ - wrapOneInvert(X, input, output, 2, bi) \ - X##bo ^= trailingBits; \ - } \ - } \ - } \ - else { \ - wrapOne(X, input, output, 0, ba) \ - wrapOneInvert(X, input, output, 1, be) \ - wrapOneInvert(X, input, output, 2, bi) \ - wrapOne(X, input, output, 3, bo) \ - if (laneCount < 6) { \ - if (laneCount < 5) { \ - X##bu ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 4, bu) \ - X##ga ^= trailingBits; \ - } \ - } \ - else { \ - wrapOne(X, input, output, 4, bu) \ - wrapOne(X, input, output, 5, ga) \ - if (laneCount < 7) { \ - X##ge ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 6, ge) \ - X##gi ^= trailingBits; \ - } \ - } \ - } \ - } \ - else { \ - wrapOne(X, input, output, 0, ba) \ - wrapOneInvert(X, input, output, 1, be) \ - wrapOneInvert(X, input, output, 2, bi) \ - wrapOne(X, input, output, 3, bo) \ - wrapOne(X, input, output, 4, bu) \ - wrapOne(X, input, output, 5, ga) \ - wrapOne(X, input, output, 6, ge) \ - wrapOne(X, input, output, 7, gi) \ - if (laneCount < 12) { \ - if (laneCount < 10) { \ - if (laneCount < 9) { \ - X##go ^= trailingBits; \ - } \ - else { \ - wrapOneInvert(X, input, output, 8, go) \ - X##gu ^= trailingBits; \ - } \ - } \ - else { \ - wrapOneInvert(X, input, output, 8, go) \ - wrapOne(X, input, output, 9, gu) \ - if (laneCount < 11) { \ - X##ka ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 10, ka) \ - X##ke ^= trailingBits; \ - } \ - } \ - } \ - else { \ - wrapOneInvert(X, input, output, 8, go) \ - wrapOne(X, input, output, 9, gu) \ - wrapOne(X, input, output, 10, ka) \ - wrapOne(X, input, output, 11, ke) \ - if (laneCount < 14) { \ - if (laneCount < 13) { \ - X##ki ^= trailingBits; \ - } \ - else { \ - wrapOneInvert(X, input, output, 12, ki) \ - X##ko ^= trailingBits; \ - } \ - } \ - else { \ - wrapOneInvert(X, input, output, 12, ki) \ - wrapOne(X, input, output, 13, ko) \ - if (laneCount < 15) { \ - X##ku ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 14, ku) \ - X##ma ^= trailingBits; \ - } \ - } \ - } \ - } \ - } \ - else { \ - wrapOne(X, input, output, 0, ba) \ - wrapOneInvert(X, input, output, 1, be) \ - wrapOneInvert(X, input, output, 2, bi) \ - wrapOne(X, input, output, 3, bo) \ - wrapOne(X, input, output, 4, bu) \ - wrapOne(X, input, output, 5, ga) \ - wrapOne(X, input, output, 6, ge) \ - wrapOne(X, input, output, 7, gi) \ - wrapOneInvert(X, input, output, 8, go) \ - wrapOne(X, input, output, 9, gu) \ - wrapOne(X, input, output, 10, ka) \ - wrapOne(X, input, output, 11, ke) \ - wrapOneInvert(X, input, output, 12, ki) \ - wrapOne(X, input, output, 13, ko) \ - wrapOne(X, input, output, 14, ku) \ - wrapOne(X, input, output, 15, ma) \ - if (laneCount < 24) { \ - if (laneCount < 20) { \ - if (laneCount < 18) { \ - if (laneCount < 17) { \ - X##me ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 16, me) \ - X##mi ^= trailingBits; \ - } \ - } \ - else { \ - wrapOne(X, input, output, 16, me) \ - wrapOneInvert(X, input, output, 17, mi) \ - if (laneCount < 19) { \ - X##mo ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 18, mo) \ - X##mu ^= trailingBits; \ - } \ - } \ - } \ - else { \ - wrapOne(X, input, output, 16, me) \ - wrapOneInvert(X, input, output, 17, mi) \ - wrapOne(X, input, output, 18, mo) \ - wrapOne(X, input, output, 19, mu) \ - if (laneCount < 22) { \ - if (laneCount < 21) { \ - X##sa ^= trailingBits; \ - } \ - else { \ - wrapOneInvert(X, input, output, 20, sa) \ - X##se ^= trailingBits; \ - } \ - } \ - else { \ - wrapOneInvert(X, input, output, 20, sa) \ - wrapOne(X, input, output, 21, se) \ - if (laneCount < 23) { \ - X##si ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 22, si) \ - X##so ^= trailingBits; \ - } \ - } \ - } \ - } \ - else { \ - wrapOne(X, input, output, 16, me) \ - wrapOneInvert(X, input, output, 17, mi) \ - wrapOne(X, input, output, 18, mo) \ - wrapOne(X, input, output, 19, mu) \ - wrapOneInvert(X, input, output, 20, sa) \ - wrapOne(X, input, output, 21, se) \ - wrapOne(X, input, output, 22, si) \ - wrapOne(X, input, output, 23, so) \ - if (laneCount < 25) { \ - X##su ^= trailingBits; \ - } \ - else { \ - wrapOne(X, input, output, 24, su) \ - } \ - } \ - } - -#define unwrap(X, input, output, laneCount, trailingBits) \ - if (laneCount < 16) { \ - if (laneCount < 8) { \ - if (laneCount < 4) { \ - if (laneCount < 2) { \ - if (laneCount < 1) { \ - X##ba ^= trailingBits; \ - } \ - else { \ - unwrapOne(X, input, output, 0, ba) \ - X##be ^= trailingBits; \ + X##ba ^= HTOLE64(input[ 0]); \ } \ } \ else { \ - unwrapOne(X, input, output, 0, ba) \ - unwrapOneInvert(X, input, output, 1, be) \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ if (laneCount < 3) { \ - X##bi ^= trailingBits; \ } \ else { \ - unwrapOneInvert(X, input, output, 2, bi) \ - X##bo ^= trailingBits; \ + X##bi ^= HTOLE64(input[ 2]); \ } \ } \ } \ else { \ - unwrapOne(X, input, output, 0, ba) \ - unwrapOneInvert(X, input, output, 1, be) \ - unwrapOneInvert(X, input, output, 2, bi) \ - unwrapOne(X, input, output, 3, bo) \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ if (laneCount < 6) { \ if (laneCount < 5) { \ - X##bu ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 4, bu) \ - X##ga ^= trailingBits; \ + X##bu ^= HTOLE64(input[ 4]); \ } \ } \ else { \ - unwrapOne(X, input, output, 4, bu) \ - unwrapOne(X, input, output, 5, ga) \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ if (laneCount < 7) { \ - X##ge ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 6, ge) \ - X##gi ^= trailingBits; \ + X##ge ^= HTOLE64(input[ 6]); \ } \ } \ } \ } \ else { \ - unwrapOne(X, input, output, 0, ba) \ - unwrapOneInvert(X, input, output, 1, be) \ - unwrapOneInvert(X, input, output, 2, bi) \ - unwrapOne(X, input, output, 3, bo) \ - unwrapOne(X, input, output, 4, bu) \ - unwrapOne(X, input, output, 5, ga) \ - unwrapOne(X, input, output, 6, ge) \ - unwrapOne(X, input, output, 7, gi) \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ if (laneCount < 12) { \ if (laneCount < 10) { \ if (laneCount < 9) { \ - X##go ^= trailingBits; \ } \ else { \ - unwrapOneInvert(X, input, output, 8, go) \ - X##gu ^= trailingBits; \ + X##go ^= HTOLE64(input[ 8]); \ } \ } \ else { \ - unwrapOneInvert(X, input, output, 8, go) \ - unwrapOne(X, input, output, 9, gu) \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ if (laneCount < 11) { \ - X##ka ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 10, ka) \ - X##ke ^= trailingBits; \ + X##ka ^= HTOLE64(input[10]); \ } \ } \ } \ else { \ - unwrapOneInvert(X, input, output, 8, go) \ - unwrapOne(X, input, output, 9, gu) \ - unwrapOne(X, input, output, 10, ka) \ - unwrapOne(X, input, output, 11, ke) \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ if (laneCount < 14) { \ if (laneCount < 13) { \ - X##ki ^= trailingBits; \ } \ else { \ - unwrapOneInvert(X, input, output, 12, ki) \ - X##ko ^= trailingBits; \ + X##ki ^= HTOLE64(input[12]); \ } \ } \ else { \ - unwrapOneInvert(X, input, output, 12, ki) \ - unwrapOne(X, input, output, 13, ko) \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ if (laneCount < 15) { \ - X##ku ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 14, ku) \ - X##ma ^= trailingBits; \ + X##ku ^= HTOLE64(input[14]); \ } \ } \ } \ } \ } \ else { \ - unwrapOne(X, input, output, 0, ba) \ - unwrapOneInvert(X, input, output, 1, be) \ - unwrapOneInvert(X, input, output, 2, bi) \ - unwrapOne(X, input, output, 3, bo) \ - unwrapOne(X, input, output, 4, bu) \ - unwrapOne(X, input, output, 5, ga) \ - unwrapOne(X, input, output, 6, ge) \ - unwrapOne(X, input, output, 7, gi) \ - unwrapOneInvert(X, input, output, 8, go) \ - unwrapOne(X, input, output, 9, gu) \ - unwrapOne(X, input, output, 10, ka) \ - unwrapOne(X, input, output, 11, ke) \ - unwrapOneInvert(X, input, output, 12, ki) \ - unwrapOne(X, input, output, 13, ko) \ - unwrapOne(X, input, output, 14, ku) \ - unwrapOne(X, input, output, 15, ma) \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + X##ku ^= HTOLE64(input[14]); \ + X##ma ^= HTOLE64(input[15]); \ if (laneCount < 24) { \ if (laneCount < 20) { \ if (laneCount < 18) { \ if (laneCount < 17) { \ - X##me ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 16, me) \ - X##mi ^= trailingBits; \ + X##me ^= HTOLE64(input[16]); \ } \ } \ else { \ - unwrapOne(X, input, output, 16, me) \ - unwrapOneInvert(X, input, output, 17, mi) \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ if (laneCount < 19) { \ - X##mo ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 18, mo) \ - X##mu ^= trailingBits; \ + X##mo ^= HTOLE64(input[18]); \ } \ } \ } \ else { \ - unwrapOne(X, input, output, 16, me) \ - unwrapOneInvert(X, input, output, 17, mi) \ - unwrapOne(X, input, output, 18, mo) \ - unwrapOne(X, input, output, 19, mu) \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ if (laneCount < 22) { \ if (laneCount < 21) { \ - X##sa ^= trailingBits; \ } \ else { \ - unwrapOneInvert(X, input, output, 20, sa) \ - X##se ^= trailingBits; \ + X##sa ^= HTOLE64(input[20]); \ } \ } \ else { \ - unwrapOneInvert(X, input, output, 20, sa) \ - unwrapOne(X, input, output, 21, se) \ + X##sa ^= HTOLE64(input[20]); \ + X##se ^= HTOLE64(input[21]); \ if (laneCount < 23) { \ - X##si ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 22, si) \ - X##so ^= trailingBits; \ + X##si ^= HTOLE64(input[22]); \ } \ } \ } \ } \ else { \ - unwrapOne(X, input, output, 16, me) \ - unwrapOneInvert(X, input, output, 17, mi) \ - unwrapOne(X, input, output, 18, mo) \ - unwrapOne(X, input, output, 19, mu) \ - unwrapOneInvert(X, input, output, 20, sa) \ - unwrapOne(X, input, output, 21, se) \ - unwrapOne(X, input, output, 22, si) \ - unwrapOne(X, input, output, 23, so) \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + X##sa ^= HTOLE64(input[20]); \ + X##se ^= HTOLE64(input[21]); \ + X##si ^= HTOLE64(input[22]); \ + X##so ^= HTOLE64(input[23]); \ if (laneCount < 25) { \ - X##su ^= trailingBits; \ } \ else { \ - unwrapOne(X, input, output, 24, su) \ + X##su ^= HTOLE64(input[24]); \ } \ } \ } diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h index 6cf765e6ce11e1..f5ac6b50d3425a 100644 --- a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h +++ b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h @@ -1,16 +1,21 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +Please refer to SnP-documentation.h for more details. */ #ifndef _KeccakP_1600_SnP_h_ @@ -29,6 +34,7 @@ void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); void KeccakP1600_Permute_12rounds(void *state); void KeccakP1600_Permute_24rounds(void *state); void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h index 889a31a79444c5..f904949c9b8d01 100644 --- a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h +++ b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h @@ -1,24 +1,26 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +Please refer to SnP-documentation.h for more details. */ #ifndef _KeccakP_1600_SnP_h_ #define _KeccakP_1600_SnP_h_ -/** For the documentation, see SnP-documentation.h. - */ - /* #include "brg_endian.h" */ #include "KeccakP-1600-opt64-config.h" @@ -26,6 +28,7 @@ and related or neighboring rights to the source code in this file. #define KeccakP1600_stateSizeInBytes 200 #define KeccakP1600_stateAlignment 8 #define KeccakF1600_FastLoop_supported +#define KeccakP1600_12rounds_FastLoop_supported #include @@ -40,10 +43,12 @@ void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); void KeccakP1600_Permute_12rounds(void *state); void KeccakP1600_Permute_24rounds(void *state); void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); #endif diff --git a/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c b/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c index a2f9ffea93259d..ccac7a2d6ba2e2 100644 --- a/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c +++ b/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c @@ -1,34 +1,36 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file implements Keccak-p[1600] in a SnP-compatible way. +Please refer to SnP-documentation.h for more details. + +This implementation comes with KeccakP-1600-SnP.h in the same folder. +Please refer to LowLevel.build for the exact list of other files it must be combined with. */ -#include +#include +#include /* #include "brg_endian.h" */ #include "KeccakP-1600-SnP.h" #include "SnP-Relaned.h" -typedef unsigned char UINT8; -typedef unsigned int UINT32; -/* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */ - -/*typedef unsigned long UINT32; */ - - -#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset)))) +#define ROL32(a, offset) ((((uint32_t)a) << (offset)) ^ (((uint32_t)a) >> (32-(offset)))) /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ - #define prepareToBitInterleaving(low, high, temp, temp0, temp1) \ temp0 = (low); \ temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \ @@ -57,7 +59,6 @@ typedef unsigned int UINT32; odd = (temp0 >> 16) | (temp1 & 0xFFFF0000); /* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ - #define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ temp0 = (even); \ temp1 = (odd); \ @@ -85,26 +86,26 @@ typedef unsigned int UINT32; void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length) { - UINT8 laneAsBytes[8]; - UINT32 low, high; - UINT32 temp, temp0, temp1; - UINT32 *stateAsHalfLanes = (UINT32*)state; + uint8_t laneAsBytes[8]; + uint32_t low, high; + uint32_t temp, temp0, temp1; + uint32_t *stateAsHalfLanes = (uint32_t*)state; memset(laneAsBytes, 0xFF, offset); memset(laneAsBytes+offset, 0x00, length); memset(laneAsBytes+offset+length, 0xFF, 8-offset-length); #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - low = *((UINT32*)(laneAsBytes+0)); - high = *((UINT32*)(laneAsBytes+4)); + low = *((uint32_t*)(laneAsBytes+0)); + high = *((uint32_t*)(laneAsBytes+4)); #else low = laneAsBytes[0] - | ((UINT32)(laneAsBytes[1]) << 8) - | ((UINT32)(laneAsBytes[2]) << 16) - | ((UINT32)(laneAsBytes[3]) << 24); + | ((uint32_t)(laneAsBytes[1]) << 8) + | ((uint32_t)(laneAsBytes[2]) << 16) + | ((uint32_t)(laneAsBytes[3]) << 24); high = laneAsBytes[4] - | ((UINT32)(laneAsBytes[5]) << 8) - | ((UINT32)(laneAsBytes[6]) << 16) - | ((UINT32)(laneAsBytes[7]) << 24); + | ((uint32_t)(laneAsBytes[5]) << 8) + | ((uint32_t)(laneAsBytes[6]) << 16) + | ((uint32_t)(laneAsBytes[7]) << 24); #endif toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); } @@ -122,17 +123,17 @@ void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) { unsigned int lanePosition = offset/8; unsigned int offsetInLane = offset%8; - UINT32 low, high; - UINT32 temp, temp0, temp1; - UINT32 *stateAsHalfLanes = (UINT32*)state; + uint32_t low, high; + uint32_t temp, temp0, temp1; + uint32_t *stateAsHalfLanes = (uint32_t*)state; if (offsetInLane < 4) { - low = (UINT32)byte << (offsetInLane*8); + low = (uint32_t)byte << (offsetInLane*8); high = 0; } else { low = 0; - high = (UINT32)byte << ((offsetInLane-4)*8); + high = (uint32_t)byte << ((offsetInLane-4)*8); } toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); } @@ -141,25 +142,25 @@ void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) { - UINT8 laneAsBytes[8]; - UINT32 low, high; - UINT32 temp, temp0, temp1; - UINT32 *stateAsHalfLanes = (UINT32*)state; + uint8_t laneAsBytes[8]; + uint32_t low, high; + uint32_t temp, temp0, temp1; + uint32_t *stateAsHalfLanes = (uint32_t*)state; memset(laneAsBytes, 0, 8); memcpy(laneAsBytes+offset, data, length); #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - low = *((UINT32*)(laneAsBytes+0)); - high = *((UINT32*)(laneAsBytes+4)); + low = *((uint32_t*)(laneAsBytes+0)); + high = *((uint32_t*)(laneAsBytes+4)); #else low = laneAsBytes[0] - | ((UINT32)(laneAsBytes[1]) << 8) - | ((UINT32)(laneAsBytes[2]) << 16) - | ((UINT32)(laneAsBytes[3]) << 24); + | ((uint32_t)(laneAsBytes[1]) << 8) + | ((uint32_t)(laneAsBytes[2]) << 16) + | ((uint32_t)(laneAsBytes[3]) << 24); high = laneAsBytes[4] - | ((UINT32)(laneAsBytes[5]) << 8) - | ((UINT32)(laneAsBytes[6]) << 16) - | ((UINT32)(laneAsBytes[7]) << 24); + | ((uint32_t)(laneAsBytes[5]) << 8) + | ((uint32_t)(laneAsBytes[6]) << 16) + | ((uint32_t)(laneAsBytes[7]) << 24); #endif toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); } @@ -169,14 +170,14 @@ void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const un void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) { #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - const UINT32 * pI = (const UINT32 *)data; - UINT32 * pS = (UINT32*)state; - UINT32 t, x0, x1; + const uint32_t * pI = (const uint32_t *)data; + uint32_t * pS = (uint32_t*)state; + uint32_t t, x0, x1; int i; for (i = laneCount-1; i >= 0; --i) { #ifdef NO_MISALIGNED_ACCESSES - UINT32 low; - UINT32 high; + uint32_t low; + uint32_t high; memcpy(&low, pI++, 4); memcpy(&high, pI++, 4); toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1); @@ -187,19 +188,18 @@ void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int l #else unsigned int lanePosition; for(lanePosition=0; lanePosition= 0; --i) { #ifdef NO_MISALIGNED_ACCESSES - UINT32 low; - UINT32 high; + uint32_t low; + uint32_t high; memcpy(&low, pI++, 4); memcpy(&high, pI++, 4); toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1); @@ -243,19 +243,18 @@ void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned #else unsigned int lanePosition; for(lanePosition=0; lanePosition> 8) & 0xFF; @@ -313,14 +312,14 @@ void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) { #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - UINT32 * pI = (UINT32 *)data; - const UINT32 * pS = ( const UINT32 *)state; - UINT32 t, x0, x1; + uint32_t * pI = (uint32_t *)data; + const uint32_t * pS = ( const uint32_t *)state; + uint32_t t, x0, x1; int i; for (i = laneCount-1; i >= 0; --i) { #ifdef NO_MISALIGNED_ACCESSES - UINT32 low; - UINT32 high; + uint32_t low; + uint32_t high; fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); memcpy(pI++, &low, 4); memcpy(pI++, &high, 4); @@ -331,10 +330,10 @@ void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned i #else unsigned int lanePosition; for(lanePosition=0; lanePosition> 8) & 0xFF; laneAsBytes[2] = (low >> 16) & 0xFF; @@ -359,15 +358,15 @@ void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned i void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) { - UINT32 *stateAsHalfLanes = (UINT32*)state; - UINT32 low, high, temp, temp0, temp1; - UINT8 laneAsBytes[8]; + uint32_t *stateAsHalfLanes = (uint32_t*)state; + uint32_t low, high, temp, temp0, temp1; + uint8_t laneAsBytes[8]; unsigned int i; fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - *((UINT32*)(laneAsBytes+0)) = low; - *((UINT32*)(laneAsBytes+4)) = high; + *((uint32_t*)(laneAsBytes+0)) = low; + *((uint32_t*)(laneAsBytes+4)) = high; #else laneAsBytes[0] = low & 0xFF; laneAsBytes[1] = (low >> 8) & 0xFF; @@ -387,15 +386,15 @@ void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePo void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) { #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - const UINT32 * pI = (const UINT32 *)input; - UINT32 * pO = (UINT32 *)output; - const UINT32 * pS = (const UINT32 *)state; - UINT32 t, x0, x1; + const uint32_t * pI = (const uint32_t *)input; + uint32_t * pO = (uint32_t *)output; + const uint32_t * pS = (const uint32_t *)state; + uint32_t t, x0, x1; int i; for (i = laneCount-1; i >= 0; --i) { #ifdef NO_MISALIGNED_ACCESSES - UINT32 low; - UINT32 high; + uint32_t low; + uint32_t high; fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); *(pO++) = *(pI++) ^ low; *(pO++) = *(pI++) ^ high; @@ -406,10 +405,10 @@ void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *inpu #else unsigned int lanePosition; for(lanePosition=0; lanePosition> 8) & 0xFF; laneAsBytes[2] = (low >> 16) & 0xFF; @@ -418,8 +417,8 @@ void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *inpu laneAsBytes[5] = (high >> 8) & 0xFF; laneAsBytes[6] = (high >> 16) & 0xFF; laneAsBytes[7] = (high >> 24) & 0xFF; - ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0)); - ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4)); + ((uint32_t*)(output+lanePosition*8))[0] = ((uint32_t*)(input+lanePosition*8))[0] ^ (*(const uint32_t*)(laneAsBytes+0)); + ((uint32_t*)(output+lanePosition*8))[1] = ((uint32_t*)(input+lanePosition*8))[0] ^ (*(const uint32_t*)(laneAsBytes+4)); } #endif } @@ -432,7 +431,7 @@ void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *inpu /* ---------------------------------------------------------------- */ -static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] = +static const uint32_t KeccakF1600RoundConstants_int2[2*24+1] = { 0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL, @@ -461,690 +460,692 @@ static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] = 0x000000FFUL }; -#define KeccakAtoD_round0() \ +#define KeccakRound0() \ Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \ Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \ Da0 = Cx^ROL32(Du1, 1); \ Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \ Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \ Da1 = Cz^Du0; \ -\ Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \ Do0 = Cw^ROL32(Cz, 1); \ Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \ Do1 = Cy^Cx; \ -\ Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \ De0 = Cx^ROL32(Cy, 1); \ Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \ De1 = Cz^Cw; \ -\ Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \ Di0 = Du0^ROL32(Cy, 1); \ Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \ Di1 = Du1^Cw; \ -\ Du0 = Cw^ROL32(Cz, 1); \ Du1 = Cy^Cx; \ - -#define KeccakAtoD_round1() \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Age0^De0), 22); \ + Bi = ROL32((Aki1^Di1), 22); \ + Bo = ROL32((Amo1^Do1), 11); \ + Bu = ROL32((Asu0^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Age0 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Age1^De1), 22); \ + Bi = ROL32((Aki0^Di0), 21); \ + Bo = ROL32((Amo0^Do0), 10); \ + Bu = ROL32((Asu1^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Age1 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aka1^Da1), 2); \ + Bo = ROL32((Ame1^De1), 23); \ + Bu = ROL32((Asi1^Di1), 31); \ + Ba = ROL32((Abo0^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aka0^Da0), 1); \ + Bo = ROL32((Ame0^De0), 22); \ + Bu = ROL32((Asi0^Di0), 30); \ + Ba = ROL32((Abo1^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Asa0^Da0), 9); \ + Ba = ROL32((Abe1^De1), 1); \ + Be = ROL32((Agi0^Di0), 3); \ + Bi = ROL32((Ako1^Do1), 13); \ + Bo = ROL32((Amu0^Du0), 4); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Asa1^Da1), 9); \ + Ba = (Abe0^De0); \ + Be = ROL32((Agi1^Di1), 3); \ + Bi = ROL32((Ako0^Do0), 12); \ + Bo = ROL32((Amu1^Du1), 4); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aga0^Da0), 18); \ + Bi = ROL32((Ake0^De0), 5); \ + Bo = ROL32((Ami1^Di1), 8); \ + Bu = ROL32((Aso0^Do0), 28); \ + Ba = ROL32((Abu1^Du1), 14); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aga1^Da1), 18); \ + Bi = ROL32((Ake1^De1), 5); \ + Bo = ROL32((Ami0^Di0), 7); \ + Bu = ROL32((Aso1^Do1), 28); \ + Ba = ROL32((Abu0^Du0), 13); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Ama1^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Abi0^Di0), 31); \ + Be = ROL32((Ago1^Do1), 28); \ + Bi = ROL32((Aku1^Du1), 20); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Ama0^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Abi1^Di1), 31); \ + Be = ROL32((Ago0^Do0), 27); \ + Bi = ROL32((Aku0^Du0), 19); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ) + +#define KeccakRound1() \ Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \ Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \ Da0 = Cx^ROL32(Du1, 1); \ Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \ Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \ Da1 = Cz^Du0; \ -\ Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \ Do0 = Cw^ROL32(Cz, 1); \ Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \ Do1 = Cy^Cx; \ -\ Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \ De0 = Cx^ROL32(Cy, 1); \ Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \ De1 = Cz^Cw; \ -\ Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \ Di0 = Du0^ROL32(Cy, 1); \ Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \ Di1 = Du1^Cw; \ -\ Du0 = Cw^ROL32(Cz, 1); \ Du1 = Cy^Cx; \ - -#define KeccakAtoD_round2() \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Ame1^De0), 22); \ + Bi = ROL32((Agi1^Di1), 22); \ + Bo = ROL32((Aso1^Do1), 11); \ + Bu = ROL32((Aku1^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Ame0^De1), 22); \ + Bi = ROL32((Agi0^Di0), 21); \ + Bo = ROL32((Aso0^Do0), 10); \ + Bu = ROL32((Aku0^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Asa1^Da1), 2); \ + Bo = ROL32((Ake1^De1), 23); \ + Bu = ROL32((Abi1^Di1), 31); \ + Ba = ROL32((Amo1^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Asa0^Da0), 1); \ + Bo = ROL32((Ake0^De0), 22); \ + Bu = ROL32((Abi0^Di0), 30); \ + Ba = ROL32((Amo0^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Ama1^Da0), 9); \ + Ba = ROL32((Age1^De1), 1); \ + Be = ROL32((Asi1^Di0), 3); \ + Bi = ROL32((Ako0^Do1), 13); \ + Bo = ROL32((Abu1^Du0), 4); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Ama0^Da1), 9); \ + Ba = (Age0^De0); \ + Be = ROL32((Asi0^Di1), 3); \ + Bi = ROL32((Ako1^Do0), 12); \ + Bo = ROL32((Abu0^Du1), 4); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aka1^Da0), 18); \ + Bi = ROL32((Abe1^De0), 5); \ + Bo = ROL32((Ami0^Di1), 8); \ + Bu = ROL32((Ago1^Do0), 28); \ + Ba = ROL32((Asu1^Du1), 14); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aka0^Da1), 18); \ + Bi = ROL32((Abe0^De1), 5); \ + Bo = ROL32((Ami1^Di0), 7); \ + Bu = ROL32((Ago0^Do1), 28); \ + Ba = ROL32((Asu0^Du0), 13); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aga1^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Aki1^Di0), 31); \ + Be = ROL32((Abo1^Do1), 28); \ + Bi = ROL32((Amu1^Du1), 20); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aga0^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Aki0^Di1), 31); \ + Be = ROL32((Abo0^Do0), 27); \ + Bi = ROL32((Amu0^Du0), 19); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); + +#define KeccakRound2() \ Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \ Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \ Da0 = Cx^ROL32(Du1, 1); \ Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \ Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \ Da1 = Cz^Du0; \ -\ Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \ Do0 = Cw^ROL32(Cz, 1); \ Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \ Do1 = Cy^Cx; \ -\ Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \ De0 = Cx^ROL32(Cy, 1); \ Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \ De1 = Cz^Cw; \ -\ Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \ Di0 = Du0^ROL32(Cy, 1); \ Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \ Di1 = Du1^Cw; \ -\ Du0 = Cw^ROL32(Cz, 1); \ Du1 = Cy^Cx; \ - -#define KeccakAtoD_round3() \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Ake1^De0), 22); \ + Bi = ROL32((Asi0^Di1), 22); \ + Bo = ROL32((Ago0^Do1), 11); \ + Bu = ROL32((Amu1^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Ake0^De1), 22); \ + Bi = ROL32((Asi1^Di0), 21); \ + Bo = ROL32((Ago1^Do0), 10); \ + Bu = ROL32((Amu0^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Ama0^Da1), 2); \ + Bo = ROL32((Abe0^De1), 23); \ + Bu = ROL32((Aki0^Di1), 31); \ + Ba = ROL32((Aso1^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Ama1^Da0), 1); \ + Bo = ROL32((Abe1^De0), 22); \ + Bu = ROL32((Aki1^Di0), 30); \ + Ba = ROL32((Aso0^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aga1^Da0), 9); \ + Ba = ROL32((Ame0^De1), 1); \ + Be = ROL32((Abi1^Di0), 3); \ + Bi = ROL32((Ako1^Do1), 13); \ + Bo = ROL32((Asu1^Du0), 4); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aga0^Da1), 9); \ + Ba = (Ame1^De0); \ + Be = ROL32((Abi0^Di1), 3); \ + Bi = ROL32((Ako0^Do0), 12); \ + Bo = ROL32((Asu0^Du1), 4); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Asa1^Da0), 18); \ + Bi = ROL32((Age1^De0), 5); \ + Bo = ROL32((Ami1^Di1), 8); \ + Bu = ROL32((Abo1^Do0), 28); \ + Ba = ROL32((Aku0^Du1), 14); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Asa0^Da1), 18); \ + Bi = ROL32((Age0^De1), 5); \ + Bo = ROL32((Ami0^Di0), 7); \ + Bu = ROL32((Abo0^Do1), 28); \ + Ba = ROL32((Aku1^Du0), 13); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aka0^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Agi1^Di0), 31); \ + Be = ROL32((Amo0^Do1), 28); \ + Bi = ROL32((Abu0^Du1), 20); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aka1^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Agi0^Di1), 31); \ + Be = ROL32((Amo1^Do0), 27); \ + Bi = ROL32((Abu1^Du0), 19); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); + +#define KeccakRound3() \ Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \ Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \ Da0 = Cx^ROL32(Du1, 1); \ Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \ Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \ Da1 = Cz^Du0; \ -\ Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \ Do0 = Cw^ROL32(Cz, 1); \ Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \ Do1 = Cy^Cx; \ -\ Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \ De0 = Cx^ROL32(Cy, 1); \ Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \ De1 = Cz^Cw; \ -\ Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \ Di0 = Du0^ROL32(Cy, 1); \ Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \ Di1 = Du1^Cw; \ -\ Du0 = Cw^ROL32(Cz, 1); \ Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Abe0^De0), 22); \ + Bi = ROL32((Abi0^Di1), 22); \ + Bo = ROL32((Abo0^Do1), 11); \ + Bu = ROL32((Abu0^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Abe1^De1), 22); \ + Bi = ROL32((Abi1^Di0), 21); \ + Bo = ROL32((Abo1^Do0), 10); \ + Bu = ROL32((Abu1^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aga0^Da1), 2); \ + Bo = ROL32((Age0^De1), 23); \ + Bu = ROL32((Agi0^Di1), 31); \ + Ba = ROL32((Ago0^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aga1^Da0), 1); \ + Bo = ROL32((Age1^De0), 22); \ + Bu = ROL32((Agi1^Di0), 30); \ + Ba = ROL32((Ago1^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aka0^Da0), 9); \ + Ba = ROL32((Ake0^De1), 1); \ + Be = ROL32((Aki0^Di0), 3); \ + Bi = ROL32((Ako0^Do1), 13); \ + Bo = ROL32((Aku0^Du0), 4); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aka1^Da1), 9); \ + Ba = (Ake1^De0); \ + Be = ROL32((Aki1^Di1), 3); \ + Bi = ROL32((Ako1^Do0), 12); \ + Bo = ROL32((Aku1^Du1), 4); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Ama0^Da0), 18); \ + Bi = ROL32((Ame0^De0), 5); \ + Bo = ROL32((Ami0^Di1), 8); \ + Bu = ROL32((Amo0^Do0), 28); \ + Ba = ROL32((Amu0^Du1), 14); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Ama1^Da1), 18); \ + Bi = ROL32((Ame1^De1), 5); \ + Bo = ROL32((Ami1^Di0), 7); \ + Bu = ROL32((Amo1^Do1), 28); \ + Ba = ROL32((Amu1^Du0), 13); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Asa0^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Asi0^Di0), 31); \ + Be = ROL32((Aso0^Do1), 28); \ + Bi = ROL32((Asu0^Du1), 20); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Asa1^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Asi1^Di1), 31); \ + Be = ROL32((Aso1^Do0), 27); \ + Bi = ROL32((Asu1^Du0), 19); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds) { + uint32_t Da0, De0, Di0, Do0, Du0; + uint32_t Da1, De1, Di1, Do1, Du1; + uint32_t Ba, Be, Bi, Bo, Bu; + uint32_t Cx, Cy, Cz, Cw; + const uint32_t *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2; + uint32_t *stateAsHalfLanes = (uint32_t*)state; + #define Aba0 stateAsHalfLanes[ 0] + #define Aba1 stateAsHalfLanes[ 1] + #define Abe0 stateAsHalfLanes[ 2] + #define Abe1 stateAsHalfLanes[ 3] + #define Abi0 stateAsHalfLanes[ 4] + #define Abi1 stateAsHalfLanes[ 5] + #define Abo0 stateAsHalfLanes[ 6] + #define Abo1 stateAsHalfLanes[ 7] + #define Abu0 stateAsHalfLanes[ 8] + #define Abu1 stateAsHalfLanes[ 9] + #define Aga0 stateAsHalfLanes[10] + #define Aga1 stateAsHalfLanes[11] + #define Age0 stateAsHalfLanes[12] + #define Age1 stateAsHalfLanes[13] + #define Agi0 stateAsHalfLanes[14] + #define Agi1 stateAsHalfLanes[15] + #define Ago0 stateAsHalfLanes[16] + #define Ago1 stateAsHalfLanes[17] + #define Agu0 stateAsHalfLanes[18] + #define Agu1 stateAsHalfLanes[19] + #define Aka0 stateAsHalfLanes[20] + #define Aka1 stateAsHalfLanes[21] + #define Ake0 stateAsHalfLanes[22] + #define Ake1 stateAsHalfLanes[23] + #define Aki0 stateAsHalfLanes[24] + #define Aki1 stateAsHalfLanes[25] + #define Ako0 stateAsHalfLanes[26] + #define Ako1 stateAsHalfLanes[27] + #define Aku0 stateAsHalfLanes[28] + #define Aku1 stateAsHalfLanes[29] + #define Ama0 stateAsHalfLanes[30] + #define Ama1 stateAsHalfLanes[31] + #define Ame0 stateAsHalfLanes[32] + #define Ame1 stateAsHalfLanes[33] + #define Ami0 stateAsHalfLanes[34] + #define Ami1 stateAsHalfLanes[35] + #define Amo0 stateAsHalfLanes[36] + #define Amo1 stateAsHalfLanes[37] + #define Amu0 stateAsHalfLanes[38] + #define Amu1 stateAsHalfLanes[39] + #define Asa0 stateAsHalfLanes[40] + #define Asa1 stateAsHalfLanes[41] + #define Ase0 stateAsHalfLanes[42] + #define Ase1 stateAsHalfLanes[43] + #define Asi0 stateAsHalfLanes[44] + #define Asi1 stateAsHalfLanes[45] + #define Aso0 stateAsHalfLanes[46] + #define Aso1 stateAsHalfLanes[47] + #define Asu0 stateAsHalfLanes[48] + #define Asu1 stateAsHalfLanes[49] + + nRounds &= 3; + switch ( nRounds ) + { + #define I0 Ba + #define I1 Be + #define T0 Bi + #define T1 Bo + #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \ + I0 = (in0)[0]; I1 = (in0)[1]; \ + T0 = (in1)[0]; T1 = (in1)[1]; \ + (in0)[eo0] = T0; (in0)[eo0^1] = T1; \ + T0 = (in2)[0]; T1 = (in2)[1]; \ + (in1)[eo1] = T0; (in1)[eo1^1] = T1; \ + T0 = (in3)[0]; T1 = (in3)[1]; \ + (in2)[eo2] = T0; (in2)[eo2^1] = T1; \ + (in3)[eo3] = I0; (in3)[eo3^1] = I1 + #define SwapPI2( in0,in1,in2,in3 ) \ + I0 = (in0)[0]; I1 = (in0)[1]; \ + T0 = (in1)[0]; T1 = (in1)[1]; \ + (in0)[1] = T0; (in0)[0] = T1; \ + (in1)[1] = I0; (in1)[0] = I1; \ + I0 = (in2)[0]; I1 = (in2)[1]; \ + T0 = (in3)[0]; T1 = (in3)[1]; \ + (in2)[1] = T0; (in2)[0] = T1; \ + (in3)[1] = I0; (in3)[0] = I1 + #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0 + + case 1: + SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 ); + SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 ); + SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 ); + SwapEO( Ami0, Ami1 ); + SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 ); + SwapEO( Ako0, Ako1 ); + SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 ); + break; + + case 2: + SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 ); + SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 ); + SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 ); + SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 ); + SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 ); + break; + + case 3: + SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 ); + SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 ); + SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 ); + SwapEO( Ami0, Ami1 ); + SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 ); + SwapEO( Ako0, Ako1 ); + SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 ); + break; + #undef I0 + #undef I1 + #undef T0 + #undef T1 + #undef SwapPI13 + #undef SwapPI2 + #undef SwapEO + } + + do { - UINT32 Da0, De0, Di0, Do0, Du0; - UINT32 Da1, De1, Di1, Do1, Du1; - UINT32 Ca0, Ce0, Ci0, Co0, Cu0; - UINT32 Cx, Cy, Cz, Cw; - #define Ba Ca0 - #define Be Ce0 - #define Bi Ci0 - #define Bo Co0 - #define Bu Cu0 - const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2; - UINT32 *stateAsHalfLanes = (UINT32*)state; - #define Aba0 stateAsHalfLanes[ 0] - #define Aba1 stateAsHalfLanes[ 1] - #define Abe0 stateAsHalfLanes[ 2] - #define Abe1 stateAsHalfLanes[ 3] - #define Abi0 stateAsHalfLanes[ 4] - #define Abi1 stateAsHalfLanes[ 5] - #define Abo0 stateAsHalfLanes[ 6] - #define Abo1 stateAsHalfLanes[ 7] - #define Abu0 stateAsHalfLanes[ 8] - #define Abu1 stateAsHalfLanes[ 9] - #define Aga0 stateAsHalfLanes[10] - #define Aga1 stateAsHalfLanes[11] - #define Age0 stateAsHalfLanes[12] - #define Age1 stateAsHalfLanes[13] - #define Agi0 stateAsHalfLanes[14] - #define Agi1 stateAsHalfLanes[15] - #define Ago0 stateAsHalfLanes[16] - #define Ago1 stateAsHalfLanes[17] - #define Agu0 stateAsHalfLanes[18] - #define Agu1 stateAsHalfLanes[19] - #define Aka0 stateAsHalfLanes[20] - #define Aka1 stateAsHalfLanes[21] - #define Ake0 stateAsHalfLanes[22] - #define Ake1 stateAsHalfLanes[23] - #define Aki0 stateAsHalfLanes[24] - #define Aki1 stateAsHalfLanes[25] - #define Ako0 stateAsHalfLanes[26] - #define Ako1 stateAsHalfLanes[27] - #define Aku0 stateAsHalfLanes[28] - #define Aku1 stateAsHalfLanes[29] - #define Ama0 stateAsHalfLanes[30] - #define Ama1 stateAsHalfLanes[31] - #define Ame0 stateAsHalfLanes[32] - #define Ame1 stateAsHalfLanes[33] - #define Ami0 stateAsHalfLanes[34] - #define Ami1 stateAsHalfLanes[35] - #define Amo0 stateAsHalfLanes[36] - #define Amo1 stateAsHalfLanes[37] - #define Amu0 stateAsHalfLanes[38] - #define Amu1 stateAsHalfLanes[39] - #define Asa0 stateAsHalfLanes[40] - #define Asa1 stateAsHalfLanes[41] - #define Ase0 stateAsHalfLanes[42] - #define Ase1 stateAsHalfLanes[43] - #define Asi0 stateAsHalfLanes[44] - #define Asi1 stateAsHalfLanes[45] - #define Aso0 stateAsHalfLanes[46] - #define Aso1 stateAsHalfLanes[47] - #define Asu0 stateAsHalfLanes[48] - #define Asu1 stateAsHalfLanes[49] - - do + /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ + switch ( nRounds ) { - /* --- Code for 4 rounds */ - - /* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ - - KeccakAtoD_round0(); - - Ba = (Aba0^Da0); - Be = ROL32((Age0^De0), 22); - Bi = ROL32((Aki1^Di1), 22); - Bo = ROL32((Amo1^Do1), 11); - Bu = ROL32((Asu0^Du0), 7); - Aba0 = Ba ^((~Be)& Bi ); - Aba0 ^= *(pRoundConstants++); - Age0 = Be ^((~Bi)& Bo ); - Aki1 = Bi ^((~Bo)& Bu ); - Amo1 = Bo ^((~Bu)& Ba ); - Asu0 = Bu ^((~Ba)& Be ); - - Ba = (Aba1^Da1); - Be = ROL32((Age1^De1), 22); - Bi = ROL32((Aki0^Di0), 21); - Bo = ROL32((Amo0^Do0), 10); - Bu = ROL32((Asu1^Du1), 7); - Aba1 = Ba ^((~Be)& Bi ); - Aba1 ^= *(pRoundConstants++); - Age1 = Be ^((~Bi)& Bo ); - Aki0 = Bi ^((~Bo)& Bu ); - Amo0 = Bo ^((~Bu)& Ba ); - Asu1 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Aka1^Da1), 2); - Bo = ROL32((Ame1^De1), 23); - Bu = ROL32((Asi1^Di1), 31); - Ba = ROL32((Abo0^Do0), 14); - Be = ROL32((Agu0^Du0), 10); - Aka1 = Ba ^((~Be)& Bi ); - Ame1 = Be ^((~Bi)& Bo ); - Asi1 = Bi ^((~Bo)& Bu ); - Abo0 = Bo ^((~Bu)& Ba ); - Agu0 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Aka0^Da0), 1); - Bo = ROL32((Ame0^De0), 22); - Bu = ROL32((Asi0^Di0), 30); - Ba = ROL32((Abo1^Do1), 14); - Be = ROL32((Agu1^Du1), 10); - Aka0 = Ba ^((~Be)& Bi ); - Ame0 = Be ^((~Bi)& Bo ); - Asi0 = Bi ^((~Bo)& Bu ); - Abo1 = Bo ^((~Bu)& Ba ); - Agu1 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Asa0^Da0), 9); - Ba = ROL32((Abe1^De1), 1); - Be = ROL32((Agi0^Di0), 3); - Bi = ROL32((Ako1^Do1), 13); - Bo = ROL32((Amu0^Du0), 4); - Asa0 = Ba ^((~Be)& Bi ); - Abe1 = Be ^((~Bi)& Bo ); - Agi0 = Bi ^((~Bo)& Bu ); - Ako1 = Bo ^((~Bu)& Ba ); - Amu0 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Asa1^Da1), 9); - Ba = (Abe0^De0); - Be = ROL32((Agi1^Di1), 3); - Bi = ROL32((Ako0^Do0), 12); - Bo = ROL32((Amu1^Du1), 4); - Asa1 = Ba ^((~Be)& Bi ); - Abe0 = Be ^((~Bi)& Bo ); - Agi1 = Bi ^((~Bo)& Bu ); - Ako0 = Bo ^((~Bu)& Ba ); - Amu1 = Bu ^((~Ba)& Be ); - - Be = ROL32((Aga0^Da0), 18); - Bi = ROL32((Ake0^De0), 5); - Bo = ROL32((Ami1^Di1), 8); - Bu = ROL32((Aso0^Do0), 28); - Ba = ROL32((Abu1^Du1), 14); - Aga0 = Ba ^((~Be)& Bi ); - Ake0 = Be ^((~Bi)& Bo ); - Ami1 = Bi ^((~Bo)& Bu ); - Aso0 = Bo ^((~Bu)& Ba ); - Abu1 = Bu ^((~Ba)& Be ); - - Be = ROL32((Aga1^Da1), 18); - Bi = ROL32((Ake1^De1), 5); - Bo = ROL32((Ami0^Di0), 7); - Bu = ROL32((Aso1^Do1), 28); - Ba = ROL32((Abu0^Du0), 13); - Aga1 = Ba ^((~Be)& Bi ); - Ake1 = Be ^((~Bi)& Bo ); - Ami0 = Bi ^((~Bo)& Bu ); - Aso1 = Bo ^((~Bu)& Ba ); - Abu0 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Ama1^Da1), 21); - Bu = ROL32((Ase0^De0), 1); - Ba = ROL32((Abi0^Di0), 31); - Be = ROL32((Ago1^Do1), 28); - Bi = ROL32((Aku1^Du1), 20); - Ama1 = Ba ^((~Be)& Bi ); - Ase0 = Be ^((~Bi)& Bo ); - Abi0 = Bi ^((~Bo)& Bu ); - Ago1 = Bo ^((~Bu)& Ba ); - Aku1 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Ama0^Da0), 20); - Bu = ROL32((Ase1^De1), 1); - Ba = ROL32((Abi1^Di1), 31); - Be = ROL32((Ago0^Do0), 27); - Bi = ROL32((Aku0^Du0), 19); - Ama0 = Ba ^((~Be)& Bi ); - Ase1 = Be ^((~Bi)& Bo ); - Abi1 = Bi ^((~Bo)& Bu ); - Ago0 = Bo ^((~Bu)& Ba ); - Aku0 = Bu ^((~Ba)& Be ); - - KeccakAtoD_round1(); - - Ba = (Aba0^Da0); - Be = ROL32((Ame1^De0), 22); - Bi = ROL32((Agi1^Di1), 22); - Bo = ROL32((Aso1^Do1), 11); - Bu = ROL32((Aku1^Du0), 7); - Aba0 = Ba ^((~Be)& Bi ); - Aba0 ^= *(pRoundConstants++); - Ame1 = Be ^((~Bi)& Bo ); - Agi1 = Bi ^((~Bo)& Bu ); - Aso1 = Bo ^((~Bu)& Ba ); - Aku1 = Bu ^((~Ba)& Be ); - - Ba = (Aba1^Da1); - Be = ROL32((Ame0^De1), 22); - Bi = ROL32((Agi0^Di0), 21); - Bo = ROL32((Aso0^Do0), 10); - Bu = ROL32((Aku0^Du1), 7); - Aba1 = Ba ^((~Be)& Bi ); - Aba1 ^= *(pRoundConstants++); - Ame0 = Be ^((~Bi)& Bo ); - Agi0 = Bi ^((~Bo)& Bu ); - Aso0 = Bo ^((~Bu)& Ba ); - Aku0 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Asa1^Da1), 2); - Bo = ROL32((Ake1^De1), 23); - Bu = ROL32((Abi1^Di1), 31); - Ba = ROL32((Amo1^Do0), 14); - Be = ROL32((Agu0^Du0), 10); - Asa1 = Ba ^((~Be)& Bi ); - Ake1 = Be ^((~Bi)& Bo ); - Abi1 = Bi ^((~Bo)& Bu ); - Amo1 = Bo ^((~Bu)& Ba ); - Agu0 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Asa0^Da0), 1); - Bo = ROL32((Ake0^De0), 22); - Bu = ROL32((Abi0^Di0), 30); - Ba = ROL32((Amo0^Do1), 14); - Be = ROL32((Agu1^Du1), 10); - Asa0 = Ba ^((~Be)& Bi ); - Ake0 = Be ^((~Bi)& Bo ); - Abi0 = Bi ^((~Bo)& Bu ); - Amo0 = Bo ^((~Bu)& Ba ); - Agu1 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Ama1^Da0), 9); - Ba = ROL32((Age1^De1), 1); - Be = ROL32((Asi1^Di0), 3); - Bi = ROL32((Ako0^Do1), 13); - Bo = ROL32((Abu1^Du0), 4); - Ama1 = Ba ^((~Be)& Bi ); - Age1 = Be ^((~Bi)& Bo ); - Asi1 = Bi ^((~Bo)& Bu ); - Ako0 = Bo ^((~Bu)& Ba ); - Abu1 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Ama0^Da1), 9); - Ba = (Age0^De0); - Be = ROL32((Asi0^Di1), 3); - Bi = ROL32((Ako1^Do0), 12); - Bo = ROL32((Abu0^Du1), 4); - Ama0 = Ba ^((~Be)& Bi ); - Age0 = Be ^((~Bi)& Bo ); - Asi0 = Bi ^((~Bo)& Bu ); - Ako1 = Bo ^((~Bu)& Ba ); - Abu0 = Bu ^((~Ba)& Be ); - - Be = ROL32((Aka1^Da0), 18); - Bi = ROL32((Abe1^De0), 5); - Bo = ROL32((Ami0^Di1), 8); - Bu = ROL32((Ago1^Do0), 28); - Ba = ROL32((Asu1^Du1), 14); - Aka1 = Ba ^((~Be)& Bi ); - Abe1 = Be ^((~Bi)& Bo ); - Ami0 = Bi ^((~Bo)& Bu ); - Ago1 = Bo ^((~Bu)& Ba ); - Asu1 = Bu ^((~Ba)& Be ); - - Be = ROL32((Aka0^Da1), 18); - Bi = ROL32((Abe0^De1), 5); - Bo = ROL32((Ami1^Di0), 7); - Bu = ROL32((Ago0^Do1), 28); - Ba = ROL32((Asu0^Du0), 13); - Aka0 = Ba ^((~Be)& Bi ); - Abe0 = Be ^((~Bi)& Bo ); - Ami1 = Bi ^((~Bo)& Bu ); - Ago0 = Bo ^((~Bu)& Ba ); - Asu0 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Aga1^Da1), 21); - Bu = ROL32((Ase0^De0), 1); - Ba = ROL32((Aki1^Di0), 31); - Be = ROL32((Abo1^Do1), 28); - Bi = ROL32((Amu1^Du1), 20); - Aga1 = Ba ^((~Be)& Bi ); - Ase0 = Be ^((~Bi)& Bo ); - Aki1 = Bi ^((~Bo)& Bu ); - Abo1 = Bo ^((~Bu)& Ba ); - Amu1 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Aga0^Da0), 20); - Bu = ROL32((Ase1^De1), 1); - Ba = ROL32((Aki0^Di1), 31); - Be = ROL32((Abo0^Do0), 27); - Bi = ROL32((Amu0^Du0), 19); - Aga0 = Ba ^((~Be)& Bi ); - Ase1 = Be ^((~Bi)& Bo ); - Aki0 = Bi ^((~Bo)& Bu ); - Abo0 = Bo ^((~Bu)& Ba ); - Amu0 = Bu ^((~Ba)& Be ); - - KeccakAtoD_round2(); - - Ba = (Aba0^Da0); - Be = ROL32((Ake1^De0), 22); - Bi = ROL32((Asi0^Di1), 22); - Bo = ROL32((Ago0^Do1), 11); - Bu = ROL32((Amu1^Du0), 7); - Aba0 = Ba ^((~Be)& Bi ); - Aba0 ^= *(pRoundConstants++); - Ake1 = Be ^((~Bi)& Bo ); - Asi0 = Bi ^((~Bo)& Bu ); - Ago0 = Bo ^((~Bu)& Ba ); - Amu1 = Bu ^((~Ba)& Be ); - - Ba = (Aba1^Da1); - Be = ROL32((Ake0^De1), 22); - Bi = ROL32((Asi1^Di0), 21); - Bo = ROL32((Ago1^Do0), 10); - Bu = ROL32((Amu0^Du1), 7); - Aba1 = Ba ^((~Be)& Bi ); - Aba1 ^= *(pRoundConstants++); - Ake0 = Be ^((~Bi)& Bo ); - Asi1 = Bi ^((~Bo)& Bu ); - Ago1 = Bo ^((~Bu)& Ba ); - Amu0 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Ama0^Da1), 2); - Bo = ROL32((Abe0^De1), 23); - Bu = ROL32((Aki0^Di1), 31); - Ba = ROL32((Aso1^Do0), 14); - Be = ROL32((Agu0^Du0), 10); - Ama0 = Ba ^((~Be)& Bi ); - Abe0 = Be ^((~Bi)& Bo ); - Aki0 = Bi ^((~Bo)& Bu ); - Aso1 = Bo ^((~Bu)& Ba ); - Agu0 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Ama1^Da0), 1); - Bo = ROL32((Abe1^De0), 22); - Bu = ROL32((Aki1^Di0), 30); - Ba = ROL32((Aso0^Do1), 14); - Be = ROL32((Agu1^Du1), 10); - Ama1 = Ba ^((~Be)& Bi ); - Abe1 = Be ^((~Bi)& Bo ); - Aki1 = Bi ^((~Bo)& Bu ); - Aso0 = Bo ^((~Bu)& Ba ); - Agu1 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Aga1^Da0), 9); - Ba = ROL32((Ame0^De1), 1); - Be = ROL32((Abi1^Di0), 3); - Bi = ROL32((Ako1^Do1), 13); - Bo = ROL32((Asu1^Du0), 4); - Aga1 = Ba ^((~Be)& Bi ); - Ame0 = Be ^((~Bi)& Bo ); - Abi1 = Bi ^((~Bo)& Bu ); - Ako1 = Bo ^((~Bu)& Ba ); - Asu1 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Aga0^Da1), 9); - Ba = (Ame1^De0); - Be = ROL32((Abi0^Di1), 3); - Bi = ROL32((Ako0^Do0), 12); - Bo = ROL32((Asu0^Du1), 4); - Aga0 = Ba ^((~Be)& Bi ); - Ame1 = Be ^((~Bi)& Bo ); - Abi0 = Bi ^((~Bo)& Bu ); - Ako0 = Bo ^((~Bu)& Ba ); - Asu0 = Bu ^((~Ba)& Be ); - - Be = ROL32((Asa1^Da0), 18); - Bi = ROL32((Age1^De0), 5); - Bo = ROL32((Ami1^Di1), 8); - Bu = ROL32((Abo1^Do0), 28); - Ba = ROL32((Aku0^Du1), 14); - Asa1 = Ba ^((~Be)& Bi ); - Age1 = Be ^((~Bi)& Bo ); - Ami1 = Bi ^((~Bo)& Bu ); - Abo1 = Bo ^((~Bu)& Ba ); - Aku0 = Bu ^((~Ba)& Be ); - - Be = ROL32((Asa0^Da1), 18); - Bi = ROL32((Age0^De1), 5); - Bo = ROL32((Ami0^Di0), 7); - Bu = ROL32((Abo0^Do1), 28); - Ba = ROL32((Aku1^Du0), 13); - Asa0 = Ba ^((~Be)& Bi ); - Age0 = Be ^((~Bi)& Bo ); - Ami0 = Bi ^((~Bo)& Bu ); - Abo0 = Bo ^((~Bu)& Ba ); - Aku1 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Aka0^Da1), 21); - Bu = ROL32((Ase0^De0), 1); - Ba = ROL32((Agi1^Di0), 31); - Be = ROL32((Amo0^Do1), 28); - Bi = ROL32((Abu0^Du1), 20); - Aka0 = Ba ^((~Be)& Bi ); - Ase0 = Be ^((~Bi)& Bo ); - Agi1 = Bi ^((~Bo)& Bu ); - Amo0 = Bo ^((~Bu)& Ba ); - Abu0 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Aka1^Da0), 20); - Bu = ROL32((Ase1^De1), 1); - Ba = ROL32((Agi0^Di1), 31); - Be = ROL32((Amo1^Do0), 27); - Bi = ROL32((Abu1^Du0), 19); - Aka1 = Ba ^((~Be)& Bi ); - Ase1 = Be ^((~Bi)& Bo ); - Agi0 = Bi ^((~Bo)& Bu ); - Amo1 = Bo ^((~Bu)& Ba ); - Abu1 = Bu ^((~Ba)& Be ); - - KeccakAtoD_round3(); - - Ba = (Aba0^Da0); - Be = ROL32((Abe0^De0), 22); - Bi = ROL32((Abi0^Di1), 22); - Bo = ROL32((Abo0^Do1), 11); - Bu = ROL32((Abu0^Du0), 7); - Aba0 = Ba ^((~Be)& Bi ); - Aba0 ^= *(pRoundConstants++); - Abe0 = Be ^((~Bi)& Bo ); - Abi0 = Bi ^((~Bo)& Bu ); - Abo0 = Bo ^((~Bu)& Ba ); - Abu0 = Bu ^((~Ba)& Be ); - - Ba = (Aba1^Da1); - Be = ROL32((Abe1^De1), 22); - Bi = ROL32((Abi1^Di0), 21); - Bo = ROL32((Abo1^Do0), 10); - Bu = ROL32((Abu1^Du1), 7); - Aba1 = Ba ^((~Be)& Bi ); - Aba1 ^= *(pRoundConstants++); - Abe1 = Be ^((~Bi)& Bo ); - Abi1 = Bi ^((~Bo)& Bu ); - Abo1 = Bo ^((~Bu)& Ba ); - Abu1 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Aga0^Da1), 2); - Bo = ROL32((Age0^De1), 23); - Bu = ROL32((Agi0^Di1), 31); - Ba = ROL32((Ago0^Do0), 14); - Be = ROL32((Agu0^Du0), 10); - Aga0 = Ba ^((~Be)& Bi ); - Age0 = Be ^((~Bi)& Bo ); - Agi0 = Bi ^((~Bo)& Bu ); - Ago0 = Bo ^((~Bu)& Ba ); - Agu0 = Bu ^((~Ba)& Be ); - - Bi = ROL32((Aga1^Da0), 1); - Bo = ROL32((Age1^De0), 22); - Bu = ROL32((Agi1^Di0), 30); - Ba = ROL32((Ago1^Do1), 14); - Be = ROL32((Agu1^Du1), 10); - Aga1 = Ba ^((~Be)& Bi ); - Age1 = Be ^((~Bi)& Bo ); - Agi1 = Bi ^((~Bo)& Bu ); - Ago1 = Bo ^((~Bu)& Ba ); - Agu1 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Aka0^Da0), 9); - Ba = ROL32((Ake0^De1), 1); - Be = ROL32((Aki0^Di0), 3); - Bi = ROL32((Ako0^Do1), 13); - Bo = ROL32((Aku0^Du0), 4); - Aka0 = Ba ^((~Be)& Bi ); - Ake0 = Be ^((~Bi)& Bo ); - Aki0 = Bi ^((~Bo)& Bu ); - Ako0 = Bo ^((~Bu)& Ba ); - Aku0 = Bu ^((~Ba)& Be ); - - Bu = ROL32((Aka1^Da1), 9); - Ba = (Ake1^De0); - Be = ROL32((Aki1^Di1), 3); - Bi = ROL32((Ako1^Do0), 12); - Bo = ROL32((Aku1^Du1), 4); - Aka1 = Ba ^((~Be)& Bi ); - Ake1 = Be ^((~Bi)& Bo ); - Aki1 = Bi ^((~Bo)& Bu ); - Ako1 = Bo ^((~Bu)& Ba ); - Aku1 = Bu ^((~Ba)& Be ); - - Be = ROL32((Ama0^Da0), 18); - Bi = ROL32((Ame0^De0), 5); - Bo = ROL32((Ami0^Di1), 8); - Bu = ROL32((Amo0^Do0), 28); - Ba = ROL32((Amu0^Du1), 14); - Ama0 = Ba ^((~Be)& Bi ); - Ame0 = Be ^((~Bi)& Bo ); - Ami0 = Bi ^((~Bo)& Bu ); - Amo0 = Bo ^((~Bu)& Ba ); - Amu0 = Bu ^((~Ba)& Be ); - - Be = ROL32((Ama1^Da1), 18); - Bi = ROL32((Ame1^De1), 5); - Bo = ROL32((Ami1^Di0), 7); - Bu = ROL32((Amo1^Do1), 28); - Ba = ROL32((Amu1^Du0), 13); - Ama1 = Ba ^((~Be)& Bi ); - Ame1 = Be ^((~Bi)& Bo ); - Ami1 = Bi ^((~Bo)& Bu ); - Amo1 = Bo ^((~Bu)& Ba ); - Amu1 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Asa0^Da1), 21); - Bu = ROL32((Ase0^De0), 1); - Ba = ROL32((Asi0^Di0), 31); - Be = ROL32((Aso0^Do1), 28); - Bi = ROL32((Asu0^Du1), 20); - Asa0 = Ba ^((~Be)& Bi ); - Ase0 = Be ^((~Bi)& Bo ); - Asi0 = Bi ^((~Bo)& Bu ); - Aso0 = Bo ^((~Bu)& Ba ); - Asu0 = Bu ^((~Ba)& Be ); - - Bo = ROL32((Asa1^Da0), 20); - Bu = ROL32((Ase1^De1), 1); - Ba = ROL32((Asi1^Di1), 31); - Be = ROL32((Aso1^Do0), 27); - Bi = ROL32((Asu1^Du0), 19); - Asa1 = Ba ^((~Be)& Bi ); - Ase1 = Be ^((~Bi)& Bo ); - Asi1 = Bi ^((~Bo)& Bu ); - Aso1 = Bo ^((~Bu)& Ba ); - Asu1 = Bu ^((~Ba)& Be ); + case 0: KeccakRound0(); /* fall through */ + case 3: KeccakRound1(); + case 2: KeccakRound2(); + case 1: KeccakRound3(); } - while ( *pRoundConstants != 0xFF ); - - #undef Aba0 - #undef Aba1 - #undef Abe0 - #undef Abe1 - #undef Abi0 - #undef Abi1 - #undef Abo0 - #undef Abo1 - #undef Abu0 - #undef Abu1 - #undef Aga0 - #undef Aga1 - #undef Age0 - #undef Age1 - #undef Agi0 - #undef Agi1 - #undef Ago0 - #undef Ago1 - #undef Agu0 - #undef Agu1 - #undef Aka0 - #undef Aka1 - #undef Ake0 - #undef Ake1 - #undef Aki0 - #undef Aki1 - #undef Ako0 - #undef Ako1 - #undef Aku0 - #undef Aku1 - #undef Ama0 - #undef Ama1 - #undef Ame0 - #undef Ame1 - #undef Ami0 - #undef Ami1 - #undef Amo0 - #undef Amo1 - #undef Amu0 - #undef Amu1 - #undef Asa0 - #undef Asa1 - #undef Ase0 - #undef Ase1 - #undef Asi0 - #undef Asi1 - #undef Aso0 - #undef Aso1 - #undef Asu0 - #undef Asu1 + nRounds = 0; } + while ( *pRoundConstants != 0xFF ); + + #undef Aba0 + #undef Aba1 + #undef Abe0 + #undef Abe1 + #undef Abi0 + #undef Abi1 + #undef Abo0 + #undef Abo1 + #undef Abu0 + #undef Abu1 + #undef Aga0 + #undef Aga1 + #undef Age0 + #undef Age1 + #undef Agi0 + #undef Agi1 + #undef Ago0 + #undef Ago1 + #undef Agu0 + #undef Agu1 + #undef Aka0 + #undef Aka1 + #undef Ake0 + #undef Ake1 + #undef Aki0 + #undef Aki1 + #undef Ako0 + #undef Ako1 + #undef Aku0 + #undef Aku1 + #undef Ama0 + #undef Ama1 + #undef Ame0 + #undef Ame1 + #undef Ami0 + #undef Ami1 + #undef Amo0 + #undef Amo1 + #undef Amu0 + #undef Amu1 + #undef Asa0 + #undef Asa1 + #undef Ase0 + #undef Ase1 + #undef Asi0 + #undef Asi1 + #undef Aso0 + #undef Aso1 + #undef Asu0 + #undef Asu1 } /* ---------------------------------------------------------------- */ diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h b/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h index 9501c64b186aa9..e6f16a4becb4b3 100644 --- a/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h +++ b/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h @@ -1,3 +1,7 @@ +/* +This file defines some parameters of the implementation in the parent directory. +*/ + #define KeccakP1600_implementation_config "lane complementing, all rounds unrolled" #define KeccakP1600_fullUnrolling #define KeccakP1600_useLaneComplementing diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64.c b/Modules/_sha3/kcp/KeccakP-1600-opt64.c index c90010dd9256c1..1673abe877b83e 100644 --- a/Modules/_sha3/kcp/KeccakP-1600-opt64.c +++ b/Modules/_sha3/kcp/KeccakP-1600-opt64.c @@ -1,28 +1,33 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file implements Keccak-p[1600] in a SnP-compatible way. +Please refer to SnP-documentation.h for more details. + +This implementation comes with KeccakP-1600-SnP.h in the same folder. +Please refer to LowLevel.build for the exact list of other files it must be combined with. */ +#include #include #include /* #include "brg_endian.h" */ #include "KeccakP-1600-opt64-config.h" -#if NOT_PYTHON -typedef unsigned char UINT8; -/* typedef unsigned long long int UINT64; */ -#endif - #if defined(KeccakP1600_useLaneComplementing) #define UseBebigokimisa #endif @@ -31,13 +36,13 @@ typedef unsigned char UINT8; #define ROL64(a, offset) _rotl64(a, offset) #elif defined(KeccakP1600_useSHLD) #define ROL64(x,N) ({ \ - register UINT64 __out; \ - register UINT64 __in = x; \ + register uint64_t __out; \ + register uint64_t __in = x; \ __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ __out; \ }) #else -#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) +#define ROL64(a, offset) ((((uint64_t)a) << offset) ^ (((uint64_t)a) >> (64-offset))) #endif #include "KeccakP-1600-64.macros" @@ -49,7 +54,7 @@ typedef unsigned char UINT8; #include "KeccakP-1600-unrolling.macros" #include "SnP-Relaned.h" -static const UINT64 KeccakF1600RoundConstants[24] = { +static const uint64_t KeccakF1600RoundConstants[24] = { 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, @@ -81,12 +86,12 @@ void KeccakP1600_Initialize(void *state) { memset(state, 0, 200); #ifdef KeccakP1600_useLaneComplementing - ((UINT64*)state)[ 1] = ~(UINT64)0; - ((UINT64*)state)[ 2] = ~(UINT64)0; - ((UINT64*)state)[ 8] = ~(UINT64)0; - ((UINT64*)state)[12] = ~(UINT64)0; - ((UINT64*)state)[17] = ~(UINT64)0; - ((UINT64*)state)[20] = ~(UINT64)0; + ((uint64_t*)state)[ 1] = ~(uint64_t)0; + ((uint64_t*)state)[ 2] = ~(uint64_t)0; + ((uint64_t*)state)[ 8] = ~(uint64_t)0; + ((uint64_t*)state)[12] = ~(uint64_t)0; + ((uint64_t*)state)[17] = ~(uint64_t)0; + ((uint64_t*)state)[20] = ~(uint64_t)0; #endif } @@ -95,7 +100,7 @@ void KeccakP1600_Initialize(void *state) void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) { #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - UINT64 lane; + uint64_t lane; if (length == 0) return; if (length == 1) @@ -106,12 +111,12 @@ void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const un } lane <<= offset*8; #else - UINT64 lane = 0; + uint64_t lane = 0; unsigned int i; for(i=0; i 1) { - ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + ((uint64_t*)data)[ 1] = ~((uint64_t*)data)[ 1]; if (laneCount > 2) { - ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + ((uint64_t*)data)[ 2] = ~((uint64_t*)data)[ 2]; if (laneCount > 8) { - ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + ((uint64_t*)data)[ 8] = ~((uint64_t*)data)[ 8]; if (laneCount > 12) { - ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + ((uint64_t*)data)[12] = ~((uint64_t*)data)[12]; if (laneCount > 17) { - ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + ((uint64_t*)data)[17] = ~((uint64_t*)data)[17]; if (laneCount > 20) { - ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + ((uint64_t*)data)[20] = ~((uint64_t*)data)[20]; } } } @@ -379,7 +447,7 @@ void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned i void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) { - UINT64 lane = ((UINT64*)state)[lanePosition]; + uint64_t lane = ((uint64_t*)state)[lanePosition]; #ifdef KeccakP1600_useLaneComplementing if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) lane = ~lane; @@ -387,10 +455,10 @@ void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePo #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) { unsigned int i; - UINT64 lane1[1]; + uint64_t lane1[1]; lane1[0] = lane; for(i=0; i 1) { - ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1]; + ((uint64_t*)output)[ 1] = ~((uint64_t*)output)[ 1]; if (laneCount > 2) { - ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2]; + ((uint64_t*)output)[ 2] = ~((uint64_t*)output)[ 2]; if (laneCount > 8) { - ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8]; + ((uint64_t*)output)[ 8] = ~((uint64_t*)output)[ 8]; if (laneCount > 12) { - ((UINT64*)output)[12] = ~((UINT64*)output)[12]; + ((uint64_t*)output)[12] = ~((uint64_t*)output)[12]; if (laneCount > 17) { - ((UINT64*)output)[17] = ~((UINT64*)output)[17]; + ((uint64_t*)output)[17] = ~((uint64_t*)output)[17]; if (laneCount > 20) { - ((UINT64*)output)[20] = ~((UINT64*)output)[20]; + ((uint64_t*)output)[20] = ~((uint64_t*)output)[20]; } } } @@ -459,8 +527,8 @@ size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const un #ifndef KeccakP1600_fullUnrolling unsigned int i; #endif - UINT64 *stateAsLanes = (UINT64*)state; - UINT64 *inDataAsLanes = (UINT64*)data; + uint64_t *stateAsLanes = (uint64_t*)state; + uint64_t *inDataAsLanes = (uint64_t*)data; copyFromState(A, stateAsLanes) while(dataByteLen >= laneCount*8) { @@ -472,3 +540,26 @@ size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const un copyToState(stateAsLanes, A) return originalDataByteLen - dataByteLen; } + +/* ---------------------------------------------------------------- */ + +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + uint64_t *stateAsLanes = (uint64_t*)state; + uint64_t *inDataAsLanes = (uint64_t*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds12 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} diff --git a/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros b/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros index 405ce29724cedd..9f72002262b2ef 100644 --- a/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros +++ b/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros @@ -1,12 +1,13 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. @@ -56,6 +57,22 @@ http://creativecommons.org/publicdomain/zero/1.0/ thetaRhoPiChiIotaPrepareTheta(22, A, E) \ thetaRhoPiChiIota(23, E, A) \ +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + #elif (Unrolling == 12) #define rounds24 \ prepareTheta \ @@ -89,6 +106,22 @@ http://creativecommons.org/publicdomain/zero/1.0/ thetaRhoPiChiIotaPrepareTheta(22, A, E) \ thetaRhoPiChiIota(23, E, A) \ +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + #elif (Unrolling == 6) #define rounds24 \ prepareTheta \ @@ -112,6 +145,22 @@ http://creativecommons.org/publicdomain/zero/1.0/ thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ } \ +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + #elif (Unrolling == 4) #define rounds24 \ prepareTheta \ @@ -131,6 +180,20 @@ http://creativecommons.org/publicdomain/zero/1.0/ thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ } \ +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + #elif (Unrolling == 3) #define rounds24 \ prepareTheta \ @@ -150,6 +213,22 @@ http://creativecommons.org/publicdomain/zero/1.0/ copyStateVariables(A, E) \ } \ +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + #elif (Unrolling == 2) #define rounds24 \ prepareTheta \ @@ -165,6 +244,20 @@ http://creativecommons.org/publicdomain/zero/1.0/ thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ } \ +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + #elif (Unrolling == 1) #define rounds24 \ prepareTheta \ @@ -180,6 +273,33 @@ http://creativecommons.org/publicdomain/zero/1.0/ copyStateVariables(A, E) \ } \ +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + #else #error "Unrolling is not correctly specified!" #endif + +#define roundsN(__nrounds) \ + prepareTheta \ + i = 24 - (__nrounds); \ + if ((i&1) != 0) { \ + thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + copyStateVariables(A, E) \ + ++i; \ + } \ + for( /* empty */; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } diff --git a/Modules/_sha3/kcp/KeccakSponge.c b/Modules/_sha3/kcp/KeccakSponge.c index afdb73172f3478..350df772e426dd 100644 --- a/Modules/_sha3/kcp/KeccakSponge.c +++ b/Modules/_sha3/kcp/KeccakSponge.c @@ -1,12 +1,13 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. @@ -19,7 +20,7 @@ and related or neighboring rights to the source code in this file. #include "displayIntermediateValues.h" #endif -#ifndef KeccakP200_excluded +#ifdef XKCP_has_KeccakP200 #include "KeccakP-200-SnP.h" #define prefix KeccakWidth200 @@ -37,7 +38,7 @@ and related or neighboring rights to the source code in this file. #undef SnP_FastLoop_Absorb #endif -#ifndef KeccakP400_excluded +#ifdef XKCP_has_KeccakP400 #include "KeccakP-400-SnP.h" #define prefix KeccakWidth400 @@ -55,7 +56,7 @@ and related or neighboring rights to the source code in this file. #undef SnP_FastLoop_Absorb #endif -#ifndef KeccakP800_excluded +#ifdef XKCP_has_KeccakP800 #include "KeccakP-800-SnP.h" #define prefix KeccakWidth800 @@ -73,7 +74,7 @@ and related or neighboring rights to the source code in this file. #undef SnP_FastLoop_Absorb #endif -#ifndef KeccakP1600_excluded +#ifdef XKCP_has_KeccakP1600 #include "KeccakP-1600-SnP.h" #define prefix KeccakWidth1600 @@ -90,3 +91,21 @@ and related or neighboring rights to the source code in this file. #undef SnP_Permute #undef SnP_FastLoop_Absorb #endif + +#ifdef XKCP_has_KeccakP1600 + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600_12rounds + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_12rounds + #if defined(KeccakP1600_12rounds_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif diff --git a/Modules/_sha3/kcp/KeccakSponge.h b/Modules/_sha3/kcp/KeccakSponge.h index 0f4badcac059e9..5ef9bf29728022 100644 --- a/Modules/_sha3/kcp/KeccakSponge.h +++ b/Modules/_sha3/kcp/KeccakSponge.h @@ -1,12 +1,13 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. @@ -16,121 +17,14 @@ and related or neighboring rights to the source code in this file. #ifndef _KeccakSponge_h_ #define _KeccakSponge_h_ -/** General information - * - * The following type and functions are not actually implemented. Their - * documentation is generic, with the prefix Prefix replaced by - * - KeccakWidth200 for a sponge function based on Keccak-f[200] - * - KeccakWidth400 for a sponge function based on Keccak-f[400] - * - KeccakWidth800 for a sponge function based on Keccak-f[800] - * - KeccakWidth1600 for a sponge function based on Keccak-f[1600] - * - * In all these functions, the rate and capacity must sum to the width of the - * chosen permutation. For instance, to use the sponge function - * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination - * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(), - * KeccakWidth1600_SpongeAbsorbLastFewBits() and - * KeccakWidth1600_SpongeSqueeze(). - * - * The Prefix_SpongeInstance contains the sponge instance attributes for use - * with the Prefix_Sponge* functions. - * It gathers the state processed by the permutation as well as the rate, - * the position of input/output bytes in the state and the phase - * (absorbing or squeezing). - */ - -#ifdef DontReallyInclude_DocumentationOnly -/** Function to evaluate the sponge function Keccak[r, c] in a single call. - * @param rate The value of the rate r. - * @param capacity The value of the capacity c. - * @param input Pointer to the input message (before the suffix). - * @param inputByteLen The length of the input message in bytes. - * @param suffix Byte containing from 0 to 7 suffix bits - * that must be absorbed after @a input. - * These n bits must be in the least significant bit positions. - * These bits must be delimited with a bit 1 at position n - * (counting from 0=LSB to 7=MSB) and followed by bits 0 - * from position n+1 to position 7. - * Some examples: - * - If no bits are to be absorbed, then @a suffix must be 0x01. - * - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04. - * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32. - * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B. - * . - * @param output Pointer to the output buffer. - * @param outputByteLen The desired number of output bytes. - * @pre One must have r+c equal to the supported width of this implementation - * and the rate a multiple of 8 bits (one byte) in this implementation. - * @pre @a suffix ≠ 0x00 - * @return Zero if successful, 1 otherwise. - */ -int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); - -/** - * Function to initialize the state of the Keccak[r, c] sponge function. - * The phase of the sponge function is set to absorbing. - * @param spongeInstance Pointer to the sponge instance to be initialized. - * @param rate The value of the rate r. - * @param capacity The value of the capacity c. - * @pre One must have r+c equal to the supported width of this implementation - * and the rate a multiple of 8 bits (one byte) in this implementation. - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); - -/** - * Function to give input data bytes for the sponge function to absorb. - * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). - * @param data Pointer to the input data. - * @param dataByteLen The number of input bytes provided in the input data. - * @pre The sponge function must be in the absorbing phase, - * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() - * must not have been called before. - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); - -/** - * Function to give input data bits for the sponge function to absorb - * and then to switch to the squeezing phase. - * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). - * @param delimitedData Byte containing from 0 to 7 trailing bits - * that must be absorbed. - * These n bits must be in the least significant bit positions. - * These bits must be delimited with a bit 1 at position n - * (counting from 0=LSB to 7=MSB) and followed by bits 0 - * from position n+1 to position 7. - * Some examples: - * - If no bits are to be absorbed, then @a delimitedData must be 0x01. - * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04. - * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32. - * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B. - * . - * @pre The sponge function must be in the absorbing phase, - * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() - * must not have been called before. - * @pre @a delimitedData ≠ 0x00 - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData); - -/** - * Function to squeeze output data from the sponge function. - * If the sponge function was in the absorbing phase, this function - * switches it to the squeezing phase - * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called. - * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). - * @param data Pointer to the buffer where to store the output data. - * @param dataByteLen The number of output bytes desired. - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); -#endif +/* For the documentation, please follow the link: */ +/* #include "KeccakSponge-documentation.h" */ #include #include "align.h" +/* #include "config.h" */ -#define KCP_DeclareSpongeStructure(prefix, size, alignment) \ +#define XKCP_DeclareSpongeStructure(prefix, size, alignment) \ ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ unsigned char state[size]; \ unsigned int rate; \ @@ -138,35 +32,45 @@ int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *d int squeezing; \ } prefix##_SpongeInstance; -#define KCP_DeclareSpongeFunctions(prefix) \ +#define XKCP_DeclareSpongeFunctions(prefix) \ int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); -#ifndef KeccakP200_excluded +#ifdef XKCP_has_KeccakP200 #include "KeccakP-200-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth200) + XKCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth200) + #define XKCP_has_Sponge_Keccak_width200 #endif -#ifndef KeccakP400_excluded +#ifdef XKCP_has_KeccakP400 #include "KeccakP-400-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth400) + XKCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth400) + #define XKCP_has_Sponge_Keccak_width400 #endif -#ifndef KeccakP800_excluded +#ifdef XKCP_has_KeccakP800 #include "KeccakP-800-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth800) + XKCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth800) + #define XKCP_has_Sponge_Keccak_width800 +#endif + +#ifdef XKCP_has_KeccakP1600 + #include "KeccakP-1600-SnP.h" + XKCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth1600) + #define XKCP_has_Sponge_Keccak_width1600 #endif -#ifndef KeccakP1600_excluded +#ifdef XKCP_has_KeccakP1600 #include "KeccakP-1600-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth1600) + XKCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) #endif #endif diff --git a/Modules/_sha3/kcp/KeccakSponge.inc b/Modules/_sha3/kcp/KeccakSponge.inc index e10739deafa836..70080923ec7bb2 100644 --- a/Modules/_sha3/kcp/KeccakSponge.inc +++ b/Modules/_sha3/kcp/KeccakSponge.inc @@ -1,12 +1,13 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. @@ -47,16 +48,13 @@ int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, return 1; /* Initialize the state */ - SnP_StaticInitialize(); SnP_Initialize(state); /* First, absorb whole blocks */ - #ifdef SnP_FastLoop_Absorb if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) { /* fast lane: whole lane rate */ - size_t j; j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen); curInput += j; @@ -74,7 +72,6 @@ int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, } /* Then, absorb what remains */ - partialBlock = (unsigned int)inputByteLen; #ifdef KeccakReference displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock); @@ -82,7 +79,6 @@ int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, SnP_AddBytes(state, curInput, 0, partialBlock); /* Finally, absorb the suffix */ - #ifdef KeccakReference { unsigned char delimitedData1[1]; @@ -91,14 +87,11 @@ int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, } #endif /* Last few bits, whose delimiter coincides with first bit of padding */ - SnP_AddByte(state, suffix, partialBlock); /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ - if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1))) SnP_Permute(state); /* Second bit of padding */ - SnP_AddByte(state, 0x80, rateInBytes-1); #ifdef KeccakReference { @@ -114,7 +107,6 @@ int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, #endif /* First, output whole blocks */ - while(outputByteLen > (size_t)rateInBytes) { SnP_ExtractBytes(state, curOutput, 0, rateInBytes); SnP_Permute(state); @@ -126,7 +118,6 @@ int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, } /* Finally, output what remains */ - partialBlock = (unsigned int)outputByteLen; SnP_ExtractBytes(state, curOutput, 0, partialBlock); #ifdef KeccakReference @@ -167,17 +158,14 @@ int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dat if (instance->squeezing) return 1; /* Too late for additional input */ - i = 0; curData = data; while(i < dataByteLen) { if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) { #ifdef SnP_FastLoop_Absorb /* processing full blocks first */ - if ((rateInBytes % (SnP_width/200)) == 0) { /* fast lane: whole lane rate */ - j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i); i += j; curData += j; @@ -199,7 +187,6 @@ int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dat } else { /* normal lane: using the message queue */ - partialBlock = (unsigned int)(dataByteLen - i); if (partialBlock+instance->byteIOIndex > rateInBytes) partialBlock = rateInBytes-instance->byteIOIndex; @@ -231,7 +218,6 @@ int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedDat if (instance->squeezing) return 1; /* Too late for additional input */ - #ifdef KeccakReference { unsigned char delimitedData1[1]; @@ -240,14 +226,11 @@ int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedDat } #endif /* Last few bits, whose delimiter coincides with first bit of padding */ - SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex); /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ - if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1))) SnP_Permute(instance->state); /* Second bit of padding */ - SnP_AddByte(instance->state, 0x80, rateInBytes-1); #ifdef KeccakReference { @@ -294,7 +277,6 @@ int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByte } else { /* normal lane: using the message queue */ - if (instance->byteIOIndex == rateInBytes) { SnP_Permute(instance->state); instance->byteIOIndex = 0; diff --git a/Modules/_sha3/kcp/PlSnP-Fallback.inc b/Modules/_sha3/kcp/PlSnP-Fallback.inc deleted file mode 100644 index 3a9119ab4b6aa8..00000000000000 --- a/Modules/_sha3/kcp/PlSnP-Fallback.inc +++ /dev/null @@ -1,257 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -/* expect PlSnP_baseParallelism, PlSnP_targetParallelism */ - -/* expect SnP_stateSizeInBytes, SnP_stateAlignment */ - -/* expect prefix */ - -/* expect SnP_* */ - - -#define JOIN0(a, b) a ## b -#define JOIN(a, b) JOIN0(a, b) - -#define PlSnP_StaticInitialize JOIN(prefix, _StaticInitialize) -#define PlSnP_InitializeAll JOIN(prefix, _InitializeAll) -#define PlSnP_AddByte JOIN(prefix, _AddByte) -#define PlSnP_AddBytes JOIN(prefix, _AddBytes) -#define PlSnP_AddLanesAll JOIN(prefix, _AddLanesAll) -#define PlSnP_OverwriteBytes JOIN(prefix, _OverwriteBytes) -#define PlSnP_OverwriteLanesAll JOIN(prefix, _OverwriteLanesAll) -#define PlSnP_OverwriteWithZeroes JOIN(prefix, _OverwriteWithZeroes) -#define PlSnP_ExtractBytes JOIN(prefix, _ExtractBytes) -#define PlSnP_ExtractLanesAll JOIN(prefix, _ExtractLanesAll) -#define PlSnP_ExtractAndAddBytes JOIN(prefix, _ExtractAndAddBytes) -#define PlSnP_ExtractAndAddLanesAll JOIN(prefix, _ExtractAndAddLanesAll) - -#if (PlSnP_baseParallelism == 1) - #define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes) - #define SnP_stateAlignment JOIN(SnP, _stateAlignment) -#else - #define SnP_stateSizeInBytes JOIN(SnP, _statesSizeInBytes) - #define SnP_stateAlignment JOIN(SnP, _statesAlignment) -#endif -#define PlSnP_factor ((PlSnP_targetParallelism)/(PlSnP_baseParallelism)) -#define SnP_stateOffset (((SnP_stateSizeInBytes+(SnP_stateAlignment-1))/SnP_stateAlignment)*SnP_stateAlignment) -#define stateWithIndex(i) ((unsigned char *)states+((i)*SnP_stateOffset)) - -#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) -#define SnP_Initialize JOIN(SnP, _Initialize) -#define SnP_InitializeAll JOIN(SnP, _InitializeAll) -#define SnP_AddByte JOIN(SnP, _AddByte) -#define SnP_AddBytes JOIN(SnP, _AddBytes) -#define SnP_AddLanesAll JOIN(SnP, _AddLanesAll) -#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) -#define SnP_OverwriteLanesAll JOIN(SnP, _OverwriteLanesAll) -#define SnP_OverwriteWithZeroes JOIN(SnP, _OverwriteWithZeroes) -#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) -#define SnP_ExtractLanesAll JOIN(SnP, _ExtractLanesAll) -#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) -#define SnP_ExtractAndAddLanesAll JOIN(SnP, _ExtractAndAddLanesAll) - -void PlSnP_StaticInitialize( void ) -{ - SnP_StaticInitialize(); -} - -void PlSnP_InitializeAll(void *states) -{ - unsigned int i; - - for(i=0; ihash_state, buf.buf, buf.len * 8); } - if (res != SUCCESS) { + if (res != KECCAK_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Update()"); goto error; @@ -322,7 +313,7 @@ _sha3_sha3_224_digest_impl(SHA3object *self) SHA3_copystate(temp, self->hash_state); LEAVE_HASHLIB(self); res = SHA3_done(&temp, digest); - if (res != SUCCESS) { + if (res != KECCAK_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()"); return NULL; } @@ -350,7 +341,7 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self) SHA3_copystate(temp, self->hash_state); LEAVE_HASHLIB(self); res = SHA3_done(&temp, digest); - if (res != SUCCESS) { + if (res != KECCAK_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()"); return NULL; } @@ -396,7 +387,7 @@ _sha3_sha3_224_update(SHA3object *self, PyObject *data) res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); } - if (res != SUCCESS) { + if (res != KECCAK_SUCCESS) { PyBuffer_Release(&buf); PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Update()"); @@ -618,12 +609,12 @@ _SHAKE_digest(SHA3object *self, unsigned long digestlen, int hex) SHA3_copystate(temp, self->hash_state); LEAVE_HASHLIB(self); res = SHA3_done(&temp, NULL); - if (res != SUCCESS) { + if (res != KECCAK_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 done()"); goto error; } res = SHA3_squeeze(&temp, digest, digestlen * 8); - if (res != SUCCESS) { + if (res != KECCAK_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Squeeze()"); return NULL; }