sha1.c 6.37 KB
/**
 * \file
 * This is a drop in replacement for the openssl sha1 code.
 * It has a small performance penalty compared to the openssl code,
 * but that's no surprise at all as the openssl code is highly
 * optimized.
 *
 * \author	Georg Hopp
 *
 * \copyright
 * Copyright © 2014 Georg Hopp
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
#ifndef HAS_OPENSSL

#include <string.h>
#include <sys/types.h>

#ifdef __MINGW32__
#include <winsock.h>
#else
#include <arpa/inet.h>
#endif

#include "tr/sha1.h"

/*
 * The code included here is far beyond my horizon. It's an SSE
 * optimized version of the hashblock functionality taken from
 * here: http://arctic.org/~dean/crypto/sha1.html
 * I am able to understand the intention to vectorize this part
 * of the code and make use of the CIMD functionality in modern
 * cpus and to integrate it here,
 * but I would never be able to create this nor to even have the
 * idea that the algorithm could be optimized that way.
 * Anyway, if an SSE instruction set it available this code is about
 * 4 times faster than my C implementation...we are still talking
 * about microseconds but if you do a lot of these this matters.
 */
#include "sse/sha1.h"

#ifndef __SSE__
static void sha1_hashblock(TR_SHA_CTX *, const unsigned char[64]);
#endif

int
TR_SHA1_Init(TR_SHA_CTX * ctx)
{
	ctx->hash[0] = 0x67452301;
	ctx->hash[1] = 0xefcdab89;
	ctx->hash[2] = 0x98badcfe;
	ctx->hash[3] = 0x10325476;
	ctx->hash[4] = 0xc3d2e1f0;

	*(uint64_t *)&(ctx->temp_buffer)[0]  = 0;
	*(uint64_t *)&(ctx->temp_buffer)[8]  = 0;
	*(uint64_t *)&(ctx->temp_buffer)[16] = 0;
	*(uint64_t *)&(ctx->temp_buffer)[24] = 0;
	*(uint64_t *)&(ctx->temp_buffer)[32] = 0;
	*(uint64_t *)&(ctx->temp_buffer)[40] = 0;
	*(uint64_t *)&(ctx->temp_buffer)[48] = 0;
	*(uint64_t *)&(ctx->temp_buffer)[56] = 0;

	ctx->ntemp_buffer = 0;
	ctx->size         = 0;

	return 1;
}

int
TR_SHA1_Update(TR_SHA_CTX * ctx, const void * data, size_t ndata)
{
	size_t pos_data = 0;

	if (0 != ctx->ntemp_buffer || 64 > ctx->ntemp_buffer + ndata) {
		/*
		 * either there was already data in temp_buffer or we have
		 * not enough data for a complete block or both.
		 * This means we start with filling up temp_buffer and
		 * if it is full use it as the first block.
		 */
		size_t to_move = ndata < 64 - ctx->ntemp_buffer
			? ndata
			: 64 - ctx->ntemp_buffer;

		memcpy(ctx->temp_buffer + ctx->ntemp_buffer, data, to_move);
		ctx->ntemp_buffer += to_move;
		pos_data           = to_move;
		
		if (ctx->ntemp_buffer != 64) {
			/*
			 * if temp_buffer is still not full we can stop here
			 */
			return 1;
		}

#ifndef __SSE__
		sha1_hashblock(ctx, ctx->temp_buffer);
#else
		sha1_step(ctx->hash, (uint32_t *)(ctx->temp_buffer), 1);
#endif

		ctx->ntemp_buffer  = 0;
		ctx->size         += 64;
	}

#ifndef __SSE__
	while (ndata >= 64 && pos_data <= (ndata-64)) {
		sha1_hashblock(ctx, data + pos_data);
		pos_data  += 64;
		ctx->size += 64;
	}
#else
	pos_data = ndata / 64;
	sha1_step(ctx->hash, (uint32_t *)data, pos_data);
	pos_data *= 64;
	ctx->size += pos_data;
#endif

	if (pos_data == ndata) {
		return 1;
	}

	ctx->ntemp_buffer = ndata - pos_data;
	memcpy(ctx->temp_buffer, data + pos_data, ctx->ntemp_buffer);

	return 1;
}

int
TR_SHA1_Final(unsigned char * const digest, TR_SHA_CTX * ctx)
{
	/*
	 * temp_block is never full, as then it would have been
	 * processed in SHA1_Update. So it is save to set the 0x80
	 * end marker.
	 */
	uint64_t size = (ctx->size + ctx->ntemp_buffer) * 8; // size in bits

	ctx->temp_buffer[ctx->ntemp_buffer++] = 0x80;

	if ((64 - ctx->ntemp_buffer) < 8) {
		/*
		 * it might happen that the size in bits does not
		 * fit within the block...in that case it has to be
		 * but in a second empty block.
		 */
#ifndef __SSE__
		sha1_hashblock(ctx, ctx->temp_buffer);
#else
		sha1_step(ctx->hash, (uint32_t *)(ctx->temp_buffer), 1);
#endif

		*(uint64_t *)&(ctx->temp_buffer)[0]  = 0;
		*(uint64_t *)&(ctx->temp_buffer)[8]  = 0;
		*(uint64_t *)&(ctx->temp_buffer)[16] = 0;
		*(uint64_t *)&(ctx->temp_buffer)[24] = 0;
		*(uint64_t *)&(ctx->temp_buffer)[32] = 0;
		*(uint64_t *)&(ctx->temp_buffer)[40] = 0;
		*(uint64_t *)&(ctx->temp_buffer)[48] = 0;
		*(uint64_t *)&(ctx->temp_buffer)[56] = 0;
	}

	((uint32_t *)ctx->temp_buffer)[14] = ntohl(((uint32_t *)&size)[1]);
	((uint32_t *)ctx->temp_buffer)[15] = ntohl(((uint32_t *)&size)[0]);

#ifndef __SSE__
	sha1_hashblock(ctx, ctx->temp_buffer);
#else
	sha1_step(ctx->hash, (uint32_t *)(ctx->temp_buffer), 1);
#endif
	((uint32_t *)digest)[0] = ntohl(ctx->hash[0]);
	((uint32_t *)digest)[1] = ntohl(ctx->hash[1]);
	((uint32_t *)digest)[2] = ntohl(ctx->hash[2]);
	((uint32_t *)digest)[3] = ntohl(ctx->hash[3]);
	((uint32_t *)digest)[4] = ntohl(ctx->hash[4]);

	return 1;
}

#ifndef __SSE__

#define ROTL(val, n) (((val)<<(n)) | ((val)>>(32-(n))))
#define CH(x, y, z)     ((((y)^(z))&(x))^(z))
#define PARITY(x, y, z) (((x)^(y))^(z))
#define MAJOR(x, y, z)  (((x)&(z))|(((x)|(z))&(y)))

static
void
sha1_hashblock(TR_SHA_CTX * ctx, const unsigned char block[64])
{
	uint32_t work[80];
	int      i;
	uint32_t a = ctx->hash[0];
	uint32_t b = ctx->hash[1];
	uint32_t c = ctx->hash[2];
	uint32_t d = ctx->hash[3];
	uint32_t e = ctx->hash[4];

	for (i=0; i<80; i++) {
		uint32_t T;

		if (i<16) {
			work[i] = htonl(((uint32_t *)block)[i]);
		} else {
			work[i] = ROTL(work[i-3] ^ work[i-8] ^ work[i-14] ^ work[i-16], 1);
		}

		T = ROTL(a, 5) + e + work[i];

		switch (i / 20) {
			case 0: 
				T += CH(b, c, d) + 0x5a827999;
				break;

			case 1:
				T += PARITY(b, c, d) + 0x6ed9eba1;
				break;

			case 2:
				T += MAJOR(b, c, d) + 0x8f1bbcdc;
				break;

			case 3:
				T += PARITY(b, c, d) + 0xca62c1d6;
				break;
		}

		e = d;
		d = c;
		c = ROTL(b, 30);
		b = a;
		a = T;
	}

	ctx->hash[0] += a;
	ctx->hash[1] += b;
	ctx->hash[2] += c;
	ctx->hash[3] += d;
	ctx->hash[4] += e;
}
#endif // __SSE__
#endif // HAS_OPENSSL

// vim: set ts=4 sw=4: