Implement md5c function, slower than Golang's implementation

keeping it in repo to make further improvements and also rename

        minio-hash ---> crypto
This commit is contained in:
Harshavardhana 2014-12-21 02:12:30 -08:00
parent 958890276d
commit 13650e088c
11 changed files with 449 additions and 17 deletions

View File

@ -22,6 +22,7 @@ build-cpu:
build-md5:
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkgs/crypto/md5/
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkgs/crypto/md5c/
build-sha1:
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkgs/crypto/sha1/
@ -58,7 +59,7 @@ cover: build-erasure build-signify build-split build-crc32c build-cpu build-scsi
install: build-erasure
@godep go install github.com/minio-io/minio/cmd/minio && echo "Installed minio into ${GOPATH}/bin"
@godep go install github.com/minio-io/minio/cmd/minio-cli && echo "Installed minio-cli into ${GOPATH}/bin"
@godep go install github.com/minio-io/minio/cmd/minio-hash && echo "Installed minio-hash into ${GOPATH}/bin"
@godep go install github.com/minio-io/minio/cmd/crypto && echo "Installed crypto into ${GOPATH}/bin"
save: restore
@godep save ./...

1
cmd/crypto/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
crypto

View File

@ -24,8 +24,8 @@ import (
func main() {
app := cli.NewApp()
app.Name = "minio-hash"
app.Usage = ""
app.Name = "crypto"
app.Usage = "calculate cryptosum on a given stream"
app.Commands = Options
app.Run(os.Args)
}

33
cmd/crypto/crypto.md Normal file
View File

@ -0,0 +1,33 @@
% MINIO(1) Minio Manual
% Minio community
% December 2014
# NAME
crypto - calculate crypto sum on a stream
# SYNOPSIS
# DESCRIPTION
```sh
NAME:
crypto - calculate cryptosum on a given stream
USAGE:
crypto [global options] command [command options] [arguments...]
VERSION:
0.0.0
COMMANDS:
md5sum
sha1sum
sha256sum
sha512sum
help, h Shows a list of commands or help for one command
GLOBAL OPTIONS:
--help, -h show help
--version, -v print the version
```
# EXAMPLES
# AUTHORS

View File

@ -1,13 +0,0 @@
% MINIO(1) Minio Manual
% Minio community
% December 2014
# NAME
md5sum -
# SYNOPSIS
# DESCRIPTION
# EXAMPLES
# AUTHORS

View File

@ -8,10 +8,15 @@ import (
func Sum(reader io.Reader) ([]byte, error) {
hash := md5.New()
var err error
var length int
for err == nil {
length := 0
byteBuffer := make([]byte, 1024*1024)
length, err = reader.Read(byteBuffer)
// While hash.Write() wouldn't mind a Nil byteBuffer
// It is necessary for us to verify this and break
if length == 0 {
break
}
byteBuffer = byteBuffer[0:length]
hash.Write(byteBuffer)
}

291
pkgs/crypto/md5c/md5.c Normal file
View File

@ -0,0 +1,291 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* (This is a heavily cut-down "BSD license".)
*
* This differs from Colin Plumb's older public domain implementation in that
* no exactly 32-bit integer data type is required (any 32-bit or wider
* unsigned integer data type will do), there's no compile-time endianness
* configuration, and the function prototypes match OpenSSL's. No code from
* Colin Plumb's implementation has been reused; this comment merely compares
* the properties of the two independent implementations.
*
* The primary goals of this implementation are portability and ease of use.
* It is meant to be fast, but not as fast as possible. Some known
* optimizations are not included to reduce source code size and avoid
* compile-time configuration.
*/
#include <string.h>
#include "md5.h"
/*
* The basic MD5 functions.
*
* F and G are optimized compared to their RFC 1321 definitions for
* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
* implementation.
*/
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
#define H(x, y, z) (((x) ^ (y)) ^ (z))
#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
/*
* The MD5 transformation for all four rounds.
*/
#define STEP(f, a, b, c, d, x, t, s) \
(a) += f((b), (c), (d)) + (x) + (t); \
(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
(a) += (b);
/*
* SET reads 4 input bytes in little-endian byte order and stores them
* in a properly aligned word in host byte order.
*
* The check for little-endian architectures that tolerate unaligned
* memory accesses is just an optimization. Nothing will break if it
* doesn't work.
*/
#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
#define SET(n) \
(*(MD5_u32plus *)&ptr[(n) * 4])
#define GET(n) \
SET(n)
#else
#define SET(n) \
(ctx->block[(n)] = \
(MD5_u32plus)ptr[(n) * 4] | \
((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
#define GET(n) \
(ctx->block[(n)])
#endif
/*
* This processes one or more 64-byte data blocks, but does NOT update
* the bit counters. There are no alignment requirements.
*/
static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
{
const unsigned char *ptr;
MD5_u32plus a, b, c, d;
MD5_u32plus saved_a, saved_b, saved_c, saved_d;
ptr = (const unsigned char *)data;
a = ctx->a;
b = ctx->b;
c = ctx->c;
d = ctx->d;
do {
saved_a = a;
saved_b = b;
saved_c = c;
saved_d = d;
/* Round 1 */
STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
/* Round 2 */
STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
/* Round 3 */
STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
/* Round 4 */
STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
a += saved_a;
b += saved_b;
c += saved_c;
d += saved_d;
ptr += 64;
} while (size -= 64);
ctx->a = a;
ctx->b = b;
ctx->c = c;
ctx->d = d;
return ptr;
}
void MD5_Init(MD5_CTX *ctx)
{
ctx->a = 0x67452301;
ctx->b = 0xefcdab89;
ctx->c = 0x98badcfe;
ctx->d = 0x10325476;
ctx->lo = 0;
ctx->hi = 0;
}
void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
{
MD5_u32plus saved_lo;
unsigned long used, available;
saved_lo = ctx->lo;
if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
ctx->hi++;
ctx->hi += size >> 29;
used = saved_lo & 0x3f;
if (used) {
available = 64 - used;
if (size < available) {
memcpy(&ctx->buffer[used], data, size);
return;
}
memcpy(&ctx->buffer[used], data, available);
data = (const unsigned char *)data + available;
size -= available;
body(ctx, ctx->buffer, 64);
}
if (size >= 64) {
data = body(ctx, data, size & ~(unsigned long)0x3f);
size &= 0x3f;
}
memcpy(ctx->buffer, data, size);
}
void MD5_Final(unsigned char *result, MD5_CTX *ctx)
{
unsigned long used, available;
used = ctx->lo & 0x3f;
ctx->buffer[used++] = 0x80;
available = 64 - used;
if (available < 8) {
memset(&ctx->buffer[used], 0, available);
body(ctx, ctx->buffer, 64);
used = 0;
available = 64;
}
memset(&ctx->buffer[used], 0, available - 8);
ctx->lo <<= 3;
ctx->buffer[56] = ctx->lo;
ctx->buffer[57] = ctx->lo >> 8;
ctx->buffer[58] = ctx->lo >> 16;
ctx->buffer[59] = ctx->lo >> 24;
ctx->buffer[60] = ctx->hi;
ctx->buffer[61] = ctx->hi >> 8;
ctx->buffer[62] = ctx->hi >> 16;
ctx->buffer[63] = ctx->hi >> 24;
body(ctx, ctx->buffer, 64);
result[0] = ctx->a;
result[1] = ctx->a >> 8;
result[2] = ctx->a >> 16;
result[3] = ctx->a >> 24;
result[4] = ctx->b;
result[5] = ctx->b >> 8;
result[6] = ctx->b >> 16;
result[7] = ctx->b >> 24;
result[8] = ctx->c;
result[9] = ctx->c >> 8;
result[10] = ctx->c >> 16;
result[11] = ctx->c >> 24;
result[12] = ctx->d;
result[13] = ctx->d >> 8;
result[14] = ctx->d >> 16;
result[15] = ctx->d >> 24;
memset(ctx, 0, sizeof(*ctx));
}

43
pkgs/crypto/md5c/md5.h Normal file
View File

@ -0,0 +1,43 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* See md5.c for more information.
*/
#ifndef __MD5_H__
#define __MD5_H__
/* Any 32-bit or wider unsigned integer data type will do */
typedef unsigned int MD5_u32plus;
typedef struct {
MD5_u32plus lo, hi;
MD5_u32plus a, b, c, d;
unsigned char buffer[64];
MD5_u32plus block[16];
} MD5_CTX;
extern void MD5_Init(MD5_CTX *ctx);
extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
extern void MD5_Final(unsigned char *result, MD5_CTX *ctx);
#endif /* __MD5_H__ */

View File

@ -0,0 +1,23 @@
package md5c
import (
"bytes"
"encoding/hex"
"testing"
. "gopkg.in/check.v1"
)
func Test(t *testing.T) { TestingT(t) }
type MySuite struct{}
var _ = Suite(&MySuite{})
func (s *MySuite) TestMd5sum(c *C) {
testString := []byte("Test string")
expectedHash, _ := hex.DecodeString("0fd3dbec9730101bff92acc820befc34")
hash, err := Sum(bytes.NewBuffer(testString))
c.Assert(err, IsNil)
c.Assert(bytes.Equal(expectedHash, hash), Equals, true)
}

48
pkgs/crypto/md5c/md5c.go Normal file
View File

@ -0,0 +1,48 @@
// !build linux,amd64
package md5c
// #include "md5.h"
import "C"
import (
"io"
"unsafe"
)
func context() *C.MD5_CTX {
var ctx C.MD5_CTX
C.MD5_Init(&ctx)
return &ctx
}
func write(buffer []byte, ctx *C.MD5_CTX) {
size := len(buffer)
data := unsafe.Pointer(&buffer[0])
C.MD5_Update(ctx, data, C.ulong(size))
}
func Sum(reader io.Reader) ([]byte, error) {
ctx := context()
var err error
var length int
for err == nil {
byteBuffer := make([]byte, 1024*1024)
length, err = reader.Read(byteBuffer)
// break here since byteBuffer will go out of range
// when invoking subsequent write() call
if length == 0 {
break
}
byteBuffer = byteBuffer[0:length]
write(byteBuffer, ctx)
}
if err != io.EOF {
return nil, err
}
outputBuffer := make([]byte, 16)
coutputbuff := (*C.uchar)(unsafe.Pointer(&outputBuffer[0]))
C.MD5_Final(coutputbuff, ctx)
return outputBuffer, nil
}