From 13650e088ca5d2d6eb5100d705f69f32aea946a0 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sun, 21 Dec 2014 02:12:30 -0800 Subject: [PATCH] Implement md5c function, slower than Golang's implementation keeping it in repo to make further improvements and also rename minio-hash ---> crypto --- Makefile | 3 +- cmd/crypto/.gitignore | 1 + .../crypto-options.go} | 0 .../minio-hash.go => crypto/crypto.go} | 4 +- cmd/crypto/crypto.md | 33 ++ cmd/minio-hash/minio-hash.md | 13 - pkgs/crypto/md5/md5.go | 7 +- pkgs/crypto/md5c/md5.c | 291 ++++++++++++++++++ pkgs/crypto/md5c/md5.h | 43 +++ pkgs/crypto/md5c/md5_test.go | 23 ++ pkgs/crypto/md5c/md5c.go | 48 +++ 11 files changed, 449 insertions(+), 17 deletions(-) create mode 100644 cmd/crypto/.gitignore rename cmd/{minio-hash/minio-hash-options.go => crypto/crypto-options.go} (100%) rename cmd/{minio-hash/minio-hash.go => crypto/crypto.go} (91%) create mode 100644 cmd/crypto/crypto.md delete mode 100644 cmd/minio-hash/minio-hash.md create mode 100644 pkgs/crypto/md5c/md5.c create mode 100644 pkgs/crypto/md5c/md5.h create mode 100644 pkgs/crypto/md5c/md5_test.go create mode 100644 pkgs/crypto/md5c/md5c.go diff --git a/Makefile b/Makefile index 32b037cc1..cf18f04d9 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ build-cpu: build-md5: @godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkgs/crypto/md5/ + @godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkgs/crypto/md5c/ build-sha1: @godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkgs/crypto/sha1/ @@ -58,7 +59,7 @@ cover: build-erasure build-signify build-split build-crc32c build-cpu build-scsi install: build-erasure @godep go install github.com/minio-io/minio/cmd/minio && echo "Installed minio into ${GOPATH}/bin" @godep go install github.com/minio-io/minio/cmd/minio-cli && echo "Installed minio-cli into ${GOPATH}/bin" - @godep go install github.com/minio-io/minio/cmd/minio-hash && echo "Installed minio-hash into ${GOPATH}/bin" + @godep go install github.com/minio-io/minio/cmd/crypto && echo "Installed crypto into ${GOPATH}/bin" save: restore @godep save ./... diff --git a/cmd/crypto/.gitignore b/cmd/crypto/.gitignore new file mode 100644 index 000000000..2e8fb9de5 --- /dev/null +++ b/cmd/crypto/.gitignore @@ -0,0 +1 @@ +crypto \ No newline at end of file diff --git a/cmd/minio-hash/minio-hash-options.go b/cmd/crypto/crypto-options.go similarity index 100% rename from cmd/minio-hash/minio-hash-options.go rename to cmd/crypto/crypto-options.go diff --git a/cmd/minio-hash/minio-hash.go b/cmd/crypto/crypto.go similarity index 91% rename from cmd/minio-hash/minio-hash.go rename to cmd/crypto/crypto.go index 14a60d4e9..2c2c0af92 100644 --- a/cmd/minio-hash/minio-hash.go +++ b/cmd/crypto/crypto.go @@ -24,8 +24,8 @@ import ( func main() { app := cli.NewApp() - app.Name = "minio-hash" - app.Usage = "" + app.Name = "crypto" + app.Usage = "calculate cryptosum on a given stream" app.Commands = Options app.Run(os.Args) } diff --git a/cmd/crypto/crypto.md b/cmd/crypto/crypto.md new file mode 100644 index 000000000..dcc595446 --- /dev/null +++ b/cmd/crypto/crypto.md @@ -0,0 +1,33 @@ +% MINIO(1) Minio Manual +% Minio community +% December 2014 +# NAME +crypto - calculate crypto sum on a stream + +# SYNOPSIS + +# DESCRIPTION +```sh +NAME: + crypto - calculate cryptosum on a given stream + +USAGE: + crypto [global options] command [command options] [arguments...] + +VERSION: + 0.0.0 + +COMMANDS: + md5sum + sha1sum + sha256sum + sha512sum + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS: + --help, -h show help + --version, -v print the version +``` +# EXAMPLES + +# AUTHORS \ No newline at end of file diff --git a/cmd/minio-hash/minio-hash.md b/cmd/minio-hash/minio-hash.md deleted file mode 100644 index 289db02fa..000000000 --- a/cmd/minio-hash/minio-hash.md +++ /dev/null @@ -1,13 +0,0 @@ -% MINIO(1) Minio Manual -% Minio community -% December 2014 -# NAME -md5sum - - -# SYNOPSIS - -# DESCRIPTION - -# EXAMPLES - -# AUTHORS \ No newline at end of file diff --git a/pkgs/crypto/md5/md5.go b/pkgs/crypto/md5/md5.go index 765949d5b..103f76143 100644 --- a/pkgs/crypto/md5/md5.go +++ b/pkgs/crypto/md5/md5.go @@ -8,10 +8,15 @@ import ( func Sum(reader io.Reader) ([]byte, error) { hash := md5.New() var err error + var length int for err == nil { - length := 0 byteBuffer := make([]byte, 1024*1024) length, err = reader.Read(byteBuffer) + // While hash.Write() wouldn't mind a Nil byteBuffer + // It is necessary for us to verify this and break + if length == 0 { + break + } byteBuffer = byteBuffer[0:length] hash.Write(byteBuffer) } diff --git a/pkgs/crypto/md5c/md5.c b/pkgs/crypto/md5c/md5.c new file mode 100644 index 000000000..43c945d4e --- /dev/null +++ b/pkgs/crypto/md5c/md5.c @@ -0,0 +1,291 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + * + * This differs from Colin Plumb's older public domain implementation in that + * no exactly 32-bit integer data type is required (any 32-bit or wider + * unsigned integer data type will do), there's no compile-time endianness + * configuration, and the function prototypes match OpenSSL's. No code from + * Colin Plumb's implementation has been reused; this comment merely compares + * the properties of the two independent implementations. + * + * The primary goals of this implementation are portability and ease of use. + * It is meant to be fast, but not as fast as possible. Some known + * optimizations are not included to reduce source code size and avoid + * compile-time configuration. + */ + +#include +#include "md5.h" + +/* + * The basic MD5 functions. + * + * F and G are optimized compared to their RFC 1321 definitions for + * architectures that lack an AND-NOT instruction, just like in Colin Plumb's + * implementation. + */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) +#define H(x, y, z) (((x) ^ (y)) ^ (z)) +#define H2(x, y, z) ((x) ^ ((y) ^ (z))) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) + +/* + * The MD5 transformation for all four rounds. + */ +#define STEP(f, a, b, c, d, x, t, s) \ + (a) += f((b), (c), (d)) + (x) + (t); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ + (a) += (b); + +/* + * SET reads 4 input bytes in little-endian byte order and stores them + * in a properly aligned word in host byte order. + * + * The check for little-endian architectures that tolerate unaligned + * memory accesses is just an optimization. Nothing will break if it + * doesn't work. + */ +#if defined(__i386__) || defined(__x86_64__) || defined(__vax__) +#define SET(n) \ + (*(MD5_u32plus *)&ptr[(n) * 4]) +#define GET(n) \ + SET(n) +#else +#define SET(n) \ + (ctx->block[(n)] = \ + (MD5_u32plus)ptr[(n) * 4] | \ + ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \ + ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \ + ((MD5_u32plus)ptr[(n) * 4 + 3] << 24)) +#define GET(n) \ + (ctx->block[(n)]) +#endif + +/* + * This processes one or more 64-byte data blocks, but does NOT update + * the bit counters. There are no alignment requirements. + */ +static const void *body(MD5_CTX *ctx, const void *data, unsigned long size) +{ + const unsigned char *ptr; + MD5_u32plus a, b, c, d; + MD5_u32plus saved_a, saved_b, saved_c, saved_d; + + ptr = (const unsigned char *)data; + + a = ctx->a; + b = ctx->b; + c = ctx->c; + d = ctx->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + +/* Round 1 */ + STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) + STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) + STEP(F, c, d, a, b, SET(2), 0x242070db, 17) + STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) + STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) + STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) + STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) + STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) + STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) + STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) + STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) + STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) + STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) + STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) + STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) + STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) + +/* Round 2 */ + STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) + STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) + STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) + STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) + STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) + STEP(G, d, a, b, c, GET(10), 0x02441453, 9) + STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) + STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) + STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) + STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) + STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) + STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) + STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) + STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) + STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) + STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) + +/* Round 3 */ + STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) + STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11) + STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) + STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23) + STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) + STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11) + STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) + STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23) + STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) + STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11) + STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) + STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23) + STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) + STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11) + STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) + STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23) + +/* Round 4 */ + STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) + STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) + STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) + STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) + STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) + STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) + STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) + STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) + STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) + STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) + STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) + STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) + STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) + STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) + STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) + STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while (size -= 64); + + ctx->a = a; + ctx->b = b; + ctx->c = c; + ctx->d = d; + + return ptr; +} + +void MD5_Init(MD5_CTX *ctx) +{ + ctx->a = 0x67452301; + ctx->b = 0xefcdab89; + ctx->c = 0x98badcfe; + ctx->d = 0x10325476; + + ctx->lo = 0; + ctx->hi = 0; +} + +void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size) +{ + MD5_u32plus saved_lo; + unsigned long used, available; + + saved_lo = ctx->lo; + if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) + ctx->hi++; + ctx->hi += size >> 29; + + used = saved_lo & 0x3f; + + if (used) { + available = 64 - used; + + if (size < available) { + memcpy(&ctx->buffer[used], data, size); + return; + } + + memcpy(&ctx->buffer[used], data, available); + data = (const unsigned char *)data + available; + size -= available; + body(ctx, ctx->buffer, 64); + } + + if (size >= 64) { + data = body(ctx, data, size & ~(unsigned long)0x3f); + size &= 0x3f; + } + + memcpy(ctx->buffer, data, size); +} + +void MD5_Final(unsigned char *result, MD5_CTX *ctx) +{ + unsigned long used, available; + + used = ctx->lo & 0x3f; + + ctx->buffer[used++] = 0x80; + + available = 64 - used; + + if (available < 8) { + memset(&ctx->buffer[used], 0, available); + body(ctx, ctx->buffer, 64); + used = 0; + available = 64; + } + + memset(&ctx->buffer[used], 0, available - 8); + + ctx->lo <<= 3; + ctx->buffer[56] = ctx->lo; + ctx->buffer[57] = ctx->lo >> 8; + ctx->buffer[58] = ctx->lo >> 16; + ctx->buffer[59] = ctx->lo >> 24; + ctx->buffer[60] = ctx->hi; + ctx->buffer[61] = ctx->hi >> 8; + ctx->buffer[62] = ctx->hi >> 16; + ctx->buffer[63] = ctx->hi >> 24; + + body(ctx, ctx->buffer, 64); + + result[0] = ctx->a; + result[1] = ctx->a >> 8; + result[2] = ctx->a >> 16; + result[3] = ctx->a >> 24; + result[4] = ctx->b; + result[5] = ctx->b >> 8; + result[6] = ctx->b >> 16; + result[7] = ctx->b >> 24; + result[8] = ctx->c; + result[9] = ctx->c >> 8; + result[10] = ctx->c >> 16; + result[11] = ctx->c >> 24; + result[12] = ctx->d; + result[13] = ctx->d >> 8; + result[14] = ctx->d >> 16; + result[15] = ctx->d >> 24; + + memset(ctx, 0, sizeof(*ctx)); +} diff --git a/pkgs/crypto/md5c/md5.h b/pkgs/crypto/md5c/md5.h new file mode 100644 index 000000000..0553cd21f --- /dev/null +++ b/pkgs/crypto/md5c/md5.h @@ -0,0 +1,43 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * See md5.c for more information. + */ + +#ifndef __MD5_H__ +#define __MD5_H__ + +/* Any 32-bit or wider unsigned integer data type will do */ +typedef unsigned int MD5_u32plus; + +typedef struct { + MD5_u32plus lo, hi; + MD5_u32plus a, b, c, d; + unsigned char buffer[64]; + MD5_u32plus block[16]; +} MD5_CTX; + +extern void MD5_Init(MD5_CTX *ctx); +extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size); +extern void MD5_Final(unsigned char *result, MD5_CTX *ctx); + +#endif /* __MD5_H__ */ diff --git a/pkgs/crypto/md5c/md5_test.go b/pkgs/crypto/md5c/md5_test.go new file mode 100644 index 000000000..5267a1946 --- /dev/null +++ b/pkgs/crypto/md5c/md5_test.go @@ -0,0 +1,23 @@ +package md5c + +import ( + "bytes" + "encoding/hex" + "testing" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type MySuite struct{} + +var _ = Suite(&MySuite{}) + +func (s *MySuite) TestMd5sum(c *C) { + testString := []byte("Test string") + expectedHash, _ := hex.DecodeString("0fd3dbec9730101bff92acc820befc34") + hash, err := Sum(bytes.NewBuffer(testString)) + c.Assert(err, IsNil) + c.Assert(bytes.Equal(expectedHash, hash), Equals, true) +} diff --git a/pkgs/crypto/md5c/md5c.go b/pkgs/crypto/md5c/md5c.go new file mode 100644 index 000000000..f79744461 --- /dev/null +++ b/pkgs/crypto/md5c/md5c.go @@ -0,0 +1,48 @@ +// !build linux,amd64 + +package md5c + +// #include "md5.h" +import "C" +import ( + "io" + "unsafe" +) + +func context() *C.MD5_CTX { + var ctx C.MD5_CTX + C.MD5_Init(&ctx) + return &ctx +} + +func write(buffer []byte, ctx *C.MD5_CTX) { + size := len(buffer) + data := unsafe.Pointer(&buffer[0]) + C.MD5_Update(ctx, data, C.ulong(size)) +} + +func Sum(reader io.Reader) ([]byte, error) { + ctx := context() + var err error + var length int + for err == nil { + byteBuffer := make([]byte, 1024*1024) + length, err = reader.Read(byteBuffer) + // break here since byteBuffer will go out of range + // when invoking subsequent write() call + if length == 0 { + break + } + byteBuffer = byteBuffer[0:length] + write(byteBuffer, ctx) + } + + if err != io.EOF { + return nil, err + } + + outputBuffer := make([]byte, 16) + coutputbuff := (*C.uchar)(unsafe.Pointer(&outputBuffer[0])) + C.MD5_Final(coutputbuff, ctx) + return outputBuffer, nil +}