pyList

golang 计算大文件md5

以前介绍过用python 计算大文件的md5 值,这里将介绍使用 golang 计算大文件md5

golang 计算大文件md5

还是使用分片计算的方法

package main

import (
	"crypto/md5"
	"flag"
	"fmt"
	"io"
	"math"
	"os"
	"time"
)

const filechunk = 8192 // we settle for 8KB 8192
var file_src *string = flag.String("file", "", "The file to hash")

func main() {

	flag.Parse()
	fmt.Println("Opening file:" + *file_src)

	t1 := time.Now()

	file, err := os.Open(*file_src)

	if err != nil {
		panic(err.Error())
	}

	defer file.Close()

	// calculate the file size
	info, _ := file.Stat()

	filesize := info.Size()

	blocks := uint64(math.Ceil(float64(filesize) / float64(filechunk)))

	hash := md5.New()

	for i := uint64(0); i < blocks; i++ {
		blocksize := int(math.Min(filechunk, float64(filesize-int64(i*filechunk))))
		buf := make([]byte, blocksize)

		file.Read(buf)
		io.WriteString(hash, string(buf)) // append into the hash
	}

	fmt.Printf("%s checksum is %x\n", file.Name(), hash.Sum(nil))

	t2 := time.Now()
	fmt.Println("消耗时间:", t2.Sub(t1), "秒")

}

单独做一个包

// Package md5 computes MD5 checksum for large files
package md5

import (
	"bufio"
	"crypto/md5"
	"fmt"
	"io"
	"os"
)

const bufferSize = 65536

// MD5sum returns MD5 checksum of filename
func MD5sum(filename string) (string, error) {
	if info, err := os.Stat(filename); err != nil {
		return "", err
	} else if info.IsDir() {
		return "", nil
	}

	file, err := os.Open(filename)
	if err != nil {
		return "", err
	}
	defer file.Close()

	hash := md5.New()
	for buf, reader := make([]byte, bufferSize), bufio.NewReader(file); ; {
		n, err := reader.Read(buf)
		if err != nil {
			if err == io.EOF {
				break
			}
			return "", err
		}

		hash.Write(buf[:n])
	}

	checksum := fmt.Sprintf("%x", hash.Sum(nil))
	return checksum, nil
}

使用示例

package main

import (
    "fmt"
    "./md5"
)

func main() {
    file := "/Users/wei/Downloads/Win8pro.iso"
    md5sum, _ := md5.MD5sum(file)
    fmt.Println(md5sum)
}
package main

import (
	"bufio"
	"crypto/md5"
	"flag"
	"fmt"
	"io"
	"os"
	"time"
)

var file_src *string = flag.String("file", "", "The file to hash")

const bufferSize = 65536

// MD5sum returns MD5 checksum of filename
func MD5sum(filename string) (string, error) {
	if info, err := os.Stat(filename); err != nil {
		return "", err
	} else if info.IsDir() {
		return "", nil
	}

	file, err := os.Open(filename)
	if err != nil {
		return "", err
	}
	defer file.Close()

	hash := md5.New()
	for buf, reader := make([]byte, bufferSize), bufio.NewReader(file); ; {
		n, err := reader.Read(buf)
		if err != nil {
			if err == io.EOF {
				break
			}
			return "", err
		}

		hash.Write(buf[:n])
	}

	checksum := fmt.Sprintf("%x", hash.Sum(nil))
	return checksum, nil
}

func main() {

	flag.Parse()
	fmt.Println("Opening file:" + *file_src)

	t1 := time.Now()

	md5sum, _ := MD5sum(*file_src)
	fmt.Println(md5sum)

	t2 := time.Now()
	fmt.Println("消耗时间:", t2.Sub(t1), "秒")

}

与python 速度比较

3.8G 的文件比python 快 4秒 ,go 8.491195742s,python 12.5568759441s

本文标签: golang md5 文件 计算
本文网址: https://pylist.com/t/1456468578 (转载注明出处)
如果你有任何建议或疑问可以在下面 留言
发表第一条评论!
验证码图片
相关推荐