以前介绍过用python 计算大文件的md5 值,这里将介绍使用 golang 计算大文件md5
还是使用分片计算的方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package main
import (
"crypto/md5"
"flag"
"fmt"
"io"
"math"
"os"
"time"
)
const filechunk = 8192 // we settle for 8KB 8192
var file_src *string = flag.String("file", "", "The file to hash")
func main() {
flag.Parse()
fmt.Println("Opening file:" + *file_src)
t1 := time.Now()
file, err := os.Open(*file_src)
if err != nil {
panic(err.Error())
}
defer file.Close()
// calculate the file size
info, _ := file.Stat()
filesize := info.Size()
blocks := uint64(math.Ceil(float64(filesize) / float64(filechunk)))
hash := md5.New()
for i := uint64(0); i < blocks; i++ {
blocksize := int(math.Min(filechunk, float64(filesize-int64(i*filechunk))))
buf := make([]byte, blocksize)
file.Read(buf)
io.WriteString(hash, string(buf)) // append into the hash
}
fmt.Printf("%s checksum is %x\n", file.Name(), hash.Sum(nil))
t2 := time.Now()
fmt.Println("消耗时间:", t2.Sub(t1), "秒")
}
单独做一个包
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
// Package md5 computes MD5 checksum for large files
package md5
import (
"bufio"
"crypto/md5"
"fmt"
"io"
"os"
)
const bufferSize = 65536
// MD5sum returns MD5 checksum of filename
func MD5sum(filename string) (string, error) {
if info, err := os.Stat(filename); err != nil {
return "", err
} else if info.IsDir() {
return "", nil
}
file, err := os.Open(filename)
if err != nil {
return "", err
}
defer file.Close()
hash := md5.New()
for buf, reader := make([]byte, bufferSize), bufio.NewReader(file); ; {
n, err := reader.Read(buf)
if err != nil {
if err == io.EOF {
break
}
return "", err
}
hash.Write(buf[:n])
}
checksum := fmt.Sprintf("%x", hash.Sum(nil))
return checksum, nil
}
使用示例
1
2
3
4
5
6
7
8
9
10
11
12
package main
import (
"fmt"
"./md5"
)
func main() {
file := "/Users/wei/Downloads/Win8pro.iso"
md5sum, _ := md5.MD5sum(file)
fmt.Println(md5sum)
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package main
import (
"bufio"
"crypto/md5"
"flag"
"fmt"
"io"
"os"
"time"
)
var file_src *string = flag.String("file", "", "The file to hash")
const bufferSize = 65536
// MD5sum returns MD5 checksum of filename
func MD5sum(filename string) (string, error) {
if info, err := os.Stat(filename); err != nil {
return "", err
} else if info.IsDir() {
return "", nil
}
file, err := os.Open(filename)
if err != nil {
return "", err
}
defer file.Close()
hash := md5.New()
for buf, reader := make([]byte, bufferSize), bufio.NewReader(file); ; {
n, err := reader.Read(buf)
if err != nil {
if err == io.EOF {
break
}
return "", err
}
hash.Write(buf[:n])
}
checksum := fmt.Sprintf("%x", hash.Sum(nil))
return checksum, nil
}
func main() {
flag.Parse()
fmt.Println("Opening file:" + *file_src)
t1 := time.Now()
md5sum, _ := MD5sum(*file_src)
fmt.Println(md5sum)
t2 := time.Now()
fmt.Println("消耗时间:", t2.Sub(t1), "秒")
}
与python 速度比较
3.8G 的文件比python 快 4秒 ,go 8.491195742s,python 12.5568759441s
本文网址: https://pylist.com/topic/126.html 转摘请注明来源