删除相同图片,Python实现


原理:读取图片二进制码,使用MD5或SHA-1散列,生成图片唯一编码,与图片字典做比对,存在则图片重复,移除.
Python实现:
 
01
import os
02
import re
03
import hashlib
04
from time import time
05
 
06
rootPath = 'F:/Image/照片'
07
backupPath = 'F:/Image/backup'
08
picDic = {}
09
regular = re.compile(r'^(.*)\.(jpg|jpeg|bmp|gif|png|JPG|JPEG|BMP|GIF|PNG)$')
10
 
11
def RemoverRePic(dirPath):
12
    quantity = 0
13
    for childPath in os.listdir(unicode(dirPath)):
14
        childPath = dirPath + '/'  + childPath
15
        if os.path.isdir(childPath):
16
            quantity =+ RemoverRePic(childPath)
17
        else:
18
            if regular.match(childPath):
19
                pic = open(childPath, 'rb')
20
                picMd5 = hashlib.md5(pic.read()).hexdigest()
21
                pic.close()
22
                if picDic.has_key(picMd5):
23
                    newPath = backupPath + '/'  + hashlib.md5(childPath)\
24
                    .hexdigest() + childPath[childPath.find('.'):]
25
                    os.rename(childPath, newPath)
26
                    quantity =+ 1
27
                else:
28
                    picDic[picMd5] = childPath
29
    return quantity
30
 
31
if __name__ == '__main__':
32
    t = time()
33
    print 'start:'
34
    print t
35
    print RemoverRePic(rootPath)
36
    print 'end:'
37
    print time() - t
作者:西麦

相关内容

    暂无相关文章

评论关闭