#!/usr/bin/env python

import bsddb, sha, binascii
import os, sys
from gzip import GzipFile
from StringIO import StringIO

def hash(file, piecesize):
        h = []
	fullh = sha.new()
        while 1:
                x = file.read(piecesize)
                if x == "": break
                h.append((sha.new(x).hexdigest(), len(x)))
		fullh.update(x)
        return (fullh.hexdigest(), h)

piecesize = 512*1024
chunksize = 16*1024

def optimalpiecesize(size):
    def eval(s,c,m):
        b = m/c
        return [ i*c for i in range(int(b/2), b+1) if s - i*c*int(s/m) <= i*c ]

    def score(s,c,m):
        l = int(s/m)
        return [ (abs(i - (s - l*i)), i) for i in eval(s,c,m) ]

    def bestest(s,c,m): 
        return min( score(s,c,m) )

    return bestest(size,chunksize,piecesize)[1]

cache_file = sys.argv[1]
pieces = {}

cache = bsddb.btopen(cache_file, "w")

def str2hash(s):
    r = []
    if s == "": return None, []

    fh,s = binascii.b2a_hex(s[:20]), s[20:]
    while len(s) > 0:
        (l,h,s) = s[:4], s[4:24], s[24:]
	r.append( (binascii.b2a_hex(h), long(binascii.b2a_hex(l), 16)) )
    return fh,r

def hash2str(fh, hs):
    s = binascii.a2b_hex(fh)
    for (h, l) in hs:
	s += binascii.a2b_hex("%08x" % l) + binascii.a2b_hex(h)
    return s

for filename in sys.stdin:
    filename = filename.rstrip()
    fnkey = filename + ":pc"
    if cache.has_key(fnkey):
    	sha1, result = str2hash(cache[fnkey])
    else:
    	size = os.stat(filename).st_size
    	if size <= piecesize:
		values = ""
		result = []
	else:
        	ps = optimalpiecesize(size)
        	file = open(filename)
        	sha1, result = hash(file, ps)
		values = hash2str(sha1, result)
        	file.close()
	cache[fnkey] = values

    if result:
    	print "Filename: %s" % (filename)
	print "SHA1: %s" % (sha1)
    	print "SHA1-Pieces:"
    	for x in result:
            print " %s %d" % x
    	print ""

cache.sync()
cache.close()
