Subversion Repositories planix.SVN

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#include "stdinc.h"
#include "dat.h"
#include "fns.h"

Bloom b;

void
usage(void)
{
        fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
        threadexitsall(0);
}

void
threadmain(int argc, char *argv[])
{
        Part *part;
        char *file;
        vlong bits, size, size2;
        int nhash;
        vlong nblocks;
        
        ventifmtinstall();
        statsinit();

        size = 0;
        nhash = 0;
        nblocks = 0;
        ARGBEGIN{
        case 'n':
                if(nhash || nblocks)
                        usage();
                nblocks = unittoull(EARGF(usage()));
                break;
        case 'N':
                if(nhash || nblocks)
                        usage();
                nhash = unittoull(EARGF(usage()));
                if(nhash > BloomMaxHash){
                        fprint(2, "maximum possible is -N %d", BloomMaxHash);
                        usage();
                }
                break;
        case 's':
                size = unittoull(ARGF());
                if(size == ~0)
                        usage();
                break;
        default:
                usage();
                break;
        }ARGEND

        if(argc != 1)
                usage();

        file = argv[0];

        part = initpart(file, ORDWR|ODIRECT);
        if(part == nil)
                sysfatal("can't open partition %s: %r", file);

        if(size == 0)
                size = part->size;
        
        if(size < 1024*1024)
                sysfatal("bloom filter too small");

        if(size > MaxBloomSize){
                fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
                        size, (vlong)MaxBloomSize);
                size = MaxBloomSize;
        }
        if(size&(size-1)){
                for(size2=1; size2<size; size2*=2)
                        ;
                size = size2/2;
                fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
        }

        if(nblocks){
                /*
                 * no use for more than 32 bits per block
                 * shoot for less than 64 bits per block
                 */
                size2 = size;
                while(size2*8 >= nblocks*64)
                        size2 >>= 1;
                if(size2 != size){
                        size = size2;
                        fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
                                size/1024/1024);
                }

                /*
                 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.  
                 */
                bits = (8*size)/nblocks;
                nhash = bits*7/10;
                if(nhash > BloomMaxHash)
                        nhash = BloomMaxHash;
        }
        if(!nhash)
                nhash = BloomMaxHash;
        if(bloominit(&b, size, nil) < 0)
                sysfatal("bloominit: %r");
        b.nhash = nhash;
        bits = nhash*10/7;
        nblocks = (8*size)/bits;
        fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
        b.data = vtmallocz(size);
        b.part = part;
        if(writebloom(&b) < 0)
                sysfatal("writing %s: %r", file);
        threadexitsall(0);
}