Multiprocessing на питоне (сравнение файлов)

Jan 27, 2015 17:34

На идеальный код не претендует, для примера и чтоб не потерялось. #!/usr/bin/python # -*- coding: utf-8 -*- import codecs import csv import sys import multiprocessing import mmap import os import StringIO import fileinput import time numbers=u'./numbers_clean.txt' ranges=u'./ranges_clean.txt' firsplitfile=u'./output/outputnumbers0' splitfilename=u'./output/outputnumbers' outputresult=u'./result/foundnumbers' #output=u'../output/output_multi.txt' #outputsplit=u'../output/split/outputnumbers0.txt' #agicnumber=u'./output/output_magicnumber.txt' Utf8 = codecs.lookup( 'utf-8' ) def notworker(x): print x def splitfiles (spf, spfname, whatsplit): spi = 1 spc = 0 fout = open(spf,"wb") for line in fileinput.FileInput(whatsplit) : fout.write(line) spi+=1 if spi%40000 == 0: fout.close() fout = open(spfname+str(spi/40000),"wb") spc+=1 #print spc fout.close() #Do +2 for param because we not count first and last file spc+=1 return spc def worker(num,fromres,outres,filewithr): count = 0 filenumber=fromres+str(num) print num with open( filenumber, 'rt' ) as SRC : RD = csv.reader( SRC ) for Record in RD : count += 1 Record = [Utf8.decode(X)[0] for X in Record] # print count with open( filewithr, 'rt' ) as SRC_R : RD_R = csv.reader( SRC_R ) for Ranges in RD_R : Ranges = [Utf8.decode(X)[0] for X in Ranges] if int(Ranges[1]) >= int(Record[0]) >= int (Ranges[0]) : filenumber2=outres+str(num) with open( filenumber2, 'ab' ) as TRG : WT = csv.writer( TRG ) WT.writerow( [ Record[1], Record[0], Ranges[2], Ranges[3] ] ) if __name__ == '__main__': #before we run any jobs we need split files countfiles=splitfiles (firsplitfile, splitfilename, numbers) print countfiles #Start jobs jobs = [] for i in range(countfiles): p = multiprocessing.Process(target=worker, args=(i,splitfilename,outputresult,ranges)) jobs.append(p) p.start()

python

Previous post Next post
Up