На идеальный код не претендует, для примера и чтоб не потерялось.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import codecs
import csv
import sys
import multiprocessing
import mmap
import os
import StringIO
import fileinput
import time
numbers=u'./numbers_clean.txt'
ranges=u'./ranges_clean.txt'
firsplitfile=u'./output/outputnumbers0'
splitfilename=u'./output/outputnumbers'
outputresult=u'./result/foundnumbers'
#output=u'../output/output_multi.txt'
#outputsplit=u'../output/split/outputnumbers0.txt'
#agicnumber=u'./output/output_magicnumber.txt'
Utf8 = codecs.lookup( 'utf-8' )
def notworker(x):
print x
def splitfiles (spf, spfname, whatsplit):
spi = 1
spc = 0
fout = open(spf,"wb")
for line in fileinput.FileInput(whatsplit) :
fout.write(line)
spi+=1
if spi%40000 == 0:
fout.close()
fout = open(spfname+str(spi/40000),"wb")
spc+=1
#print spc
fout.close()
#Do +2 for param because we not count first and last file
spc+=1
return spc
def worker(num,fromres,outres,filewithr):
count = 0
filenumber=fromres+str(num)
print num
with open( filenumber, 'rt' ) as SRC :
RD = csv.reader( SRC )
for Record in RD :
count += 1
Record = [Utf8.decode(X)[0] for X in Record]
# print count
with open( filewithr, 'rt' ) as SRC_R :
RD_R = csv.reader( SRC_R )
for Ranges in RD_R :
Ranges = [Utf8.decode(X)[0] for X in Ranges]
if int(Ranges[1]) >= int(Record[0]) >= int (Ranges[0]) :
filenumber2=outres+str(num)
with open( filenumber2, 'ab' ) as TRG :
WT = csv.writer( TRG )
WT.writerow( [ Record[1], Record[0], Ranges[2], Ranges[3] ] )
if __name__ == '__main__':
#before we run any jobs we need split files
countfiles=splitfiles (firsplitfile, splitfilename, numbers)
print countfiles
#Start jobs
jobs = []
for i in range(countfiles):
p = multiprocessing.Process(target=worker, args=(i,splitfilename,outputresult,ranges))
jobs.append(p)
p.start()