martes, 30 de septiembre de 2014

Python: Mi script favorito

Este es mi script favorito hasta ahora porque me ahorra un montón de tiempo al ingresar datos de importaciones a mi reporte. Automaticamente suma facturas, cuenta cajas, pesos, obtiene datos de guias aereas y bls y lo carga todo a mi reporte:

print 'Scanning invoices...'
import re, xlrd, os, os.path, openpyxl, pdfminer
#invoice variables
fob = 0
freight = 0
insurance = 0
total = 0
invoice = []
plNum = ''
invoiceFound = 'false'
#pl variables
writeFile = open("d:/pls.txt", "w")
cases =[]
gweight =[]
volume = []
plFound = 'false'
#bl variables
blNumber = ''
containers = []
blCases =[]
blWeight =[]
blVolume =[]
blFound = 'false'
#first set the path
path= "d:/Loader/"
#look for invoices-------------------------------------------------------------------------------------------------------------------------------
for file in os.listdir(path):
    match = re.search(r'.*\.[xlXL]\w+', file)
    if match:
        wb = xlrd.open_workbook(path+file)
        sht = wb.sheet_by_index(0)
        n = sht.nrows
        #identifies invoice based on keyword placement
        if "Huawei" in sht.cell_value(0,2) or "Huawei" in sht.cell_value(4, 2):
            invoiceFound = 'true'
            #Identifies the invoce template
            if "Huawei" in sht.cell_value(0,2): #The old template
                fob = fob + float(sht.cell_value((sht.nrows -4),13))
                freight = freight + float(sht.cell_value((sht.nrows -3),13))
                insurance = insurance + float(sht.cell_value((sht.nrows -2),13))
                total = total + float(sht.cell_value(sht.nrows-1,13))
                plNum = sht.cell_value(7,2)[8:]
            elif 'Huawei Device Co., Ltd.' in sht.cell_value(4,2): #Device template
                cellContent = sht.cell_value(sht.nrows - 7, 8)
                cellContent = cellContent.split()
                for n in range(len(cellContent)):
                    if ',' in cellContent[n]:
                        cellContent[n] = cellContent[n].replace(',', '.')
                fob = fob+float(cellContent[0])
                freight = freight + float(cellContent[1])
                insurance = insurance + float(cellContent[2])
                total = total + float(cellContent[3])
                plNum = sht.cell_value(10,7)
            else: #the new template
                for row in range(n):
                    if "FOB" in sht.cell_value(row, 6):
                        fob = fob + float(sht.cell_value(row,7))
                        freight = freight + float(sht.cell_value(row+1, 7))
                        insurance = insurance + float(sht.cell_value(row+2, 7))
                        #sometimes there is a USD in the Total cell so we need to suppress it
                        if 'USD'in str(sht.cell_value(row+3,7)):
                            total = total + float(sht.cell_value(row+3,7)[3:])
                            total = total + float(sht.cell_value(row+3,7))
                        plNum = sht.cell_value(sht.nrows-2,2)       
if invoiceFound == 'true':
    print "FOB: %f, Flete: %f, seguro: %f, Total: %f" %(fob, freight, insurance, total)
    print 'No invoices found.'
    #print 'There was a problem scanning invoices. No information could be retreived.'
#look for PLs------------------------------------------------------------------------------------------------------------------------------------
print 'Scanning PLs...'
    for file in os.listdir(path):
        match = re.search(r'.*\.[xlXL]\w+', file)
        if match:
            wb = xlrd.open_workbook(path+file)
            sht = wb.sheet_by_index(0)
            n = sht.nrows
            for row in range(n):
                if "Total:" in unicode(sht.cell_value(row,0)):
                    plFound = 'true'
                    writeFile.write(sht.cell_value(row, 0)+"\n")
    if plFound == 'true':
        summary = open("d:/pls.txt")
        for line in summary:
            matchTotalCases = re.search(r'Total:\d*CASES\s', line)
            matchGrossWeight = re.search(r'gross\sweight\(KG\):\d*\.?\d*\s', line)
            matchNetWeight = re.search(r'net\sweight\(KG\):\d*\.?\d*\s', line)
            matchVolume = re.search(r'volume\(CBM\):\d*\.?\d*\s', line)
            if matchTotalCases:
                stringCases = matchTotalCases.group()
            if matchGrossWeight:
                stringGrossWeigth = matchGrossWeight.group()
            if matchNetWeight:
                stringNetWeight = matchNetWeight.group()
            if matchVolume:
                stringVolume = matchVolume.group()
        tcases = sum(cases)
        tgweight = sum(gweight)
        tnweight = sum(nweight)
        tvolume = sum(volume)
        print "Total cases: %i, gross weight: %f,  net weight: %f, volume: %f" %(sum(cases), tgweight, tnweight, tvolume)
        print 'No Packing Lists Found.'
    print 'There was a problem scanning PLs. No information could be retreived.'
#look for BLS-----------------------------------------------------------------------------------------------------------------------
print 'Scanning BL Data...'
for file in os.listdir(path):
    match = re.search(r'.*\.[pP][dD][fF]', file)
    if match:
        blFound= 'true'
        os.system("C:/Python27/Scripts/pdf2txt.py -o d:/bls.txt "+path+file)
        blFile = open("d:/bls.txt").read()
        #identifies if its a bill of lading or air waybill:
        if "BILL OF LADING" in blFile:
            #ok it is a bl from Maersk or KN?
            #now scans the file line by line looking for matches
            blFile = open("d:/bls.txt")
            for line in blFile:
                mBlNumber = re.search(r'^\d{9}\n$',line)
                mTotalCases = re.search(r'\d\s\s\d+\sC[aA]?[sS][eE]?', line) #sometimes it is written 'CASE' or 'CS'
                mGrossWeight = re.search(r'\w\s\s\d+\.*\d*\sKGS', line)
                mVolume = re.search(r'\w\s\s\d+\.*\d*\sCBM', line)
                mContainers = re.search(r'[A-Z]{4}\d{7}', line)
                if mBlNumber: blNumber = mBlNumber.group()[:-1]
                if mTotalCases:
                    casePosition = mTotalCases.group().find('C') #finds the position of leter 'C' in order not to copy it.
                if mGrossWeight: blWeight.append(float(mGrossWeight.group()[3:-3]))
                if mVolume: blVolume.append(float(mVolume.group()[3:-3]))
                if mContainers: containers.append(line[:-13])
            #then if it is an airway bill
            #is it from panalpina or dhl?
            if "PANALPINA" in blFile:
                blFile = open("d:/bls.txt")
                for line in blFile:
                    mBlNumber = re.search(r'SZX\s\s\d{6}',line)
                    mTotalCases = re.search(r'TOTAL:\d+\s', line)
                    mGrossWeight = re.search(r'\d+\.?\d*K\s', line)
                    mVolume = re.search(r'VOLUME\s\s\s\s\d+\.?\d*\s', line)
                    if mBlNumber:
                        blNumber = mBlNumber.group()
                        blNumber = blNumber.replace('  ', '')
                    if mTotalCases: blCases.append(int(mTotalCases.group()[6:-1]))
                    if mGrossWeight: blWeight.append(float(mGrossWeight.group()[:-2]))
                    if mVolume: blVolume.append(float(mVolume.group()[10:-1]))
if blFound == 'true' and blNumber != '':     
    print "BL Number: %s, Total cases: %i, Total Gross Weight: %f, Total Volume: %f" %(blNumber, sum(blCases), sum(blWeight), sum(blVolume))
    print 'Cointainers:'
    for cont in containers:
        print cont
    print 'BL/AWB not found!'
#look for discrepancies----------------------------------------------------------------------------------------------------------------------------
#if (cases - blCases != 0) or (gweight - blWeight!=0) or (volume - blVolume !=0):
if tcases != sum(blCases) or tgweight != sum(blWeight) or sum(volume)!= sum(blVolume):
    print 'WARNING! Some discrepancies found between PLs and BL/AWB'
#create report loader--------------------------------------------------------------------------------------------------------------------------------
print 'Creating loader...'
    template = openpyxl.load_workbook("d:/loadTemplate.xlsx")
    templateSheet = template.get_sheet_by_name("Sheet1")
    templateSheet.cell(row=2, column=10).value = fob
    templateSheet.cell(row=2, column=11).value = freight
    templateSheet.cell(row=2, column=12).value = insurance
    templateSheet.cell(row=2, column=13).value = total
    templateSheet.cell(row=2, column=7).value = ""
    for inv in invoice:
        if templateSheet.cell(row=2, column=7).value == "":
            templateSheet.cell(row=2, column=7).value = inv + " "
            templateSheet.cell(row=2, column=7).value = templateSheet.cell(row=2, column=7).value + inv + " "
    templateSheet.cell(row=2, column=1).value = plNum
    templateSheet.cell(row=2, column=14).value = tcases
    templateSheet.cell(row=2, column=15).value = tgweight
    templateSheet.cell(row=2, column=16).value = tvolume
    templateSheet.cell(row=2, column=20).value = blNumber
    templateSheet.cell(row=2, column=25).value = ''
    for cont in containers:
        if templateSheet.cell(row=2, column=25).value == '':
            templateSheet.cell(row=2, column=25).value = cont + ' '
            templateSheet.cell(row=2, column=25).value = templateSheet.cell(row=2, column=25).value + cont + ' '
    print 'Process complete.'
    print 'WARNING! There was a problem writing the loader. Maybe the file loader.xlsx is open?'
raw_input ()

No hay comentarios:

Publicar un comentario