For Loop in Function in Python - python

I'm trying to shift some codes from my main function to a function called read but somehow the loop breaks and it doesn't go through my csv file.
Below are the 2 scripts and the csv.
Thank you for your advice and tips as the learning curve is getting steeper and steeper
---Code Below---
The script 'NotinFunct' will read the csv file and returns this data
The script 'InFunct' will read the same csv file but only returns one set a data
The 'NotinFunct' is
# -*- coding: utf-8 -*-
import csv
FILE = 'C://shared//API//NADEV-Numbers_20190220-092956.csv'
NBS = {'5684', '7445477'}
NEW_NBS = {'56847', '74454773'}
def main():
fields_route = {'Pattern', 'CalledX', 'CalledPrefix', 'CallingX', 'CallingPrefix'}
for row in csv.DictReader(open(FILE)):
if row['Type'] == 'RoutePattern':
for nb in NBS:
for field in fields_route:
if nb in row[field]:
for new in NEW_NBS:
if nb in new:
rp = row['Pattern']
pt = row['Partition']
newrp = row['Pattern'].replace(nb, new)
if row['CalledX'] == 'None':
cedp = row['CalledX'].replace('None', '')
else:
cedp = row['CalledX'].replace(nb, new)
if row['CalledPrefix'] == 'None':
pced = row['CalledPrefix'].replace('None', '')
else:
pced = row['CalledPrefix'].replace(nb, new)
if row['CallingX'] == 'None':
cingp = row['CallingX'].replace('None', '')
else:
cingp = row['CallingX'].replace(nb, new)
if row['CallingPrefix'] == 'None':
pcing = row['CallingPrefix'].replace('None', '')
else:
pcing = row['CallingPrefix'].replace(nb, new)
print(rp)
print(pt)
print(newrp)
print(cedp)
print(pced)
print(cingp)
print(pcing)
print('################')
if __name__ == '__main__':
main()
the 'InFunct' is
# -*- coding: utf-8 -*-
import csv
FILE = 'C://shared//API//NADEV-Numbers_20190220-092956.csv'
NBS = {'5684', '7445477'}
NEW_NBS = {'56847', '74454773'}
def read():
fields_route = {'Pattern', 'CalledX', 'CalledPrefix', 'CallingX', 'CallingPrefix'}
for row in csv.DictReader(open(FILE)):
if row['Type'] == 'RoutePattern':
for nb in NBS:
for field in fields_route:
if nb in row[field]:
for new in NEW_NBS:
if nb in new:
rp = row['Pattern']
pt = row['Partition']
newrp = row['Pattern'].replace(nb, new)
if row['CalledX'] == 'None':
cedp = row['CalledX'].replace('None', '')
else:
cedp = row['CalledX'].replace(nb, new)
if row['CalledPrefix'] == 'None':
pced = row['CalledPrefix'].replace('None', '')
else:
pced = row['CalledPrefix'].replace(nb, new)
if row['CallingX'] == 'None':
cingp = row['CallingX'].replace('None', '')
else:
cingp = row['CallingX'].replace(nb, new)
if row['CallingPrefix'] == 'None':
pcing = row['CallingPrefix'].replace('None', '')
else:
pcing = row['CallingPrefix'].replace(nb, new)
return rp, pt, newrp, cedp, pced, cingp, pcing
def main():
for test in read():
print(test)
if __name__ == '__main__':
main()
the csv is
Type,Pattern,Partition,Description,CalledX,CalledPrefix,CallingX,CallingPrefix,FwdAll,FwdBusyInt,FwdBusyExt,FwdNAnsInt,FwdNAnsExt,FwdNCovInt,FwdNCovExt,FwdCTIFail,FwdURegInt,FwdURegExt,ExtPNMask,Device
DirectoryNumber,875423,a_nothing_partition,a_nothing_DN,N/A,N/A,N/A,N/A,11,22,33,44,55,66,744547722,77,88,99,9898,SEP798798465143
DirectoryNumber,5684001,a_nothing_partition,None,N/A,N/A,N/A,N/A,None,None,None,None,None,None,None,None,None,None,N/A,N/A
TranslationPattern,568412,a_nothing_partition,a_nothing_tp,None,None,None,5236,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
TranslationPattern,568411,a_nothing_partition,a_nothing_tp,None,None,875421,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
TranslationPattern,744547720,a_nothing_partition,a_nothing_tp,961433,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
TranslationPattern,744547721,a_nothing_partition,a_nothing_tp,None,786512,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
TranslationPattern,47852,a_nothing_partition,a_nothing_tp,None,None,744547711,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
TranslationPattern,9632,a_nothing_partition,a_nothing_tp,None,None,None,5684,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
TranslationPattern,897435496,a_nothing_partition,a_nothing_tp,568433,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
TranslationPattern,7896312145697,a_nothing_partition,a_nothing_tp,None,7445477,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
RoutePattern,6568433,a_nothing_partition,None,None,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
RoutePattern,6568434,a_nothing_partition,None,None,None,None,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
RoutePattern,24132,a_nothing_partition,a_nothing_rp,None,None,7445477,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
HuntPilot,568444,a_nothing_partition,a_nothing_hunt pilot,88,99,66,77,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
CingPartyX,8787,a_nothing_partition,a_nothing_calling party X,N/A,N/A,11,744547722,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A
CedPartyX,98563,a_nothing_partition,a_nothing_called party X,N/A,N/A,568496,None,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A

Your read function only returns one set of values (the last one). If you changed the function to a generator you can get all the values.
Change the end of the read function to the following, making sure to align the yield to the innermost loop block:
...
if row['CallingPrefix'] == 'None':
pcing = row['CallingPrefix'].replace('None', '')
else:
pcing = row['CallingPrefix'].replace(nb, new)
yield rp, pt, newrp, cedp, pced, cingp, pcing
Then you get:
('6568433', 'a_nothing_partition', '65684733', '', '', '', '')
('6568434', 'a_nothing_partition', '65684734', '', '', '', '')
('24132', 'a_nothing_partition', '24132', '', '', '74454773', '')
Change your main function to the following to get similar output to NotInFunct:
def main():
for test in read():
for col in test:
print(col)
print('################')
Output:
6568433
a_nothing_partition
65684733
#################
6568434
a_nothing_partition
65684734
#################
24132
a_nothing_partition
24132
74454773
#################

Related

NameError: free variable 'gender_list' referenced before assignment in enclosing scope

I must modularize the name_2_sex function that receives a dataframe, for this I call it from a file called test.py but it gives me this error.
The function receives a dataframe with data on people and returns the dataframe with 2 extra columns, one with the patient's first name and the other with their gender.
NameError: free variable 'gender_list' referenced before assignment in enclosing scope
The algorithm worked without modularizing.
name_2_sex code:
import pandas as pd
import operator
import re
def name_2_sex(df):
def clean_text(txt):
txt = re.sub("[^a-záéíóúñüäë]", " ", txt.lower())
txt = re.sub(' +',' ', txt)
return txt.strip().split()
def df_to_dict(df, key_column, val_column):
"""convierte dos pandas series en un diccionario"""
xkey = df[key_column].tolist()
xval = df[val_column].tolist()
return dict(zip(xkey,xval))
def get_gender2(names):
names = clean_text(names)
names = [x for x in names if gender_list.get(x,'a') != 'a']
gender ={'m':0, 'f':0, 'a':0}
for i, name in enumerate(names):
g = gender_list.get(name,'a')
gender[g] += 1
gender[g] += 2 if len(names) > 1 and i == 0 and g != 'a' else 0
gender['a'] = 0 if (gender['f']+gender['m']) > 0 else 1
return max(gender.items(), key=operator.itemgetter(1))[0]
if __name__ == '__main__':
path = 'https://www.dropbox.com/s/edm5383iffurv4x/nombres.csv?dl=1'
gender_list = pd.read_csv(path)
gender_list = df_to_dict(gender_list, key_column='nombre', val_column='genero')
df_nombre_completo= df["patient_full_name"]
pacientes_primer_nombre = []
for name in df_nombre_completo:
if (isinstance(name, str)):
pacientes_primer_nombre.append(name.split(" ")[0])
for name in df["patient_full_name"]:
if (isinstance(name, str)):
df["first_name"] = name.split(" ")[0]
else:
df["first_name"] = 0
df["first_name"] = [str(name).split(" ")[0] for name in df["patient_full_name"]]
df["gender"] = df["first_name"]
df["gender"] = [get_gender2(name) for name in df["first_name"]]
return df
code of the file where I want to execute it (test.py):
from nombre_a_sexo import name_2_sex
import pandas as pd
df = pd.read_csv("nuevo_dataset.csv", index_col=0)
print(name_2_sex(df))
Both files are in the same folder.
I did not do the algorithm that filters by gender, so I would not know what to edit if the problem comes from there.
You only assign gender_list in this block:
if __name__ == '__main__':
path = 'https://www.dropbox.com/s/edm5383iffurv4x/nombres.csv?dl=1'
gender_list = pd.read_csv(path)
gender_list = df_to_dict(gender_list, key_column='nombre', val_column='genero')
But this condition will only be true if you execute nombre_a_sexo.py as a top-level script, not when you import from it.
So you never assign gender_list before the rest of the code tries to use it.
When the function is called from another file, I think you want to use the df parameter instead of reading from this file. So change it to:
if __name__ == '__main__':
path = 'https://www.dropbox.com/s/edm5383iffurv4x/nombres.csv?dl=1'
gender_list = pd.read_csv(path)
gender_list = df_to_dict(gender_list, key_column='nombre', val_column='genero')
else:
gender_list = df_to_dict(df, key_column='nombre', val_column='genero')

Convert JSON file to flat table

Good Afternoon,
I am still pretty new to Python but have found it particularly addicting, but there are def some "quirks" to python that have been a pain to get through. I am currently trying to take a JSON file and flatten it out into a table. There are a ton of posts, specifically here on stack overflow on converting it into a flat dict, but that doesn't allow me to convert it into a table. This has been way harder than I expected.
I am currently getting the following error, which to me seems like it is something wrong with my key generation portion.
Code:
import json
import os
import csv
import copy
from pandas.io.json._normalize import nested_to_record
#Basic Veriables
scriptDirectory = os.path.dirname(os.path.realpath(__file__))
def getKeys(dictionary:dict, result:list = None, parentKey='', sep='.',skipParent = False) -> list:
if result == None:
result = []
#Loop through all keeys and return unique options
for key in dictionary.keys():
newKey = (parentKey + sep + key if parentKey else str(key))
if type(dictionary[key]) == dict:
result = (getKeys(dictionary[key],result=result,parentKey = ('' if skipParent else str(newKey))))
else:
if key != "" and newKey not in result:
result.append(newKey)
return result
def convertKey(data:str,languageDict:dict):
try:
return languageDict[data]
except KeyError:
return data
#Read the JSON files.
#Library File
with open(os.path.join(scriptDirectory,'inventoryItem.json'), "r",encoding='utf-8') as read_file:
lib = json.load(read_file)
#English Dictionary
with open(os.path.join(scriptDirectory,'en.json'), "r",encoding='utf-8') as read_file:
en = json.load(read_file)
for key in lib['inventoryItem'].keys():
print(key)
dictTemplate = dict.fromkeys(getKeys(dictionary=lib['inventoryItem'][key],skipParent = True),None)
print(dictTemplate)
firstItem = 0
try:
with open(os.path.join(scriptDirectory,'export',f"{key}.csv"),"w", newline='', encoding='utf-8') as csvfile:
for item in lib['inventoryItem'][key]:
entry = copy.deepcopy(dictTemplate)
entry.update(nested_to_record(lib['inventoryItem'][key][item], sep='.'))
if key == 'coin':
entry['name'] = convertKey(data = f"LIB_COIN_NAME_{entry['id']}",languageDict=en)
entry['description'] = convertKey(data = f"LIB_COIN_DESC_{entry['id']}",languageDict=en)
entry['obtainNavigatorData.not_enough_message'] = convertKey(data = entry['obtainNavigatorData.not_enough_message'],languageDict=en)
entry['obtainNavigatorData.not_enough_title'] = convertKey(data = entry['obtainNavigatorData.not_enough_title'],languageDict=en)
entry['obtainNavigatorData.button_label'] = convertKey(data = entry['obtainNavigatorData.button_label'],languageDict=en)
elif key == 'consumable':
#print(dictTemplate)
pass
elif key == 'scroll':
del entry["fragmentMergeCost"]
del entry["fragmentSellCost"]
del entry["fragmentBuyCost"]
del entry["buyCost"]
if firstItem == 0:
firstItem += 1
writer =csv.DictWriter(csvfile, fieldnames=entry.keys(),delimiter = ';')
writer.writeheader()
writer.writerow(entry)
except IOError:
print("I/O error")
Data set thats giving me trouble.
"consumable": {
"96": {
"id": 96,
"rewardType": "",
"rewardAmount": 0,
"effectDescription": {
"": ""
},
"buyCost": null,
"sellCost": null,
"buySpecialCost": null,
"assetAtlas": 4,
"assetTexture": "social_vk",
"iconAssetTexture": "",
"color": 1,
"hidden": 0,
"descLocaleId": "PLAY_AT_HOME_TICKET",
"obtainNavigatorData": null
},
Desired Output.
This is from a section called "coins", but the example data is from "consumables". Certain sections work fine, but others cause issues because the value is set to "","".
Finally figured out the solution. For anyone who wants to properly convert a JSON file to a table, this is the only solution I could come up with :)
import json
import os
import csv
import copy
def processJSON(initialDict:dict, createTemplate:bool = False, existingKeys:dict = None, parentKey:str = None, sep:str ='.', skipParent:bool = False) -> dict:
outPut = (existingKeys if existingKeys else {})
#Loop through all keeys and return unique options
for key in initialDict.keys():
#Create needed variables
keyTitle = str(parentKey + sep + key if parentKey else key)
#Loop
if type(initialDict[key]) == dict:
parentTitle = ('' if skipParent else keyTitle)
outPut.update(processJSON(initialDict=initialDict[key], existingKeys = outPut, createTemplate = createTemplate, parentKey = parentTitle, sep = sep))
elif keyTitle not in outPut.keys():
keyValue = str('' if createTemplate else initialDict[key])
outPut[keyTitle] = keyValue
return dict(outPut)
def convertKey(data:str,languageDict:dict):
try:
return languageDict[data]
except KeyError:
return data
#Basic Veriables
scriptDirectory = os.path.dirname(os.path.realpath(__file__))
#Read the JSON files.
#Library File
with open(os.path.join(scriptDirectory,'inventoryItem.json'), "r",encoding='utf-8') as read_file:
lib = json.load(read_file)
#English Dictionary
with open(os.path.join(scriptDirectory,'en.json'), "r",encoding='utf-8') as read_file:
en = json.load(read_file)
for key in lib['inventoryItem'].keys():
firstItem = True
header = processJSON(initialDict=lib['inventoryItem'][key], createTemplate=True, skipParent=True)
try:
with open(os.path.join(scriptDirectory,'export',f"{key}.csv"),"w", newline='', encoding='utf-8') as csvfile:
for item in lib['inventoryItem'][key]:
#Copy Header because Python sucks
row = dict(copy.deepcopy(header))
row.update(processJSON(initialDict=lib['inventoryItem'][key][item]))
if key == 'coin':
row['name'] = convertKey(data = f"LIB_COIN_NAME_{row['id']}",languageDict=en)
row['description'] = convertKey(data = f"LIB_COIN_DESC_{row['id']}",languageDict=en)
row['obtainNavigatorData.not_enough_message'] = convertKey(data = row['obtainNavigatorData.not_enough_message'],languageDict=en)
row['obtainNavigatorData.not_enough_title'] = convertKey(data = row['obtainNavigatorData.not_enough_title'],languageDict=en)
row['obtainNavigatorData.button_label'] = convertKey(data = row['obtainNavigatorData.button_label'],languageDict=en)
elif key == 'consumable':
row['name'] = convertKey(data = f"LIB_CONSUMABLE_NAME_{row['id']}",languageDict=en)
row['description'] = convertKey(data = f"LIB_CONSUMABLE_DESC_{row['descLocaleId']}",languageDict=en)
row['obtainNavigatorData.button_label'] = convertKey(data = row['obtainNavigatorData.button_label'],languageDict=en)
row['obtainNavigatorData.not_enough_message'] = convertKey(data = row['obtainNavigatorData.not_enough_message'],languageDict=en)
row['obtainNavigatorData.not_enough_title'] = convertKey(data = row['obtainNavigatorData.not_enough_title'],languageDict=en)
elif key == 'gear':
row['name'] = convertKey(data = f"LIB_GEAR_NAME_{row['id']}",languageDict=en)
elif key == 'petGear':
row['name'] = convertKey(data = f"LIB_PET_GEAR_NAME_{row['id']}",languageDict=en)
elif key == 'pseudo':
row['name'] = convertKey(data = f"LIB_PSEUDO_{row['constName']}",languageDict=en)
row['description'] = convertKey(data = f"LIB_PSEUDO_DESC_{row['id']}",languageDict=en)
elif key == 'scroll':
row['name'] = convertKey(data = f"LIB_SCROLL_NAME_{row['id']}",languageDict=en)
del row["fragmentMergeCost"]
del row["fragmentSellCost"]
del row["fragmentBuyCost"]
del row["buyCost"]
else:
print(key)
if firstItem:
firstItem = False
writer = csv.DictWriter(csvfile, fieldnames=row.keys(),delimiter = ',')
writer.writeheader()
writer.writerow(row)
except IOError:
print("I/O error")

Not sure what I am doing wrong... - ||| PYTHON |||

I am doing a thing while following a tutorial. I think I did everything correct but when starting the program I am getting an error.
Here are my files codes:
1) the main file - frs.py
from parser import Parser
from lexer import Lexer
def main():
filename = 'hello.frs'
file = open(filename, 'r')
lexer = Lexer(file)
parser = Parser(lexer.tokens)
lexer.tokenizer()
print ("TOKENS:")
print (lexer.tokens, "\n")
parser.build_AST()
print ("AST:")
print (parset.AST, "\n")
if __name__ == "__main__":
main()
2) the Lexer class - lexer.py
class Lexer:
def __init__(self, data):
self.data = data
self.tokens = []
self.keywords = [
'tosay'
]
def tokenizer(self):
for loc in self.data:
tmp = []
tid = ''
for l in loc:
if l == '"' and tid == '':
tid = 'char'
tmp = []
elif l == '"' and tid == 'char':
self.tokens.append({'id': tid, 'value': ''.join(tmp)})
tid = ''
tmp = []
elif l == ':':
self.tokens.append({'id': 'label', 'value': ''.join(tmp)})
tmp = []
elif ''.join(tmp) in self.keywords:
self.tokens.append({'id': 'keyword', 'value': ''.join(tmp)})
tmp = []
elif l == ' ' and tid != 'char':
continue
else:
tmp.append(l)
3) the Parser class - parser.py
class Parser:
def __init__(self, tokens):
self.tokens = tokens
self.AST = []
def add_node(self, parent, node):
for a in self.AST:
if parent in a:
a[parent].append(node)
def build_AST(self):
saved = {}
parent = {}
collect = False
for token in self.tokens:
if token['id'] == 'label':
t = {token['value']: []}
if parent != t:
parent = token['value']
self.AST.append(t)
elif token['id'] == 'keyword':
if token['value'] == 'stop':
t = {token['value']: 0}
self.add_node(parent, t)
else:
if collect == False:
saved = token
collect = True
else:
t = {saved['value']: token[:value]}
self.add_node(parent, t)
collect = False
elif token['id'] == 'char':
if collect = False:
saved = token
collect = True
else:
t = {saved['value']: token['value']}
self.add_node(parent, t)
collect = False
4) the file with my own language and is a goal of the tutorial - hello.frs:
commence:
tosay "Hello World"
stop
Basically, until I added the from parser import Parser, everything worked. But after adding, I am getting this error message:
Traceback (most recent call last):
File "frs.py", line 1, in <module>
from parser import Parser
ImportError: cannot import name 'Parser'
I tried renaming the class, but it still doesn't work.
Please help me!
Thank you in advance.
Two errors in your files.
1) File parser.py:
Change:
if collect = False:
To
if collect == False:
2) File frs.py
Change:
print (parset.AST, "\n")
To:
print (parser.AST, "\n")`
After Above Corrections My Output
TOKENS:
[{'id': 'label', 'value': 'commence'}, {'id': 'keyword', 'value': 'tosay'}, {'id': 'char', 'value': 'Hello World'}]
AST:
[{'commence': [{'tosay': 'Hello World'}]}]

Collect rows into a single row if Name-row is empty CSV

I have a CSV file that has several columns. Two of the columns are called Namn (Name) (Product) and Alla bilder (All images).
Some of the products have several images. These images are inserted on their own row under the "All images":
Example:
Name All images
Name1 Image1
Name2 Image2
Image2-1
Image2-2
Name3 Image3
I am trying to get all images belonging to one product to be inserted in the first image-row and seperated by a |. Here's the code I've managed with help from a friend to make, but I just don't get it to work. (I downloaded Python, and ran the .py script) - Nothing happens. Please note I have never touched Python before.
import codecs, csv
def main():
file_input = codecs.open('test.csv', 'r', 'utf-8-sig')
dictreader = csv.DictReader(file_input, delimiter=',')
main_rows = []
fields = []
temp_row = {}
for row_item in dictreader:
if len(fields) == 0:
# fix the fields header...
for field in row_item:
fields.append(field)
if row_item['Name'] == '':
# this row probably only contains picture information...
if 'Alla bilder' in row_item and row_item['Alla bilder'] != '':
if 'Alla bilder' not in temp_row:
temp_row['Alla bilder'] = row_item['Alla bilder']
elif temp_row['Alla bilder'] == '':
temp_row['Alla bilder'] = row_item['Alla bilder']
else:
temp_row['Alla bilder'] = temp_row['Alla bilder'] + '|' + row_item['Alla bilder']
else:
# this seems to be a new product row...
if len(temp_row) != 0:
# there seems to be something to store....
print('\nSTORING: ' + str(temp_row))
main_rows.append(temp_row)
temp_row = {}
# print('\nNEW ROW: ' + str(row_item))
temp_row = row_item
if len(temp_row) != 0:
# there seems to be something to store....
print('\nSTORING: ' + str(temp_row))
main_rows.append(temp_row)
file_output = codecs.open('test2.csv', 'w', 'utf-8-sig')
dictwriter = csv.DictWriter(file_output, delimiter=',', fieldnames=fields)
dictwriter.writerows(main_rows)
if _name_ == "_main_":
main()
else:
print('This file was called from another class...')
What error or message gives you when you start the script from the console?
Try changing
if _name_ == "_main_":
main()
else:
print('This file was called from another class...')
For
if __name__ == "__main__":
main()
else:
print('This file was called from another class...')
It's the only error I see (if I'm not wrong, it's __name__, not _name_). Changing that works for me. Using this input:
Name,Alla bilder
Name1,Image1
Name2,Image2
,Image2-1
,Image2-2
Name3,Image3
I get the next result:
Name1,Image1
Name2,Image2|Image2-1|Image2-2
Name3,Image3
Is that your problem?

My function to extract totals is exhausting my input file for future reading

The client includes 3 rows at the bottom that contain totals for me to reconcile against in my program. Only problem is that my program is exhausting the input file with readlines() before it can do anything else. Is there a way to keep the file from being exhausted during my get_recon_total function call?
#!/usr/bin/env python
# pre_process.py
import csv
import sys
def main():
infile = sys.argv[1]
outfile = sys.argv[2]
with open(infile, 'rbU') as in_obj:
# Create reader object, get fieldnames for later on
reader, fieldnames = open_reader(in_obj)
nav_tot_cnt, nav_rec_cnt, nav_erec_cnt = get_recon_totals(in_obj)
print nav_tot_cnt, nav_rec_cnt, nav_erec_cnt
# This switches the dictionary to a sorted list... necessary??
reader_list = sorted(reader, key=lambda key: (key['PEOPLE_ID'],
key['DON_DATE']))
# Create a list to contain section header information
header_list = create_header_list(reader_list)
# Create dictionary that contains header list as the key,
# then all rows that match as a list of dictionaries.
master_dict = map_data(header_list, reader_list)
# Write data to processed file, create recon counts to compare
# to footer record
tot_cnt, rec_cnt, erec_cnt = write_data(master_dict, outfile, fieldnames)
print tot_cnt, rec_cnt, erec_cnt
def open_reader(file_obj):
'''
Uses DictReader from the csv module to take the first header line
as the fieldnames, then applies them to each element in the file.
Returns the DictReader object and the fieldnames being used (used
later when data is printed out with DictWriter.)
'''
reader = csv.DictReader(file_obj, delimiter=',')
return reader, reader.fieldnames
def create_header_list(in_obj):
p_id_list = []
for row in in_obj:
if (row['PEOPLE_ID'], row['DON_DATE']) not in p_id_list:
p_id_list.append((row['PEOPLE_ID'], row['DON_DATE']))
return p_id_list
def map_data(header_list, data_obj):
master_dict = {}
client_section_list = []
for element in header_list:
for row in data_obj:
if (row['PEOPLE_ID'], row['DON_DATE']) == element:
client_section_list.append(row)
element = list(element)
element_list = [client_section_list[0]['DEDUCT_AMT'],
client_section_list[0]['ND_AMT'],
client_section_list[0]['DEDUCT_YTD'],
client_section_list[0]['NONDEDUCT_YTD']
]
try:
element_list.append((float(client_section_list[0]['DEDUCT_YTD']) +
float(client_section_list[0]['NONDEDUCT_YTD'])
))
except ValueError:
pass
element.extend(element_list)
element = tuple(element)
master_dict[element] = client_section_list
client_section_list = []
return master_dict
def write_data(in_obj, outfile, in_fieldnames):
with open(outfile, 'wb') as writer_outfile:
writer = csv.writer(writer_outfile, delimiter=',')
dict_writer = csv.DictWriter(writer_outfile,
fieldnames=in_fieldnames,
extrasaction='ignore')
tot_cnt = 0
rec_cnt = 0
email_cnt = 0
for k, v in in_obj.iteritems():
writer_outfile.write(' -01- ')
writer.writerow(k)
rec_cnt += 1
for i, e in enumerate(v):
if v[i]['INT_CODE_EX0006'] != '' or v[i]['INT_CODE_EX0028'] != '':
email_cnt += 1
writer_outfile.write(' -02- ')
dict_writer.writerow(e)
tot_cnt += 1
return tot_cnt, rec_cnt, email_cnt
def get_recon_totals(in_obj):
print in_obj
client_tot_cnt = 0
client_rec_cnt = 0
client_erec_cnt = 0
for line in in_obj.readlines():
line = line.split(',')
if line[0] == 'T' and line[1] == 'Total Amount':
print 'Total Amount found.'
client_tot_cnt = line[2]
elif line[0] == 'T' and line[1] == 'Receipt Count':
print 'Receipt Count found.'
client_rec_cnt = line[2]
elif line[0] == 'T' and line[1] == 'Email Receipt Count':
print 'E-Receipt Count Found.'
client_erec_cnt = line[2]
return client_tot_cnt, client_rec_cnt, client_erec_cnt
if __name__ == '__main__':
main()
If your file is not very large, you can convert reader generator to a list of dcitonary , by calling list() on reader and then use it in your code instead of trying to read from the file directly.
Example -
def main():
infile = sys.argv[1]
outfile = sys.argv[2]
with open(infile, 'rbU') as in_obj:
# Create reader object, get fieldnames for later on
reader, fieldnames = open_reader(in_obj)
reader_list = list(reader)
nav_tot_cnt, nav_rec_cnt, nav_erec_cnt = get_recon_totals(reader_list)
print nav_tot_cnt, nav_rec_cnt, nav_erec_cnt
# This switches the dictionary to a sorted list... necessary??
reader_list = sorted(reader_list, key=lambda key: (key['PEOPLE_ID'],
key['DON_DATE']))
.
.
def get_recon_totals(reader_list):
print in_obj
client_tot_cnt = 0
client_rec_cnt = 0
client_erec_cnt = 0
for line in reader_list: #line here is a dict
if line[<fieldname for first column>] == 'T' and line[<fieldname for secondcolumn>] == 'Total Amount':
print 'Total Amount found.'
client_tot_cnt = line[<fieldname for third column>]
.
. #continued like above
.
return client_tot_cnt, client_rec_cnt, client_erec_cnt

Categories