Python 3 csv.reader empty response - python

Hello I got the following code but the loop won't work because the csv.reader is empty. The file with the csv data is opened correctly.
For Understanding:
var pokemon can be any pokemon name as string.
bot, logger and event are vars comming from the Hangoutsbot.
All needed libaries are loaded.
Code:
def pkmn_translate(bot, event, pokemon):
logger.info("translating pokemon name")
url = "https://raw.githubusercontent.com/PokeAPI/pokeapi/master/data/v2/csv/pokemon_species_names.csv"
request = urllib.request.Request(url, headers = {"User-agent":"Mozilla/5.0", "Accept-Charset":"utf-8"})
try:
data = urllib.request.urlopen(request)
csv_data = data.read()
csvstr = str(csv_data).strip("b'")
lines = csvstr.split("\\n")
f = open('{}/pokemon_species_names.csv'.format(os.path.dirname(os.path.realpath(__file__))), "w",encoding='utf8')
for line in lines:
f.write(line + "\n")
f.close()
logger.info("translating db saved")
except urllib.error.URLError as e:
logger.info("{}: Error: {}".format(event.user.full_name, json.loads(e.read().decode("utf8","ignore"))['detail']))
yield from bot.coro_send_message(event.conv, "{}: Error: {}".format(event.user.full_name, json.loads(e.read().decode("utf8","ignore"))['detail']))
return
pokemon_id = "default"
f = open('{}/pokemon_species_names.csv'.format(os.path.dirname(os.path.realpath(__file__))), 'r', encoding='utf8') # opens the csv file
try:
logger.info("DEBUG: openFile")
#Quick and dirty fix because CSV File is very big
maxInt = sys.maxsize
decrement = True
while decrement:
# decrease the maxInt value by factor 10
# as long as the OverflowError occurs.
decrement = False
try:
csv.field_size_limit(maxInt)
except OverflowError:
maxInt = int(maxInt/10)
decrement = True
logger.info("DEBUG: maxInt = {}".format(maxInt))
reader = csv.reader(f)
rows = list(reader)
for row in reader:
logger.info("DEBUG: row = {}".format(row))
for column in row:
if pokemon == column:
#DEBUG
logger.info("Info: row = {}".format(row))
#SET VAR
pokemon_id = rows[row][0]
#DEBUG
logger.info("Info: {}".format(pokemon_id))
bot.coro_send_message(event.conv, "Info: {}".format(pokemon_id))
else:
logger.info("Error: Name not in File!")
bot.coro_send_message(event.conv, "Error: Name not in File!")
else:
logger.info("DEBUG: Loop exited")
else:
logger.info("DEBUG: Loop exited")
except:
logger.info("Debug: Some error")
finally:
f.close() # closing
logger.info("Debug func: PokemonID = {}".format(pokemon_id))
yield from pokemon_id
return pokemon_id
at the for loop it has no data in the reader variable and it fails. I don't know how to get the csv.reader to work.
PS: I am an total noob at python.

your list(reader) call consumes the reader, which is empty on the for loop.
just replace
reader = csv.reader(f)
rows = list(reader)
for row in reader:
by
reader = csv.reader(f)
rows = list(reader)
for row in rows:

Related

how to make file path as a global variable in python?

I've been getting this error NameError: name 'filep' is not defined i need to make filep as a file path. but every time i run my code i always get this error.
i need to make the filep as a module variable instead of a parameter as well as menulist.
import csv
filep= # filepath
menulist = [] #global scope
def menu_List():
global menulist
menulist = [] # store items
try:
with open(filep) as f: # read file
reader = f.readlines()
next(reader, None) #skip the header
for row in reader:
row[2] = int(row[2].strip()) #convert string to int
row[1] = float(row[1].strip()) #convert string to float
if row[2] > 100 and row[2] < 200:
menulist.append(row)
except NameError:
raise ValueError("Variable not set")
menulist.sort(key=lambda x: x[-1])
menu_List()
You do not need global variables here, your function should accept the path as an argument and return the menu list.
import csv
def menu_List(filep):
menulist = [] # store items
try:
with open(filep) as f: # read file
reader = f.readlines()
next(reader, None) #skip the header
for row in reader:
row[2] = int(row[2].strip()) #convert string to int
row[1] = float(row[1].strip()) #convert string to float
if row[2] > 100 and row[2] < 200:
menulist.append(row)
except NameError:
raise ValueError("Variable not set")
menulist.sort(key=lambda x: x[-1])
return menulist
menulist = menu_List("a/path/goes/here")
Unrelated to your question, you can skip the header like you did or like this:
reader = f.readlines()
for row in reader[1:]: # skip the first line.
...
The answer above (which use args for fliep) is the best solution, but if you determined to not use args:
filep= 'file path you want'# filepath
menulist = [] #global scope
def menu_List():
global filep #just add one more global will make it work
global menulist
menulist = [] # store items
try:
with open(filep) as f: # read file
reader = f.readlines()
next(reader, None) #skip the header
for row in reader:
row[2] = int(row[2].strip()) #convert string to int
row[1] = float(row[1].strip()) #convert string to float
if row[2] > 100 and row[2] < 200:
menulist.append(row)
except NameError:
raise ValueError("Variable not set")
menulist.sort(key=lambda x: x[-1])
menu_List()
Extra little tip: Try prevent using global var if you can, global var slow down program's speed and eats memory, also sometime causing messy var naming.

Search value in column in CSV file using Python

I need to scan through line by line in time column time in CSV file, and see if there was activity in another column during 5 hours ago, then add column with value 1.
Here is my idea:
import csv
from collections import namedtuple
from contextlib import closing
light3 = pd.read_csv('G:/light3.csv')
light3.Time = pd.to_datetime(light3.Time)
m = light3.Time - DateOffset(hours = 5)
def search():
item = light3[(light3['Time']> m)| (light3['Time']== m)]
raw_data = 'G:/light3.csv'
failure = 'No matching item could be found with that item code. Please try again.'
check = False
with open('G:/project/test.pcap_z/light.csv','r') as csvinput:
with open('G:/light1.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
reader = csv.reader(csvinput)
read_data = csv.DictReader(csvinput, delimiter=';')
item_data = namedtuple(read_data['Time'], read_data.Time)
all = [ ]
row = next(reader)
row.append('f')
all.append(row)
while check == False:
for row in reader:
if row.Item == item:
row.append('f')
all.append(row)
row.append(1)
writer.writerows(all)

How to skip a column when writing to a csv file in python

Sorry if this has been asked, but is it possible to skip a column when writing to a csv file?
Here is the code I have:
with open("list.csv","r") as f:
reader2 = csv.reader(f)
for row in reader2:
url = 'http://peopleus.intelius.com/results.php?ReportType=33&qi=0&qk=10&qp='+row
req = urllib.request.Request(url)
response = urllib.request.urlopen(req)
html = response.read()
retrieved_name = b'class="singleName">(.*?)<\/h1'
retrieved_number = b'<div\sclass="phone">(.*?)<\/div'
retrieved_nothing = b"(Sorry\swe\scouldn\\'t\sfind\sany\sresults)"
if re.search(retrieved_nothing,html):
noth = re.search(retrieved_nothing.decode('utf-8'),html.decode('utf-8')).group(1)
add_list(phone_data, noth)
else:
if re.search(retrieved_name,html):
name_found = re.search(retrieved_name.decode('utf-8'),html.decode('utf-8')).group(1)
else:
name_found = "No name found on peopleus.intelius.com"
if re.search(retrieved_number,html):
number_found = re.search(retrieved_number.decode('utf-8'),html.decode('utf-8')).group(1)
else:
number_found = "No number found on peopleus.intelius.com"
add_list(phone_data, name_found, number_found)
with open('column_skip.csv','a+', newline='') as mess:
writ = csv.writer(mess, dialect='excel')
writ.writerow(phone_data[-1])
time.sleep(10)
Assuming that there is data in the first three rows of column_skip.csv, can I have my program start writing its info in column 4?
Yeah, don't use csv.writer method and write it as an simple file write operation:
`file_path ='your_csv_file.csv'
with open(file_path, 'w') as fp:
#following are the data you want to write to csv
fp.write("%s, %s, %s" % ('Name of col1', 'col2', 'col4'))
fp.write("\n")`
I hope this helps...

How can I select only a particular row in a CSV file?

I have a little program that just needs to read one (and only one) row from a csv file and write the column values to a series of files. The program has three system arguments: the path to the data file, the job id (uuid), and the target row number, i.e. the row in the csv that I want to parse. It's not working, how can I fix it?
import csv
import sys
import itertools
f = sys.argv[1]
uuid = sys.argv[2]
target_row = sys.argv[3]
tmpdir="/tmp/pagekicker/"
folder = tmpdir+uuid
destination1 = folder + '/csv/row.editedby'
destination3 = folder + '/csv/row.booktitle'
destination4 = folder + '/csv/row.seeds'
destination5 = folder + '/csv/row.imprint'
f = open(f, 'rb')
f1 = open(destination1, 'w')
f3 = open(destination3, 'w')
f4 = open(destination4, 'w')
f5 = open(destination5, 'w')
target_row = int(target_row)
try:
reader = csv.reader(f) # creates the reader object
for row in itertools.islice(reader,1,1): # iterates the rows of the file in orders
editedby = row[0] # we throw away column 2
booktitle = row[2]
print row[2]
seeds = row[3]
imprint = row[4]
f1.write(editedby)
f3.write(booktitle)
f4.write(seeds)
f5.write(imprint)
f.close()
f1.close()
f3.close()
f4.close()
f5.close()
finally:
print 'done'
UPDATE: thanks Graham Bell for his suggested code. There are two "f5s" in the first line of his 'with' statement My code now looks like this:
i
mport csv
import sys
import itertools
f = sys.argv[1]
uuid = sys.argv[2]
target_row = sys.argv[3]
tmpdir="/tmp/pagekicker/"
folder = tmpdir+uuid
# os.mkdir(folder)
destination3 = folder + '/csv/row.booktitle'
destination1 = folder + '/csv/row.editedby'
destination4 = folder + '/csv/row.seeds'
destination5 = folder + '/csv/row.imprint'
with open(f, 'rb') as f, open(destination1, 'w') as f1, open(destination3, 'w') as f3, open(destination4, 'w') as f4, open(destination5, 'w') as f5:
target_row = int(target_row)
try:
reader = csv.reader(f) # creates the reader object
for row in itertools.islice(reader,1,1): # iterates the rows of the file in orders
editedby = row[0] # we throw away column 2
booktitle = row[2]
print row[2]
seeds = row[3]
imprint = row[4]
f1.write(editedby)
f3.write(booktitle)
f4.write(seeds)
f5.write(imprint)
except
print 'done'
Without the except, it generates "unexpected unindent" when I run it. With the except, it says that the except line is invalid syntax.
the csv library DictReader() object has the ability to display the current line number with:
reader = csv.DictReader(csv_file)
reader.line_num
you could iterate through and do nothing until you reach the correct line number that you need, something like this:
for row in reader:
if reader.line_num == row_you_want
do something
the DictReader class also allows you to have the first row in your CSV file to be title columns, and then you can access them like so:
reader["title_of_column1"]
which might save you some work as well, also you should use the python with block when working with files like so:
with open(f, 'rb') as f, open(destination1, 'w') as f1, open(destination3, 'w') as f3, open(destination4, 'w') as f5, open(destination5, 'w') as f5:
target_row = int(target_row)
try:
reader = csv.reader(f) # creates the reader object
for row in itertools.islice(reader,1,1): # iterates the rows of the file in orders
editedby = row[0] # we throw away column 2
booktitle = row[2]
print row[2]
seeds = row[3]
imprint = row[4]
f1.write(editedby)
f3.write(booktitle)
f4.write(seeds)
f5.write(imprint)
This way you don't have to worry about closing them all
Assuming you count rows from 1 (rather than 0), here's a standalone function that will do it:
import csv
from contextlib import contextmanager
import sys
import itertools
#contextmanager
def multi_file_manager(files, mode='r'):
""" Context manager for multiple files. """
files = [open(file, mode) for file in files]
yield files
for file in files:
file.close()
# This is the standalone function
def csv_read_row(filename, n):
""" Read and return nth row of a csv file, counting from 1. """
with open(filename, 'rb') as f:
reader = csv.reader(f)
return next(itertools.islice(reader, n-1, n))
if len(sys.argv) != 4:
print('usage: utility <csv filename> <uuid> <target row>')
sys.exit(1)
tmpdir = "/tmp/pagekicker"
f = sys.argv[1]
uuid = sys.argv[2]
target_row = int(sys.argv[3])
folder = os.path.join(tmpdir, uuid)
destinations = [folder+dest for dest in ('/csv/row.editedby',
'/csv/row.booktitle',
'/csv/row.seeds',
'/csv/row.imprint')]
with multi_file_manager(destinations, mode='w') as files:
row = csv_read_row(f, target_row)
#editedby, booktitle, seeds, imprint = row[0], row[2], row[3], row[4]
for i,j in zip(range(4), (0, 2, 3, 4)):
files[i].write(row[j]+'\n')

Removing Duplicate CSV Entries with python

I have just completed a script that (sigh) finally works. It searches twitter for keywords. The results are written to a csv with 4 columns of keyword, Tweet, Lat, Lon (location). The code that I'm using is:
import tweepy
import csv
keywordList = ['McDonalds', 'Taco Bell', 'Burger King',]
for keyword in keywordList:
result = tweepy.api.search(q=keyword,rpp=1000,page=2, geocode= "34.085422,-117.900879,500mi" )
with open(r'C:\Temp\results.csv', 'a') as acsv:
w = csv.writer(acsv)
for tweet in result:
lat, lon = tweet.geo if tweet.geo else ('', '')
try:
a = tweet.geo['coordinates']
print a[0] , a[1]
print tweet.text
w.writerow((keyword, tweet.text, a[0] , a[1]))
except:
pass
I want to use task manager or python to run this search every 5 minutes but It will rewrite duplicates. I was going to use the following code to remove duplicates but two things happen. The resutls2.csv is blank and when I go to open the csv, it is locked and I have to view it in a read only. I tried f1.close(), writer.close() etc but it says 'csv.reader' object has no attribute close.
My biggest concern is getting no duplicates either by writing to the new csv or somehow removing and writing to the same table on each search. Any suggestions are much appreciated!!
import csv
f1 = csv.reader(open(r'C:\Temp\results.csv', 'rb'))
writer = csv.writer(open(r'C:\Temp\results2.csv', 'wb'))
tweet = set()
for row in f1:
if row[1] not in tweet:
writer.writerow(row)
tweet.add( row[1] )
f1.close()
writer.close()
Here's a refactored version:
Edit: unicode, what fun - I've added a .decode() call in read_csv() and an .encode() call in append_csv(); this should solve your problem (I think - you might need to decide on a string codec).
import tweepy
import csv
from collections import defaultdict
import time
FILE = 'c:/temp/results.csv'
KEYWORDS = ['McDonalds', 'Taco Bell', 'Burger King']
WHERE = "34.085422,-117.900879,500mi"
DELAY = 300 # seconds
def _float(s, err=None):
try:
return float(s)
except ValueError:
return err
def _str(f, err=""):
return err if f is None else str(f)
def read_csv(fname=FILE):
data = defaultdict(dict)
with open(fname, 'rb') as inf:
incsv = csv.reader(inf)
for kw,tw,lat,lon in incsv:
# added .decode() call to handle saved unicode chars
data[kw][tw.decode()] = (_float(lat), _float(lon))
return data
def append_csv(data, fname=FILE):
with open(fname, "ab") as outf:
outcsv = csv.writer(outf)
# added .encode() call to handle saved unicode chars
outcsv.writerows((kw,tw.encode(),_str(lat),_str(lon)) for kw,dat in data.iteritems() for tw,(lat,lon) in dat.iteritems())
def search_twitter(keywords=KEYWORDS, loc=WHERE):
data = defaultdict(dict)
for kw in keywords:
for tweet in tweepy.api.search(q=kw, rpp=1000, page=2, geocode=loc):
data[kw][tweet.text] = tweet.geo if tweet.geo else (None,None)
return data
def calc_update(old_data, new_data):
diff = defaultdict(dict)
for kw,dat in new_data.iteritems():
for tw,loc in dat.iteritems():
if tw not in old_data[kw]:
diff[kw][tw] = old_data[kw][tw] = loc
return old_data, diff
def show_data(data):
for kw,dat in data.iteritems():
for tw,(lat,lon) in dat.iteritems():
print("<{},{}> {} [{}]".format(_str(lat,"???"), _str(lon,"???"), tw, kw))
def main():
data = read_csv()
while True:
new_data = search_twitter()
data,diff = calc_update(data, new_data)
append_csv(diff)
show_data(diff)
time.sleep(DELAY)
if __name__=="__main__":
main()

Categories