I have a csv file and I need to mix 2 of its columns:
Sitio, ID_espacio, Espacio, Tamano, Country, Impresiones_exchange, Importe_a_cobrar, eCPM, Subastas, Fill_rate
NUEVO_Infotechnology, 264244, NUEVO_Infotechnology - Home_IT - IT_Header, Variable (1240x90), Bangladesh, 0, 0.00, 0.00, 1, 0.00
NUEVO Apertura, 274837, NUEVO Apertura - Nota_Ap - Right3_300x250, 300x250, Paises Bajos, 0, 0.00, 0.00, 4, 0.00
The problem is I need to mix ID_espaciowith Espacio but in this way:
example:
NUEVO_Infotechnology, 264244, NUEVO_Infotechnology - Home_IT - IT_Header, Variable (1240x90), Bangladesh, 0, 0.00, 0.00, 1, 0.00
What I need:
NUEVO_Infotechnology, 264244 - Home_IT - IT_Header, Variable (1240x90), Bangladesh, 0, 0.00, 0.00, 1, 0.00
As you can see I remove the first name of the Espacio until the '-' and then i put the ID_espacio.
I tried to do it and I could but the now I need to have all the csv and not only my modification:
import csv
lista_ok = []
base = []
with open("test.csv", 'rb') as f:
reader = csv.reader(f)
your_list = list(reader)
for item in your_list[1:]:
a = item[2].split(" - ")
base.append(a)
for item in base:
for itemf in your_list[1:]:
b = []
a = itemf[1] + ' - ' + ' - '.join(item[1:])
b.append(a)
lista_ok.append(b)
Output:
[[' 264244 - Home_IT - IT_Header'], [' 274837 - Home_IT - IT_Header'], [' 264244 - Nota_Ap - Right3_300x250'], [' 274837 - Nota_Ap - Right3_300x250']]
Output I need:
[['Sitio', ' ID_espacio', ' Espacio', ' Tamano', ' Country', ' Impresiones_exchange', ' Importe_a_cobrar', ' eCPM', ' Subastas', ' Fill_rate'], ['NUEVO_Infotechnology', ' 264244 - Home_IT - IT_Header', ' Variable (1240x90)', ' Bangladesh', ' 0', ' 0.00', ' 0.00', ' 1', ' 0.00'], ['NUEVO Apertura', ' 274837 - Nota_Ap - Right3_300x250', ' 300x250', ' Paises Bajos', ' 0', ' 0.00', ' 0.00', ' 4', ' 0.00']]
Here another version:
import csv
lista_ok = []
with open("test.csv", 'rb') as f:
reader = csv.reader(f)
your_list = list(reader)
for item in your_list:
sitio = item[0]
id_espacio = item[1]
item.remove(id_espacio)
espacio_parts = item[1].split(' - ')
if your_list.index(item) > 0:
espacio_parts[0] = espacio_parts[0].lstrip().replace(sitio,id_espacio)
espacio = ' - '.join(espacio_parts)
item[1] = espacio
lista_ok.append(item)
You could write a function that transforms a single row the way you want. Then call that function for each row as you read it from the file and put it in your final list:
def row_transform(row, is_header=False):
if not is_header:
# trim Sitio from Espacio
row[2] = row[2].split(" - ", 1)[1]
# add ID to espacio
row[2] = " - ".join((row[1], espacio))
# remove ID col
del row[1]
return row
with open("test.csv") as fp:
reader = csv.reader(fp)
lista_ok = [row_transform(next(reader), True)]
lista_ok.extend((row_transform(row) for row in reader))
Related
I am looking to extract tweets and write them to a CSV file, however, I cannot figure out how to get it to generate a file. I am using Tweepy to extract the tweets. I would like the CSV file to contain the following cells: User, date, tweet, likes, retweets, total, eng rate, rating, tweet id
import tweepy
import csv
auth = tweepy.OAuthHandler("", "")
auth.set_access_token("", "")
api = tweepy.API(auth)
try:
api.verify_credentials()
print("Authentication OK")
except:
print("Error during authentication")
def timeline(username):
tweets = api.user_timeline(screen_name=username, count = '100', tweet_mode="extended")
for status in (tweets):
eng = round(((status.favorite_count + status.retweet_count)/status.user.followers_count)*100, 2)
if (not status.retweeted) and ('RT #' not in status.full_text) and (eng <= 0.02):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Low' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.02 < eng <= 0.09):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Good' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.09 < eng <= 0.33):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: High' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.33 < eng):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Very High' + ',Tweet ID: ' + str(status.id))
tweet = timeline("twitter")
with open('tweet.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([tweet])
You can look at https://docs.python.org/3/library/csv.html for the info on how to generate a csv file in Python. Quick exmaple:
import csv
with open('some_output.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["field1", "field2", "field3"])
Your function get_tweets does not return a value but you are trying to retrieve a value from that function which would result in None. Also it looks like tweet value will be list of strings. writerow method from csv.writer should get list of items and not list of lists. I have modified your code to address those issues. Let me know if it works.
def get_tweets(username):
tweets = api.user_timeline(screen_name=username, count=100)
tweets_for_csv = [tweet.text for tweet in tweets]
print(tweets_for_csv)
return tweets_for_csv
tweet = get_tweets("fazeclan")
with open('tweet.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(tweet)
I'm iterating API requests for each row of the input CSV file. And I want to add API output results to the existing CSV file.
Input
Desired output
As you can see, I added three headers with corresponding results (latitude, longitude, coordinates)
However, I'm finding difficulty with writing the right query for this. Below is the best I could do.
df=pd.read_csv(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv",delimiter=',', na_values="nan")
# Output
with open(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv", 'r') as csvin, open (r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy3.csv", 'w', newline='') as out:
csvreader = csv.DictReader(csvin)
fieldnames = csvreader.fieldnames + ["latitude","longitude","coordinates"]
csvwriter = csv.DictWriter(out, fieldnames)
csvwriter.writeheader()
# Iterating requests for each row
for row in df.itertuples():
output = client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' + str(row.state) + ', ' + str(row.postalcode)).coords
cord = '(' + str(output[0]) + ', '+ str(output[1]) + ')'
for node, row in enumerate(csvreader, 3):
csvwriter.writerow(dict(3, {'latitude': output[0], 'longitude': output[1], 'coordinates': cord}))
Update:
Here is my new Python query:
df=pd.read_csv(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv",delimiter=',', na_values="nan")
# Output
with open(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv", 'r') as csvin, open (r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy3.csv", 'w', newline='') as out:
csvreader = csv.DictReader(csvin)
fieldnames = csvreader.fieldnames + ["latitude","longitude","coordinates"]
csvwriter = csv.DictWriter(out, fieldnames)
csvwriter.writeheader()
# Iterating requests for each row
for row in df.itertuples():
output = client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' + str(row.state) + ', ' + str(row.postalcode)).coords
cord = '(' + str(output[0]) + ', '+ str(output[1]) + ')'
for node, row1 in enumerate(csvreader, 38):
csvwriter.writerow(dict(row1,latitude= output[0] % node))
for node, row2 in enumerate(csvreader, 39):
csvwriter.writerow(dict(row2,longitude = output[1] % node))
for node, row3 in enumerate(csvreader, 40):
csvwriter.writerow(dict(row3,coordinates= cord % node))
However, I get the following result:
You can more easily accomplish this by using more of pandas features.
Import the data from csv as you have been doing.
import pandas as pd
df = pd.read_csv("input_file.csv")
You can use dataframe.apply(func, axis=1) to apply a function to each row of a dataframe. https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html
def get_coords(row):
return client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' \
+ str(row.state) + ', ' + str(row.postalcode)).coords
coords = df.apply(get_coords, axis=1)
df['latitide'] = coords.values[:,0]
df['longitude'] = coords.values[:,1]
df['coords'] = coords
You can then easily save the dataframe to csv using:
df.to_csv('output_filename.csv')
Hope this help.
p.s. code is untested but should be good :)
I have text file, that store orders info in following format. I try to search an order by first line of the block, that represent ID and print 7 next lines. But my code checking just the first line or print all line's that contain an input number. Could somebody help me?
4735
['Total price: ', 1425.0]
['Type of menu: ', 'BBQ']
['Type of service: ', ' ']
['Amount of customers: ', 25.0]
['Discount: ', '5%', '= RM', 75.0]
['Time: ', '2017-01-08 21:39:19']
3647
['Total price: ', 2000.0]
['Type of menu: ', ' ']
['Type of service: ', 'Tent ']
['Amount of customers: ', 0]
.......
I use the following code to search in text file.
try:
f = open('Bills.txt', 'r')
f.close()
except IOError:
absent_input = (raw_input("|----File was not founded----|\n|----Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
Id_input = (raw_input("Enter ID of order\n"))
with open("Bills.txt", "r") as f:
searchlines = f.readlines()
j = len(searchlines) - 1
for i, line in enumerate(searchlines):
if Id_input in str(line): # I also try to check in this way (Id_input == str(line)), but it didn't work
k = min(i + 7, j)
for l in searchlines[i:k]: print l,
print
else:
absent_input = (raw_input("|----Order was not founded----|\n|----Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
check the following code.
Id_input = (raw_input("Enter ID of order\n")).strip()
try:
f = open("Bills.txt", "r")
print_rows = False
for idline in f:
if idline.strip() == Id_input:
print_rows = True
continue
if print_rows:
if idline.startswith("["):
print idline
else:
break
if not print_rows:
absent_input = (raw_input("|----Order was not founded----|\n|---- Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
except IOError:
absent_input = (raw_input("|----File was not founded----|\n|---- Press 'Enter' to continue...----|\n"))
report_module = ReportModule()
report_module.show_report()
sample data:
id, Name, mail, data1, data2, data3
1, Name1, mail#com, abc, 14, de
1, Name1, mail#com, fgh, 25, kl
1, Name1, mail#com, mno, 38, pq
2, Name2, mail#com, abc, 14, d
I wrote a script that selects the first field is a unique string to clear the duplicates. However, since the data in the fields date1-3 are not repeated, it is necessary to make the result:
1, Name1, mail#com, "abc, 14, de, fgh, 25, kl, mno, 38, pq"
How to merge rows in the array?
My code not work:
import sys
import csv
in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
csv.register_dialect('dlm', delimiter=',')
csv.register_dialect('dmt', delimiter=';')
# if this .csv file do:
if (in_fln[-3:]) == "csv":
out_fln = 'out' + in_fln
inputf = open(in_fln, 'r')
seen = []
outfile = []
nout = {}
#rowun = []
try:
reader = csv.reader(inputf, dialect='dlm')
# select by ContactID
for row in reader:
if row[0] not in seen:
#IT'S work byt temp comment
#rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'
#outfile.append(row[:-5]+[rowun])
outfile.append(row[:-4])
rowun = (row[0])
nout[rowun] = (row[-4:-1])
seen.append(row[0])
print (type(row))
else:
#rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'
#nout.insert(-1,(row[-4:-1]))
print (type(row))
rowun = (row[0])
rowun2 = {rowun:(row[-4:-1])}
nout.update(rowun2)
finally:
#print (nout)
#print (outfile[:-1])
#csv.writer(open(('nout' + in_fln), 'w', newline='')).writerows(nout)
csv.writer(open(out_fln, 'w', newline=''), dialect='dlm').writerows(outfile)
inputf.close()
print ("All done")
This should do the trick.
from collections import defaultdict
import pandas as pd
# recreate your example
df = pd.DataFrame([[1, 'Name1', 'mail#com', 'abc', 14, 'de'],
[1, 'Name1', 'mail#com', 'fgh', 25, 'kl'],
[1, 'Name1', 'mail#com', 'mno', 38, 'pq'],
[2, 'Name2', 'mail#com', 'abc', 14, 'd']
], columns=['id', 'Name', 'mail', 'data1', 'data2','data3'])
res = defaultdict(list)
for ind, row in df.iterrows():
key = (row['id'], row['Name'], row['mail'])
value = (row['data1'], row['data2'], row['data3'])
res[key].append(value)
for key, value in res.items():
print(key, value)
# gives
# (2, 'Name2', 'mail#com') [('abc', 14, 'd')]
# (1, 'Name1', 'mail#com') [('abc', 14, 'de'), ('fgh', 25, 'kl'), ('mno', 38, 'pq')]
My own version is very close to the beter:
Now all work!
#!/usr/bin/env python3
import csv, re
import os, sys
in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
#csv.register_dialect('dlm', delimiter=',')
dm = ','
seen = []
# if this .csv file do:
if (in_fln[-3:]) == "csv":
out_fln = 'out' + in_fln
#create the full structure: output_rows
infile = csv.reader(open(in_fln, 'r'), delimiter=dm, quotechar='"')
output_rows = []
for row in infile:
a = 0
if row[0] not in seen:
seen.append(row[0])
output_rows.append(row[:-4])
#rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"'
rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
output_rows.append([rowun])
else:
#output_rows.append([row[-4], row[-3], row[-2]])
#rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"'
rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
#output_rows.insert(-1,[rowun])
#rowun = str(rowun)
#print (rowun)
output_rows[-1].append(rowun)
#Finally save it to a file
csv.writer(open(out_fln, 'w', newline=''), delimiter=dm, quotechar='"').writerows(output_rows)
chng = [
['","',','], # chng "," on ,
['\n"',',"'], # Del new str
]
input_file = open(out_fln).read()
output_file = open(out_fln,'w')
for string in chng:
input_file = re.sub(str(string[0]),str(string[1]),input_file)
output_file.write(input_file)
output_file.close()
print ("All done")
I know how to remove duplicates from a list using set() or two lists, but how do I maintain the same list and add a number at the end for duplicates? I could do it using if, but it´s not pythonic. Thanks guys!!
nome_a = ['Anthony','Rudolph', 'Chuck', 'Chuck', 'Chuck', 'Rudolph', 'Bob']
nomes = []
for item in nome_a:
if item in nomes:
if (str(item) + ' 5') in nomes:
novoitem = str(item) + ' 6'
nomes.append(novoitem)
if (str(item) + ' 4') in nomes:
novoitem = str(item) + ' 5'
nomes.append(novoitem)
if (str(item) + ' 3') in nomes:
novoitem = str(item) + ' 4'
nomes.append(novoitem)
if (str(item) + ' 2') in nomes:
novoitem = str(item) + ' 3'
nomes.append(novoitem)
else:
novoitem = str(item) + ' 2'
nomes.append(novoitem)
if item not in nomes:
nomes.append(item)
print(nomes)
Edit(1): Sorry. I edited for clarification.
You could use the following:
names = ['Anthony','Rudolph', 'Chuck', 'Chuck', 'Chuck', 'Rudolph', 'Bob']
answer = []
name_dict = {}
for name in names:
if name_dict.get(name):
name_dict[name] += 1
answer.append('{}_{}'.format(name, name_dict[name]))
else:
name_dict[name] = 1
answer.append(name)
print(answer)
Output
['Anthony', 'Rudolph', 'Chuck', 'Chuck_2', 'Chuck_3', 'Rudolph_2', 'Bob']