I am making a Twitter sentiment analysis. After cleaning the tweets, when I try to write tweets into .txt file from .csv it writes only first tweets in text file and repeats until the end. Consider the following code
f = open('PanamaCase.csv', 'r')
with f:
reader = csv.DictReader(f)
i=0
for row in reader:
row=str(row['Tweets'])
#print(type(row))
print(clean(row))
txt = open('cleanedTweets.txt','w')
#line = 0
with txt:
reader2 = csv.DictReader(f)
for line in reader2:
txt.write(clean(row) + "\n")
I think your problem is that you are reading the input file twice in your code (or actually 1 + once for each line).
I suggest to try:
f = open('PanamaCase.csv', 'r')
with f:
txt = open('cleanedTweets.txt','w')
with txt:
for row in reader:
row=str(row['Tweets'])
print(clean(row))
txt.write(clean(row) + "\n")
Related
I have a text file which contains this data (items corresponds to code,entry1,entry2) :
a,1,2
b,2,3
c,4,5
....
....
Here a,b,c.. will be unique always
Every time I read this file in python to either create a new entry for example d,6,7 or to update existing values: say a,1,2 to a,4,3.
I use the following code :
data = ['a',5,6]
datastring = ''
for d in data
datastring = datastring + str(d) + ','
try:
with open("opfile.txt", "a") as f:
f.write(datastring + '\n')
f.close()
return(True)
except:
return(False)
This appends any data as a new entry.
I am trying something like this which checks the first character of each line:
f = open("opfile.txt", "r")
for x in f:
if(x[0] == username):
pass
I don't know how to club these two so that a check will be done on first character(lets say it as id) and if an entry with id is already in the file, then it should be replaced with new data and all other data remains same else it will be entered as new line item.
Read the file into a dictionary that uses the first field as keys. Update the appropriate dictionary, then write it back.
Use the csv module to parse and format the file.
import csv
data = ['a',5,6]
with open("opfile.txt", "r", newline='') as infile:
incsv = csv.reader(infile)
d = {row[0]: row for row in incsv if len(row) != 0}
d[data[0]] = data
with open("opfile.txt", "w") as outfile:
outcsv = csv.writer(outfile)
outcsv.writerows(d.values())
first append all new row to the file.
second, try using write to update rows in your file
def update_record(file_name, field1, field2, field3):
with open(file_name, 'r') as f:
lines = f.readlines()
with open(file_name, 'w') as f:
for line in lines:
if field1 in line:
f.write(field1 + ',' + field2 + ',' + field3 + '\n')
else:
f.write(line)
I'm trying to use Python to copy lines from one csv file to another and add data to a new column in the process. The data is being copied correctly to the new file, but it's all being copied to the same line in the new file.
file = "C:/original_file.csv"
nf = "C:/file_updated.csv"
i = 0
with open(file, 'r') as origFile:
with open(nf, 'w') as newFile:
lineList = []
for line in origFile:
strippedLine = line.strip()
lineList = strippedLine.split(',')
lineList.append("C:/ATT" + str(i) + "_PHOTO 1.jpg")
lineStr = str(lineList)
lineStr = lineStr.replace("'", "")
newFile.write(lineStr)
print lineList
i += 1
origFile.close()
newFile.close()
How can I make it so that each line from the first file copies to a separate line of the new file?
file = "C:/original_file.csv"
nf = "C:/file_updated.csv"
i = 0
with open(file, 'r') as origFile:
with open(nf, 'w') as newFile:
lineList = []
for line in origFile:
strippedLine = line.strip()
lineList = strippedLine.split(',')
lineList.append("C:/ATT" + str(i) + "_PHOTO 1.jpg")
lineStr = str(lineList)
lineStr = lineStr.replace("'", "")
newFile.write(lineStr)
newFile.write('\n') #Insert a new line
print lineList
i += 1
origFile.close()
newFile.close()
No need to install pandas, the built-in csv library is great for this!!
$ cat tmp.csv
first,second
third,fourth
import csv
to_read = "./tmp.csv"
to_write = "./tmp2.csv"
with open(to_read, newline="") as to_read_fp, open(to_write, "w", newline="") as to_write_fp:
reader = csv.reader(to_read_fp)
writer = csv.writer(to_write_fp)
for count, row in enumerate(reader):
row.append(f"C:/ATT{count}_PHOTO 1.jpg")
writer.writerow(row)
$ cat tmp2.csv
first,second,C:/ATT0_PHOTO 1.jpg
third,fourth,C:/ATT1_PHOTO 1.jpg
If you want to do it without any imports you could try something like this which adds a new column with the header New Field.
Of course it assumes the original CSV has a header row.
file = "original_file.csv"
nf = "file_updated.csv"
with open(file, 'r') as origFile:
data = [line.strip().split(',') for line in origFile.readlines()]
header = data[0]
data = data[1:]
header.append('New Field')
data = [line + [f'C:/ATT{idx}_PHOTO 1.jpg'] for idx, line in enumerate(data)]
data = [','.join(line) for line in [header]+data]
with open(nf, 'w') as newFile:
newFile.writelines('\n'.join(data))
"""
SAMPLE INPUT
Field1,Field2
Data1,Data2
Data3,Data4
SAMPLE OUTPUT
Field1,Field2,New Field
Data1,Data2,C:/ATT0_PHOTO 1.jpg
Data3,Data4,C:/ATT1_PHOTO 1.jpg
"""
I have a JSON file like this: [{"ID": "12345", "Name":"John"}, {"ID":"45321", "Name":"Max"}...] called myclass.json. I used json.load library to get "ID" and "Name" values.
I have another .txt file with the content below. File name is list.txt:
Student,12345,Age 14
Student,45321,Age 15
.
.
.
I'm trying to create a script in python that compares the two files line by line and replace the student ID for the students name in list.txt file, so the new file would be:
Student,John,Age 14
Student,Max,Age 15
.
.
Any ideas?
My code so far:
import json
with open('/myclass.json') as f:
data = json.load(f)
for key in data:
x = key['Name']
z = key['ID']
with open('/myclass.json', 'r') as file1:
with open('/list.txt', 'r+') as file2:
for line in file2:
x = z
try this:
import json
import csv
with open('myclass.json') as f:
data = json.load(f)
with open('list.txt', 'r') as f:
reader = csv.reader(f)
rows = list(reader)
def get_name(id_):
for item in data:
if item['ID'] == id_:
return item["Name"]
with open('list.txt', 'w') as f:
writer = csv.writer(f)
for row in rows:
name = get_name(id_ = row[1])
if name:
row[1] = name
writer.writerows(rows)
Keep in mind that this script technically does not replace the items in the list.txt file one by one, but instead reads the entire file in and then overwrites the list.txt file entirely and constructs it from scratch. I suggest making a back up of list.txt or naming the new txt file something different incase the program crashes from some unexpected input.
One option is individually open each file for each mode while appending a list for matched ID values among those two files as
import json
with open('myclass.json','r') as f_in:
data = json.load(f_in)
j=0
lis=[]
with open('list.txt', 'r') as f_in:
for line in f_in:
if data[j]['ID']==line.split(',')[1]:
s = line.replace(line.split(',')[1],data[j]['Name'])
lis.append(s)
j+=1
with open('list.txt', 'w') as f_out:
for i in lis:
f_out.write(i)
scenario:
I'm trying to extract tweets from twitter, which is working fine,
next I'm trying to merge 10 files into 1(say file = QW).
for line in file:
my_row = [line]
filename = line.rstrip()+"_tweets"+".csv"
if(os.path.exists(filename)):
f = open(filename, "rt")
reader = csv.reader(f, delimiter="\t")
for line in enumerate(reader):
my_row.append(line)
writer.writerow(my_row)
else:
print(""+filename+ " doesnt exist")
my csv file looks like this
and then I will process that one file
problem: I want to read specific column of that CSV(QW) file
I tried row[0]
for row in input_file:
name_list = [] ;score = 0;
name_list.append(row[0])
print(name_list)
for a in row:
if a.find(skill_input) > 0 :
score = score+1;
name_list.append(score)
print(name_list)
writer.writerow([name_list])
and that point I get an error
my csv file looks like this
name_list.append(row[0])
IndexError: list index out of range
Try this
for line in open("csvfile.csv"):
csv_row = line.split(your_delimiter) #returns a list ["1","50","60"]
if not csv_row[k]:
continue
name_list.append(csv_row[k]) # Or csv_row[k] for specific kth column
I want to read CSV file which contains following data :
Input.csv-
10,[40000,1][50000,5][60000,14]
20,[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
30,[60000,4]
40,[40000,5][50000,14]
I want to parse this CSV file and parse it row by row. But these lists contains commas ',' so I'm not getting correct result.
Program-Code-
if __name__ == "__main__":
with open(inputfile, "r") as f:
reader = csv.reader(f,skipinitialspace=True)
next(reader,None)
for read in reader:
no = read[0]
splitted_record = read[1]
print splitted_record
Output-
[40000
[40000
[60000
[40000
I can understand read.csv method reads till commas for each column. But how I can read whole lists as a one column?
Expected Output-
[40000,1][50000,5][60000,14]
[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
[60000,4]
[40000,5][50000,14]
Writing stuff to other file-
name_list = ['no','splitted_record']
file_name = 'temp/'+ no +'.csv'
if not os.path.exists(file_name):
f = open(file_name, 'a')
writer = csv.DictWriter(f,delimiter=',',fieldnames=name_list)
writer.writeheader()
else:
f = open(file_name, 'a')
writer = csv.DictWriter(f,delimiter=',',fieldnames=name_list)
writer.writerow({'no':no,'splitted_record':splitted_record})
How I can write this splitted_record without quote ""?
you can join those items together, since you know it split by comma
if __name__ == "__main__":
with open(inputfile, "r") as f:
reader = csv.reader(f,skipinitialspace=True)
next(reader,None)
for read in reader:
no = read[0]
splitted_record = ','.join(read[1:])
print splitted_record
output
[40000,1][50000,5][60000,14]
[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
[60000,4]
[40000,5][50000,14]
---update---
data is the above output
with open(filepath,'wb') as f:
w = csv.writer(f)
for line in data:
w.writerow([line])
You can use your own dialect and register it to read as you need.
https://docs.python.org/2/library/csv.html