How to replace string character pattern using python in csv file - python

I am new to python. How to replace string character ," to ,{ and ", to }, which contains multilines in .csv file?
Here is my content of .csv file
Name, Degree,Some, Occupation, Object
Mr. A,"B.A, M.A",123,"ags,gshs",ass
Mr. ABC,"B.A, M.A",231,"ags,gshs",asas
Mr. D,"BB.A, M.A",44,"ags,gshs",asas
Mr. G,"BBBB.A, M.A",12,"ags,gshs",asasasa
Mr. S,"B.A, MMM.A",10,"ags,gshs",asasas
Mr. R,"B.A, M.A",11,"ags,gshs",asasas
Mr. T,"B.A, M.A",12,"ags,gshs",asasa
Mr. DD,"B.A, M.A",13,"ags,gshs",asasas
So my output will be something like this
Name, Degree,Some, Occupation, Obejct
Mr. A,{B.A, M.A},123,{ags,gshs},ass
Mr. ABC,{B.A, M.A},231,{ags,gshs},asas
Mr. D,{BB.A, M.A},44,{ags,gshs},asas
Mr. G,{BBBB.A, M.A},12,{ags,gshs},asasasa
Mr. S,{B.A, MMM.A},10,{ags,gshs},asasas
Mr. R,{B.A, M.A},11,{ags,gshs},asasas
Mr. T,{B.A, M.A},12,{ags,gshs},asasa
Mr. DD,{B.A, M.A},13,{ags,gshs},asasas

After opening the file with file.read(), you can use replace(old, new) to replace the string characters you desire. Keep in mind, since the strings ," and ", contain quotes, you must put a \ before the quotes to show they part of the string.
EDIT: A comment mentioned you could enclose the string in ' '. If you do this, putting \ before the quotes is not required. For example, both ",\"" and ',"' are valid strings.
data = ""
with open("/path/to/file.csv") as file:
data = file.read().replace(",\"", ",{").replace("\",", "},")
with open("/path/to/new_file.csv") as file:
file.write(data)

If you only need it once you could use pandas like this:
import pandas as pd
data1 = '''\
Name,Degree,Some,Occupation,Object
Mr. A,"B.A, M.A",123,"ags,gshs",ass
Mr. ABC,"B.A, M.A",231,"ags,gshs",asas
Mr. D,"BB.A, M.A",44,"ags,gshs",asas
Mr. G,"BBBB.A, M.A",12,"ags,gshs",asasasa
Mr. S,"B.A, MMM.A",10,"ags,gshs",asasas
Mr. R,"B.A, M.A",11,"ags,gshs",asasas
Mr. T,"B.A, M.A",12,"ags,gshs",asasa
Mr. DD,"B.A, M.A",13,"ags,gshs",asasas'''
df = pd.read_csv(pd.compat.StringIO(data1), sep=',', dtype=object)
#df = pd.read_csv('input.csv', sep=',', dtype=object) # Use this row for real application
df['Degree'] = '{'+df['Degree']+'}'
df['Occupation'] = '{'+df['Occupation']+'}'
# Create custom output
out = '\n'.join([','.join(df.columns), '\n'.join(','.join(i) for i in df.values)])
with open('output.csv') as f:
f.write(out)

You can use unpacking:
import csv
with open('filename.csv') as f:
data = filter(None, list(csv.reader(f)))
with open('filename.csv', 'w') as f1:
write = csv.writer(f1)
write.writerows([data[0]]+[[a, '{'+b+'}', c, '{'+d+'}', e] for a, b, c, d, e in data[1:]])
Output:
Name, Degree,Some, Occupation, Object
Mr. A,{B.A, M.A},123,{ags,gshs},ass
Mr. ABC,{B.A, M.A},231,{ags,gshs},asas
Mr. D,{BB.A, M.A},44,{ags,gshs},asas
Mr. G,{BBBB.A, M.A},12,{ags,gshs},asasasa
Mr. S,{B.A, MMM.A},10,{ags,gshs},asasas
Mr. R,{B.A, M.A},11,{ags,gshs},asasas
Mr. T,{B.A, M.A},12,{ags,gshs},asasa
Mr. DD,{B.A, M.A},13,{ags,gshs},asasas

Try:
def find_replace(csv_path, search_characters, replace_with):
text = open(csv_path, "r")
text = ''.join([i for i in text]).replace(
search_characters, replace_with)
x = open(csv_path, "w")
x.writelines(text)
x.close()
if __name__ == '__main__':
csv_path = "path/to/csv/file.csv"
search_characters = ',"'
replace_with = ',{'
find_replace(csv_path, search_characters, replace_with)
search_characters = '",'
replace_with = '},'
find_replace(csv_path, search_characters, replace_with)
The above code opens the file, writes some data to it and then closes it.
Or, if you prefer lists as well the with statement which will take care to call __exit__ function of the given object even if something bad happened in code.
def find_replace(csv_path, search_characters, replace_with):
s_one, s_two = search_characters
r_one, r_two = replace_with
with open(csv_path) as file:
data = file.read().replace(s_one, r_one).replace(s_two, r_two)
with open(csv_path, 'w') as file:
file.write(data)
if __name__ == '__main__':
csv_path = "path/to/csv/file.csv"
search_characters = [',"', '",']
replace_with = [',{', '},']
find_replace(csv_path, search_characters, replace_with)
The main advantage of using a with statement is that it makes sure our file is closed without paying attention to how the nested block exits.
Tested and works nicely on your example.

Replacing string character pattern using python in csv file
text = open("input.csv", "r")
#text = ''.join([i for i in text]).replace("character to be replaced", "character to be replaced with")
text = ''.join([i for i in text]).replace(",\"", ",{")
#Replacing character from replaced text
text1 = ''.join([i for i in text]).replace("\",", "},")
x = open("output.csv","w")
x.writelines(text1)
x.close()

Related

Textual parsing

I am a newby with Python and Panda, but i would like to parse from multiple downloaded files (which have the same format).
On every HTML there is an section like below where the executives are mentioned.
<DIV id=article_participants class="content_part hid">
<P>Redhill Biopharma Ltd. (NASDAQ:<A title="" href="http://seekingalpha.com/symbol/rdhl" symbolSlug="RDHL">RDHL</A>)</P>
<P>Q4 2014 <SPAN class=transcript-search-span style="BACKGROUND-COLOR: yellow">Earnings</SPAN> Conference <SPAN class=transcript-search-span style="BACKGROUND-COLOR: #f38686">Call</SPAN></P>
<P>February 26, 2015 9:00 AM ET</P>
<P><STRONG>Executives</STRONG></P>
<P>Dror Ben Asher - CEO</P>
<P>Ori Shilo - Deputy CEO, Finance and Operations</P>
<P>Guy Goldberg - Chief Business Officer</P>
and further in the files there is a section called "DIV id=article_qanda class="content_part hid" where the executives like Ori Shilo is named followed by an answer, like:
<P><STRONG><SPAN class=answer>Ori Shilo</SPAN></STRONG></P>
<P>Good morning, Vernon. Both safety which is obvious and fertility analysis under the charter of the data and safety monitoring board will be - will be on up.</P>
Till now i only succeeded with an html parser for one individual by name to collect all their answers. I am not sure how to proceed and base the code on a variable list of executives. Does someone have a suggestion?
import textwrap
import os
from bs4 import BeautifulSoup
directory ='C:/Research syntheses - Meta analysis/SeekingAlpha/out'
for filename in os.listdir(directory):
if filename.endswith('.html'):
fname = os.path.join(directory,filename)
with open(fname, 'r') as f:
soup = BeautifulSoup(f.read(),'html.parser')
print('{:<30} {:<70}'.format('Name', 'Answer'))
print('-' * 101)
for answer in soup.select('p:contains("Question-and-Answer Session") ~ strong:contains("Dror Ben Asher") + p'):
txt = answer.get_text(strip=True)
s = answer.find_next_sibling()
while s:
if s.name == 'strong' or s.find('strong'):
break
if s.name == 'p':
txt += ' ' + s.get_text(strip=True)
s = s.find_next_sibling()
txt = ('\n' + ' '*31).join(textwrap.wrap(txt))
print('{:<30} {:<70}'.format('Dror Ben Asher - CEO', txt), file=open("output.txt", "a"))
To give some color to my original comment, I'll use a simple example. Let's say you've got some code that is looking for the string "Hello, World!" in a file, and you want the line numbers to be aggregated into a list. Your first attempt might look like:
# where I will aggregate my results
line_numbers = []
with open('path/to/file.txt') as fh:
for num, line in enumerate(fh):
if 'Hello, World!' in line:
line_numbers.append(num)
This code snippet works perfectly well. However, it only works to check 'path/to/file.txt' for 'Hello, World!'.
Now, you want to be able to change the string you are looking for. This is analogous to saying "I want to check for different executives". You could use a function to do this. A function allows you to add flexibility into a piece of code. In this simple example, I would do:
# Now I'm checking for a parameter string_to_search
# that I can change when I call the function
def match_in_file(string_to_search):
line_numbers = []
with open('path/to/file.txt') as fh:
for num, line in enumerate(fh):
if string_to_search in line:
line_numbers.append(num)
return line_numbers
# now I'm just calling that function here
line_numbers = match_in_file("Hello, World!")
You'd still have to make a code change, but this becomes much more powerful if you wanted to search for lots of strings. I could feasibly use this function in a loop if I wanted to (though I would do things a little differently in practice), for the sake of the example, I now have the power to do:
list_of_strings = [
"Hello, World!",
"Python",
"Functions"
]
for s in list_of_strings:
line_numbers = match_in_file(s)
print(f"Found {s} on lines ", *line_numbers)
Generalized to your specific problem, you'll want a parameter for the executive that you want to search for. Your function signature might look like:
def find_executive(soup, executive):
for answer in soup.select(f'p:contains("Question-and-Answer Session") ~ strong:contains({executive}) + p'):
# rest of code
You've already read in the soup, so you don't need to do that again. You only need to change the executive in your select statement. The reason you want a parameter for soup is so you aren't relying on variables in global scope.
#C.Nivs Would my code then be the following? Because i now get a block error:
import textwrap
import os
from bs4 import BeautifulSoup
directory ='C:/Research syntheses - Meta analysis/SeekingAlpha/'
for filename in os.listdir(directory):
if filename.endswith('.html'):
fname = os.path.join(directory,filename)
with open(fname, 'r') as f:
soup = BeautifulSoup(f.read(),'html.parser')
print('{:<30} {:<70}'.format('Name', 'Answer'))
print('-' * 101)
def find_executive(soup, executive):
for answer in soup.select(f'p:contains("Question-and-Answer Session") ~ strong:contains({executive}) + p'):
txt = answer.get_text(strip=True)
s = answer.find_next_sibling()
while s:
if s.name == 'strong' or s.find('strong'):
break
if s.name == 'p':
txt += ' ' + s.get_text(strip=True)
s = s.find_next_sibling()
txt = ('\n' + ' '*31).join(textwrap.wrap(txt))
print('{:<30} {:<70}'.format(func, txt), file=open("output.txt", "a"))

Python program that reads a text file for input?

So I'm currently trying to write some code that opens and reads a text file. The text file contains a short paragraph. Within the paragraph, there are some words with brackets around them, which could look like: "the boy [past_tense_verb] into the wall." I am trying to write code that looks for the brackets in the text file, and then displays to the user the words in the text file, for the user to then write some input that will replace the bracketed words. This is the code I have so far:
f = open('madlib.txt', 'r')
for line in f:
start = line.find('[')+1
end = line.find(']')+1
word = line[start:end+1]
inputword = input('Enter a ' + word + ': ')
print(line[:start] + inputword + line[end:])
Any help is greatly appreciated - thanks!
import re
with open('madlib.txt', 'r') as f:
data = f.read()
words_to_replace = re.findall(r"\[(\w+)\]", data)
replace_with = []
for idx, i in enumerate(words_to_replace):
print(f"Type here replace \033[1;31m{i}\033[1;m with:", end =" ")
a = input()
replace_with.append(a)
for idx, i in enumerate(replace_with):
data = data.replace(words_to_replace[idx], i)
with open('newmadlib.txt', 'w') as f:
f.write(data)

formatted output to an external txt file

fp = open ('data.txt','r')
saveto = open('backup.txt','w')
someline = fp.readline()
savemodfile = ''
while someline :
temp_array = someline.split()
print('temp_array[1] {0:20} temp_array[0] {0:20}'.format(temp_array[1], temp_array[0]), '\trating:', temp_array[len(temp_array)-1]))
someline = fp.readline()
savemodfile = temp_array[1] + ' ' + temp_array[0] +',\t\trating:'+ temp_array[10]
saveto.write(savemodfile + '\n')
fp.close()
saveto.close()
The input file :data.txt has records of this pattern: firstname Lastname age address
I would like the backup.txt to has this format: Lastname firstname address age
How do i store the data in the backup.txt in a nice formatted way? I think i should use format() method somehow...
I use the print object in the code to show you what i understood about format() so far. Of course, i do not get the desired results.
To answer your question:
you can indeed use the .format() method on a string template, see the documentation https://docs.python.org/3.5/library/stdtypes.html#str.format
For example:
'the first parameter is {}, the second parameter is {}, the third one is {}'.format("this one", "that one", "there")
Will output: 'the first parameter is this one, the second parameter is that one, the third one is there'
You do not seem to use format() properly in your case: 'temp_array[1] {0:20} temp_array[0] {0:20}'.format(temp_array[1], temp_array[0]) will output something like 'temp_array[1] Lastname temp_array[0] Lastname '. That is because {0:20} will output the 1st parameter to format(), right padded with spaces to 20 characters.
Additionally, there is many things to be improved in your code. I guess you are learning Python so that's normal. Here is a functionally equivalent code that produces the output you want, and makes good use of Python features and syntax:
with open('data.txt', 'rt') as finput, \
open('backup.txt','wt') as foutput:
for line in finput:
firstname, lastname, age, address = line.strip().split()
foutput.write("{} {} {} {}\n".format(lastname, firstname, address, age)
This code will give you a formatted output on the screen and in the output file
fp = open ('data.txt','r')
saveto = open('backup.txt','w')
someline = fp.readline()
savemodfile = ''
while someline :
temp_array = someline.split()
str = '{:20}{:20}{:20}{:20}'.format(temp_array[1], temp_array[0], temp_array[2], temp_array[3])
print(str)
savemodfile = str
saveto.write(savemodfile + '\n')
someline = fp.readline()
fp.close()
saveto.close()
But this is not a very nice code in working with files, try using the following pattern:
with open('a', 'w') as a, open('b', 'w') as b:
do_something()
refer to : How can I open multiple files using "with open" in Python?
fp = open ('data.txt','r')
saveto = open('backup.txt','w')
someline = fp.readline()
savemodfile = ''
while someline :
temp_array = someline.split()
someline = fp.readline()
savemodfile = '{:^20} {:^20} {:^20} {:^20}'.format(temp_array[1],temp_array[0],temp_array[3],temp_array[2])
saveto.write(savemodfile + '\n')
fp.close()
saveto.close()

Python: Add a new line after the first word in a sentence if the first word is all caps

I'm trying to modify a txt file. The file is a movie script in the format:
BEN We’ve discussed this before.
LUKE I can be a Jedi. I’m ready.
I'd like insert a new line after the character:
BEN
We’ve discussed this before.
LUKE
I can be a Jedi. I’m ready.
How do I do this in python? I currently have:
def modify_file(file_name):
fh=fileinput.input(file_name,inplace=True)
for line in fh:
split_line = line.split()
if(len(split_line)>0):
first_word = split_line[0]
replacement = first_word+'\n'
first_word=first_word.replace(first_word,replacement)
sys.stdout.write(first_word)
fh.close()
As suggested in one of the comments, this can be done using split and isupper. An example is provided below:
source_path = 'source_path.txt'
f = open(source_path)
lines = f.readlines()
f.close()
temp = ''
for line in lines:
words = line.split(' ')
if words[0].isupper():
temp += words[0] + '\n' + ' '.join(words[1:])
else:
temp += line
f = open(source_path, 'w')
f.write(temp)
f.close()
There are multiple problems with your code.
import fileinput
def modify_file(file_name):
fh=fileinput.input("output.txt",inplace=True)
for line in fh:
split_line = line.split()
if(len(split_line)>0):
x=split_line[0]+"\n"+" ".join(split_line[1:])+"\n"
sys.stdout.write(x)
fh.close() #==>this cannot be in the if loop.It has to be at the outer for level

Python: Formatting a merged txt file

I want to merge two text files: names.txt and studentid.txt
the name txt contains:
Timmy Wong, Johnny Willis, Jason Prince
the studentid.txt contains:
B5216, B5217, B5218
I want to combine them into a new text file called studentlist.txt with the format I simply want all the commas to become vertical bars
Student_Name Student_ID
Timmy Wong | B5216
Johnny Willis | B5217
Jason Prince | B5218
So far I don't really know how to format this been reading up some guides and my book but it really isn't helping much.
This is what I done so far
def main():
one = open( "names.txt", 'r' )
lines = one.readlines()
two = open( "studentid.txt", 'r' )
lines2 = two.readlines()
outfile = open( "studentlist.txt", 'w' )
outfile.write( "Student_Name StudentID")
outfile.writelines( lines + lines2 )
main()
and the output becomes
Student_Name StudentIDTimmy Wong, Johnny Willis, Jason Prince
B5216, B5217, B218
I'm a beginner so go easy on me ><"
names = [n.strip() for n in open("names.txt").read().split(",")]
ids = [i.strip() for i in open("studentid.txt").read().split(",")]
print "Student_Name\t| Student_ID"
for n, i in zip(names, ids):
print "{}\t| {}".format(n, i)
with open('data.txt') as f1,open('data1.txt') as f2,open('sudentlist.txt') as f3:
line=f1.readline().strip() #read the first line of names file
names=map(str.strip,line.split(',')) #split the line by "," and then apply strip()
line=f2.readline().strip() #read the first line of ID file
ids=map(str.strip,line.split(',')) #split the line by "," and then apply strip()
f3.write("{0:25}{1}\m".format("Student_Name","Student_Id"))
for name,i in zip(names,ids): #use zip() to fetch data from both lists
f3.write("{0:25}|{1}\n".format(name,i)) #use write() instead of print to write it to a file
output:
Student_Name Student_Id
Timmy Wong |B5216
Johnny Willis |B5217
Jason Prince |B5218
Untested, but you want something similar to:
import csv
with open('names.txt') as nf, open('studentid.txt') as sf, open('output.txt','wb') as pf:
csvnf = csv.reader(nf)
csvsf = csv.reader(sf)
csvpf = csv.writer(pf, delimiter='|')
for name_student in zip(csvnf, csvsf):
pf.writerow( name_student )
names = [n.strip() for n in open("names.txt").read().split(",")]
student_ids = [i.strip() for i in open("studentid.txt").read().split(",")]
outfile = open("studentlist.txt", 'w')
outfile.write("Student_Name\tStudent_ID\n")
for current_name, current_id in zip(names, student_ids):
outfile.write(current_name + "\t|" + current_id + "\n")
outfile.close()

Categories