i trying to understand that.
I must delete few characters in my string loaded from .txt
f = open("my_file.txt")
myList = [".", ",", "-"]
removed = ""
for i in myList:
removed += f.read().replace(f'{i}', '')
print(removed)
My solution working only on one char, why?
f.read() will change the seek position. Hence, you need to store the file contents in a variable
f = open("my_file.txt")
myList = [".", ",", "-"]
f_data = f.read()
for i in myList:
f_data = f_data.replace(f'{i}', '')
print(f_data)
Or:
myList = [".", ",", "-"]
with open("my_file.txt") as f:
f_data = f.read()
for i in myList:
f_data = f_data.replace(f'{i}', '')
print(f_data)
i would suggest you the following code:
import re
removed = ""
pattern = re.compile(r'[\.,-]')
with open('your_file', 'r') as f:
for line in f:
removed += pattern.sub('',line)
print(removed)
If it is not mandatory to iterate over myList you could use a regular expression.
Related
What i'm trying to do is grab some text from a folder split it into words, count the words, sort it into a list and write it into a file. All is well except, instead of splitting into words, it splits the text into letters and counts them. Seems like a easy fix but i have no clue what i'm doing so... thanks in advance
import os
import os.path
import string
prefix_path = ("C:/Users/User/Desktop/Python/sampleTexts")
files = [f for f in os.listdir(prefix_path) if f.endswith(".txt")]
files.sort()
files = [os.path.join(prefix_path,name) for name in files]
textOut = open("texthere.txt", "w", encoding="utf-8")
def readText(file):
for i in file:
with open(i, "r", encoding= "utf-8") as f:
textin = f.read()
first_dict= dict()
for i in textin:
i = i.strip()
i = i.lower()
i = i.translate(i.maketrans("","", string.punctuation))
words = i.split()
for word in words:
if word in first_dict:
first_dict[word] = first_dict[word] + 1
else:
first_dict[word] = 1
sorted_dict = sorted(first_dict.items(), key= lambda x: x[1], reverse=True)
for key, val in sorted_dict:
print(key," :", val)
for key, val in sorted_dict:
textOut.write(key + " :" + str(val) + "\n")
textOut.close()
readText(files)
f.read() will you give a string of the entire text file such that when you iterate over it for i in textin you are iterating over each character. What you probably want is
for line in f.readlines():
for word in line.split():
blah
I have a list1 = ['hi','world','of']
I have a .txt file
Hellohihowareyou
worldisfullofwonder
How to check whether hi ,'of', and world exists in the file
pseudo code
import re
pattern = r''
for i in list1:
#print (i)
with open('file.txt','r'):
content = f.read()
test= re.search(pattern,content)
print (test)
My Expected out
['hi','of'], since there is no world in the file
You can also do it with in keywords, use regex if your pattern start to be more advanced.
Normal
list1 = ['hi','world','of']
text = """ Hellohihowareyou
worldisfullofwonder"""
results = []
for element in list1:
if element in text:
results.append(element)
print(results)
List comprehension
results = [element for element in list1 if element in text]
print(results)
use this:
import re
file = 'file.txt'
content = ''
lst = ['hi','world','of']
with open(file, 'r') as file_handler:
content = file_handler.read()
result = re.findall('|'.join(lst), content)
import re
file = r'C:\Users\wind\Desktop\file.txt'
list1 = ['hi','of','wonderw']
pattern = r''
for i in list1:
pattern = re.compile(i, re.IGNORECASE)
with open(file,'r') as f:
content = f.read()
test= re.search(pattern,content)
print (test)
I have a file as below, whenever there is a key with empty value, I want to delete the key and the empty quotes
My file
<items="20" product="abc" condition="new">
<items="10" product="" condition="new">
<items="50" product="xyz" condition="">
<items="" product="mno" condition="fair">
desired output
<items="20" product="abc" condition="new">
<items="10" condition="new">
<items="50" product="xyz">
<product="mno" condition="fair">
I tried somehting like this, this deleted only the quotes. I want to delete the quotes and the value before "="
f= open('test.txt','r')
A1=f.read()
for i in A1:
if i=="''":
A1.remove(i)
print A1
break
You could use a regular expression:
import re
with open('test.txt','r') as A1:
for i in A1:
print(re.sub('[a-z-]+=\"\" *', '', i))
A possible solution could be:
with open('test.txt','r+') as f:
for line in f:
Line=line[1:len(line)-1]
L=Line.split()
for k in L:
if("" not in k):
f.write(k)
f.write(" ")
You could write a function to pass the lines through:
with open('in_file', 'r') as f:
lines = f.readlines()
def process_line(line):
line = line.split('<')[1].rsplit('>')[0]
valids = [val for val in line.split(' ') if '""' not in val]
line = '<{}>\n'.format(' '.join(valids))
return line
with open('out_file', 'w') as f:
for line in lines:
f.write(process_line(line))
You can use regex,
with open('tmp.txt', 'r') as f_in:
with open('tmp_clean.txt', 'w') as f_outfile:
f_out = csv.writer(f_outfile)
for line in f_in:
line = line.strip()
row = []
if bool(re.search('(.*="")', line)):
line = re.sub('[a-z]+=\"\"', '',line)
row.append(line)
else:
row.append(line)
f_out.writerow(row)
I have a text file containing these lines for example
[:];#;;]wqwww actualnumber 1234 ;;:###
aaaa ''3# allnumber 9876
///qqq |||)))
]][]: best 56
I want to get the value 1234,9876,56. Like this(desired output)
1234
9876
56
I tried with the following script but it did not print out anything
with open("test.txt", "r") as f:
lines = f.readlines()
stripped_lines = [line.strip() for line in lines]
word = ["actual number", "potential", "time"]
if any(item in stripped_lines for item in word):
aa = stripped_lines.split("actualnumber ")[1].split(" ")[0]
bb = stripped_lines.split("allnumber ")[1].split(" ")[1]
cc = stripped_lines.split("best ")[1]
print aa
print bb
print cc
Did I miss something?
you can do it with re module
import re
with open('f.txt') as f:
data = f.read()
act = re.findall(r'actualnumber\s+(\d+)',data)
best = re.findall(r'best\s(\d+)',data)
allnumber = re.findall(r'allnumber\s(\d+)',data)
print "actualnumber : ", act[0] if act else None
print "allnumber : ", allnumber[0] if allnumber else None
print "best : ", best[0] if best else None
output
actualnumber : 1234
allnumber : 9876
best : 56
The simple way is by using isdigit()
f = open('test.txt')
data = f.read()
[int(s) for s in data.split() if s.lstrip("-").isdigit()]
Output :
[1234, 9876, 56]
word = ["actualnumber", "potential", "time"]
with open("./abx.txt") as file:
temp_list = [line.strip() for line in file.readlines()]
list_val = [item for item in temp_list if any(x in item for x in word)]
final_list = []
match_value = ["actualnumber", "allnumber", "best"]
if list_val:
for val in temp_list:
val_list = val.split(" ")
for key, value in enumerate(val_list):
if value in match_value:
final_list.append(val_list[key+1])
print(final_list):
And at the end you can iterate list and find out your expected values.
You can extract numbers like this:
k = '[:];#;;]wqwww actualnumber 1234 ;;:###'
for item in k.split():
if item.isnumeric():
print(item)
You would use your striped lines instead. For each line in stripped line, split it, and check if any item in the split line is numeric. isnumeric() works only on unicode objects.
with open("test.txt", "r") as f:
lines = f.readlines()
stripped_lines = [line.strip() for line in lines]
words = ['actualnumber', 'allnumber', 'best']
found = {}
for line in stripped_lines:
current_line = line.split()
for position, item in enumerate(current_line):
if item in words:
found[item] = current_line[position + 1]
Now that you have them in a dictionary, you can access them as: found['actualnumber']. And do further processing, such as storing them in a database.
I am writing a script for scrap data from file (any format like csv,text,json,html etc.) and match list with another file and then replace that particular string from another file , each file contain same data and i would like to use regular expression because i want to scrap data after %%string%% and then store string in to the list
format of file
file1.txt
{
"alias": "%%demo%%",
"demo": "%%demo%%",
"dns_domain": "googlr.com",
"max_physical_memory": "%%maxmemory%%",
"dataset_uuid": "%%DS_UUID%%",
"nics": [
{
"nic_tag": "stub0",
"ip": "%%ip%%",
"netmask": "255.255.240.0",
"primary": "1"
}
]
}
I want to get all of the string in to the list between %%____%% sign
Python Code
import sys
import re
list = []
list1 = []
i = 0
for n in sys.argv[1:]:
#list = []
#list1 = []
print n
input1 = open(n, "w")
#print input1
output = open(n,"r")
for line1 in output:
s = line1.split("=",1)[1:2]
for m in s:
list1.append(m.strip())
for line in input1:
a = re.findall(r"%%([^%^\n]+)%%", line)
for val in a:
list.append(val)
stext = list[i:0]
rtext = list1[i:0]
input1.write(line.replace(val, rtext))
i += 1
input1.close()
output.close()
print list and list2 , list2 having values from file2.txt
file2.txt
demo=somehost
demo=somehost2
maxmemory=1025
DS_UUID = 454s5da5d4a
ip=127.0.0.1
i want to replace in file1 from file2 , please check my code and let me know how can we do it
It's easy to find data inside well-known markers using regular expressions:
>>> import re
>>> re.findall(r"%%([^%^\n]+)%%", "hello %%there%% how\n are %%you%%")
['there', 'you']
From your updated example, you can extend the list instead of adding sublists
import fileinput
import re
array = []
for line in fileinput.input():
array.extend(re.findall(r"%%([^%^\n]+)%%", line))
print array
fileinput.close()
Thanks to all for your time, finally i achive what i want and my code is below
import sys
import re
list2 = []
file1 = 'file1.json'
file2 = 'test-var.txt'
output = open(file2, "r")
for line1 in output:
s = line1.split("=",1)[1:2]
for m in s:
list2.append(m)
input1 = open(file1, "r")
list1 = []
txt = ''
for line in input1:
a = re.findall(r"%%([^%^\n]+)%%",line)
a = ''.join(a)
if a =='':
txt = txt + line
continue
if any(a in s for s in list1):
val = '%%'+a+"1"+'%%'
line = line.replace('%%'+a+'%%', val)
a = a + "1"
txt = txt + line
list1.append(a)
for i in range(len(list1)):
string1 = '%%'+''.join(list1[i])+'%%'
string2 = ''.join(list2[i])
txt = txt.replace(string1,string2)
input1.close
output.close()
output = open(file1, "w")
print txt
output.write(txt)
output.close()