I am trying to convert multiple .txt file to "table-like" data (with columns and rows). Each .txt file should be considered as a new column.
Consider below content of the .txt file:
File1.txt
Hi there
How are you doing?
What is your name?
File2.txt
Hi
Great!
Oliver, what's yours?
I have created a simple method, that accepts the file and and integer (the file number from another method):
def txtFileToJson(text_file, column):
data = defaultdict(list)
i = int(1)
with open(text_file) as f:
data[column].append(column)
for line in f:
i = i + 1
for line in re.split(r'[\n\r]+', line):
data[column] = line
with open("output.txt", 'a+') as f:
f.write(json.dumps(data))
So above method will run two times (one time for each file, and append the data).
This is the output.txt file after I have run my script:
{"1": "What is your name?"}{"2": "Oliver, what's yours?"}
As you can see, I can only get it to create a new for each file I have, and then add the entire line.
[{
"1": [{
"1": "Hi there",
"2": "How are you doing?",
"3": "\n"
"4": "What is your name?"
},
"2": [{
"1": "Hi"
"2": "Great!",
"3": "\n",
"4": "Oliver, what's yours?"
},
}]
Update:
OK, so I played around a bit and got a bit closer:
myDict = {str(column): []}
i = int(1)
with open(text_file) as f:
for line in f:
# data[column].append(column)
match = re.split(r'[\n\r]+', line)
if match:
myDict[str(column)].append({str(i): line})
i = i + 1
with open(out_file, 'a+') as f:
f.write(json.dumps(myDict[str(column)]))
That gives me below output:
[{"1": "Hi there\n"}, {"2": "How are you doing?\n"}, {"3": "\n"}, {"4": "What is your name?"}]
[{"1": "Hi\n"}, {"2": "Great!\n"}, {"3": "\n"}, {"4": "Oliver, what's yours?"}]
But as you can see, now I have multiple JSON root elements.
Solution
Thanks to jonyfries, I did this:
data = defaultdict(list)
for path in images.values():
column = column + 1
data[str(column)] = txtFileToJson(path, column)
saveJsonFile(path, data)
And then added a new method to save the final combined list:
def saveJsonFile(text_file, data):
basename = os.path.splitext(os.path.basename(text_file))
dir_name = os.path.dirname(text_file) + "/"
text_file = dir_name + basename[0] + "1.txt"
out_file = dir_name + 'table_data.txt'
with open(out_file, 'a+') as f:
f.write(json.dumps(data))
You're creating a new dictionary within the function itself. So each time you pass a text file in it will create a new dictionary.
The easiest solution seems to be returning the dictionary created and add it to an existing dictionary.
def txtFileToJson(text_file, column):
myDict = {str(column): []}
i = int(1)
with open(text_file) as f:
for line in f:
# data[column].append(column)
match = re.split(r'[\n\r]+', line)
if match:
myDict[str(column)].append({str(i): line})
i = i + 1
with open(out_file, 'a+') as f:
f.write(json.dumps(myDict[str(column)]))
return myDict
data = defaultdict(list)
data["1"] = txtFileToJson(text_file, column)
data["2"] = txtFileToJson(other_text_file, other_column)
def read(text_file):
data, i = {}, 0
with open(text_file) as f:
for line in f:
i = i + 1
data['row_%d'%i] = line.rstrip('\n')
return data
res = {}
for i, fname in enumerate([r'File1.txt', r'File2.txt']):
res[i] = read(fname)
with open(out_file, 'w') as f:
json.dump(res, f)
First, if I understand you are trying to get as output a dictionary of dictionaries, then let me observe that what I understand to be your desired output seems to be enclosing the whole thing within a list, Furthermore, you have unbalanced open and closed list brackets within the dictionaries, which I will ignore, as I will the enclosing list.
I think you need something like:
#!python3
import json
import re
def processTxtFile(text_file, n, data):
d = {}
with open(text_file) as f:
i = 0
for line in f:
for line in re.split(r'[\n\r]+', line):
i = i + 1
d[str(i)] = line
data[str(n)] = d
data = dict()
processTxtFile('File1.txt', 1, data)
processTxtFile('File2.txt', 2, data)
with open("output.txt", 'wt') as f:
f.write(json.dumps(data))
If you really need the nested dictionaries to be enclosed within a list, then replace
data[str(n)] = d
with:
data[str(n)] = [d]
Related
I have a file with multiple objects like this:
{
name: (sindey, crosby)
game: "Hockey"
type: athlete
},
{
name: (wayne, gretzky)
game: "Ice Hockey"
type: athlete
}
...and I'd like to convert them to JSON format and output this:
[
{
"name": "(sindey, crosby)",
"game": "Hockey",
"type": "athlete"
},
{
"name": "(wayne, gretzky)",
"game": "Ice Hockey",
"type": "athlete"
}
]
If the input was in this format,
name: (sidney, crosby) | game:"Hockey" | type:athlete
name: (wayne, gretzky) | game:"Ice Hockey" | type:athlete
I could implement using json dump with list and dict and it gives me the desired output
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
for v in splitcontent:
l = v.split(' | ')
d.append(dict(s.split(':',1) for s in l))
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys= False)))
How can I reformat this code to convert my input to JSON format?
With slight changes on the answer given by #sarah Messer. Changes involved
lines without the : separator skipped
Try this
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
appendage = {}
for line in splitcontent:
if ('}' in line) or ('{' in line) or ('{' in line) or ('}' in line):
# Append a just-created record and start a new one
if appendage:
d.append(appendage)
appendage = {}
continue
key, val = line.split(':')
if val.endswith(','):
# strip a trailing comma
val = val[:-1]
print(val)
# if val == "":
# pass
# else:
appendage[key] = val
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys=False)))
Something like this will probably work for most cases - you just have to handle the lines with curly braces separately from the lines with data:
import json
f = open("log.file", "r")
content = f.read()
splitcontent = content.splitlines()
d = []
appendage = {}
for line in splitcontent:
if ('}' in line) or ('{' in line):
# Append a just-created record and start a new one
if appendage:
d.append(appendage)
appendage ={}
else:
key, val = line.split(':',1)
if val.endswith(','):
# strip a trailing comma
val = val[:-1]
appendage[key] = val
with open("json_log.json", 'w') as file:
file.write((json.dumps(d, indent=4, sort_keys= False)))
I might also have some typos in there...
I am writing a code in python where I am removing all the text after a specific word but in output lines are missing. I have a text file in unicode which have 3 lines:
my name is test1
my name is
my name is test 2
What I want is to remove text after word "test" so I could get the output as below
my name is test
my name is
my name is test
I have written a code but it does the task but also removes the second line "my name is"
My code is below
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
with open(r"test.txt", "w") as fp:
fp.write(txt)
It looks like if there is no keyword found the index become -1.
So you are avoiding the lines w/o keyword.
I would modify your if by adding the condition as follows:
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
elif index < 0:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
No need to add \n because the line already contains it.
Your code does not append the line if the splitStr is not defined.
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index != -1:
txt += line[:index + len(splitStr)] + "\n"
else:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
In my solution I simulate the input file via io.StringIO. Compared to your code my solution remove the else branch and only use one += operater. Also splitStr is set only one time and not on each iteration. This makes the code more clear and reduces possible errore sources.
import io
# simulates a file for this example
the_file = io.StringIO("""my name is test1
my name is
my name is test 2""")
txt = ""
splitStr = "test"
with the_file as fp:
# each line
for line in fp.readlines():
# cut somoething?
if splitStr in line:
# find index
index = line.find(splitStr)
# cut after 'splitStr' and add newline
line = line[:index + len(splitStr)] + "\n"
# append line to output
txt += line
print(txt)
When handling with files in Python 3 it is recommended to use pathlib for that like this.
import pathlib
file_path = pathlib.Path("test.txt")
# read from wile
with file_path.open('r') as fp:
# do something
# write back to the file
with file_path.open('w') as fp:
# do something
Suggestion:
for line in fp.readlines():
i = line.find('test')
if i != -1:
line = line[:i]
I'm currently trying to make a Chromebook rental application for my high school that stores checkout information in a JSON file. Everything works except removing data from the JSON array. I found a YouTube video(link) that I thought would work as a solution, so I followed along with that. However, whenever there's more than two elements and I enter anything higher than two, it doesn't delete anything. Even worse, when I enter the number one, it deletes everything but the zero index(whenever the array has more than two elements in it).
Here's the Python code:
def view_data(): # Prints JSON Array to screen
with open(filename, "r") as f:
data = json.load(f)
i = 0
for item in data:
name = item["name"]
chromebook = item["chromebook"]
check_out = item["time&date"]
print(f"Index Number: {i}")
print(f"Name : {name}")
print(f"Chromebook : {chromebook}")
print(f"Time Of Checkout: {check_out} ")
print("\n\n")
i = i + 1
def delete_data(): # Deletes an element from the array
view_data()
new_data = []
with open(filename, "r") as f:
data = json.load(f)
data_length = len(data) - 1
print("Which index number would you like to delete?")
delete_option = input(f"Select a number 0-{data_length}: ")
i = 0
for entry in data:
if i == int(delete_option):
pass
i = + 1
else:
new_data.append(entry)
i = + 1
with open(filename, "w") as f:
json.dump(new_data, f, indent=4)
Here's the JSON file code:
[
{
"name": "Tyler",
"chromebook": "12123223",
"time&date": "Check Out Time: 13:33:22 May-11-2021"
},
{
"name": "Craig",
"chromebook": "41222224",
"time&date": "Check Out Time: 13:33:34 May-11-2021"
},
{
"name": "Bill",
"chromebook": "3235223",
"time&date": "Check Out Time: 13:33:46 May-11-2021"
}
]
For example, say the user wanted to remove the second index in the JSON array. Is there a better way to implement that in my Python script?
I'm still a fairly new and learning Python developer, and if there's any better solution I'm open to suggestions. If you need for info, I'll be active.
First question
However, whenever there's more than two elements and I enter anything higher than two, it doesn't delete anything. Even worse, when I enter the number one, it deletes everything but the zero index(whenever the array has more than two elements in it).
Inside delete_data() you have two lines reading i = + 1, which just assignes +1 (i.e., 1) to i. Thus, you're never increasing your index. You probably meant to write either i = i+1 or i += 1.
def delete_data(): # Deletes an element from the array
view_data()
new_data = []
with open(filename, "r") as f:
data = json.load(f)
data_length = len(data) - 1
print("Which index number would you like to delete?")
delete_option = input(f"Select a number 0-{data_length}: ")
i = 0
for entry in data:
if i == int(delete_option):
i += 1 # <-- here
else:
new_data.append(entry)
i += 1 # <-- and here
with open(filename, "w") as f:
json.dump(new_data, f, indent=4)
Second question: further improvements
Is there a better way to implement that in my Python script?
First, you can get rid of manually increasing i by using the builtin enumerate generator. Second, you could make your functions reusable by giving them parameters - where does the filename in your code example come from?
# view_data() should probably receive `filename` as a parameter
def view_data(filename: str): # Prints JSON Array to screen
with open(filename, "r") as f:
data = json.load(f)
# iterate over i and data simultaneously
# alternatively, you could just remove i
for i, item in enumerate(data):
name = item["name"]
chromebook = item["chromebook"]
check_out = item["time&date"]
print(f"Index Number: {i}")
print(f"Name : {name}")
print(f"Chromebook : {chromebook}")
print(f"Time Of Checkout: {check_out} ")
print("\n\n")
# not needed anymore: i = i + 1
# view_data() should probably receive `filename` as a parameter
def delete_data(filename: str): # Deletes an element from the array
view_data()
new_data = []
with open(filename, "r") as f:
data = json.load(f)
data_length = len(data) - 1
print("Which index number would you like to delete?")
delete_option = input(f"Select a number 0-{data_length}: ")
# iterate over i and data simultaneously
for i, entry in enumerate(data):
if i != int(delete_option):
new_data.append(entry)
with open(filename, "w") as f:
json.dump(new_data, f, indent=4)
Furthermore, you could replace that for-loop by a list comprehension, which some may deem more "pythonic":
new_data = [entry for i, entry in enumerate(data) if i != int(delete_option)]
There are easier ways to delete an element by index from a Python list.
Given li = ["a", "b", "c"], you can delete element 1 ("b") by index in (at least) the following ways:
li.pop(1) # pop takes an index (defaults to last) and removes and returns the element at that index
del li[1] # the del keyword will also remove an element from a list
So, here's some updated code:
def view_data(): # Prints JSON Array to screen
with open(filename, "r") as f:
data = json.load(f)
i = 0
for item in data:
name = item["name"]
chromebook = item["chromebook"]
check_out = item["time&date"]
print(f"Index Number: {i}")
print(f"Name : {name}")
print(f"Chromebook : {chromebook}")
print(f"Time Of Checkout: {check_out} ")
print("\n\n")
i = i + 1
def delete_data(): # Deletes an element from the array
view_data()
with open(filename, "r") as f:
data = json.load(f)
data_length = len(data) - 1
print("Which index number would you like to delete?")
delete_option = input(f"Select a number 0-{data_length}: ")
del data[int(delete_option)] # or data.pop(int(delete_option))
with open(filename, "w") as f:
json.dump(data, f, indent=4)
import json
data = json.loads(jsonString) #convert json string to object
delete_option = input(f"Select a number 0-{data_length}: ")
del data[int(delete_option)]
I have this code
with open(newconfig, 'r') as file1: # New File
with open(goldconfig, 'r') as file2: # Standard File
difference = set(file1).difference(file2)
difference.discard('\n')
diff_file = input("INFO: Select what to name the difference(s) : ")
with open(diff_file, 'w') as file_out:
for line in difference:
file_out.write("** WARNING: Difference found in New Config:\n " + line + "\n")
print("WARNING: Difference in file: " + line)
print("\n\n")
print("INFO: Difference File Created: " + diff_file)
but I want to ignore if the file has the same word, but on different lines
so for example
List one:
TOM123
TOM1234
TOM12345
List Two:
TOMA
TOMB
TOM123
TOM1234
TOM12345
Difference:
TOMA
TOMB
If you want to get a line of text into a set, you can do something like this:
text = 'TOM1234 TOM1234 TOM12345 TOM123'
a = set([word for word in text.split()])
print(a)
Output
{'TOM123', 'TOM1234', 'TOM12345'}
If you want to find the items that are only in one of the sets, use symmetric_difference.
a = set(['TOM123', 'TOM1234', 'TOM12345', 'TOM5'])
b = set(['TOMA', 'TOMB', 'TOM123', 'TOM1234', 'TOM12345'])
difference = a ^ b
print(difference)
Output
{'TOM5', 'TOMA', 'TOMB'}
you can try this:
def open_file_and_return_list(file_path):
list = []
with open(file_path, 'r') as f:
line = f.readline()
while line:
list.append(line)
line = f.readline()
return list
def clean_new_line(list):
for i in range(len(list)):
if "\n" in list[i]:
list[i] = list[i].replace("\n", "")
return list
if __name__ == "__main__":
list1 = open_file_and_return_list(r"path\File1.txt")
list2 = open_file_and_return_list(r"path\File2.txt")
list1 = clean_new_line(list1)
list2 = clean_new_line(list2)
diff = []
for obj in list1:
if obj not in list2:
diff.append(obj)
for obj in list2:
if obj not in list1:
diff.append(obj)
print(diff)
I am writing a script for scrap data from file (any format like csv,text,json,html etc.) and match list with another file and then replace that particular string from another file , each file contain same data and i would like to use regular expression because i want to scrap data after %%string%% and then store string in to the list
format of file
file1.txt
{
"alias": "%%demo%%",
"demo": "%%demo%%",
"dns_domain": "googlr.com",
"max_physical_memory": "%%maxmemory%%",
"dataset_uuid": "%%DS_UUID%%",
"nics": [
{
"nic_tag": "stub0",
"ip": "%%ip%%",
"netmask": "255.255.240.0",
"primary": "1"
}
]
}
I want to get all of the string in to the list between %%____%% sign
Python Code
import sys
import re
list = []
list1 = []
i = 0
for n in sys.argv[1:]:
#list = []
#list1 = []
print n
input1 = open(n, "w")
#print input1
output = open(n,"r")
for line1 in output:
s = line1.split("=",1)[1:2]
for m in s:
list1.append(m.strip())
for line in input1:
a = re.findall(r"%%([^%^\n]+)%%", line)
for val in a:
list.append(val)
stext = list[i:0]
rtext = list1[i:0]
input1.write(line.replace(val, rtext))
i += 1
input1.close()
output.close()
print list and list2 , list2 having values from file2.txt
file2.txt
demo=somehost
demo=somehost2
maxmemory=1025
DS_UUID = 454s5da5d4a
ip=127.0.0.1
i want to replace in file1 from file2 , please check my code and let me know how can we do it
It's easy to find data inside well-known markers using regular expressions:
>>> import re
>>> re.findall(r"%%([^%^\n]+)%%", "hello %%there%% how\n are %%you%%")
['there', 'you']
From your updated example, you can extend the list instead of adding sublists
import fileinput
import re
array = []
for line in fileinput.input():
array.extend(re.findall(r"%%([^%^\n]+)%%", line))
print array
fileinput.close()
Thanks to all for your time, finally i achive what i want and my code is below
import sys
import re
list2 = []
file1 = 'file1.json'
file2 = 'test-var.txt'
output = open(file2, "r")
for line1 in output:
s = line1.split("=",1)[1:2]
for m in s:
list2.append(m)
input1 = open(file1, "r")
list1 = []
txt = ''
for line in input1:
a = re.findall(r"%%([^%^\n]+)%%",line)
a = ''.join(a)
if a =='':
txt = txt + line
continue
if any(a in s for s in list1):
val = '%%'+a+"1"+'%%'
line = line.replace('%%'+a+'%%', val)
a = a + "1"
txt = txt + line
list1.append(a)
for i in range(len(list1)):
string1 = '%%'+''.join(list1[i])+'%%'
string2 = ''.join(list2[i])
txt = txt.replace(string1,string2)
input1.close
output.close()
output = open(file1, "w")
print txt
output.write(txt)
output.close()