I'm using API call through which I get data in every iteration but the issue is I'm confused that how I can save data in JSON file of every iteration.
language : Python
Version : 3.9
import virustotal_python
from pprint import pprint
folder_path = 'C:/Users/E-TIME/PycharmProjects/FYP script/263 Hascodes in Txt Format'
count = 0
for file in glob.glob(os.path.join(folder_path, '*.txt')):
with open(file, 'r') as f:
lines = f.read()
l = lines.split(" ")
l = l[0].split('\n')
for file_id in range(0,3):
with virustotal_python.Virustotal(
"ab8421085f362f075cc88cb1468534253239be0bc482da052d8785d422aaabd7") as vtotal:
resp = vtotal.request(f"files/{l[file_id]}/behaviours")
data = resp.data
pprint(data)
I have the following code to extract tables from a word doc and create a list of csv files from the tables:
from docx import Document
import pandas as pd
import csv
import json
import time
document = Document('pathtoFile')
tables = []
for table in document.tables:
df = [['' for i in range(len(table.columns))] for j in range(len(table.rows))]
for i, row in enumerate(table.rows):
for j, cell in enumerate(row.cells):
if cell.text:
df[i][j] = cell.text
tables.append(pd.DataFrame(df))
for nr, i in enumerate(tables):
i.to_csv("table_" + str(nr) + ".csv")
I also have the following script to take a csv file and extract it to JSON:
import csv
import json
import time
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8', errors='ignore') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'pathtoFile'
jsonFilePath = r'pathtoFile'
start = time.perf_counter()
csv_to_json(csvFilePath, jsonFilePath)
finish = time.perf_counter()
print(f"Conversion completed successfully in {finish - start:0.4f} seconds")
The main issue is combining the two and figuring out how to go about taking the word document with the tables, extracting them to csv's, then taking the csv and converting to JSON. I may be overcomplicating this but open to suggestions.
I am in the process of doing a conversion of JSON to XML using Python.
I'm giving a presentation of how by starting with one file, CSV, you can convert it through multiple formats in a chain. So, CSV to JSON, that JSON to XML, XML to the next file type in the chain, etc, back to CSV.
I obtained a public domain CSV file from Kaggle (https://www.kaggle.com/canggih/anime-data-score-staff-synopsis-and-genre), then converted it to JSON.
From JSON, I am trying to convert to XML and write to an outfile.
I converted the CSV to JSON using this (no formatting, just a straight conversion):
#This should convert CSV to JSON
import json, os
import pandas as pd
import csv
df = pd.read_csv('dataanime.csv')
df.to_json(r'sassyg_data_Anime.json')
Then, I created my JSON to XML file:
#With help from instructor and CodeSpeedy
#https://www.codespeedy.com/how-to-convert-json-to-xml-using-python/
#Import libraries
import json as j
import xml.etree.ElementTree as et
#load in the json file
with open("sassyg_data_Anime.json") as json_file_format:
d = j.load(json_file_format)
#create the main container element for the entire XML file
r = et.Element("Work")
#creates the subelements for each part of the json file
et.SubElement(r,"Title").text = d["Title"]
et.SubElement(r,"Type").text = d["Type"]
et.SubElement(r,"Episodes").text = d["Episodes"]
et.SubElement(r,"Status").text = d["Status"]
et.SubElement(r,"Start airing").text = str(d["Start airing"])
et.SubElement(r,"End airing").text = str(d["End airing"])
et.SubElement(r,"Starting season").text = d["Starting season"]
et.SubElement(r,"Broadcast time").text = d["Broadcast time"]
et.SubElement(r,"Producers").text = d["Producers"]
et.SubElement(r,"Licensors").text = d["Licensors"]
et.SubElement(r,"Studios").text = d["Studios"]
et.SubElement(r,"Sources").text = d["Sources"]
et.SubElement(r,"Genres").text = d["Genres"]
et.SubElement(r,"Duration").text = str(d["Duration"])
et.SubElement(r,"Rating").text = d["Rating"]
et.SubElement(r,"Score").text = str(d["Score"])
et.SubElement(r,"Scored by").text = str(d["Scored by"])
et.SubElement(r,"Members").text = str(d["Members"])
et.SubElement(r,"Favorites").text = str(d["Favorites"])
et.SubElement(r,"Description").text = d["Description"]
#create the element tree/info for the write file
a = et.ElementTree(r)
#ERROR ERROR
#structure the output for xml via tostring rather than str
#Cannot write an ElementTree to file, errors out
#This was one solution I came up with, still errors out
a_xml_str = et.tostring(a)
print(a_xml_str)
#This might error out as well, I can't get the program to get to this point
#write file it should go to
outfile = open("json_to_xml.xml", 'w', encoding='utf-8')
outfile.write(a_xml_str)
outfile.close()
The error I get is:
Traceback (most recent call last):
File "F:\Data_Int_Final\Gardner_json_to_xml\convert_json_to_xml.py", line 44, in <module>
a_xml_str = et.tostring(a)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 1109, in tostring
ElementTree(element).write(stream, encoding,
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 748, in write
serialize(write, self._root, qnames, namespaces,
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 873, in _serialize_xml
tag = elem.tag
AttributeError: 'ElementTree' object has no attribute 'tag'
This is the latest version of the code I've tried. Can anyone see a solution?
Update:
I have two other ways to convert to the starting JSON file, would one of these be a better approach?
import json
import csv
def make_json(csvFilePath, jsonFilePath):
data = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['Title']
data[key] = rows
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath = r'dataanime.csv'
jsonFilePath = r'dataAnime.json'
make_json(csvFilePath, jsonFilePath)
which errors out my XML conversion when I use this JSON file with it:
Traceback (most recent call last):
File "F:\Data_Int_Final\convert_json_to_xml.py", line 16, in <module>
et.SubElement(r,"Title").text = d["Title"]
KeyError: 'Title'
or:
import csv
import json
import time
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'dataanime.csv'
jsonFilePath = r'g_d_anime.json'
start = time.perf_counter()
csv_to_json(csvFilePath, jsonFilePath)
finish = time.perf_counter()
print(f"Conversion of all rows completed successfully in {finish - start:0.4f} seconds")
which errors out my XML conversion when I use this created JSON file with it:
Traceback (most recent call last):
File "F:\Data_Int_Final\convert_json_to_xml.py", line 16, in <module>
et.SubElement(r,"Title").text = d["Title"]
TypeError: list indices must be integers or slices, not str
It's simpler to work with the CSV file and generate a XML file from that directly.
Try something like this:
import csv
import xml.etree.ElementTree as et
root = et.Element('WorksXML')
tree = et.ElementTree(root)
with open("dataanime.csv", "r", encoding="utf-8") as fin:
reader = csv.DictReader(fin)
for row in reader:
r = et.SubElement(root, "Work")
# iterate over each of the fields and add to the XML element
for field in reader.fieldnames:
et.SubElement(r, field.replace(' ', '_')).text = row[field]
with open("csv_to_xml.xml", 'wb') as fout:
tree.write(fout, xml_declaration=True, encoding='utf-8')
This generates an XML file with each "work" as a separate sub-element under the root element.
<?xml version="1.0" encoding="utf-8"?>
<WorksXML>
<Work>
<Title>Fullmetal Alchemist: Brotherhood</Title>
<Type>TV</Type>
<Episodes>64</Episodes>
<Status>Finished Airing</Status>
<Start_airing>4/5/2009</Start_airing>
<End_airing>7/4/2010</End_airing>
<Starting_season>Spring</Starting_season>
...
For the CSV to JSON conversion, the first approach creates a dictionary with titles as keys and the second approach creates an array with each item an object with all the attributes.
If any of the works have a duplicate title then the first approach will overwrite the duplicate entries. If not then it's just a matter of how you want to access the data in the JSON file as a dictionary or a list. If you want to generate XML from the JSON file then the second approach with an array will be the better option.
To convert the array-based JSON file to XML then this will do the job.
import json
import xml.etree.ElementTree as ET
def json_to_xml(jsonFilePath, xmlFilePath):
root = ET.Element('WorksXML')
tree = ET.ElementTree(root)
with open(jsonFilePath, "r", encoding="utf-8") as fin:
jdata = json.load(fin)
for obj in jdata:
r = ET.SubElement(root, "Work")
for key, value in obj.items():
ET.SubElement(r, key.replace(' ', '_')).text = value
with open(xmlFilePath, 'wb') as fout:
tree.write(fout, xml_declaration=True, encoding='utf-8')
jsonFilePath = 'g_d_anime.json'
xmlFilePath = 'g_d_anime.xml'
json_to_xml(jsonFilePath, xmlFilePath)
I have this simple python code that converts a json file into a csv.
I would like to convert only the first four values of each key, but i couldn't figure out how to do it.
import json
import csv
# Opening JSON file and loading the data
# into the variable data
with open('personas.json') as json_file:
data = json.load(json_file)
employee_data = data['emp_details']
# now we will open a file for writing
data_file = open('data_file.csv', 'w')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for emp in employee_data:
if count == 0:
# Writing headers of CSV file
header = emp.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(emp.values())
data_file.close()
Here is an example of json file's format
{"emp_details":[
{
"DATAID":"6908443",
"FIRST_NAME":"Fernando",
"SECOND_NAME":"Fabbiano",
"THIRD_NAME":"Agustin",
"FOURTH_NAME":"",
"AGE": "21",
"EMAIL": "fer.fab#gmail.com"
}
]}
And as i said, i would like to convert only the fields DATAID, FIRSTNAME, SECONDNAME, THIRD NAME.
I have read file of csv but I have a problem that how to read CSV file and save it in table.html?
import csv
html_about = ''
names = []
with open('filo.csv') as data_file:
csv_data = csv.reader(data_file)
for line in csv_data:
names.append(f'{line[0]}')
html_output = '\n<ul>'
for name in names:
html_output += f'\n\t<li>{name}</li>'
html_output += '\n</ul>'
from prettytable import PrettyTable
x = PrettyTable(line[0])
html_code = x.get_html_string()
html_file = open('table.html','w')
html_file = html_file.write(html_code)
I suggest you use pandas library,
it has pd.read_csv, and also pd.to_html
usage should look like this, let me know if this works for you:
import pandas as pd
df = pd.read_csv('filo.csv')
with open('table.html', 'w') as html_file:
df.to_html(html_file)