Error with json generation with python - data.append attribute error - python

i am tryng to generate a json file with my python script.
The goal is to parse a csv file , get some data, do some operations/elaborations and then generate a json file.
When i run the script the json generations seems to run smoothly but as soon the first row is parsed the scripts stops with the following error :
Traceback (most recent call last): File
"c:\xampp\htdocs\mix_test.py", line 37, in
data.append({"name": file_grab ,"status": "progress"})
^^^^^^^^^^^ AttributeError: 'str' object has no attribute 'append'
Below the code:
import json
import requests
import shutil
from os.path import exists
from pathlib import Path
timestr = time.strftime("%Y%m%d")
dest_folder = (r'C:\Users\Documents\mix_test_python')
filename = []
# read filename and path with pandas extension
df = pd.read_csv(
r'C:\Users\Documents\python_test_mix.csv', delimiter=';')
data = []
for ind in df.index:
mode = (df['Place'][ind])
source_folder = (df['File Path'][ind])
file_grab = (df['File Name'][ind])
code = (df['Event ID'][ind])
local_file_grab = os.path.join(dest_folder, file_grab)
remote_file_grab = os.path.join(source_folder, file_grab)
### generate json ########
##s = json.dumps(test)##
data.append({"name": file_grab ,"status": "progress"})
with open(r'C:\Users\Documents\test.json','w') as f:
json.dump(data, f, indent=4)
f.close
#### detect if it is ftp ######
print(mode, source_folder, remote_file_grab)
Could you help me to understand what i am wrong in ?

Related

json dump not updating the file

I wanted to store some value in a json file
json file gets readed but not getting writed
import json
import os
filepath = os.path.abspath(__file__).replace("test.py", "test.json")
data = json.load(open(filepath, "r"))
out_file = open("test.json", "w")
a = input()
data["cool"] = a
print(data)
json.dump(data,out_file, indent = 6)
out_file.close()

Python: Cretae file if it does not exists in wb mode throws an error

I want to create a csv file and read from it using a random token as a filename, However, it returns an error by creating a file.
import secrets
import os
import pandas as pd
views_data_dir = 'views_data'
def rnd_token():
return str(secrets.token_hex(nbytes=128))
def convert_csv(csv):
rnd_token = rnd_token()
name = f'./{views_data_dir}/{rnd_token}.csv'
print(name)
if not os.path.exists(name):
# error happens here
with open(name, 'wb') as f:
# Perform byte join on the CSV data
f.write(b''.join(csv))
return get_df(rnd_token)
def get_df(rnd_token):
print(rnd_token)
df = pd.read_csv(f'./{views_data_dir}/{rnd_token}.csv', header=0)
return df
FileNotFoundError: [Errno 2] No such file or directory: './views_data/c7f7b45b8fb8261fb021ada872a3885055bfce7b61533dfb53241bd6958f396c6496e8577477e3e5c49dd2adf1f73b1a6f931cca84ad084729f4933c0e97b79a5ac1d1dea29378903d1c85863bb9910d429c4389f5bfbc6df464648c9cb0e74ee0365b
46464275ec07118621aa4e7bf7dd1c821b562ef50d38dac17055e52241.csv'
But when I pass some filename such as "test" or so on it creates the file without errors.
I have tried "wb+" mode but it did not help.
The function creates a file without name ".csv".

Conversion of JSON to XML errors out when I try to write to file

I am in the process of doing a conversion of JSON to XML using Python.
I'm giving a presentation of how by starting with one file, CSV, you can convert it through multiple formats in a chain. So, CSV to JSON, that JSON to XML, XML to the next file type in the chain, etc, back to CSV.
I obtained a public domain CSV file from Kaggle (https://www.kaggle.com/canggih/anime-data-score-staff-synopsis-and-genre), then converted it to JSON.
From JSON, I am trying to convert to XML and write to an outfile.
I converted the CSV to JSON using this (no formatting, just a straight conversion):
#This should convert CSV to JSON
import json, os
import pandas as pd
import csv
df = pd.read_csv('dataanime.csv')
df.to_json(r'sassyg_data_Anime.json')
Then, I created my JSON to XML file:
#With help from instructor and CodeSpeedy
#https://www.codespeedy.com/how-to-convert-json-to-xml-using-python/
#Import libraries
import json as j
import xml.etree.ElementTree as et
#load in the json file
with open("sassyg_data_Anime.json") as json_file_format:
d = j.load(json_file_format)
#create the main container element for the entire XML file
r = et.Element("Work")
#creates the subelements for each part of the json file
et.SubElement(r,"Title").text = d["Title"]
et.SubElement(r,"Type").text = d["Type"]
et.SubElement(r,"Episodes").text = d["Episodes"]
et.SubElement(r,"Status").text = d["Status"]
et.SubElement(r,"Start airing").text = str(d["Start airing"])
et.SubElement(r,"End airing").text = str(d["End airing"])
et.SubElement(r,"Starting season").text = d["Starting season"]
et.SubElement(r,"Broadcast time").text = d["Broadcast time"]
et.SubElement(r,"Producers").text = d["Producers"]
et.SubElement(r,"Licensors").text = d["Licensors"]
et.SubElement(r,"Studios").text = d["Studios"]
et.SubElement(r,"Sources").text = d["Sources"]
et.SubElement(r,"Genres").text = d["Genres"]
et.SubElement(r,"Duration").text = str(d["Duration"])
et.SubElement(r,"Rating").text = d["Rating"]
et.SubElement(r,"Score").text = str(d["Score"])
et.SubElement(r,"Scored by").text = str(d["Scored by"])
et.SubElement(r,"Members").text = str(d["Members"])
et.SubElement(r,"Favorites").text = str(d["Favorites"])
et.SubElement(r,"Description").text = d["Description"]
#create the element tree/info for the write file
a = et.ElementTree(r)
#ERROR ERROR
#structure the output for xml via tostring rather than str
#Cannot write an ElementTree to file, errors out
#This was one solution I came up with, still errors out
a_xml_str = et.tostring(a)
print(a_xml_str)
#This might error out as well, I can't get the program to get to this point
#write file it should go to
outfile = open("json_to_xml.xml", 'w', encoding='utf-8')
outfile.write(a_xml_str)
outfile.close()
The error I get is:
Traceback (most recent call last):
File "F:\Data_Int_Final\Gardner_json_to_xml\convert_json_to_xml.py", line 44, in <module>
a_xml_str = et.tostring(a)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 1109, in tostring
ElementTree(element).write(stream, encoding,
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 748, in write
serialize(write, self._root, qnames, namespaces,
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 873, in _serialize_xml
tag = elem.tag
AttributeError: 'ElementTree' object has no attribute 'tag'
This is the latest version of the code I've tried. Can anyone see a solution?
Update:
I have two other ways to convert to the starting JSON file, would one of these be a better approach?
import json
import csv
def make_json(csvFilePath, jsonFilePath):
data = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['Title']
data[key] = rows
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath = r'dataanime.csv'
jsonFilePath = r'dataAnime.json'
make_json(csvFilePath, jsonFilePath)
which errors out my XML conversion when I use this JSON file with it:
Traceback (most recent call last):
File "F:\Data_Int_Final\convert_json_to_xml.py", line 16, in <module>
et.SubElement(r,"Title").text = d["Title"]
KeyError: 'Title'
or:
import csv
import json
import time
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'dataanime.csv'
jsonFilePath = r'g_d_anime.json'
start = time.perf_counter()
csv_to_json(csvFilePath, jsonFilePath)
finish = time.perf_counter()
print(f"Conversion of all rows completed successfully in {finish - start:0.4f} seconds")
which errors out my XML conversion when I use this created JSON file with it:
Traceback (most recent call last):
File "F:\Data_Int_Final\convert_json_to_xml.py", line 16, in <module>
et.SubElement(r,"Title").text = d["Title"]
TypeError: list indices must be integers or slices, not str
It's simpler to work with the CSV file and generate a XML file from that directly.
Try something like this:
import csv
import xml.etree.ElementTree as et
root = et.Element('WorksXML')
tree = et.ElementTree(root)
with open("dataanime.csv", "r", encoding="utf-8") as fin:
reader = csv.DictReader(fin)
for row in reader:
r = et.SubElement(root, "Work")
# iterate over each of the fields and add to the XML element
for field in reader.fieldnames:
et.SubElement(r, field.replace(' ', '_')).text = row[field]
with open("csv_to_xml.xml", 'wb') as fout:
tree.write(fout, xml_declaration=True, encoding='utf-8')
This generates an XML file with each "work" as a separate sub-element under the root element.
<?xml version="1.0" encoding="utf-8"?>
<WorksXML>
<Work>
<Title>Fullmetal Alchemist: Brotherhood</Title>
<Type>TV</Type>
<Episodes>64</Episodes>
<Status>Finished Airing</Status>
<Start_airing>4/5/2009</Start_airing>
<End_airing>7/4/2010</End_airing>
<Starting_season>Spring</Starting_season>
...
For the CSV to JSON conversion, the first approach creates a dictionary with titles as keys and the second approach creates an array with each item an object with all the attributes.
If any of the works have a duplicate title then the first approach will overwrite the duplicate entries. If not then it's just a matter of how you want to access the data in the JSON file as a dictionary or a list. If you want to generate XML from the JSON file then the second approach with an array will be the better option.
To convert the array-based JSON file to XML then this will do the job.
import json
import xml.etree.ElementTree as ET
def json_to_xml(jsonFilePath, xmlFilePath):
root = ET.Element('WorksXML')
tree = ET.ElementTree(root)
with open(jsonFilePath, "r", encoding="utf-8") as fin:
jdata = json.load(fin)
for obj in jdata:
r = ET.SubElement(root, "Work")
for key, value in obj.items():
ET.SubElement(r, key.replace(' ', '_')).text = value
with open(xmlFilePath, 'wb') as fout:
tree.write(fout, xml_declaration=True, encoding='utf-8')
jsonFilePath = 'g_d_anime.json'
xmlFilePath = 'g_d_anime.xml'
json_to_xml(jsonFilePath, xmlFilePath)

Ignoring specific json files where key [Behavior] is not present

I am working with a huge dataset of Cuckoo sandbox datset having several .JSON files, I have to create a CSV file having API stats in the behavior section of JSOn files, but if a json file doesn't have Specific file it the code stops executing.
here is my program
import pandas as pd
# As of Pandas 1.01, json_normalize as pandas.io.json.json_normalize is deprecated and is now exposed in the top-level namespace.
from pandas.io.json import json_normalize
from pathlib import Path
import json
import os
bkey=[]
infoList=[]
signaturesList=[]
fileOpsList=[]
irmaList=[]
suricataList=[]
virustotalList=[]
sysmonList=[]
resubmitList=[]
snortList=[]
behaviorList=[]
memoryList=[]
debugList=[]
#mispList=[]
targetList=[]
networkList=[]
metadataList=[]
list2=[]
#print(pathList)
path_to_json = 'C:/Users/skdk/Desktop/Ransomware-API/Benign/'
for file_name in [file for file in os.listdir(path_to_json) if file.endswith('.json')]:
with open(path_to_json + file_name, encoding ='utf-8') as json_file:
data = json.load(json_file)
#print(data)
behaviorList.append(str(data['behavior']))
# for path in path_to_json:
# p = Path(path)
# #print(p)
# # read json50
# with p.open('r', encoding='utf-8') as f:
# data = json.loads(f.read())
# #print(p)
# behaviorList.append(str(data['behavior']))
apiStatsList = []
for behavior in behaviorList:
for key,value in eval(behavior)['apistats'].items():
fileName = str(pathList[behaviorList.index(behavior)][:pathList[behaviorList.index(behavior)].index('.json')])+"/" + str(key)
list2.append(fileName)
apiStatsList.append(value)
print(fileName)
dataset2= {}
for key,value in apiStatsList[0].items():
dataset2[key] = [value]
count = 1
for apiStat in apiStatsList[1:]:
for key,value in apiStat.items():
if(key in dataset2):
while(len(dataset2[key])!=count):
dataset2[key].append(0)
dataset2[key].append(apiStat[key])
else:
tempList=[0]*(count)
tempList.append(value)
dataset2[key] = tempList
count=count+1
dataset2['Directory']=list2
df2 = pd.DataFrame.from_dict(dataset2, orient='index')
df2 = df2.transpose()
df2 = df2.fillna(0)
df2=df2.set_index('Directory')
#df2
df2.to_csv('Benign.csv')
I am getting a following error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-16-fc19a9a3c2d1> in <module>
34 data = json.load(json_file)
35 #print(data)
---> 36 behaviorList.append(str(data['behavior']))
37
38 # for path in path_to_json:
KeyError: 'behavior'
Any Help is appreciated.
Put it inside
try:
'your code'
except KeyError:
'your code in case the json doesn't have behaviour. It could skip to the next file for example.'
```
It would catch any or specified error. And since you've said you are interested only in files with behaviour key, I think it should help you.

How to read from a csv file in zip folder and save data from csv file in database?

import glob
import os
import csv
import zipfile
from io import StringIO
for name in glob.glob('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'):
base = os.path.basename(name)
filename = os.path.splitext(base)[0]
datadirectory = 'C:/Users/RAMESH SANTHA/Downloads/'
dataFile = filename
archive = '.'.join([dataFile, 'zip'])
fullpath = ''.join([datadirectory, archive])
csv_file = '.'.join([dataFile, 'csv']) #all fixed
filehandle = open(fullpath, 'rb')
zfile = zipfile.ZipFile(filehandle)
data = StringIO.StringIO(zfile.read(csv_file))
reader = csv.reader(data)
for row in reader:
print (row)
I tried following code to read data from zip folder which contains csv file and print rows but got error:
data = StringIO.StringIO(zfile.read(csv_file))
AttributeError: type object '_io.StringIO' has no attribute 'StringIO'
There is no StringIO.StringIO() but io.StringIO()
import io
data = io.StringIO(...)
With your import it will be even without io.
from io import StringIO
data = StringIO(...)
BTW: I think you overcomplicated code using glob and join(). And you can use filename directly with ZipFile without open()
import os
import csv
import zipfile
import io
zip_fullname = 'C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'
zip_file = os.path.basename(zip_fullname)
csv_file = zip_file.replace('.zip', '.csv')
print(zip_file) # download-NIFTY 50-01012020.zip
print(csv_file) # download-NIFTY 50-01012020.csv
zfile = zipfile.ZipFile(zip_fullname)
data = io.StringIO(zfile.read(csv_file).decode('utf-8')) # bytes needs to be converted to string
reader = csv.reader(data)
for row in reader:
print(row)
But with pandas it should be even simpler
import pandas as pd
df = pd.read_csv('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip')
print(df)
Looking at the script you getting error opening the csv file from zip file. Below is python 3 code that I have working for a zip file having few csv's. The directory to extract should exist before you run the script
import zipfile
path_to_zip_file='/tmp/test1.zip' # Assuming this file exist , This path is from mac, but should work for windows as well'
directory_to_extract_to='/tmp/extract/' # Assuming this directory already exist
import csv,os
import codecs
import glob
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
zip_ref.extractall(directory_to_extract_to)
for file in glob.glob(directory_to_extract_to+'*.csv'):
path = os.path.join(directory_to_extract_to,file)
with open(path, 'rb') as f:
reader = csv.reader(codecs.iterdecode(f, 'utf-8'))
# Below code is print them as arrays
# for row in reader:
# print(row)
# Reading rows as ordered dictionary
dictReader = csv.DictReader(codecs.iterdecode(f, 'utf-8'))
for row in dictReader:
print(row)

Categories