I am working with a huge dataset of Cuckoo sandbox datset having several .JSON files, I have to create a CSV file having API stats in the behavior section of JSOn files, but if a json file doesn't have Specific file it the code stops executing.
here is my program
import pandas as pd
# As of Pandas 1.01, json_normalize as pandas.io.json.json_normalize is deprecated and is now exposed in the top-level namespace.
from pandas.io.json import json_normalize
from pathlib import Path
import json
import os
bkey=[]
infoList=[]
signaturesList=[]
fileOpsList=[]
irmaList=[]
suricataList=[]
virustotalList=[]
sysmonList=[]
resubmitList=[]
snortList=[]
behaviorList=[]
memoryList=[]
debugList=[]
#mispList=[]
targetList=[]
networkList=[]
metadataList=[]
list2=[]
#print(pathList)
path_to_json = 'C:/Users/skdk/Desktop/Ransomware-API/Benign/'
for file_name in [file for file in os.listdir(path_to_json) if file.endswith('.json')]:
with open(path_to_json + file_name, encoding ='utf-8') as json_file:
data = json.load(json_file)
#print(data)
behaviorList.append(str(data['behavior']))
# for path in path_to_json:
# p = Path(path)
# #print(p)
# # read json50
# with p.open('r', encoding='utf-8') as f:
# data = json.loads(f.read())
# #print(p)
# behaviorList.append(str(data['behavior']))
apiStatsList = []
for behavior in behaviorList:
for key,value in eval(behavior)['apistats'].items():
fileName = str(pathList[behaviorList.index(behavior)][:pathList[behaviorList.index(behavior)].index('.json')])+"/" + str(key)
list2.append(fileName)
apiStatsList.append(value)
print(fileName)
dataset2= {}
for key,value in apiStatsList[0].items():
dataset2[key] = [value]
count = 1
for apiStat in apiStatsList[1:]:
for key,value in apiStat.items():
if(key in dataset2):
while(len(dataset2[key])!=count):
dataset2[key].append(0)
dataset2[key].append(apiStat[key])
else:
tempList=[0]*(count)
tempList.append(value)
dataset2[key] = tempList
count=count+1
dataset2['Directory']=list2
df2 = pd.DataFrame.from_dict(dataset2, orient='index')
df2 = df2.transpose()
df2 = df2.fillna(0)
df2=df2.set_index('Directory')
#df2
df2.to_csv('Benign.csv')
I am getting a following error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-16-fc19a9a3c2d1> in <module>
34 data = json.load(json_file)
35 #print(data)
---> 36 behaviorList.append(str(data['behavior']))
37
38 # for path in path_to_json:
KeyError: 'behavior'
Any Help is appreciated.
Put it inside
try:
'your code'
except KeyError:
'your code in case the json doesn't have behaviour. It could skip to the next file for example.'
```
It would catch any or specified error. And since you've said you are interested only in files with behaviour key, I think it should help you.
Related
i am tryng to generate a json file with my python script.
The goal is to parse a csv file , get some data, do some operations/elaborations and then generate a json file.
When i run the script the json generations seems to run smoothly but as soon the first row is parsed the scripts stops with the following error :
Traceback (most recent call last): File
"c:\xampp\htdocs\mix_test.py", line 37, in
data.append({"name": file_grab ,"status": "progress"})
^^^^^^^^^^^ AttributeError: 'str' object has no attribute 'append'
Below the code:
import json
import requests
import shutil
from os.path import exists
from pathlib import Path
timestr = time.strftime("%Y%m%d")
dest_folder = (r'C:\Users\Documents\mix_test_python')
filename = []
# read filename and path with pandas extension
df = pd.read_csv(
r'C:\Users\Documents\python_test_mix.csv', delimiter=';')
data = []
for ind in df.index:
mode = (df['Place'][ind])
source_folder = (df['File Path'][ind])
file_grab = (df['File Name'][ind])
code = (df['Event ID'][ind])
local_file_grab = os.path.join(dest_folder, file_grab)
remote_file_grab = os.path.join(source_folder, file_grab)
### generate json ########
##s = json.dumps(test)##
data.append({"name": file_grab ,"status": "progress"})
with open(r'C:\Users\Documents\test.json','w') as f:
json.dump(data, f, indent=4)
f.close
#### detect if it is ftp ######
print(mode, source_folder, remote_file_grab)
Could you help me to understand what i am wrong in ?
I have to convert json files as I said, here is the code:enter image description here
def AnalysisJson():
file_path = 'my_file'
for root,dirs,files in os.walk(file_path):
for file in files:
InputPath = open(file_path + '\\'+ file, encoding="utf-8")
for i in files:
df = json.load(InputPath)
demo = pd.json_normalize(df,record_path = 'label_annotations')
demo.to_csv('files.csv')
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
I want to convert these files, if the code is surely hard to run, I wish someone will give me an advice, thanks!
I am not sure that I understand correctly what you want, but here is an answer based on my interpretation of your question.
import json
import os
from glob import glob
import pandas as pd
def json_to_csv(dir_path: str) -> None:
for file_path in glob(os.path.join(dir_path, '*.json')):
with open(file_path, encoding='utf-8') as f:
data = json.load(f)
df = pd.json_normalize(data, record_path='label_annotations')
df.to_csv(file_path.replace('.json', '.csv'), index=False)
I'm using API call through which I get data in every iteration but the issue is I'm confused that how I can save data in JSON file of every iteration.
language : Python
Version : 3.9
import virustotal_python
from pprint import pprint
folder_path = 'C:/Users/E-TIME/PycharmProjects/FYP script/263 Hascodes in Txt Format'
count = 0
for file in glob.glob(os.path.join(folder_path, '*.txt')):
with open(file, 'r') as f:
lines = f.read()
l = lines.split(" ")
l = l[0].split('\n')
for file_id in range(0,3):
with virustotal_python.Virustotal(
"ab8421085f362f075cc88cb1468534253239be0bc482da052d8785d422aaabd7") as vtotal:
resp = vtotal.request(f"files/{l[file_id]}/behaviours")
data = resp.data
pprint(data)
I have JSONs reporting different values, and I want to import only some keys in a csv.
I have tried 2 approaches, but both give me some problems.
At first, I have tried this :
`import os,json
import glob
import csv
# Place your JSON data in a directory named 'data/'
src = "MYPATH"
data = []
json_pattern = os.path.join(src, '*.json')
# only json
files = glob.glob(json_pattern, recursive=True)
# Loop through files
for single_file in files:
with open(single_file, 'r') as f:
json_file = json.load(f)
try:
data.append([
json_file['name1'],
json_file['name2'],
json_file['name3'],
json_file['name4'],
])
except KeyError:
continue
# Add headers
data.insert(0, ['title_1', 'title_2', 'title_3'])
# Export to CSV.
# Add the date to the file name to avoid overwriting it each time.
csv_filename = 'name.csv'
with open((src + csv_filename), "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(data)`
In this way, unfortunately, if a key is not included, the code skip the file altogether, while I want it to skip only the key.
So I tried this, instead:
import os,json
import glob
import csv
# Place your JSON data in a directory named 'data/'
src = "MY_PATH"
data = []
json_pattern = os.path.join(src, '*.json')
# Change the glob if you want to only look through files with specific names
files = glob.glob(json_pattern, recursive=True)
# Loop through files
col_name = ['name1','name2','name4']
for single_file in files:
with open(single_file, 'r') as f:
json_file = json.load(f)
for key in col_name:
try:
data.append([json_file[key]])
except KeyError:
continue
# Add headers
data.insert(0, ['title_1', 'title_2', 'title_3'])
# Export to CSV.
# Add the date to the file name to avoid overwriting it each time.
csv_filename = 'name.csv'
with open((src + csv_filename), "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(data)
But in this case, each value is a new row in the csv, while I want the value from each json in a single row.
I am not an expert and I really don't know how to combine this two.
Can someone help me out?
Thanks!
If I understand what you're trying to do correctly, why not just do
# Loop through files
for single_file in files:
with open(single_file, 'r') as f:
json_file = json.load(f)
data.append([
json_file.get('name1', ''),
json_file.get('name2', ''),
json_file.get('name3', ''),
json_file.get('name4', '')
])
By using .get() you can specify the default value in case a key isn't found.
I can't iterate over a JSON file to get all values for a key. I've tried multiple ways of writing this with many errors.
# Import package
from urllib.request import urlretrieve
# Import pandas
import pandas as pd
# Assign url of file: url
url = 'https://data.sfgov.org/resource/wwmu-gmzc.json'
# Save file locally
urlretrieve(url, 'wwmu-gmzc.json')
# Loading JSONs in Python
import json
with open('wwmu-gmzc.json', 'r') as json_file:
#json_data = json.load(json_file) # type list
json_data = json.load(json_file)[0] # turn into type dict
print(type(json_data))
# Print each key-value pair in json_data
#for k in json_data.keys():
# print(k + ': ', json_data[k])
for line in json_data['title']:
print(line)
#w_title = json_data['title']
#print(w_title)
for key, value in json_data.items():
print(key + ':', value)
#print(json_data.keys('title') + ':' , jason_data['title'])
The current version of this code only gives the first line of the file:
<class 'dict'> 1 8 0 release_year: 2011 actor_2: Nithya Menon writer: Umarji Anuradha, Jayendra, Aarthi Sriram, & Suba locations: Epic Roasthouse (399 Embarcadero) director: Jayendra title: 180 production_company: SPI Cinemas actor_1: Siddarth actor_3: Priya Anand
Corrected code below and accounts for missing keys:
# Loading JSONs in Python
import json
with open('wwmu-gmzc.json', 'r') as json_file:
content = json_file.read()
json_data = json.loads(content)
print(type(json_data))
for json_i in json_data:
try:
print(json_i['locations'])
except:
print('***** NO KEY FOUND *****')
You are loading only first data in the dataset.
with open('wwmu-gmzc.json', 'r') as json_file:
json_data = json.load(json_file) # Input is list of dict. So,load everything
for json_i in json_data:
print(json_i.get('your_key', 'default_value'))
Your code does not work because the data your are fetching is actually a list. To read each item in the list (each item is a key-value pair) you can do.
# Import package
from urllib.request import urlretrieve
import json
# Assign url of file: url
url = 'https://data.sfgov.org/resource/wwmu-gmzc.json'
# Save file locally
urlretrieve(url, 'wwmu-gmzc.json')
# Loading JSONs in Python
with open('wwmu-gmzc.json', 'r') as json_file:
content = json_file.read()
json_data = json.loads(content)
for item in json_data:
print('======')
for key, value in item.items():
print(key + ':', value)