I have JSONs reporting different values, and I want to import only some keys in a csv.
I have tried 2 approaches, but both give me some problems.
At first, I have tried this :
`import os,json
import glob
import csv
# Place your JSON data in a directory named 'data/'
src = "MYPATH"
data = []
json_pattern = os.path.join(src, '*.json')
# only json
files = glob.glob(json_pattern, recursive=True)
# Loop through files
for single_file in files:
with open(single_file, 'r') as f:
json_file = json.load(f)
try:
data.append([
json_file['name1'],
json_file['name2'],
json_file['name3'],
json_file['name4'],
])
except KeyError:
continue
# Add headers
data.insert(0, ['title_1', 'title_2', 'title_3'])
# Export to CSV.
# Add the date to the file name to avoid overwriting it each time.
csv_filename = 'name.csv'
with open((src + csv_filename), "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(data)`
In this way, unfortunately, if a key is not included, the code skip the file altogether, while I want it to skip only the key.
So I tried this, instead:
import os,json
import glob
import csv
# Place your JSON data in a directory named 'data/'
src = "MY_PATH"
data = []
json_pattern = os.path.join(src, '*.json')
# Change the glob if you want to only look through files with specific names
files = glob.glob(json_pattern, recursive=True)
# Loop through files
col_name = ['name1','name2','name4']
for single_file in files:
with open(single_file, 'r') as f:
json_file = json.load(f)
for key in col_name:
try:
data.append([json_file[key]])
except KeyError:
continue
# Add headers
data.insert(0, ['title_1', 'title_2', 'title_3'])
# Export to CSV.
# Add the date to the file name to avoid overwriting it each time.
csv_filename = 'name.csv'
with open((src + csv_filename), "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(data)
But in this case, each value is a new row in the csv, while I want the value from each json in a single row.
I am not an expert and I really don't know how to combine this two.
Can someone help me out?
Thanks!
If I understand what you're trying to do correctly, why not just do
# Loop through files
for single_file in files:
with open(single_file, 'r') as f:
json_file = json.load(f)
data.append([
json_file.get('name1', ''),
json_file.get('name2', ''),
json_file.get('name3', ''),
json_file.get('name4', '')
])
By using .get() you can specify the default value in case a key isn't found.
Related
My goal is to change multiple csv files in a folder into JSON.
First, I needed to list my csv files
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
#check if csv files are listed correctly
print(os.path.join("C:/Users/folder_to_csv", filename))
With this, I was able to call csv files in that folder.
Result:
C:/Users/folder_to_csv\file_1.csv C:/Users/folder_to_csv\file_2.csv C:/Users/folder_to_csv\file_3.csv
Then, I wanted to use all of the csv files in 'csvlist' to jsonObj, however for some reason, my codes are only using the first file (C:/Users/folder_to_csv\file_1.csv)
This is what I have tried so far:
import json
import csv
import requests
import threading
import os
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
csvlist = os.path.join("C:/Users/folder_to_csv", filename)
data = {}
def main():
#loop csv list so my codes can read all csv files
length = len(csvlist)
for i in range(length):
i += 1
path = csvlist
#switch csv to json
with open(path, mode='r') as f:
reader = csv.DictReader(f)
processdata = [row for row in reader]
dlist = processdata
jsonObj = json.dumps(dlist)
})
print(jsonObj)
main()
In the initial loop, you keep redefining the csvlist variable. I suppose you want it to be a list? Then just create an initial empty list and append to it instead of redefining
csvlist = []
...
csvlist.append(os.path.join("C:/Users/folder_to_csv", filename))
I have the following code:
import os
import json
import ipaddress
iplist = []
ipiflist = []
mydict = {}
for filename in os.listdir('data/'):
with open(os.path.join('data/', filename), 'r') as f:
data = json.load(f)
mydict.update(data)
print(mydict)
In the data directory there are several JSON files that I open in this loop.
I update the dict in every loop and for this reason I get the following output:
{'ipif_1001': '10.10.160.129', 'ipif_1002': '10.10.160.142', 'ipif_1003': '10.10.160.169', 'ipif_1004': '10.10.160.173', 'ipif_3334': '10.10.160.194', 'IpIf3337': '10.10.160.126'}
{'ipif_1001': '10.10.160.129', 'ipif_1002': '10.10.160.142', 'ipif_1003': '10.10.160.170', 'ipif_1004': '10.10.160.174', 'ipif_3334': '10.10.160.194', 'IpIf3337': '10.10.160.126', 'ipif_1005': '10.10.160.178', 'ipif_1006': '10.10.160.182'}
{'ipif_1001': '10.10.160.129', 'ipif_1002': '10.10.160.142', 'ipif_1003': '10.10.160.170', 'ipif_1004': '10.10.160.174', 'ipif_3334': '10.10.160.194', 'IpIf3337': '10.10.160.126', 'ipif_1005': '10.10.160.178', 'ipif_1006': '10.10.160.182', 'IpIf1001': '10.10.160.138', 'IpIf1002': '10.10.160.141', 'IpIf1003': '10.10.160.153', 'IpIf1006': '10.10.160.181', 'IpIf_CPEDCN': '10.10.160.241', 'IpIf_DCNMgt': '10.10.191.253', 'ipif1164': '10.10.160.166', 'IpIf1010': '10.10.170.1'}
I only need the summarized output from the last loop. How can I only access this?
Thanks for your help
The for loop in python has an else statement, which will only be executed when the loop was successful. Thus there you can plot your last resut?
for filename in os.listdir('data/'):
with open(os.path.join('data/', filename), 'r') as f:
data = json.load(f)
mydict.update(data)
else:
print(mydict)
import os
import json
import ipaddress
iplist = []
ipiflist = []
mydict = {}
list = os.listdir('data/')
for filename in os.listdir('data/'):
with open(os.path.join('data/', filename), 'r') as f:
data = json.load(f)
if list[list.count-1] == filename: #check last filename in the directory with the current filename in the loop
mydict.update(data)
print(mydict)
Try it like this
I have 200 CSV files in my folder.
What I am trying to do is read first row of each files and write in new csv.
And on top, I want to write [file,field1,field2,...fieldn]
n is maximum number of fields.
import csv
import glob
list=[]
hel=[]
files=glob.glob('C:/dataset/*.csv')
with open('test.csv', 'w',newline='') as testfile:
csv_writer = csv.writer(testfile)
for file in files:
with open(file, 'r') as infile:
file=file[file.rfind('\\')+1:]
file=file.strip('.csv')
reader = csv.reader(infile)
headers = next(reader)
hel.append((len(headers)))
max(hel)
lst = [file] + headers
csv_writer.writerow(lst)
It came out that maximum number of fields of 200 files are 255.
So on top of new csv file, I want to write file, field1, field2 ... field 255.
How can I do this?
import csv
import glob
list=[]
hel=[]
files=glob.glob('C:/dataset/*.csv')
with open('test.csv', 'w',newline='') as testfile:
csv_writer = csv.writer(testfile)
for file in files:
with open(file, 'r') as infile:
file=file[file.rfind('\\')+1:]
file=file.strip('.csv')
reader = csv.reader(infile)
headers = next(reader)
hel.append((len(headers)))
b=['field{}'.format(i) for i in range(1,max(hel)+1)]
lst = [file] + headers
csv_writer.writerow(lst)
Now b is list that looks like this ['field1','field2'...'field255']
I need to insert 'file' before 'field1' and write that row on the top of new csv file. Writing code after csv_writer.writerow(lst) gives me csv file with 'field1','field2'.. every other line. How can I fix this problem
You first need to read all your input files to determine the maximum number of fields is 255. Then you need to construct a list of field names to write into the output file (just once, not in a loop):
['field{}'.format(i) for i in range(1, 256)]
You can pass that list to the csv module to write it.
Read the field count and first line from each file before writing the file.
import glob
from itertools import chain
import os
from os.path import splitext, basename
def first_line(filepath):
with open(filepath) as f:
return next(f)
def write_test_file(dest_file_path, source_path_name):
source_paths = glob.glob(source_path_name)
first_lines = list(map(first_line, source_paths))
max_count = max(l.count(",") for l in first_lines)
field_names = map("field{}".format, range(1, max_count + 2))
header = ",".join(chain(["file"], field_names)) + os.linesep
file_names = (splitext(basename(p))[0] for p in source_paths)
content = chain([header], map(",".join, zip(file_names, first_lines)))
with open(dest_file_path, 'w') as testfile:
testfile.write("".join(content))
write_test_file('test.csv', 'C:/dataset/*.csv')
I have been trying to use the following code to move files that are listed in a csv list. But at most it will copy the last file in the list but not the rest.
I keep hitting this wall with every example I have seen listed what am I doing wrong?
My CVS list will have a list like:
12355,12355.jpg
Here's my code
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print (rowDict)
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
if file in rowDict[1]:
shutil.move(src_file, dst_file)
I think doing something like this will work (untested):
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print(rowDict) # if desired
valid_files = set(keys.values()) # file names found in csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
if file in valid_files:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
As an optimization, unless you need the keys dictionary for other processing, you could change the first part so it just creates the valid_files set variable used in the second for loop:
valid_files = set() # empty set
with open('shuttle_image.csv', 'r') as f:
for rowDict in csv.reader(f, delimiter=','):
valid_files |= {rowDict[1]} # add file name to set
print(rowDict) # if desired
The reason why it's only the last file that could be copied (if it was) is because in this line:
if file in rowDict[1]:
you are referencing rowDict outside of the first for-loop. So at that execution moment, it contains the last value of this loop.
If I understand correctly what you are trying to do you could try something like this (untested code):
import os
import shutil
import csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
id, filename = rowDict
src_file = os.path.join(dir_src, filename)
if os.path.exists(src_file):
shutil.move(src_file, dir_dst)
So instead of:
Constructing a dictionary with all the values in your CSV file
Somehow check for every file in your source directory that it is included in your dictionary (which is what I interpreted you were trying to do)
And move it if it does.
You could:
For every file extracted from your CSV, check that it exists in your source directory.
If it does, you move it to the destination directory.
Is that what you were trying to do ?
[And if the filename stays the same, you only need to specify the destination directory for the second argument of shutil.move()]
I have a list of tsv files where I am looking to grab column headers for all the files.
with open(os.path.abspath('reference/file.tsv'), 'rU') as file:
reader = csv.reader(file)
row1 = next(reader)
Currently, this snippet only reads 1 file where I have a list of files that needs to be parsed.
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
The name of the files are listed in files. How do I loop through the list of files and grab only the column headers for each file?
I try this and it works.
import os
import csv
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
for f in files:
with open(dir_path +'/'+f, 'rU') as file:
reader = csv.reader(file)
row1 = next(reader)
print row1
The files variable in your code is the content of the reference folder, meaning all files and subfolders of the folder. They are returned in a list of strings, containing only the file or subfolder name. This means that you'll have to prefix the path yourself.
Example:
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
for file in files:
# Skip non-files
if not os.path.isfile(file):
continue
with open(os.path.join(dir_path, file), 'rU') as f:
reader = csv.reader(f)
row1 = next(reader)
An alternative using the pathlib module:
for file in Path('reference/').glob('*'):
if not file.is_file():
continue
with open(str(file.resolve()), 'rU') as f:
reader = csv.reader(f)
row1 = next(reader)
Wouldn't you be better off in reading the first line of each of those files, appending them to a list and then passing them to csvreader?
Example:
lines = []
with open(str(file.resolve()), 'rU') as f:
lines.append(f.readline())
reader = csv.reader(lines)
for row in reader:
# whatever you want to do with the parsed lines