I'm currently working on a script that converts a jsonl to csv format. However, upon running the code on visual studio code's terminal, I get the following error:
Traceback (most recent call last):
File "C:\Users\Natthanon\Documents\Coding 101\Python\test.py", line 24, in <module>
with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV', 'a' , newline='') as f:
PermissionError: [Errno 13] Permission denied: 'C:\\Users\\Natthanon\\Documents\\Coding 101\\Python\\CSV'
This is my python script below. If anyone has any clue on why I am receiving the permission error as shown above, do let me know if there are any solutions to this. I'm quite new to Python and I hope someone experienced will be able to help me out with this issue. Thanks!
import glob
import json
import csv
import time
start = time.time()
#import pandas as pd
from flatten_json import flatten
#Path of jsonl file
File_path = (r'C:\Users\Natthanon\Documents\Coding 101\Python\JSONL')
#reading all jsonl files
files = [f for f in glob.glob( File_path + "**/*.jsonl", recursive=True)]
i=0
for f in files:
with open(f, 'r') as F:
for line in F:
#flatten json files
data = json.loads(line)
data_1=flatten(data)
#creating csv files
with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV', 'a' , newline='') as f:
thewriter = csv.writer(f)
#headers should be the Key values from json files that make Coulmn header
thewriter.writerow([data_1['header1'],data_1['header2']])
Seems a duplicated of PermissionError: [Errno 13] in python.
What you are trying to do is to open a directory as a file, which will fail.
Guessing you could try something like: create a new csv on CSV folder for every .jsonl on JSONL.
import glob
import json
import csv
import time
start = time.time()
#import pandas as pd
from flatten_json import flatten
#Path of jsonl file
File_path = (r'C:\Users\Natthanon\Documents\Coding 101\Python\JSONL')
#reading all jsonl files
files = [f for f in glob.glob( File_path + "**/*.jsonl", recursive=True)]
i=0
for f in files:
with open(f, 'r') as F:
for line in F:
#flatten json files
data = json.loads(line)
data_1=flatten(data)
#creating csv files
with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV\\' + f.split("\\")[-1] +".csv", 'a' , newline='') as csv_file:
thewriter = csv.writer(csv_file)
#headers should be the Key values from json files that make Coulmn header
thewriter.writerow([data_1['header1'],data_1['header2']])
On line
with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV\\' + f.split("\\")[-1] +".csv", 'a' , newline='') as csv_file:
you are taking the name of the jsonl file (the split is to get rid of all the path and just to get the filename) and creating on the "CSV" folder a pair file with a .csv extension.
Related
I have 2 folders named as Excel and CSV. The Excel folder contains excel files with prefix "_Updated" that I would like to convert to CSV with UTF-8 encoding, and If the file contains multiple sheets then new csv file will be created based on sheet. However, when I run below code, I am receiving PermissionError as shown below not sure what is wrong with below code ?
Thanks in advance for your time!
Code:
from pathlib import Path
import openpyxl
from openpyxl import load_workbook
CSV_FILE_PATH='/Excel'
CSV_FILE_SAVE='/CSV'
for file in Path(CSV_FILE_PATH).glob('*_Updated.xlsx'):
wb = load_workbook(file)
print(file, wb.active.title)
for sheetname in wb.sheetnames:
with open(CSV_FILE_PATH, 'r+') as f_in, open(CSV_FILE_SAVE, 'w',encoding="utf-8") as f_out:
content = f_in.read()
spamwriter=csv.writer(f_out)
for row in wb[sheetname].rows:
spamwriter.writerow([cell.value for cell in row])
Error:
PermissionError Traceback (most recent call last)
<ipython-input-6-2ef7cc7706ca> in <module>
10 print(file, wb.active.title)
11 for sheetname in wb.sheetnames:
---> 12 with open(CSV_FILE_PATH, 'r+') as f_in, open(CSV_FILE_SAVE, 'w',encoding="utf-8") as f_out:
13 content = f_in.read()
14 spamwriter=csv.writer(f_out)
PermissionError: [Errno 13] Permission denied: '/Excel'
You are getting a PermissionDenied error because you are attempting to open a directory for reading as if it were a file. You will get this error on Windows if you attempt a file operation on a directory.
This is where you attempt to read in a directory, in your case, the directory /Excel:
with open(CSV_FILE_PATH, 'r+') as f_in, ... :
f_in is later used in the line content = f_in.read(). However, you aren't using content anywhere, so I suggest you get rid of it, as then you can also get rid of the open(CSV_FILE_PATH, 'r+') as f_in part of the line above. That should sort out your problem:
with open(CSV_FILE_SAVE, 'w',encoding="utf-8") as f_out:
spamwriter=csv.writer(f_out)
for row in wb[sheetname].rows:
spamwriter.writerow([cell.value for cell in row])
I'm new to Python and the task I am performing is to extract a specific key value from a list of .iris ( which contains the list of nested dictionary format) files in a specific directory.
I wanted to extract the specific value and save it as a new .csv file and repeat it for all other files.
Below is my sample of .iris file from which I should extract only for the these keys ('uid','enabled','login','name').
{"streamType":"user",
"uid":17182,
"enabled":true,
"login":"xyz",
"name":"abcdef",
"comment":"",
"authSms":"",
"email":"",
"phone":"",
"location":"",
"extraLdapOu":"",
"mand":997,
"global":{
"userAccount":"View",
"uid":"",
"retention":"No",
"enabled":"",
"messages":"Change"},
"grants":[{"mand":997,"role":1051,"passOnToSubMand":true}],
I am trying to convert the .iris file to .json and reading the files one by, but unfortunately, I am not getting the exact output as desired.
Please, could anyone help me?
My code (added from comments):
import os
import csv
path = ''
os.chdir(path)
# Read iris File
def read_iris_file(file_path):
with open(file_path, 'r') as f:
print(f.read())
# iterate through all files
for file in os.listdir():
# Check whether file is in iris format or not
if file.endswith(".iris"):
file_path = f"{path}\{file}"
# call read iris file function
print(read_iris_file(file_path))
Your files contain data in JSON format, so we can use built-in json module to parse it. To iterate over files with certain extension you can use pathlib.glob() with next pattern "*.iris". Then we can use csv.DictWriter() and pass "ignore" to extrasaction argument which will make DictWriter ignore keys which we don't need and write only those which we passed to fieldnames argument.
Code:
import csv
import json
from pathlib import Path
path = Path(r"path/to/folder")
keys = "uid", "enabled", "login", "name"
with open(path / "result.csv", "w", newline="") as out_f:
writer = csv.DictWriter(out_f, fieldnames=keys, extrasaction='ignore')
writer.writeheader()
for file in path.glob("*.iris"):
with open(file) as inp_f:
data = json.load(inp_f)
writer.writerow(data)
Try the below (the key point here is loading the iris file using ast)
import ast
fields = ('uid','enabled','login','name')
with open('my.iris') as f1:
data = ast.literal_eval(f1.read())
with open('my.csv','w') as f2:
f2.write(','.join(fields) + '\n')
f2.write(','.join(data[f] for f in fields) + '\n')
my.csv
uid,enabled,login,name
17182,true,xyz,abcdef
I have with me a script that converts jsonl files in a selected directory to csv files in another specified location. However, upon converting the files to csv format, the final created csv file contains a .jsonl extension before the .csv (Think file.jsonl.csv) Any ideas on how to remove the .jsonl extension before adding the csv extension at the back? I hope I can be able to get rid of the .jsonl extension for the csv file as it may be confusing in future. Thank you!
Sample CSV file created:
20210531_CCXT_FTX_DOGEPERP.jsonl.csv
My script:
import glob
import json
import csv
import time
start = time.time()
#import pandas as pd
from flatten_json import flatten
#Path of jsonl file
File_path = (r'C:\Users\Natthanon\Documents\Coding 101\Python\JSONL')
#reading all jsonl files
files = [f for f in glob.glob( File_path + "**/*.jsonl", recursive=True)]
i = 0
for f in files:
with open(f, 'r') as F:
#creating csv files
with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV\\' + f.split("\\")[-1] + ".csv", 'w' , newline='') as csv_file:
thewriter = csv.writer(csv_file)
thewriter.writerow(["symbol", "timestamp", "datetime","high","low","bid","bidVolume","ask","askVolume","vwap","open","close","last","previousClose","change","percentage","average","baseVolume","quoteVolume"])
for line in F:
#flatten json files
data = json.loads(line)
data_1 = flatten(data)
#headers should be the Key values from json files that make Column header
thewriter.writerow([data_1['symbol'],data_1['timestamp'],data_1['datetime'],data_1['high'],data_1['low'],data_1['bid'],data_1['bidVolume'],data_1['ask'],data_1['askVolume'],data_1['vwap'],data_1['open'],data_1['close'],data_1['last'],data_1['previousClose'],data_1['change'],data_1['percentage'],data_1['average'],data_1['baseVolume'],data_1['quoteVolume']])
The problem is because you are not getting rid of the extension when writing to the new file, something like this to replace your creation of the csv file should fix it
file_name = f.rsplit("\\", 1)[-1].replace('.jsonl', '')
with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV\\' + file_name + ".csv", 'w' , newline='') as csv_file:
I am compiling a load of CSVs into one. The first CSV contains the headers, which I am opening in write mode (maincsv). I am then making a list of all the others which live in a different folder and attempting to append them to the main one.
It works, however it just writes over the headings. I just want to start appending from line 2. I'm sure it's pretty simple but all the next(), etc. things I try just throw errors. The headings and data are aligned if that helps.
import os, csv
maincsv = open(r"C:\Data\OSdata\codepo_gb\CodepointUK.csv", 'w', newline='')
maincsvwriter = csv.writer(maincsv)
curdir = os.chdir(r"C:\Data\OSdata\codepo_gb\Data\CSV")
csvlist = os.listdir()
csvfiles = []
for file in csvlist:
path = os.path.abspath(file)
csvfiles.append(path)
for incsv in csvfiles:
opencsv = open(incsv)
csvreader = csv.reader(opencsv)
for row in csvreader:
maincsvwriter.writerow(row)
maincsv.close()
To simplify things I have the code load all the files in the directory the python code is run in. This will get the first line of the first .csv file and use it as the header.
import os
count=0
collection=open('collection.csv', 'a')
files=[f for f in os.listdir('.') if os.path.isfile(f)]
for f in files:
if ('.csv' in f):
solecsv=open(f,'r')
if count==0:
# assuming header is 1 line
header=solecsv.readline()
collection.write(header)
for x in solecsv:
if not (header in x):
collection.write(x)
collection.close()
I need to read multiple csv files in a zip folder and extract the data from those csv's into a container in Python.
I am new to Python having basic knowledge.So detailed explanation is appreciable.
Thanks in advance
Sampath
The first thing to do is to open the zip file using module zipfile. Then read the CSV data from each archived file and store it in a container such as a dictionary.
The following will read the data from each file in the zip archive into a dictionary keyed by the file name.
import zipfile
container = {}
with zipfile.ZipFile('/path/to/your/zipfile') as zf:
for name in zf.namelist():
container[name] = zf.read(name)
for name in container:
print("Contents of file {}:".format(name))
print(container[name])
print("============================\n")
Optionally you could process the csv data using module csv. Something like this should get you started:
import csv
import zipfile
from cStringIO import StringIO
container = {}
with zipfile.ZipFile('/path/to/your/zipfile') as zf:
for name in zf.namelist():
container[name] = csv.reader(StringIO(zf.read(name)))
Now container is a dictionary keyed by file name with csv.reader objects as values.
Here is how you can read all the text inside zip:
import zipfile
archive = 'c:\\test\\archive.zip'
def readZip(archive):
zfile = zipfile.ZipFile(archive)
for finfo in zfile.infolist():
ifile = zfile.open(finfo)
lines = ifile.readlines()
return lines
print(readZip(archive))
Thanks for the help.
Apart from the code provided above,I have come up with a code which satisfies the question
import os
import csv
from zipfile import ZipFile
#Extracts and loads the files in a zip file to a specified destination
ze = ZipFile(open("Src_AdventureWorks_Files.zip","r"))
ze.extractall("/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files/")
print "Extraction successful"
#Meta data of the zipfile
zf = ZipFile('Src_AdventureWorks_Files.zip', 'r')
zc = zf.namelist()
print zc
#Loop to run each csv file and print the data
if __name__ == "__main__":
i=0
while i < len(zc):
#path = '/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files/'+zc[i]
#print path
print zc[i]
for csv_path in zc:
print "###########"
print zc[i]
print "###########"
os.chdir('/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files')
f = open(zc[i])
csv_f = csv.reader(f)
for row in csv_f:
print row
f.close()
i += 1