json.dump(list, f) keeps on loading on Google Colab - python

I am trying to perform json.dump operation as below on Google Colab. However, everytime it gets stuck and keep processing this line only. How to solve this issue in google colab ?
with open(fullpath, 'w') as f:
json.dump(list, f)
EDIT: Adding the complete code:
import generate_gt_from_txt_l
import generate_gt_from_xml_l
#We've shown words are identical for txt and xml so don't do both
import generate_gt_from_txt_w
import load_set
import json
import os
if __name__ == "__main__":
sets = load_set.load()
set_names = ['training', 'val1', 'val2', 'test']
generators = [generate_gt_from_txt_l, generate_gt_from_xml_l, generate_gt_from_txt_w]
gen_paths = ['lines/txt', 'lines/xml', 'words']
for s_name, s in zip(set_names, sets):
for g_path, g in zip(gen_paths, generators):
fullpath = os.path.join("raw_gts", g_path, s_name+'.json')
try:
os.makedirs(os.path.dirname(fullpath))
except:
pass
print(type(g.get_gt(s)))
with open(fullpath, 'w') as f:
json.dump(g.get_gt(s), f)
print(fullpath)
The code runs fine when I run it on my system. It just causes issues on Colab.

Try changing the name of your variable list: list is already used by Python

Related

Get partial output from nbconvert.preprocessors.ExecutePreprocessor

Is there a way to get partial output from nbconvert.preprocessors.ExecutePreprocessor? Currently, I use the ExecutePreprocessor to execute my Jupyter notebook programmatically, and it returns the output after executing the entire notebook. However, it would be great to be able to get and save the partial results and while running the notebook. For example, If I have a progress bar in the jupyter notebook, is there a way to continuously read the updated the execution output so that I can see it updating?
This is my current code:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
with open('./test2.ipynb') as f:
nb = nbformat.read(f, as_version=4)
ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
ep.preprocess(nb)
print(nb)
with open('executed_notebook.ipynb', 'w', encoding='utf-8') as f:
nbformat.write(nb, f)
however it would be great to be able to continuously read the nb variable and write it to a file while it executes
I ended up doing something like this
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
import threading
f = open('./test2.ipynb')
nb = nbformat.read(f, as_version=4)
ep = ExecutePreprocessor(kernel_name='python3')
def save_notebook():
threading.Timer(1.0, save_notebook).start()
with open('executed_notebook.ipynb', 'w', encoding='utf-8') as f:
nbformat.write(nb, f)
save_notebook()
ep.preprocess(nb)
print('ended')
Seems to work pretty well. If anyone has a better solution feel free to post as well

Writing to files from jupyter notebook

I tried to run this code:
from tqdm.auto import tqdm
import os
from datasets import load_dataset
dataset = load_dataset('oscar', 'unshuffled_deduplicated_ar', split='train[:25%]')
text_data = []
file_count = 0
for sample in tqdm(dataset['train']):
sample = sample['text'].replace('\n', ' ')
text_data.append(sample)
if len(text_data) == 10_000:
# once we git the 10K mark, save to file
filename = f'/data/text/oscar_ar/text_{file_count}.txt'
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'w', encoding='utf-8') as fp:
fp.write('\n'.join(text_data))
text_data = []
file_count += 1
# after saving in 10K chunks, we will have ~2082 leftover samples, we save those now too
with open(f'data/text/oscar_ar/text_{file_count}.txt', 'w', encoding='utf-8') as fp:
fp.write('\n'.join(text_data))
and i get following PermissionError:
Permission Error
I've tried changing rights to this directory and running jupyter with sudo privilages but it still doesn't work.
You are opening :
with open(f'data/text/oscar_ar/text_{file_count}.txt')
But you are writing :
filename = f'/Dane/text/oscar_ar/text_{file_count}.txt'
And you're screenshot says :
filename = f'/date/text/oscar_ar/text_{file_count}.txt'
You have to make a choice between data, /date or /Dane :)
Also It seems you should remove the first / in /data/text/oscar_ar/text_{file_count}.txt.
Explanation: When you put a slash (/) at the begin of a path, that means to look from the root of the filesystem, the top level. If you don't put the slash, it will start looking from your current directory.

Azure data lake - read using Python

I am trying to read a file from Azure Data lake using Python in a Databricks notebook.
this is the code I used,
from azure.storage.filedatalake import DataLakeFileClient
file = DataLakeFileClient.from_connection_string("DefaultEndpointsProtocol=https;AccountName=mydatalake;AccountKey=******;EndpointSuffix=core.windows.net",file_system_name="files", file_path="/2020/50002")
with open("./sample.txt", "wb") as my_file:
download = file.download_file()
content = download.readinto(my_file)
print(content)
The output I get is 0. Can you some point what I am doing wrong. my expectation is to print the file content.
The from_connection_string method returns a DataLakeFileClient, you could not use it to download the file.
If you want to download a file to local, you could refer to my below code.
import os, uuid, sys
from azure.storage.filedatalake import DataLakeServiceClient
service_client = DataLakeServiceClient.from_connection_string("DefaultEndpointsProtocol=https;AccountName=***;AccountKey=*****;EndpointSuffix=core.windows.net")
file_system_client = service_client.get_file_system_client(file_system="test")
directory_client = file_system_client.get_directory_client("testdirectory")
file_client = directory_client.get_file_client("test.txt")
download=file_client.download_file()
downloaded_bytes = download.readall()
with open("./sample.txt", "wb") as my_file:
my_file.write(downloaded_bytes)
my_file.close()
If you want more sample code, you could refer to this doc:Azure Data Lake Storage Gen2.

Pyomo - how to write model.pprint() to file?

I want to "debug" my pyomo model. The output of the model.pprint() method looks helpful but it is too long so the console only displays and stores the last lines. How can I see the first lines. And how can I store this output in a file
(I tried pickle, json, normal f.write but since the output of .pprint() is of type NONE I wasn't sucessfull until now. (I am also new to python and learning python and pyomo in parallel).
None of this works :
'''
with open('some_file2.txt', 'w') as f:
serializer.dump(x, f)
import pickle
object = Object()
filehandler = open('some_file', 'wb')
pickle.dump(x, filehandler)
x = str(instance)
x = str(instance.pprint())
f = open('file6.txt', 'w')
f.write(x)
f.write(instance.pprint())
f.close()
Use the filename keyword argument to the pprint method:
instance.pprint(filename='foo.txt')
instance.pprint() prints in the console (stdout for standard output), but does not return the content (the return is None as you said). To have it print in a file, you can try to redirect the standard output to a file.
Try:
import sys
f = open('file6.txt', 'w')
sys.stdout = f
instance.pprint()
f.close()
It looks like there is a cleaner solution from Bethany =)
For me the accepted answer does not work, pprint has a different signature.
help(instance.pprint)
pprint(ostream=None, verbose=False, prefix='') method of pyomo.core.base.PyomoModel.ConcreteModel instance
# working for me:
with open(path, 'w') as output_file:
instance.pprint(output_file)

Terminal in PyChram not showing me an output

This is my test code, but I have a more elaborate one - but they both don't work. In python 3.x.
import sys
def main():
inputfile = 'hi'
print(inputfile)
if __name__ == '__main__':
main()
EDIT: This what I want to use the terminal for (and syntax errors - same problem):
import csv
import sys
import json
inputfile = sys.argv[1]
outputfile = sys.argv[2]
# reading the csv
with open(inputfile, 'r') as inhandle: # r is reading while w is writing
reader = csv.DictReader(inhandle)
data = []
for row in reader:
data.append(row)
print(data)
# writing the json
with open(outputfile, "W") as outhandle:
json.dump(data, outhandle, indent=2)
As far as I understood by the code you've attached, hi must be wrote as 'hi'. In your original code, hi is regarded as another variable being assigned to inputfile, but it's not defined yet.

Categories