How to rewrite JSON file in gitlab from python - python

I have a JSON file in git repo, then I upload it into the python variable, do some manipulations with this variable. How can I update JSON in gitlab using this variable
import gitlab
import json
import io
gl = gitlab.Gitlab(
private_token='xxxxxxxxx')
gl.auth()
projects = gl.projects.list(owned=True, search='Python')
raw_content = projects[0].files.raw(file_path='8_JSON_Module/json_HW.json', ref='main')
f = io.BytesIO()
f.write(raw_content)
f.seek(0)
data = json.load(f) # read from the file
... do some manipulations with variable data
I know that in Python we use this command to update the file but I have no idea how to update it in gitlab
json.dump(data, open('json_HW.json', 'w'))

Use the files api file object. Reference
f = project.files.get(file_path='README.rst', ref='main')
decoded_content = f.decode()
new_content = modify_content(decoded_content) # you implement this
# update the contents and commit the changes
f.content = new_content
f.save(branch='main', commit_message='Update file')
You can use json.dumps to get a string for the new content.
f.content = json.dumps(data)
f.save(...)

Related

How to rewrite XML file in gitlab from python

I read XML file from gitlab into a variable, then I do some manipulations with it. And I need to rewrite the file in gitlab using that variable. When I use dump - it deletes all from the file. How can I rewrite XML file in gitlab from python?
import gitlab
import io
import xml.etree.ElementTree as ET
gl = gitlab.Gitlab(
private_token='xxxxx')
gl.auth()
projects = gl.projects.list(owned=True, search='Python')
raw_content = projects[0].files.raw(file_path='9_XML/XML_hw.xml', ref='main')
f = io.BytesIO()
f.write(raw_content)
f.seek(0)
xml_file = ET.parse(f) # read file
..... some manipulations with xml_file
project_id = 111111
project = gl.projects.get(project_id)
f = project.files.get(file_path='9_XML/XML_hw.xml', ref='main')
f.content = ET.dump(xml_file) # IT doesn't rewrite, it deletes everything from the file
f.save(branch='main', commit_message='Update file')
ET.dump doesn't produce a return value. It only prints to stdout. As stated in the docs:
Writes an element tree or element structure to sys.stdout. This function should be used for debugging only.
Hence, you end up setting f.content = None.
Instead of using .dump, use .tostring:
xml_str = ET.tostring(xml_file, encoding='unicode')
f.content = xml_str

Upload .txt file to Dropbox without saving locally first

I have a .txt file that I need to upload into a Dropbox folder. On my PC it works great as it is however I need to put the code into a Google Cloud Function and as the GCP file system is read-only - this method if failing.
Can anyone recommend an alternative way of doing this that doesn't require me to save the data locally before pushing it up into Dropbox?
Here is my current working code for my local version:
import pathlib
import dropbox
api_key = 'XXXXXXXXXX'
# Build String And Save Locally To File
string = ["Item_A","Item_B","Item_C","Item_D"]
string = str(string)
with open('Item_List.txt', 'w') as f:
f.write(string)
# Define Local File Path
localfolder = pathlib.Path(".")
localpath = localfolder / 'Item_List.txt'
# Define Dropbox Target Location
targetfile = '/Data/' + 'Item_List.txt'
# Initilize Dropbox
d = dropbox.Dropbox(api_key)
# Upload File To Dropbox
with localpath.open("rb") as f:
d.files_upload(f.read(), targetfile, mode=dropbox.files.WriteMode("overwrite"))
If you need to simply use byte data, you can use the built-in bytes function to convert a string to byte data (you need to also specify encoding):
data = ["Item_A", "Item_B", "Item_C", "Item_D"]
string_data = str(data)
byte_data = bytes(string_data, encoding='utf-8')
And then later just use the byte data as the argument:
d.files_upload(byte_data, targetfile, mode=dropbox.files.WriteMode("overwrite"))

Azure data lake - read using Python

I am trying to read a file from Azure Data lake using Python in a Databricks notebook.
this is the code I used,
from azure.storage.filedatalake import DataLakeFileClient
file = DataLakeFileClient.from_connection_string("DefaultEndpointsProtocol=https;AccountName=mydatalake;AccountKey=******;EndpointSuffix=core.windows.net",file_system_name="files", file_path="/2020/50002")
with open("./sample.txt", "wb") as my_file:
download = file.download_file()
content = download.readinto(my_file)
print(content)
The output I get is 0. Can you some point what I am doing wrong. my expectation is to print the file content.
The from_connection_string method returns a DataLakeFileClient, you could not use it to download the file.
If you want to download a file to local, you could refer to my below code.
import os, uuid, sys
from azure.storage.filedatalake import DataLakeServiceClient
service_client = DataLakeServiceClient.from_connection_string("DefaultEndpointsProtocol=https;AccountName=***;AccountKey=*****;EndpointSuffix=core.windows.net")
file_system_client = service_client.get_file_system_client(file_system="test")
directory_client = file_system_client.get_directory_client("testdirectory")
file_client = directory_client.get_file_client("test.txt")
download=file_client.download_file()
downloaded_bytes = download.readall()
with open("./sample.txt", "wb") as my_file:
my_file.write(downloaded_bytes)
my_file.close()
If you want more sample code, you could refer to this doc:Azure Data Lake Storage Gen2.

Paramiko Download, process and re-upload the same file

I am using Paramiko to create an SFTP client to create a backup copy of a JSON file, read in the contents of the original, then update (the original). I am able to get this snippet of code to work:
# open sftp connection stuff
# read in json create backup copy - but have to 'open' twice
read_file = sftp_client.open(file_path)
settings = json.load(read_file)
read_file = sftp_client.open(file_path)
sftp_client.putfo(read_file, backup_path)
# json stuff and updating
new_settings = json.dumps(settings, indent=4, sort_keys = True)
# update remote json file
with sftp_client.open(file_path, 'w') as f:
f.write(new_settings)
However when I try to clean up the code and combine the backup file creation and JSON load:
with sftp_client.open(file_path) as f:
sftp_client.putfo(f, backup_path)
settings = json.load(f)
The backup file will be created but json.load will fail to due not having any content. And if I reverse the order, the json.load will read in the values, but the backup copy will be empty.
I'm using Python 2.7 on a Windows machine, creating a remote connection to a QNX (Linux) machine. Appreciate any help.
Thanks in advance.
If you want to read the file second time, you have to seek file read pointer back to the file beginning:
with sftp_client.open(file_path) as f:
sftp_client.putfo(f, backup_path)
f.seek(0, 0)
settings = json.load(f)
Though that is functionally equivalent to your original code with two open's.
If you aim was to optimize the code, to avoid downloading the file twice, you will have to read/cache the file to memory and then upload and load the contents from the cache.
f = BytesIO()
sftp_client.getfo(file_path, f)
f.seek(0, 0)
sftp_client.putfo(f, backup_path)
f.seek(0, 0)
settings = json.load(f)

Converting cloud-init logs to json using a script

I am trying to convert the cloud-init logs to json, so that the filebeat can pick it up and send it to the Kibana. I want to do this by using a shell script or python script. Is there any script that converts such logs to json?
My python script is below
import json
import subprocess
filename = "/home/umesh/Downloads/scripts/cloud-init.log"
def convert_to_json_log(line):
""" convert each line to json format """
log = {}
log['msg'] = line
log['logger-name'] = 'cloud-init'
log['ServiceName'] = 'Contentprocessing'
return json.dumps(log)
def log_as_json(filename):
f = subprocess.Popen(['cat','-F',filename],
stdout=subprocess.PIPE,stderr=subprocess.PIPE)
while True:
line = f.stdout.readline()
log = convert_to_json_log(line)
print log
with open("/home/umesh/Downloads/outputs/cloud-init-json.log", 'a') as new:
new.write(log + '\n')
log_as_json(filename)
The scripts returns a file with json format, but the msg filed returns empty string. I want to convert each line of the log as message string.
Firstly, try reading the raw log file using python inbuilt functions rather than running os commands using subprocess, because:
It will be more portable (work across OS'es)
Faster and less prone to errors
Re-writing your log_as_json function as follows worked for me:
inputfile = "cloud-init.log"
outputfile = "cloud-init-json.log"
def log_as_json(filename):
# Open cloud-init log file for reading
with open(inputfile, 'r') as log:
# Open the output file to append json entries
with open(outputfile, 'a') as jsonlog:
# Read line by line
for line in log.readlines():
# Convert to json and write to file
jsonlog.write(convert_to_json(line)+"\n")
After taking some time on preparing the customised script finally i made the below script. It might be helpful to many others.
import json
def convert_to_json_log(line):
""" convert each line to json format """
log = {}
log['msg'] = json.dumps(line)
log['logger-name'] = 'cloud-init'
log['serviceName'] = 'content-processing'
return json.dumps(log)
# Open the file with read only permit
f = open('/var/log/cloud-init.log', "r")
# use readlines to read all lines in the file
# The variable "lines" is a list containing all lines in the file
lines = f.readlines()
# close the file after reading the lines.
f.close()
jsonData = ''
for line in lines:
jsonLine = convert_to_json_log(line)
jsonData = jsonData + "\n" + jsonLine;
with open("/var/log/cloud-init/cloud-init-json.log", 'w') as new:
new.write(jsonData)

Categories