have a script in which I am trying to extract two different flows from one site and put it in 2 different files. Unfortunately only 1 output file is generated and I get the errormessage:
Cleaning data..._LiveChat_agents
Traceback (most recent call last):
File "C:\Python_Prog\Customer_Care\LiveChat+Chats.py", line 45, in <module>
with open(messages, 'w') as outfile:
IOError: [Errno 13] Permission denied: 'LiveChat_agents14-12-2018.json'
this is my script:
*from azure.storage.blob import BlockBlobService
from azure.storage.blob import ContentSettings
import requests
import re
import os
import json
import datetime
import logging
#authenitcation service-to-service-to-serv
block_blob_service = BlockBlobService(account_name='datalake', account_key='XXXXXX'
now = datetime.datetime.now()
today = now.strftime("%d-%m-%Y")
logs = 'C:\\Python_Execution\\Log\\'
os.chdir(logs)
if not os.path.exists(today):
os.makedirs(today)
file_out = "C:\\Python_Execution\\Temp"
os.chdir(file_out)
#authenticate
#url = 'https://api.livechatinc.com/v2/chats'
headers = {'X-API-Version': '2',}
response_agents = requests.get('https://api.livechatinc.com/agents', headers=headers, auth=('jdoe#whatever.com', '000'))
#print(response_agents.text)
response_agents_toclean = response_agents.text
response_agents_clean = response_agents_toclean.encode('utf-8')
logging.info('Writing data to local file')
response_agents_final = json.loads(response_agents_clean)
messages = 'LiveChat_agents' + today +'.json'
print('Cleaning data..._LiveChat_agents')
response_agents_final = json.loads(response_agents_clean)
with open(messages, 'w') as outfile:
json.dump(response_agents_final, outfile)
#extract Chats
response_chats = requests.get('https://api.livechatinc.com/chats', headers=headers, auth=('jdoe#whatever.com', '000'))
#print(response_chats.text)
response_chats_toclean = response_chats.text
response_chats_clean = response_chats_toclean.encode('utf-8')
logging.info('Writing data to local file')
response_chats_final = json.loads(response_chats_clean)
messages = 'LiveChat_chats' + today +'.json'
print('Cleaning data...LiveChat_chats')
response_chats_final = json.loads(response_chats_clean)
with open(messages, 'w') as outfile:
json.dump(response_chats_final, outfile)
print('Uploading to blob storage...')
logging.info('Uploading to blob storage')
blob_container = 'landing-livechat'
#Upload the CSV file to Azure cloud
block_blob_service.create_blob_from_path(
blob_container,
messages,
file_out + '\\' + messages,
content_settings=ContentSettings(content_type='application/JSON')
)
# Check the list of blob
generator = block_blob_service.list_blobs(blob_container)
for blob in generator:
print(blob.name)
logging.info('Upload completed successfully')
print('Deleting temp file...')
logging.info('Deleting temp file')
os.chdir(file_out)
os.remove(messages)
logging.info('Completed Sucessfully!')*
appreciate your help.
gracias
M
Related
i am tryng to generate a json file with my python script.
The goal is to parse a csv file , get some data, do some operations/elaborations and then generate a json file.
When i run the script the json generations seems to run smoothly but as soon the first row is parsed the scripts stops with the following error :
Traceback (most recent call last): File
"c:\xampp\htdocs\mix_test.py", line 37, in
data.append({"name": file_grab ,"status": "progress"})
^^^^^^^^^^^ AttributeError: 'str' object has no attribute 'append'
Below the code:
import json
import requests
import shutil
from os.path import exists
from pathlib import Path
timestr = time.strftime("%Y%m%d")
dest_folder = (r'C:\Users\Documents\mix_test_python')
filename = []
# read filename and path with pandas extension
df = pd.read_csv(
r'C:\Users\Documents\python_test_mix.csv', delimiter=';')
data = []
for ind in df.index:
mode = (df['Place'][ind])
source_folder = (df['File Path'][ind])
file_grab = (df['File Name'][ind])
code = (df['Event ID'][ind])
local_file_grab = os.path.join(dest_folder, file_grab)
remote_file_grab = os.path.join(source_folder, file_grab)
### generate json ########
##s = json.dumps(test)##
data.append({"name": file_grab ,"status": "progress"})
with open(r'C:\Users\Documents\test.json','w') as f:
json.dump(data, f, indent=4)
f.close
#### detect if it is ftp ######
print(mode, source_folder, remote_file_grab)
Could you help me to understand what i am wrong in ?
Trying to unzip password protected file in GCS but getting error in below code. Below code work fine with normal .gz files but fails to unzip password protected files.
storage_client = storage.Client()
source_bucket = 'bucket'
source_bucket1 = storage_client.bucket(source_bucket)
blob = source_bucket1.blob("path/filename.gz")
zipbytes = io.BytesIO(blob.download_as_string())
print(zipbytes)
if is_zipfile(zipbytes):
with ZipFile(zipbytes, 'r') as myzip:
for contentfilename in myzip.namelist():
contentfile = myzip.read(contentfilename)
contentfilename = contentfilename[:-3]
blob1 = bucket.blob(contentfilename)
blob1.upload_from_string(contentfile)
print(f'File decompressed from {zipfilename_with_path} to {contentfilename}')
blob.delete()
You can use Python, e.g. from a Cloud Function:
from google.cloud import storage
from zipfile import ZipFile
from zipfile import is_zipfile
import io
def zipextract(bucketname, zipfilename_with_path):
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucketname)
destination_blob_pathname = zipfilename_with_path
blob = bucket.blob(destination_blob_pathname)
zipbytes = io.BytesIO(blob.download_as_string())
if is_zipfile(zipbytes):
with ZipFile(zipbytes, 'r') as myzip:
for contentfilename in myzip.namelist():
contentfile = myzip.read(contentfilename)
blob = bucket.blob(zipfilename_with_path + "/" + contentfilename)
blob.upload_from_string(contentfile)
zipextract("mybucket", "path/file.zip") # if the file is gs://mybucket/path/file.zip
Am able to read .csv.gz password protected file using below logic. All of this is done in memory. It has performance issue if the file is huge but works fine.
storage_client = storage.Client()
source_bucket = '<bucket-name>'
source_bucket1 = storage_client.bucket(source_bucket)
bukcet_folder = '/unzip'
blob = source_bucket1.blob(path)
zipbytes = io.BytesIO(blob.download_as_string())
with ZipFile(zipbytes, 'r') as myzip:
print("Inside the zipfiles loop")
with myzip.open('filename.csv',pwd=b'password') as myfile:
print("Inside zip 2 loop")
contentfile = myfile.read()
contentfilename = bucket_folder + destination_file_path
blob1 = source_bucket1.blob(contentfilename)
blob1.upload_from_string(contentfile)
```
Error Screenshot
import sys
import os, time
import cognitive_face as CF
import global_variables as global_var
import urllib
import sqlite3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
Key = global_var.key
CF.Key.set(Key)
BASE_URL = global_var.BASE_URL # Replace with your regional Base URL
CF.BaseUrl.set(BASE_URL)
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(__file__))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
imgurl = imgurl[3:]
print("imageurl = {}".format(imgurl))
res = CF.face.detect(imgurl)
if len(res) != 1:
print("No face detected in image")
else:
res = CF.person.add_face(imgurl, global_var.personGroupId, person_id)
print(res)
time.sleep(6)
else:
print("supply attributes please from dataset folder")
I hope images should be converted to byte array but I don't know how to do it. Local images have to be uploaded into cognitive API. Tried many ways but cannot solve the error.
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
Above line is where error exists
Welcome to Stack Overflow, #arun.
First of all, as per here, the API you're using is deprecated, and you should switch to this one instead.
Second, in this new API, there is a method called detect_with_stream (ref here), that will make a request to the Face Recognition endpoint, using the byte stream instead of an URL (it will use different request headers than the URL-based method). This method accepts a stream of bytes containing your image. I've worked with another cognitive services API that performs text recognition, and so I've faced this problem of sending an image URL or the image byte stream. You can generate a bytestream from the file as follows:
image_data = open(image_path, "rb").read()
The variable image_data can be passed to the method.
Edit: Instructions on how to use the new API with the image bytestream
First, install the following pip package:
pip install azure-cognitiveservices-vision-face
Then, you can try this approach.
import sys
import os, time
import global_variables as global_var
from azure.cognitiveservices.vision.face import FaceClient
from msrest.authentication import CognitiveServicesCredentials
import urllib
import sqlite3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
KEY = global_var.key
ENDPOINT = global_var.endpoint
face_client = FaceClient(ENDPOINT,CognitiveServicesCredentials(KEY))
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(__file__))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
img_data = open(filename, "rb").read()
res = face_client.face.detect_with_stream(img_data)
if not res:
print('No face detected from image {}'.format(filename))
continue
res = face_client.person_group_person.add_face_from_stream(global_var.personGroupId, person_id, img_data)
print(res)
time.sleep(6)
else:
print("supply attributes please from dataset folder")
Edit 2: Notes on traversing all the files in a directory
Ok #arun, your current problem stems from the fact that you're using os.listdir which only lists the filenames, so you don't have their paths. The quickest solution would be to open each image inside the loop with:
img_data = open(os.path.join(imageFolder, filename), "rb").read()
I am querying an API from a website. The API will be down for maintenance from time to time and also, there may not be data available for querying at times. I have written the code to keep forcing the program to query the API even after an error, however it doesn't seem to be working.
The following is the code:
import threading
import json
import urllib
from urllib.parse import urlparse
import httplib2 as http #External library
import datetime
import pyodbc as db
import os
import gzip
import csv
import shutil
def task():
#Authentication parameters
headers = { 'AccountKey' : 'secret',
'accept' : 'application/json'} #this is by default
#API parameters
uri = 'http://somewebsite.com/' #Resource URL
path = '/something/TrafficIncidents?'
#Build query string & specify type of API call
target = urlparse(uri + path)
print(target.geturl())
method = 'GET'
body = ''
#Get handle to http
h = http.Http()
#Obtain results
response, content = h.request(target.geturl(), method, body, headers)
api_call_time = datetime.datetime.now()
filename = "traffic_incidents_" + str(datetime.datetime.today().strftime('%Y-%m-%d'))
createHeader = 1
if os.path.exists(filename + '.csv'):
csvFile = open(filename + '.csv', 'a')
createHeader = 0
else:
#compress previous day's file
prev_filename = "traffic_incidents_" + (datetime.datetime.today()-datetime.timedelta(days=1)).strftime('%Y-%m-%d')
if os.path.exists(prev_filename + '.csv'):
with open(prev_filename + '.csv' , 'rb') as f_in, gzip.open(prev_filename + '.csv.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(prev_filename + '.csv')
#create new csv file for writing
csvFile = open(filename + '.csv', 'w')
#Parse JSON to print
jsonObj = json.loads(content)
print (json.dumps(jsonObj, sort_keys=True, indent=4))
with open("traffic_incidents.json","w") as outfile:
#Saving jsonObj["d"]
json.dump(jsonObj, outfile, sort_keys=True, indent=4,ensure_ascii=False)
for i in range(len(jsonObj["value"])):
jsonObj["value"][i]["IncidentTime"] = jsonObj["value"][i]["Message"].split(' ',1)[0]
jsonObj["value"][i]["Message"] = jsonObj["value"][i]["Message"].split(' ',1)[1]
jsonObj["value"][i]["ApiCallTime"] = api_call_time
#Save to csv file
header = jsonObj["value"][0].keys()
csvwriter = csv.writer(csvFile,lineterminator='\n')
if createHeader == 1:
csvwriter.writerow(header)
for i in range(len(jsonObj["value"])):
csvwriter.writerow(jsonObj["value"][i].values())
csvFile.close()
t = threading.Timer(120,task)
t.start()
while True:
try:
task()
except IndexError:
pass
else:
break
I get the following error and the program stops:
"header = jsonObj["value"][0].keys()
IndexError: list index out of range"
I would like the program to keep running even after the IndexError has occured.
How can I edit the code to achieve that?
I'm trying to import a zip file in to Confluence with the RPC importSpace object, but it keeps giving errors.. Atlassian has the following documentation that you can use for this:
public boolean importSpace(String token, byte[] importData)
I have created a small Pyhton script that loops through a file where the zip filenames are saved:
#!/usr/bin/python
import xmlrpclib
import time
import urllib
confluence2site = "https://confluence"
server = xmlrpclib.ServerProxy(confluence2site + '/rpc/xmlrpc')
username = ''
password = ''
token = server.confluence2.login(username, password)
loginString = "?os_username=" + username + "&os_password=" + password
filelist = ""
start = True
with open('exportedspaces.txt') as f:
for file in f:
try:
print file
f = open(os.curdir+ "\\zips\\" + file, 'rb')
fileHandle = f.read()
f.close()
server.confluence2.importSpace(token, xmlrpclib.Binary(fileHandle))
except:
print file + " failed to restore"
failureList.append(file)
Where does it goes wrong?