I'm trying to do an upload to podio but it fails.
Following extract :
c = api.OAuthClient(
podio_pw.client_id,
podio_pw.client_secret,
podio_pw.username,
podio_pw.password,
source = "dit is een test"
attributes = {
'filename' : 'test.txt',
'source' : source
}
filep =
c.transport.POST(url='/file/v2/',body=attributes,type='multipart/form-data')
This results always in the following error.
Traceback (most recent call last):
File "C:\Python34\libs\podio-py-master\attach_invoices.py", line 43, in <module>
filep = c.transport.POST(url='/file/v2/',body=attributes,type='multipart/form-data')
File "C:\Python34\libs\podio-py-master\pypodio2\transport.py", line 135, in __call__
body = "".join(body)
File "C:\Python34\libs\podio-py-master\pypodio2\encode.py", line 376, in __next__
return next(self)
File "C:\Python34\libs\podio-py-master\pypodio2\encode.py", line 352, in __next__
block = next(self.param_iter)
File "C:\Python34\libs\podio-py-master\pypodio2\encode.py", line 245, in iter_encode
block = self.encode(boundary)
File "C:\Python34\libs\podio-py-master\pypodio2\encode.py", line 233, in encode
if re.search("^--%s$" % re.escape(boundary), value, re.M):
File "C:\Python34\lib\re.py", line 166, in search
return _compile(pattern, flags).search(string)
TypeError: can't use a string pattern on a bytes-like object
I know it has to do something with byte-encoding etc but I have no idea how to handle it. Even if I try to make that source a file, raw file or whatever, the POST fails.
This worked for me:
c = api.OAuthClient(
client_id,
client_secret,
username,
password,
)
filename = 'screenie.png'
filedata = open(filename, 'r')
"""Create a file from raw data"""
attributes = {'filename': filename,
'source': filedata}
file_upload = c.transport.POST(url='/file/v2/', body=attributes, type='multipart/form-data')
print(file_upload)
I lifted the code from here: https://github.com/podio/podio-py/blob/master/pypodio2/areas.py
To execute the file upload process in Python 3.*, You have to update two files in pypodio.
Step 1
Replace the file encode.py with the below script.
import urllib.request
import http.client
import mimetypes
import codecs
import uuid
import binascii
import io
import os
import sys
def multipart_encode(fields, files):
content_type, body = MultipartFormdataEncoder().encode(fields, files)
return body, content_type
class MultipartFormdataEncoder(object):
def __init__(self):
self.boundary = uuid.uuid4().hex
self.content_type = 'multipart/form-data; boundary={}'.format(self.boundary)
#classmethod
def u(cls, s):
if sys.hexversion < 0x03000000 and isinstance(s, str):
s = s.decode('utf-8')
if sys.hexversion >= 0x03000000 and isinstance(s, bytes):
s = s.decode('utf-8')
return s
def iter(self, fields, files):
"""
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, file-type) elements for data to be uploaded as files
Yield body's chunk as bytes
"""
encoder = codecs.getencoder('utf-8')
for (key, value) in fields:
key = self.u(key)
yield encoder('--{}\r\n'.format(self.boundary))
yield encoder(self.u('Content-Disposition: form-data; name="{}"\r\n').format(key))
yield encoder('\r\n')
if isinstance(value, int) or isinstance(value, float):
value = str(value)
yield encoder(self.u(value))
yield encoder('\r\n')
for (key, filename, fpath) in files:
key = self.u(key)
filename = self.u(filename)
yield encoder('--{}\r\n'.format(self.boundary))
yield encoder(self.u('Content-Disposition: form-data; name="{}"; filename="{}"\r\n').format(key, filename))
yield encoder(
'Content-Type: {}\r\n'.format(mimetypes.guess_type(filename)[0] or 'application/octet-stream'))
yield encoder('\r\n')
with open(fpath, 'rb') as fd:
buff = fd.read()
yield (buff, len(buff))
yield encoder('\r\n')
yield encoder('--{}--\r\n'.format(self.boundary))
def encode(self, fields, files):
body = io.BytesIO()
for chunk, chunk_len in self.iter(fields, files):
body.write(chunk)
return self.content_type, body.getvalue()
Code snippet from here
Step 2
Update transport.py, line no.186,
if kwargs['type'] == 'multipart/form-data':
fields = [('filename', kwargs['body']['filename'])]
files = [('source', kwargs['body']['filename'],kwargs['body']['source'])]
body, content_type = multipart_encode(fields,files)
headers.update({'Content-Type': content_type, })
else:
Returns "can only join an iterable error"
attributes={'filename': 'mx.txt', 'source': 'hello uyur92wyhfr ruptgpwyoer8t9u'}
try:
item = c.transport.POST(url=url,
body=attributes,
type='multipart/form-data')
except Exception as e:
print(e)
Related
I am using below Python code (Lambda Function) for data transformation using kinesis data firehose. I am getting below error.
Code:
#This function is created to Transform the data from Kinesis Data Firehose -> S3 Bucket
#It converts single line json to multi line json as expected by AWS Athena best practice.
#It also removes special characters from json keys (column name in Athena) as Athena expects column names without special characters
import json
import boto3
import base64
import string
from typing import Optional, Iterable, Union
delete_dict = {sp_character: '' for sp_character in string.punctuation}
PUNCT_TABLE = str.maketrans(delete_dict)
output = []
def lambda_handler(event, context):
for record in event['records']:
payload = base64.b64decode(record['data']).decode('utf-8')
remove_special_char = json.loads(payload, object_pairs_hook=clean_keys)
row_w_newline = str(remove_special_char) + "\n"
row_w_newline = base64.b64encode(row_w_newline.encode('utf-8'))
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': row_w_newline
}
output.append(output_record)
print('Processed {} records.'.format(len(event['records'])))
return {'records': output}
def strip_punctuation(s: str,
exclude_chars: Optional[Union[str, Iterable]] = None) -> str:
"""
Remove punctuation and spaces from a string.
If `exclude_chars` is passed, certain characters will not be removed
from the string.
"""
punct_table = PUNCT_TABLE.copy()
if exclude_chars:
for char in exclude_chars:
punct_table.pop(ord(char), None)
# Next, remove the desired punctuation from the string
return s.translate(punct_table)
def clean_keys(o):
return {strip_punctuation(k): v for k, v in o}
Error:
An error occurred during JSON serialization of response: b'eyd2ZXJzaW9uJzogJzAnLCAnaWQnOiAnNjFhMGI4YjQtOGRhYS0xNGMwLTllOTMtNzhhNjk0MTY0MDgxJywgJ2RldGFpbHR5cGUnOiAnQVdTIEFQSSBDYWxsIHZpYSBDbG91ZFRyYWlsJywgJ3NvdXJjZSc6ICdhd3Muc2VjdXJpdHlodWInLCAnYWNjb3VudCc6ICc5MzQ3NTU5ODkxNzYnLCAndGltZSc6ICcyMDIxLTExLTIzVDE1OjQxOjQ3WicsICdyZWdpb24nOiAndXMtZWFzdC0xJywgJ3Jlc291cmNlcyc6IFtdLCAnZGV0YWlsJzogeydldmVudFZlcnNpb24nOiAnMS4wOCcsICd1c2VySWRlbnRpdH'
is not JSON serializable
Traceback (most recent call last):
File "/var/lang/lib/python3.6/json/__init__.py", line 238, in dumps
**kw).encode(obj)
File "/var/lang/lib/python3.6/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/var/lang/lib/python3.6/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "/var/runtime/bootstrap.py", line 135, in decimal_serializer
raise TypeError(repr(o) + " is not JSON serializable")
Event :
{'recordId': '49623720050963652954313901532126731765249603147428528130000000', 'approximateArrivalTimestamp': 1637711607661, 'data': 'eyJ2ZXJzaW9uIjoiMCIsImlkIjoiMzFkOGE3MmItYWUxNC02ZDYzLWRjODUtMTZmNWViMzk3ZTAyIiwiZGV0YWlsLXR5cGUiOiJBV1MgQVBJIENhbGwgdmlhIENsb3VkVHJhaWwiLCJzb3VyY2UiOiJhd3Muc2VjdXJpdHlodWIiLCJhY2NvdW50IjoiMjIwMzA3MjAyMzYyIiwidGltZSI6IjIwMjEtMTEtMjNUMjM6NTM6MTdaIiwicmVnaW9uIjoidXMtd2VzdC0yIiwicmVzb3VyY2VzIjpbXSwiZGV0YWlsIjp7ImV2ZW50VmVyc2lvbiI6IjEuMDgiLCJ1c2VySWRlbnRpdHkiOnsidHlwZSI6IlJvb3QiLCJwcmluY2lwYWxJZCI6IjIyMDMwNzIwMjM2MiIsImFybiI6ImFybjphd3M6aWFtOjoyMjAzMDcyMDIzNjI6cm9vdCIsImFjY291bnRJZCI6IjIyMDMwNzIwMjM2MiIsImFjY2Vzc0tleUlkIjoiQVNJQVRHUzJWRUU1TEQ2TUZFRlYiLCJzZXNzaW9uQ29udGV4dCI6eyJzZXNzaW9uSXNzdWVyIjp7fSwid2ViSWRGZWRlcmF0aW9uRGF0YSI6e30sImF0dHJpYnV0ZXMiOnsiY3JlYXRpb25EYXRlIjoiMjAyMS0xMS0yM1QxNToxMDo1N1oiLCJtZmFBdXRoZW50aWNhdGVkIjoiZmFsc2UifX19LCJldmVudFRpbWUiOiIyMDIxLTExLTIzVDIzOjUzOjE3WiIsImV2ZW50U291cmNlIjoic2VjdXJpdHlodWIuYW1hem9uYXdzLmNvbSIsImV2ZW50TmFtZSI6IkJhdGNoRGlzYWJsZVN0YW5kYXJkcyIsImF3c1JlZ2lvbiI6InVzLXdlc3QtMiIsInNvdXJjZUlQQWRkcmVzcyI6IjEwNC4xMjkuMTk4LjEwMSIsInVzZXJBZ2VudCI6ImF3cy1pbnRlcm5hbC8zIGF3cy1zZGstamF2YS8xLjEyLjExMiBMaW51eC81LjQuMTU2LTk0LjI3My5hbXpuMmludC54ODZfNjQgT3BlbkpES182NC1CaXRfU2VydmVyX1ZNLzI1LjMxMi1iMDcgamF2YS8xLjguMF8zMTIgdmVuZG9yL09yYWNsZV9Db3Jwb3JhdGlvbiBjZmcvcmV0cnktbW9kZS9zdGFuZGFyZCIsInJlcXVlc3RQYXJhbWV0ZXJzIjp7IlN0YW5kYXJkc1N1YnNjcmlwdGlvbkFybnMiOlsiYXJuOmF3czpzZWN1cml0eWh1Yjp1cy13ZXN0LTI6MjIwMzA3MjAyMzYyOnN1YnNjcmlwdGlvbi9hd3MtZm91bmRhdGlvbmFsLXNlY3VyaXR5LWJlc3QtcHJhY3RpY2VzL3YvMS4wLjAiXX0sInJlc3BvbnNlRWxlbWVudHMiOnsiU3RhbmRhcmRzU3Vic2NyaXB0aW9ucyI6W3siU3RhbmRhcmRzQXJuIjoiYXJuOmF3czpzZWN1cml0eWh1Yjp1cy13ZXN0LTI6OnN0YW5kYXJkcy9hd3MtZm91bmRhdGlvbmFsLXNlY3VyaXR5LWJlc3QtcHJhY3RpY2VzL3YvMS4wLjAiLCJTdGFuZGFyZHNJbnB1dCI6e30sIlN0YW5kYXJkc1N0YXR1cyI6IkRFTEVUSU5HIiwiU3RhbmRhcmRzU3Vic2NyaXB0aW9uQXJuIjoiYXJuOmF3czpzZWN1cml0eWh1Yjp1cy13ZXN0LTI6MjIwMzA3MjAyMzYyOnN1YnNjcmlwdGlvbi9hd3MtZm91bmRhdGlvbmFsLXNlY3VyaXR5LWJlc3QtcHJhY3RpY2VzL3YvMS4wLjAiLCJTdGFuZGFyZHNTdGF0dXNSZWFzb24iOnsiU3RhdHVzUmVhc29uQ29kZSI6Ik5PX0FWQUlMQUJMRV9DT05GSUdVUkFUSU9OX1JFQ09SREVSIn19XX0sInJlcXVlc3RJRCI6IjcyYzVjODYyLTJmOWEtNDBjYS05NDExLTY2YzIxMTcyNjIxMCIsImV2ZW50SUQiOiI3YWY4NjFiZS03YjExLTRmOTQtOWZlYS0yYTgyZjg5NDIxNWYiLCJyZWFkT25seSI6ZmFsc2UsImV2ZW50VHlwZSI6IkF3c0FwaUNhbGwiLCJtYW5hZ2VtZW50RXZlbnQiOnRydWUsInJlY2lwaWVudEFjY291bnRJZCI6IjIyMDMwNzIwMjM2MiIsImV2ZW50Q2F0ZWdvcnkiOiJNYW5hZ2VtZW50In19'}
This code helped me with above issue
def lambda_handler(event, context):
for record in event['records']:
payload = base64.b64decode(record['data']).decode('utf-8')
remove_special_char = json.loads(payload, object_pairs_hook=clean_keys)
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(json.dumps(remove_special_char).encode('utf-8') + b'\n').decode('utf-8')
}
output.append(output_record)
print('Processed {} records.'.format(len(event['records'])))
return {'records': output}
I am tried some script in that i catch pdf file and then convert that file to txt file . I got type of text file as _io.textIoWrapper . Now i call itertools.isslice() with _io.textIoWrapper object . That produce ValueError: I/O operation on closed file. If file format plain io.textIoWrapper didn't give any error. I tried to open textIoWrapper object but that gives expect only str , bytes or None not TextIoWrapper type
views.py
def save_file(cls, user, file, file_format, project_id,file1):
project = get_object_or_404(Project, pk=project_id)
parser = cls.select_parser(file_format)
if file_format == 'pdf':
path = default_storage.save('text_pdf_file.pdf', ContentFile(file.read()))
return_file = convert_pdf_txt(path,file_format)
print(type(return_file)) # _io.textIoWrapper
file = return_file
data = parser.parse(file,file_format)
storage = project.get_storage(data)
storage.save(user)
utils.py
class PlainTextParser(FileParser):
def parse(self, file,file_format):
if file_format == 'plain':
file = EncodedIO(file)
file = io.TextIOWrapper(file, encoding=file.encoding)
while True:
batch = list(itertools.islice(file, settings.IMPORT_BATCH_SIZE))
if not batch:
break
yield [{'text': line.strip()} for line in batch]
convert.py
import os
from docx import Document
import pdfplumber as pp
import io
import unidecode
import re
def remove_accented_chars(text):
text = unidecode.unidecode(text)
return text
def remove_extra_spaces(line):
return re.sub(' +|\t+',' ',line)
def remove_special_char(line):
return re.sub(r"[^a-zA-Z0-9%.#]+",' ', line)
def preprocessing(lines,fileExtension):
example_text_file = "ex_txt.txt"
for line in lines:
if fileExtension == "docx":
x=str(line.text)
elif fileExtension == "pdf":
x = line.extract_text()
else:
x = line
x=remove_accented_chars(x)
x=remove_extra_spaces(x)
x=remove_special_char(x)
with open(example_text_file,"a+",) as pre:
if len(x)>=5:
pre.write(x+"\n")
return pre
def convert_pdf_txt(path,fileExtension):
with pp.open(path) as pdf :
pages = pdf.pages
pre = preprocessing(pages,fileExtension)
return pre
I got the json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) when I tried to access to the values from a json file I created. I ran the runfile below, and it seems that there was this decoder issue, however, when the json file was created, I made the encrypted content, which is supposed to be added to the json file's dictionary, as string. Could someone help me to spot where the problem is?
Error is:
{
"youtube": {
"key": "D5IPLv06NGXut4kKdScNAP47AieP8wqeUINr6EFLXFs=",
"content": "gAAAAABclST8_XmHrAAfEbgrX-r6wwrJf7IAtDoLSkahXAraPjvoXeLl3HLkuHbW0uj5XpR4_jmkgk0ICmT8ZKP267-nnjnCpw=="
},
"facebook": {
"key": "YexP5dpgxwKhD8Flr6hbJhMiAB1nmzZXi2IMMO3agXg=",
"content": "gAAAAABclST8zSRen_0sur79NQk9Pin16PZcg95kEHnFt5vjKENMPckpnK9JQctayouQ8tHHeRNu--s58Jj3IPsPbrLoeOwr-mwdU5KvvaXLY-g6bUwnIp4="
},
"instagram": {
"key": "ew2bl0tKdlgwiWfhB0jjSrOZDb41F88HULCQ_21EDGU=",
"content": "gAAAAABclST8FKcZqasiXfARRfbGPqb3pdDj4aKuxeJoRvgIPbVIOZEa5s34f0c_H3_itv5iG1O7u8vvlT8lAPTgAp3ez8OBh4T2OfBG-ObljYmIt7exi0Q="
}
}
Traceback (most recent call last):
File "C:\Users\YOURNAME\Desktop\randomprojects\content_key_writer.py", line 65, in <module>
main()
File "C:\Users\YOURNAME\Desktop\randomprojects\content_key_writer.py", line 60, in main
data_content = json.load(data_file)
File "C:\Users\YOURNAME\AppData\Local\Programs\Python\Python37\lib\json\__init__.py", line 296, in load
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
File "C:\Users\YOURNAME\AppData\Local\Programs\Python\Python37\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Users\YOURNAME\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\YOURNAME\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The original codes are pasted here, name this as the runfile:
import sys
import os
from cryptography.fernet import Fernet
import json
import pathlib
from encipher_decipher import encrypt, decrypt, bytes_to_str, str_to_bytes
def content_key_writer(path, filename, account, content):
"""Generate key corresponding to an account, save in json"""
# make the path a Path object
path = pathlib.Path(path)
file_path = os.path.join(path, filename)
# generate a key using Fernet
key = Fernet.generate_key()
# json doesn't support bytes, so convert to string
key = bytes_to_str(key)
# with file_path, see if the file exists
if not os.path.exists(file_path):
# build the dictionary to hold key and content
data = {}
data[account] = {}
data[account]['key'] = key
data[account]['content'] = encrypt(content, key)
# if the file doesn't exist, build the new json file
with open(file_path, 'w') as f:
json.dump(data, f)
else:
# if the file does exist
with open(file_path, 'r') as f:
data = json.load(f)
data[account] = {} # <--- add the account
data[account]['key'] = key
data[account]['content'] = encrypt(content, key)
os.remove(file_path) # <--- remove the file and rewrite it
with open(file_path, 'w') as f:
json.dump(data, f, indent=4)
def main():
path = "C:/Users/YOURNAME/Desktop/randomprojects"
name = 'content.json'
account = 'youtube'
content = 'youtubepassword'
account2 = 'facebook'
content2 = 'facebookpassword'
account3 = 'instagram'
content3 = 'instagrampassword'
content_key_writer(path, name, account, content)
content_key_writer(path, name, account2, content2)
content_key_writer(path, name, account3, content3)
new_path = os.path.join(pathlib.Path(path),name)
with open(new_path) as data_file:
data = data_file.read()
print(data)
data_content = json.load(data_file)
value = data_content['youtube']['content']
print(value)
if __name__ == '__main__':
main()
The module imported in the codes above is encipher_decipher:
"""
Given an information, encrypt and decrypt using the given key
"""
from cryptography.fernet import Fernet
import os
def encrypt(information, key):
"""encrypt information and return as string"""
f = Fernet(key)
information_bytes = str_to_bytes(information)
encrypted_info = f.encrypt(information_bytes) #<--- returns bytes
encrypted_info = bytes_to_str(encrypted_info) #<--- to save in json requires str not bytes
return encrypted_info
def decrypt(information, key):
"""decrypt information and return as string"""
f = Fernet(key)
information_bytes = str_to_bytes(information)
decrypted_info = f.decrypt(information_bytes) #<--- returns bytes
decrypted_info = bytes_to_str(decrypted_info) #<--- converts to string
return decrypted_info
def bytes_to_str(byte_stuff):
"""Convert bytes to string"""
return byte_stuff.decode('utf-8')
def str_to_bytes(str_stuff):
"""Converts string to bytes"""
return bytes(str_stuff, 'utf-8') # or str_stuff.encode('utf-8')
The problem is this piece of code:
with open(new_path) as data_file:
data = data_file.read()
print(data)
data_content = json.load(data_file)
You are reading the contents of the file into data, printing it, and then asking json.load() to read from the filehandle again. However at that point, the file pointer is already at the end of the file, so there's no more data, hence the json error: Expecting value
Do this instead:
with open(new_path) as data_file:
data = data_file.read()
print(data)
data_content = json.loads(data)
You already have your data read into data, so you can just feed that string into json.loads()
I've got a csv file with URL's and I need to scrape metadata from those website. I'm using python requests for that reasons with code below:
from tempfile import NamedTemporaryFile
import shutil
import csv
from bs4 import BeautifulSoup
import requests
import re
import html5lib
import sys
#import logging
filename = 'TestWTF.csv'
#logging.basicConfig(level=logging.DEBUG)
#Get filename (with extension) from terminal
#filename = sys.argv[1]
tempfile = NamedTemporaryFile(delete=False)
read_timeout = 1.0
#Does actual scraping done, returns metaTag data
def getMetadata (url, metaTag):
r = requests.get("http://" + url, timeout=2)
data = r.text
soup = BeautifulSoup(data, 'html5lib')
metadata = soup.findAll(attrs={"name":metaTag})
return metadata
#Gets either keyword or description
def addDescription ( row ):
scrapedKeywordsData = getMetadata(row, 'keywords')
if not scrapedKeywordsData:
print row + ' NO KEYWORDS'
scrapedKeywordsData = getMetadata(row, 'description')
if not scrapedKeywordsData:
return ''
return scrapedKeywordsData[0]
def prepareString ( data ):
output = data
#Get rid of opening meta content
if output.startswith( '<meta content="' ):
output = data[15:]
#Get rid of closing meta content (keywords)
if output.endswith( '" name="keywords"/>' ):
output = output[:-19]
#Get rid of closing meta content (description)
if output.endswith( '" name="description"/>' ):
output = output[:-22]
return output
def iterator():
with open(filename, 'rb') as csvFile, tempfile:
reader = csv.reader(csvFile, delimiter=',', quotechar='"')
writer = csv.writer(tempfile, delimiter=',', quotechar='"')
i = 0
for row in reader:
try:
data = str(addDescription (row[1] ))
row[3] = prepareString( data )
except requests.exceptions.RequestException as e:
print e
except requests.exceptions.Timeout as e:
print e
except requests.exceptions.ReadTimeout as e:
print "lol"
except requests.exceptions.ConnectionError as e:
print "These aren't the domains we're looking for."
except requests.exceptions.ConnectTimeout as e:
print "Too slow Mojo!"
writer.writerow(row)
i = i + 1
print i
shutil.move(tempfile.name, filename)
def main():
iterator()
#Defining main function
if __name__ == '__main__':
main()
It works just fine but at some URL's (out of 3000 let's say maybe 2-3) it would just suddenly stop working and not progress to next one after timeout time.. So I have to kill it using Ctr+C which results in file not being saved.
I know it's a problem of catching exceptions but I cannot figure out which one or what to do with that problem.. I'm more than happy to simply ignore the one which is stuck on..
EDIT:
Added traceback:
^CTraceback (most recent call last):
File "blacklist.py", line 90, in <module>
main()
File "blacklist.py", line 85, in main
iterator()
File "blacklist.py", line 62, in iterator
data = str(addDescription (row[1] ))
File "blacklist.py", line 30, in addDescription
scrapedKeywordsData = getMetadata(row, 'keywords')
File "blacklist.py", line 25, in getMetadata
metadata = soup.findAll(attrs={"name":metaTag})
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1259, in find_all
return self._find_all(name, attrs, text, limit, generator, **kwargs)
File "/Library/Python/2.7/site-packages/bs4/element.py", line 537, in _find_all
found = strainer.search(i)
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1654, in search
found = self.search_tag(markup)
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1626, in search_tag
if not self._matches(attr_value, match_against):
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1696, in _matches
if isinstance(markup, Tag):
KeyboardInterrupt
EDIT 2:
Example website for which script doesn't work: miniusa.com
I just want to download an image. Then upload it to Amazon S3. But it's not working.
'builtin_function_or_method' object has no attribute 'StringIO'
Traceback (most recent call last):
File "flickrDump.py", line 16, in <module>
imgpath = s3.upload_thumbnail(thumbnail_name=tools.randomString(10), thumbnail_data=tdata,bucket="fabletest")
File "../lib/s3.py", line 52, in upload_thumbnail
k.set_contents_from_string(thumbnail_data)
File "/usr/lib/pymodules/python2.6/boto/s3/key.py", line 539, in set_contents_from_string
self.set_contents_from_file(fp, headers, replace, cb, num_cb, policy)
File "/usr/lib/pymodules/python2.6/boto/s3/key.py", line 455, in set_contents_from_file
self.send_file(fp, headers, cb, num_cb)
File "/usr/lib/pymodules/python2.6/boto/s3/key.py", line 366, in send_file
return self.bucket.connection.make_request('PUT', self.bucket.name,
AttributeError: 'str' object has no attribute 'connection'
My code to download it and upload it is this:
tdata = tools.download("http://farm5.static.flickr.com/4148/5124630813_c11b05e6da_z.jpg")
imgpath = s3.upload_thumbnail(thumbnail_name=tools.randomString(10), thumbnail_data=tdata,bucket="fabletest")
print imgpath
The library I'm using is the s3 library. I downloaded this somewhere, so it should be standard.
from boto.s3.connection import S3Connection
from boto.s3.key import Key
from boto.s3.bucket import Bucket
import datetime
ACCESSKEY = 'MYKEY'
SECRETKEY = 'MYSECRET'
def get_bucket_path(bucket,filename,https=False):
path = None
if isinstance(bucket, Bucket):
path = bucket.name
else:
path = bucket
if https:
return "https://s3.amazonaws.com/%s/%s" % (path, filename)
else:
return "http://s3.amazonaws.com/%s/%s" % (path, filename)
def _aws_keys():
return ACCESSKEY, SECRETKEY
def _conn():
key,secret = _aws_keys()
return S3Connection(key,secret)
def cache_bucket(conn = _conn()):
bucket = conn.create_bucket('mimvicache') bucket.make_public()
return bucket
class AwsException(Exception):
def __init__(self,value):
self.errorval = value
def __str__(self):
return repr(self.errorval)
def upload_thumbnail(thumbnail_name,thumbnail_data=None,thumbnail_path=None,bucket=cache_bucket
(),conn=_conn(),notes=None,image_id=None):
k = Key(bucket)
k.key = thumbnail_name
if notes is not None:
k.set_metadata("notes",notes)
if image_id is not None:
k.set_metadata("image_id",image_id)
if thumbnail_data is not None:
k.set_contents_from_string(thumbnail_data)
elif thumbnail_path is not None:
k.set_contents_from_filename(thumbnail_path)
else:
raise AwsException("No file name")
k.set_acl('public-read')
return get_bucket_path(bucket.name,k.key)
Can someone help me upload this image to S3?
In your code:
return self.bucket.connection.make_request('PUT', self.bucket.name,......
AttributeError: 'str' object has no attribute 'connection'
This means that some how self.bucket is evaluated to a string and you can not obviously call method "connection" on it.
So for further analysis, look at the function upload_thumbnail, it expects bucket=cache_bucket() as argument. That is it expects a bucket object.
def upload_thumbnail(thumbnail_name,thumbnail_data=None,thumbnail_path=None,bucket=cache_bucket
(),conn=_conn(),notes=None,image_id=None)
What you are passing in your code is string !! -> (bucket="fabletest")
imgpath = s3.upload_thumbnail(thumbnail_name=tools.randomString(10), thumbnail_data=tdata,bucket="fabletest")
Your code should be some thing like this. you might have to sanitize this. But the key is to pass the bucket and connection object to function upload_thumbnail function.
import S3
connection = S3.AWSAuthConnection('your access key', 'your secret key')
buck = connection.create_bucket('mybucketname')
tdata = tools.download("http://farm5.static.flickr.com/4148/5124630813_c11b05e6da_z.jpg")
imgpath = s3.upload_thumbnail(thumbnail_name=tools.randomString(10), thumbnail_data=tdata,bucket=buck, conn=connection)
print imgpath