looking at the example provided by wordpresslib, its very straight forward on how to upload images to the media library. However, the attachment of images looks like it was never finished. Has anyone successfully attached the images?
#!/usr/bin/env python
"""
Small example script that publish post with JPEG image
"""
# import library
import wordpresslib
print 'Example of posting.'
print
url = raw_input('Wordpress URL (xmlrpc.php will be added):')
user = raw_input('Username:')
password = raw_input('Password:')
# prepare client object
wp = wordpresslib.WordPressClient(url+"xmlrpc.php", user, password)
# select blog id
wp.selectBlog(0)
# upload image for post
# imageSrc = wp.newMediaObject('python.jpg')
# FIXME if imageSrc:
# create post object
post = wordpresslib.WordPressPost()
post.title = 'Test post'
post.description = '''
Python is the best programming language in the earth !
No image BROKEN FIXME <img src="" />
'''
#post.categories = (wp.getCategoryIdFromName('Python'),)
# Add tags
post.tags = ["python", "snake"]
# do not publish post
idNewPost = wp.newPost(post, False)
print
print 'Posting successfull! (Post has not been published though)'
WordPressPost class:
class WordPressPost:
"""Represents post item
"""
def __init__(self):
self.id = 0
self.title = ''
self.date = None
self.permaLink = ''
self.description = ''
self.textMore = ''
self.excerpt = ''
self.link = ''
self.categories = []
self.user = ''
self.allowPings = False
self.allowComments = False
self.tags = []
self.customFields = []
def addCustomField(self, key, value):
kv = {'key':key, 'value':value}
self.customFields.append(kv)
Wordpress saves images as website.com/wp-content/uploads/YEAR/MONTH/FILENAME
Adding a simple image tag with the above format in to post.description display the image on the post.
where YEAR is the current year with a 4 digit format (ex. 2015)
and MONTH is the current month with a leading zero (ex. 01,02,... 12)
and FILENAME is the file name submitted via imageSrc = wp.newMediaObject('python.jpg')
Example file name: website.com/wp-content/uploads/2015/06/image.jpg
Here is how I posted my image:
import time
import wordpresslib
import Image
from datetime import datetime
time = datetime.now()
h = str(time.strftime('%H'))
m = str(time.strftime('%M'))
s = str(time.strftime('%S'))
mo = str(time.strftime('%m'))
yr = str(time.strftime('%Y'))
url = 'WORDPRESSURL.xmlrpc.php'
wp = wordpresslib.WordPressClient(url,'USERNAME','PASSWORD')
wp.selectBlog(0)
imageSrc = wp.newMediaObject('testimage'+h+m+s'.jpg') #Used this format so that if i post images with the same name its unlikely they will override eachother
img = 'http://WORDPRESSURL/wp-content/uploads/'+yr+'/'+mo+'/testimage'+h+m+s+'.jpg'
post=wordpresslib.WordPressPost()
post.title='title'
post.description='<img src="'+img+'"/>'
idPost=wp.newPost(post,true)
Related
I'm writing a Form Creation/Filling out forms app, and I'm to the point where I'm taking canvas data from the front-end and filled out fields of text input and draw them on a picture of the form with the pillow library. The problem is when I try to save the data for the form I get this error from "serializer.error" from django-rest-framework: "'FilledForm': [ErrorDetail(string='The submitted data was not a file. Check the encoding type on the form.', code='invalid')]}"
Here is the Api view:
#api_view(['POST'])
def postForm(request):
dataDict = request.data
FieldCoords = {}
# Define Formtype
selectedForm = dataDict["SelectedForm"]
del dataDict["SelectedForm"]
# Get Datalist for form creation
datalist = list(FormData.objects.filter(FormType=selectedForm).values())
#Set Signature Images on Form and Fill form with inputs
for field in datalist:
#Get Field Positions of Model
if field["FieldType"] == "SignatureField":
FieldCoords[field["FieldName"]] = (field["StartX"],field["StartY"])
elif field["FieldType"] == "TextField":
FieldCoords[field["FieldName"]] = (field["StartX"],field["StartY"],abs(field["height"]))
elif field["FieldType"] == "DateField":
FieldCoords[field["FieldName"]] = (field["StartX"],field["StartY"])
#print(FieldCoords)
#Place Signature Fields On FormImage
sigFields = json.loads(dataDict["Signatures"])
FormImage = Image.open(f"{MEDIA_ROOT}/{selectedForm}")
del dataDict["Signatures"]
for field in sigFields:
datauri = sigFields[field]
uri = datauri.split("base64,")
sNameselect = field.split(" - ")
bytes = base64.b64decode(uri[1])
img = Image.open(io.BytesIO(bytes))
for keyCoordSet in FieldCoords:
print(keyCoordSet)
print("Name" + sNameselect[0])
if sNameselect[0] == keyCoordSet:
print("Im here")
FormImage.paste(img, FieldCoords[keyCoordSet], mask=img)
#Place Text Fields On FormImage
d1 = ImageDraw.Draw(FormImage)
for field in dataDict:
for keyCoordSet in FieldCoords:
if field == keyCoordSet:
myFont = ImageFont.truetype("LiberationMono-Regular.ttf",size=FieldCoords[keyCoordSet][2] - 10)
d1.text((FieldCoords[keyCoordSet][0]+5,FieldCoords[keyCoordSet][1]+5), dataDict[field], fill =(255, 255, 255),font=myFont)
time = str(timezone.now()).split(" ")
image_bytes = io.BytesIO(FormImage.tobytes())
imageFile = ContentFile(image_bytes.getvalue())
print(imageFile.read())
data = { # Final data structure
"FormType": selectedForm,
"DateCreated": time[0],
"Data": json.dumps(dataDict, indent = 4),
"FilledForm": (str(selectedForm),imageFile)
}
serializer = FilledFormdataSerializer(data=data)
print(type(imageFile))
if serializer.is_valid():
print("Im Valid")
#serializer.save()
else:
print(serializer.errors)
return Response("Returned Data")
Here is the Model
class FilledForm(models.Model):
FormType = models.CharField(max_length=100)
DateCreated = models.DateField(default=timezone.now())
Data = models.JSONField()
FilledForm = models.ImageField()
Here is the serializer
class FilledFormdataSerializer(serializers.ModelSerializer):
class Meta:
model = FilledForm
fields = ["FormType", "DateCreated", "Data", "FilledForm"]
def create(self, validated_data):
print(validated_data)
return FilledForm.objects.create(**validated_data)
Heres the part of the Code where the issue is
#Place Text Fields On FormImage
d1 = ImageDraw.Draw(FormImage)
for field in dataDict:
for keyCoordSet in FieldCoords:
if field == keyCoordSet:
myFont = ImageFont.truetype("LiberationMono-Regular.ttf",size=FieldCoords[keyCoordSet][2] - 10)
d1.text((FieldCoords[keyCoordSet][0]+5,FieldCoords[keyCoordSet][1]+5), dataDict[field], fill =(255, 255, 255),font=myFont)
time = str(timezone.now()).split(" ")
image_bytes = io.BytesIO(FormImage.tobytes())
imageFile = ContentFile(image_bytes.getvalue())
print(imageFile.read())
data = { # Final data structure
"FormType": selectedForm,
"DateCreated": time[0],
"Data": json.dumps(dataDict, indent = 4),
"FilledForm": (str(selectedForm),imageFile)
}
serializer = FilledFormdataSerializer(data=data)
print(type(imageFile))
if serializer.is_valid():
print("Im Valid")
#serializer.save()
else:
print(serializer.errors)
Here's all the module imported
from rest_framework.response import Response
from rest_framework.decorators import api_view
from api.serializer import DataSerializer, FilledFormdataSerializer
from django.core.files import File
from django.utils import timezone
from backend.models import FormData
from django.core.files.base import ContentFile
import json
from PIL import Image, ImageDraw, ImageFont
import base64
import io
from FormApp.settings import BASE_DIR, MEDIA_ROOT
For some reason it's saying its not reading it as a file even though I'm converting it to bytes and rapping it in the ContentFile() object. Can I get some advice on why it's not saving to the image field?
I have created a small script that scraped a webpage that scrapes all items name, link, image and price from a product table.
I am currently facing problem where I am not able to store multiple dataclasses where I want to first of all see if there is a new URL found in the webpage and if there is a new change, I want to print out the name, image and price of the new url that has been found.
import time
from typing import Optional
import attr
import requests
from selectolax.parser import HTMLParser
#attr.dataclass
class Info:
store: str = attr.ib(factory=str)
link: str = attr.ib(factory=str)
name: Optional[str] = attr.ib(factory=str)
price: Optional[str] = attr.ib(factory=str)
image: Optional[str] = attr.ib(factory=str)
# -------------------------------------------------------------------------
# Get all latest products found in the webpage
# -------------------------------------------------------------------------
def from_page():
with requests.get("https://www.footish.se/sneakers", timeout=5) as rep:
if rep.status_code in (200, 404):
doc = HTMLParser(rep.text)
for product in doc.css('article.product-wrapper'):
name = product.css_first('div.product-image > a').attributes.get('title')
link = product.css_first('div.product-image > a').attributes.get('href')
image = product.css_first('div.product-image > a > img').attributes.get('data-original')
price = product.css_first('span.price-amount')
return Info(
store="Footish",
link=link,
name=name,
image=image,
price=price
)
if __name__ == '__main__':
all_found_products = set()
while True:
get_all_products: Info = from_page()
diff = set(get_all_products.link) - all_found_products
for new_urls in diff:
print(f"Found new url! {new_urls}")
print(f"Name: {get_all_products.name}")
print(f"image: {get_all_products.image}")
print(f"price: {get_all_products.price}")
print("Sleeping 120 sec")
time.sleep(120)
My problem is that I dont know how return dataclasses that is looped from a for loop for product in doc.css('article.product-wrapper'): as there is multiple products on the webpage and I want to store all found products and then compare to see if there is a new url found and if there is then I would like to print out the name, price and image of the new url.
You should use a list to store multiple Info instances, then return them all
def from_page():
with requests.get("https://www.footish.se/sneakers", timeout=5) as rep:
if rep.status_code in (200, 404):
doc = HTMLParser(rep.text)
infos = []
for product in doc.css('article.product-wrapper'):
name = product.css_first('div.product-image > a').attributes.get('title')
link = product.css_first('div.product-image > a').attributes.get('href')
image = product.css_first('div.product-image > a > img').attributes.get('data-original')
price = product.css_first('span.price-amount')
infos.append(Info(store="Footish", link=link, name=name,
image=image, price=price))
return infos
And for the main, it would be more something like
all_found_urls = set()
while True:
get_all_products = from_page()
for info in get_all_products:
if info.link not in all_found_urls:
print(f"Found new url! {info.link}")
print(f"Name: {info.name}")
print(f"image: {info.image}")
print(f"price: {info.price}")
all_found_urls.add(info.link)
print("Sleeping 120 sec")
time.sleep(120)
So I have this craigslist scraper project I am working on and I am running into a potential problem. I have this file url.py that has a UrlObj class and getters and setters. In my main.py, I am instantiating that object and getting the completed url back to be sent to the Job class in my main.py to do its scraping stuff.
I would like to deploy this in the cloud in the future and have it run on a time interval (i.e. every day, 4 hours, etc), but I have noticed a problem. Every time this program is ran, the UrlObj class will be called, prompting the user to enter the relevant data to construct the URL. Since this will be in the cloud running in the background, no one will be able to input the prompts every time its built and ran.
What I want is for url.py and UrlObj to be called only once, in the beginning to allow the user to input and populate the necessary fields to construct the url. Then, every time the program is built and ran, the url the user made in the beginning should be used, not calling url.py and UrlObj to prompt the user again to type in inputs since it will be running in the cloud and on a time interval.
Is it too naive to think to set conditions around url = UrlObj().url to make sure it runs once. Like an if statement or while loop?
url.py:
class UrlObj:
def __init__(self):
self.location = self.get_location() # Location(i.e. City) being searched
self.postal_code = self.get_postal_code() # Postal code of location being searched
self.query = self.get_query() # Search for the type of items that will be searched
self.max_price = self.get_max_price() # Max price of the items that will be searched
self.radius = self.get_radius() # Radius of the area searched derived from the postal code given previously
self.url = f"https://{self.location}.craigslist.org/search/sss?&max_price={self.max_price}&postal={self.postal_code}&query={self.query}&20card&search_distance={self.radius}"
def get_location(self):
location = input("Please enter the location: ")
return location
def get_postal_code(self):
postal_code = input("Please enter the postal code: ")
return postal_code
def get_query(self):
query = input("Please enter the item: ")
return query
def get_max_price(self):
max_price = input("Please enter the max price: ")
return max_price
def get_radius(self):
radius = input("Please enter the radius: ")
return radius
main.py:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd
from url import *
class Job():
def __init__(self):
self.driver = webdriver.Chrome(r"C:\Program Files\chromedriver") # Path of Chrome web driver
self.delay = 5 # The delay the driver gives when loading the web page
# Load up the web page
# Gets all relevant data on the page
# Goes to next page until we are at the last page
def load_craigslist_url(self, url):
data = []
self.driver.get(url)
while True:
try:
wait = WebDriverWait(self.driver, self.delay)
wait.until(EC.presence_of_element_located((By.ID, "searchform")))
data.append(self.extract_post_titles())
WebDriverWait(self.driver, 2).until(
EC.element_to_be_clickable((By.XPATH, '//*[#id="searchform"]/div[3]/div[3]/span[2]/a[3]'))).click()
except:
break
return data
# # Extracts all relevant information from the web-page and returns them as individual lists
def extract_post_titles(self):
all_posts = self.driver.find_elements_by_class_name("result-row")
dates_list = []
titles_list = []
prices_list = []
distance_list = []
for post in all_posts:
title = post.text.split("$")
if title[0] == '':
title = title[1]
else:
title = title[0]
title = title.split("\n")
price = title[0]
title = title[-1]
title = title.split(" ")
month = title[0]
day = title[1]
title = ' '.join(title[2:])
date = month + " " + day
if not price[:1].isdigit():
price = "0"
int(price)
raw_distance = post.find_element_by_class_name(
'maptag').text
distance = raw_distance[:-2]
titles_list.append(title)
prices_list.append(price)
dates_list.append(date)
distance_list.append(distance)
return titles_list, prices_list, dates_list, distance_list
# # Kills browser
def kill(self):
self.driver.close()
#staticmethod
def organizeResults(results):
titles_list = results[0][0]
prices_list = list(map(int, results[0][1]))
dates_list = results[0][2]
distance_list = list(map(float, results[0][3]))
list_of_attributes = []
for i in range(len(titles_list)):
content = {'Listing': titles_list[i], 'Price': prices_list[i], 'Date posted': dates_list[i],
'Distance from zip': distance_list[i]}
list_of_attributes.append(content)
list_of_attributes.sort(key=lambda x: x['Distance from zip'])
return list_of_attributes
#staticmethod
def to_csv(dictionary):
df = pd.DataFrame(dictionary)
df.to_csv('data.csv', index=False)
if __name__ == '__main__':
# This should be called only once!!!
# Then the 'url' should be used every time main.py is built and ran, and not be constructed again by calling 'UrlObj().url'
url = UrlObj().url
scraper = Job()
results = scraper.load_craigslist_url(url)
scraper.kill()
dictionary_of_listings = scraper.organizeResults(results)
scraper.to_csv(dictionary_of_listings)
Here is my code so far:
import flickrapi
import osmapi
import geopy
from geopy.geocoders import Nominatim
import overpy
import requests
import xmltodict
from time import sleep
api_key = "xxxxxxxxxxxxxxxxxxxx"
secret_api_key = "xxxxxxx"
flickr = flickrapi.FlickrAPI(api_key, secret_api_key)
def obtainImages3():
group_list = flickr.groups.search (api_key=api_key, text = 'Paris', per_page = 10)
for group in group_list[0]:
group_images = flickr.groups.pools.getPhotos (api_key=api_key, group_id = group.attrib['nsid'], extras = 'geo, tags, url_s')
for image in group_images[0]:
url = str(image.attrib['url_s'])
tags = (image.attrib['tags']).encode('utf-8')
#ONLY RUN THIS CODE IF IMAGE HAS GEO INFO
photo_location = flickr.photos_geo_getLocation(photo_id=image.attrib['id'])
lat = float(photo_location[0][0].attrib['latitude'])
lon = float(photo_location[0][0].attrib['longitude'])
geolocator = Nominatim()
location = geolocator.reverse("{}, {}".format(lat, lon))
dict = location.raw
osmid = dict.get('osm_id', 'default_value_if_null_here')
osmtype = dict.get('osm_type', 'default_value_if_null_here')
osmaddress = dict.get('display_name', 'default_value_if_null_here')
sleep(1)
if(osmtype == 'node'):
node_info = requests.get("http://api.openstreetmap.org/api/0.6/node/"+ osmid)
#print node_info.content
d = xmltodict.parse(node_info.content)
amenity_tags = [tag for tag in d['osm']['node']['tag'] if tag['#k'] == 'amenity']
amenity_name = [tag for tag in d['osm']['node']['tag'] if tag['#k'] == 'name']
if len(amenity_tags) != 0:
amenity_type = amenity_tags[0]['#v']
amenity_type_name = amenity_name[0]['#v']
print amenity_type
print amenity_type_name
print url
obtainImages3()
I want to run the code following the comment if and only if geo-info is available on the image obtained. In my group_images api call, I am asking for geo info to be returned if it it present.
I have tried for example if image.attrib['geo'] != None, and have tried a try/ exception block but all are returning errors, especially key errors.
Can anyone suggest an easy way to figure out if the image has geo info, so that the code below the comment will only run if it is available?
I have a form in google app engine where I want to upload an image and all my text at the same time. Do I have to seperate this into two seperate pages and actions?
Here is my upload handler:
class UploadHandler(blobstore_handlers.BlobstoreUploadHandler):
def upload(self, reseller_id, imgfile):
upload_files = imgfile
blob_info = upload_files[0]
key = blob_info.key()
r = Reseller.get_by_id(reseller_id)
r.blob_key_logo = str(key)
r.put();
Here is my creation of a new reseller object:
class NewReseller(BaseHandler):
def get(self):
if self.user:
self.render("new_reseller.html")
else:
self.redirect("/display_resellers")
def post(self):
name = self.request.get('name')
website = self.request.get('website')
information = self.request.get('information')
address = self.request.get('address')
city = self.request.get('city')
state = self.request.get('state')
zipcode = self.request.get('zipcode')
email = self.request.get('email')
phone = self.request.get('phone')
r = Reseller( name = name,
website = website,
information = information,
address = address,
city = city,
state = state,
zipcode = zipcode,
email = email,
phone = phone)
r.put()
theresellerid = r.key().id()
#And then Upload the image
u = UploadHandler()
logo_img = u.get_uploads('logo_img')
u.upload(theid, logo_img)
self.redirect('/display_resellers')
I think my problem here is this line:
logo_img = u.get_uploads('logo_img')
it pops out the error message
for key, value in self.request.params.items():
AttributeError: 'NoneType' object has no attribute 'params'
Somehow I need this NewReseller class to inherit the .getuploads from BlobstoreUploadHandler so I can do:
logo_img = self.get_uploads('logo_img')
Or there is probably a better way because this seems a little messy.
So my question is how to upload files and data in one form on just one page. I could do it with two seperate pages. One for adding the reseller and one for adding the image but that seems over complicated.
I tried to follow some steps and clues from this question:
Upload files in Google App Engine
******Edit***** Working Implementation Below:
class EditReseller(BaseHandler, blobstore_handlers.BlobstoreUploadHandler):
def get(self, reseller_id):
if self.user:
reseller = Reseller.get_by_id(int(reseller_id))
upload_url = blobstore.create_upload_url('/upload')
image = True
if reseller.blob_key_logo is None:
image = False
self.render('edit_reseller.html', r=reseller, reseller_id=reseller_id, upload_url=upload_url, image=image)
else:
self.redirect('/admin')
class UploadHandler(blobstore_handlers.BlobstoreUploadHandler):
def post(self):
reseller_id = self.request.get('reseller_id')
upload_files = self.get_uploads('logo_img')
if upload_files:
blob_info = upload_files[0]
key = blob_info.key()
r = Reseller.get_by_id(int(reseller_id))
r.blob_key_logo = str(key)
r.put();
name = self.request.get('name')
website = self.request.get('website')
information = self.request.get('information')
address = self.request.get('address')
city = self.request.get('city')
state = self.request.get('state')
zipcode = self.request.get('zipcode')
email = self.request.get('email')
phone = self.request.get('phone')
if name and website and information and email and phone and address and city and state and zipcode:
r = Reseller.get_by_id(int(reseller_id))
r.name = name
r.website = website
r.information = information
r.address = address
r.city = city
r.state = state
r.zipcode = zipcode
r.email = email
r.phone = phone
r.put()
else:
error = "Looks like your missing some critical info"
self.render("edit_reseller.html", name=name, website=website, information=information, address=address, city=city, zipcode=zipcode, email=email, phone=phone, error=error)
self.redirect("/edit_reseller/" + reseller_id)
You just need to put the logic of the UploadHandler inside the Reseller(BaseHandler) and make Reseller inherit from blobstore_handlers.BlobstoreUploadHandler.
The call to get_uploads fails, as the NewReseller Class does not inherit from BlobstoreUploadHandler. The BlobstoreUploadHandler class takes over the upload operation so you do not need to create a post method, just add the corresponding logic from post ( name = self.request.get('name'), r = Reseller(), r.put(), etc. ) and add it to the upload method.
You should not call or create a new a handler instance by hand (unless you know what you are doing), as it would be missing the things that make it work.
The complete app sample at the official docs, might also be helpful.