How do I download videos from Pexels API? - python

I have this code that can pull images off of Pexels, but I don't know how to change it to video. I haven't seen anyone do this before and any help greatly appreciated. I tried switching all the photo tags to videos but that seemed not to work. I've also tried adding more libraries but that doesn't seem to work either.
import argparse
import json
import os
import time
import requests
import tqdm
from pexels_api import API
PEXELS_API_KEY = os.environ['PEXELS_KEY']
MAX_IMAGES_PER_QUERY = 100
RESULTS_PER_PAGE = 10
PAGE_LIMIT = MAX_IMAGES_PER_QUERY / RESULTS_PER_PAGE
def get_sleep(t):
def sleep():
time.sleep(t)
return sleep
def main(args):
sleep = get_sleep(args.sleep)
api = API(PEXELS_API_KEY)
query = args.query
page = 1
counter = 0
photos_dict = {}
# Step 1: Getting urls and meta information
while page <= PAGE_LIMIT:
api.search(query, page=page, results_per_page=RESULTS_PER_PAGE)
photos = api.get_entries()
for photo in tqdm.tqdm(photos):
photos_dict[photo.id] = vars(photo)['_Photo__photo']
counter += 1
if not api.has_next_page:
break
page += 1
sleep()
print(f"Finishing at page: {page}")
print(f"Images were processed: {counter}")
# Step 2: Downloading
if photos_dict:
os.makedirs(args.path, exist_ok=True)
# Saving dict
with open(os.path.join(args.path, f'{query}.json'), 'w') as fout:
json.dump(photos_dict, fout)
for val in tqdm.tqdm(photos_dict.values()):
url = val['src'][args.resolution]
fname = os.path.basename(val['src']['original'])
image_path = os.path.join(args.path, fname)
if not os.path.isfile(image_path): # ignore if already downloaded
response = requests.get(url, stream=True)
with open(image_path, 'wb') as outfile:
outfile.write(response.content)
else:
print(f"File exists: {image_path}")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--query', type=str, required=True)
parser.add_argument('--path', type=str, default='./results_pexels')
parser.add_argument('--resolution', choices=['original', 'large2x', 'large',
'medium', 'small', 'portrait',
'landscape', 'tiny'], default='original')
parser.add_argument('--sleep', type=float, default=0.1)
args = parser.parse_args()
main(args)

sorry for bumping into the question. I just faced a similar situation when downloading the videos from Pexels using the python API, pexelsPy. This may be helpful:
I retrieved the ID of the videos and then created the downloading URL that has the following structure: "https://www.pexels.com/video/"+ ID +"/download".
See the following example:
def download_video(type_of_videos):
video_tag = random.choice(type_of_videos)
PEXELS_API = '-' #please add your API Key here
api = API(PEXELS_API)
retrieved_videos = read_already_download_files('downloaded_files.txt')
video_found_flag = True
num_page = 1
while video_found_flag:
api.search_videos(video_tag, page=num_page, results_per_page=10)
videos = api.get_videos()
for data in videos:
if data.width > data.height: #look for horizontal orientation videos
if data.url not in retrieved_videos:
# write_file('downloaded_files.txt', data.url)
url_video = 'https://www.pexels.com/video/' + str(data.id) + '/download' #create the url with the video id
r = requests.get(url_video)
with open(data.url.split('/')[-2]+'.mp4', 'wb') as outfile:
outfile.write(r.content)
return data.url.split('/')[-2]+'.mp4' #download the video
num_page += 1
download_video function takes an array of strings with several tags, e.g.: ['happy','sad','relax']. Then it randomly chooses one of these tags.
PEXELS_API should contain your API Key.
read_already_download_files('downloaded_files.txt'): Retrieves already downloaded files to check if the current found file is already downloaded.

from pypexels import PyPexels
import requests
api_key = 'api id'
# instantiate PyPexels object
py_pexel = PyPexels(api_key=api_key)
search_videos_page = py_pexel.videos_search(query="love", per_page=40)
# while True:
for video in search_videos_page.entries:
print(video.id, video.user.get('name'), video.url)
data_url = 'https://www.pexels.com/video/' + str(video.id) + '/download'
r = requests.get(data_url)
print(r.headers.get('content-type'))
with open('sample.mp4', 'wb') as outfile:
outfile.write(r.content)
# if not search_videos_page.has_next:
break
# search_videos_page = search_videos_page.get_next_page()

I just tried to do the same. When I was looking for it, I wanted a simple example. All other fancy stuff I was sure I could add myself. So, I built upon inou's answer. The shown example is very basic and requests one page with only 5 results using the 'Tiger' tag in the search query. I download the first video using its id provided by the response and simply write it to the source folder. The api is provided by pexelsPy and the request is executed using the standard requests package. To get access to the API, you need to create a key on pexels website (see here). Once you get your own API key, you should be able to simply substitute the shown example key and run the code as a test.
import pexelsPy
import requests
PEXELS_API = '16gv62567257256iu78krtuzwqsddudrtjberzabzwzjsrtgswnr'
api = pexelsPy.API(PEXELS_API)
api.search_videos('Tiger', page=1, results_per_page=5)
videos = api.get_videos()
url_video = 'https://www.pexels.com/video/' + str(videos[0].id) + '/download'
r = requests.get(url_video)
with open('test.mp4', 'wb') as outfile:
outfile.write(r.content)

You can download multiple videos with this code :
import pexelsPy
import requests
PEXELS_API = '-'
api = pexelsPy.API(PEXELS_API)
api.search_videos('nature', page=2, results_per_page=100, orientation='landscape')
videos = api.get_videos()
for i, video in enumerate(videos):
url_video = 'https://www.pexels.com/video/' + str(video.id) + '/download'
r = requests.get(url_video)
with open(f'test_{i}.mp4', 'wb') as outfile:
outfile.write(r.content)
This will download 100 videos, with each video being written to a separate file named test_0.mp4, test_1.mp4, ..., test_99.mp4.

Related

Uploading local images to microsoft cognitive face

Error Screenshot
import sys
import os, time
import cognitive_face as CF
import global_variables as global_var
import urllib
import sqlite3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
Key = global_var.key
CF.Key.set(Key)
BASE_URL = global_var.BASE_URL # Replace with your regional Base URL
CF.BaseUrl.set(BASE_URL)
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(__file__))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
imgurl = imgurl[3:]
print("imageurl = {}".format(imgurl))
res = CF.face.detect(imgurl)
if len(res) != 1:
print("No face detected in image")
else:
res = CF.person.add_face(imgurl, global_var.personGroupId, person_id)
print(res)
time.sleep(6)
else:
print("supply attributes please from dataset folder")
I hope images should be converted to byte array but I don't know how to do it. Local images have to be uploaded into cognitive API. Tried many ways but cannot solve the error.
imgurl = urllib.request.pathname2url(os.path.join(imageFolder, filename))
Above line is where error exists
Welcome to Stack Overflow, #arun.
First of all, as per here, the API you're using is deprecated, and you should switch to this one instead.
Second, in this new API, there is a method called detect_with_stream (ref here), that will make a request to the Face Recognition endpoint, using the byte stream instead of an URL (it will use different request headers than the URL-based method). This method accepts a stream of bytes containing your image. I've worked with another cognitive services API that performs text recognition, and so I've faced this problem of sending an image URL or the image byte stream. You can generate a bytestream from the file as follows:
image_data = open(image_path, "rb").read()
The variable image_data can be passed to the method.
Edit: Instructions on how to use the new API with the image bytestream
First, install the following pip package:
pip install azure-cognitiveservices-vision-face
Then, you can try this approach.
import sys
import os, time
import global_variables as global_var
from azure.cognitiveservices.vision.face import FaceClient
from msrest.authentication import CognitiveServicesCredentials
import urllib
import sqlite3
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
KEY = global_var.key
ENDPOINT = global_var.endpoint
face_client = FaceClient(ENDPOINT,CognitiveServicesCredentials(KEY))
def get_person_id():
person_id = ''
extractId = str(sys.argv[1])[-2:]
connect = sqlite3.connect("Face-DataBase")
c = connect.cursor()
cmd = "SELECT * FROM Students WHERE ID = " + extractId
c.execute(cmd)
row = c.fetchone()
person_id = row[3]
connect.close()
return person_id
if len(sys.argv) is not 1:
currentDir = os.path.dirname(os.path.abspath(__file__))
imageFolder = os.path.join(currentDir, "dataset/" + str(sys.argv[1]))
person_id = get_person_id()
for filename in os.listdir(imageFolder):
if filename.endswith(".jpg"):
print(filename)
img_data = open(filename, "rb").read()
res = face_client.face.detect_with_stream(img_data)
if not res:
print('No face detected from image {}'.format(filename))
continue
res = face_client.person_group_person.add_face_from_stream(global_var.personGroupId, person_id, img_data)
print(res)
time.sleep(6)
else:
print("supply attributes please from dataset folder")
Edit 2: Notes on traversing all the files in a directory
Ok #arun, your current problem stems from the fact that you're using os.listdir which only lists the filenames, so you don't have their paths. The quickest solution would be to open each image inside the loop with:
img_data = open(os.path.join(imageFolder, filename), "rb").read()

Recording youtube live stream to file in python

I want to record youtube live stream and save it to file using python.
I tried with pytube library but it probably works for videos, not for live streams.
I want to record stream and save it to file with video format as avi or something like this.
Modification based on #wownis 's answer.
(I tried that answer, however, it doesn't work.)
# pip install urllib
# pip install m3u8
# pip install streamlink
import urllib
import m3u8
import streamlink
def get_stream(url):
"""
Get upload chunk url
"""
streams = streamlink.streams(url)
stream_url = streams["best"]
m3u8_obj = m3u8.load(stream_url.args['url'])
return m3u8_obj.segments[0]
def dl_stream(url, filename, chunks):
"""
Download each chunks
"""
pre_time_stamp = 0
for i in range(chunks+1):
stream_segment = get_stream(url)
cur_time_stamp = \
stream_segment.program_date_time.strftime("%Y%m%d-%H%M%S")
if pre_time_stamp == cur_time_stamp:
pass
else:
print(cur_time_stamp)
file = open(filename + '_' + str(cur_time_stamp) + '.ts', 'ab+')
with urllib.request.urlopen(stream_segment.uri) as response:
html = response.read()
file.write(html)
pre_time_stamp = cur_time_stamp
url = "https://www.youtube.com/watch?v=2U3JnFbD-es"
dl_stream(url, "live", 15)
Output like this:
./
live_20200713-103739.ts
live_20200713-103744.ts
...
I found a solution and i put my code in python:
import urllib
import m3u8
import streamlink
def record_stream(url,filename,iterations):
last_part = 0
for i in range(iterations+1):
streams = streamlink.streams(url)
stream_url = streams["best"]
print(stream_url.args['url'])
m3u8_obj = m3u8.load(stream_url.args['url'])
previous_part_time = last_part
last_part = m3u8_obj.segments[-1].program_date_time
if i >= 1:
for j in range(1, len(m3u8_obj.segments)):
if m3u8_obj.segments[-j].program_date_time == previous_part_time:
break
print(j)
file = open(filename + ".ts", "ab+")
for i in range(j-1,0,-1):
with urllib.request.urlopen(m3u8_obj.segments[-i].uri) as response:
html = response.read()
file.write(html)
url = "https://www.youtube.com/watch?v=BgKGctL0u1U"
record_stream(url,"file",10)
10 means 10 iterations if chunks have 2s it means that records 20s of stream

Working with Tenor's API

My problem is that I don't know how to work with the result of the search of a gif. I used an example, I know how to modify some parameters but I don't know how to build the gifs of the result. Code:
import requests
import json
# set the apikey and limit
apikey = "MYKEY" # test value
lmt = 8
# load the user's anonymous ID from cookies or some other disk storage
# anon_id = <from db/cookies>
# ELSE - first time user, grab and store their the anonymous ID
r = requests.get("https://api.tenor.com/v1/anonid?key=%s" % apikey)
if r.status_code == 200:
anon_id = json.loads(r.content)["anon_id"]
# store in db/cookies for re-use later
else:
anon_id = ""
# our test search
search_term = "love"
# get the top 8 GIFs for the search term
r = requests.get(
"https://api.tenor.com/v1/search?q=%s&key=%s&limit=%s&anon_id=%s" %
(search_term, apikey, lmt, anon_id))
if r.status_code == 200:
# load the GIFs using the urls for the smaller GIF sizes
top_8gifs = json.loads(r.content)
print (top_8gifs)
else:
top_8gifs = None
I would like to download the file. I know I can do it with urllib and request, but the problem is that I don't even know what is top_8gifs.
I hope someone could help me. I'm waiting you answer, thanks for your attention!!
First of all you have to use a legitimate key instead of MYKEY. Once you have done that you'll observe this code will print the output of the GET request that you have sent. It is a json file which is similar to a dictionary in python. So now you can exploit this dictionary and obtain the urls. The best strategy is to simply print out the output of json and observe the structure of dictionary carefully and extract the url from it. If you want more clarity we can use pprint module in python. It is pretty awesome and will show you how a json file looks properly. Here is the modified version of your code which pretty prints the json file, prints the gif urls and downloads the gif files. You can improve upon it and play with it if you want.
import requests
import json
import urllib.request,urllib.parse,urllib.error
import pprint
# set the apikey and limit
apikey = "YOURKEY" # test value
lmt = 8
# load the user's anonymous ID from cookies or some other disk storage
# anon_id = <from db/cookies>
# ELSE - first time user, grab and store their the anonymous ID
r = requests.get("https://api.tenor.com/v1/anonid?key=%s" % apikey)
if r.status_code == 200:
anon_id = json.loads(r.content)["anon_id"]
# store in db/cookies for re-use later
else:
anon_id = ""
# our test search
search_term = "love"
# get the top 8 GIFs for the search term
r = requests.get(
"https://api.tenor.com/v1/search?q=%s&key=%s&limit=%s&anon_id=%s" %
(search_term, apikey, lmt, anon_id))
if r.status_code == 200:
# load the GIFs using the urls for the smaller GIF sizes
pp = pprint.PrettyPrinter(indent=4)
top_8gifs = json.loads(r.content)
pp.pprint(top_8gifs) #pretty prints the json file.
for i in range(len(top_8gifs['results'])):
url = top_8gifs['results'][i]['media'][0]['gif']['url'] #This is the url from json.
print (url)
urllib.request.urlretrieve(url, str(i)+'.gif') #Downloads the gif file.
else:
top_8gifs = None

Get movie information from IMDb API website

I used scrapy spiders to crawl the IMDb ID from the IMDb website already.
So now, I am going to use the IMDb API website & the IMDb ID I have collected to build a dictionary and save it into a json file.
import requests
import json
def query_url(id):
#query_url = 'http://www.omdbapi.com/?i='+id+'&plot=short&r=json'
return query_url
def get_movie_ids(input_file):
#id_list= []
#with open (input_file, 'r') as f:
#for line in f:
#id_list.append(line.strip()) # sth like ['tt0407887', 'tt1212123', ... ]
return id_list
def get_all_data(in_file, out_file):
movie_data_dict = {}
movie_ids = get_movie_ids(in_file)
id_counter = 0
session = requests.Session()
for id in movie_ids:
url = query_url(id)
#try:
#movie_data = session.get(url).json() # to catch corrupted json file
#except ValueError:
#pass
movie_data_dict[id_counter] = movie_data
id_counter += 1
with open(out_file, 'w+') as f:
json.dump(movie_data_dict, f)
if __name__ == '__main__':
movie_id_file = r'../IMDbIDCrawler/movie_id10-15' # the IMDb ID crawled by Scrapy
movie_data_file = 'IMDb2010-2015.json'
get_all_data(movie_id_file, movie_data_file)
When i run the code in command prompt, the code simply wouldn't run.
I dont know what I have done wrong in the above script..
This is a Python homework. So the codes marked by # is written by me while the other codes are provided in the first place. SO this is the thing......the pace of the course is very very fast and I can't catch up the course.And I have no one to ask to. So, please forgive me if I asked some really stupid / beginner questions...........

Why can't I upload a glitched image to Tumblr with Python?

My goal is to have a program that downloads an image, glitches it and then uploads the glitched image. So far my code looks like this:
import urllib
import random
import pytumblr
from tumblr_keys import
from BeautifulSoup import BeautifulStoneSoup
# Authenticate via OAuth
client = pytumblr.TumblrRestClient(
consumer_key,
consumer_secret,
token_key,
token_secret
)
def download_an_image(image_url):
filename = image_url.split('/')[-1]
#filefinal = filename[:-4 ] + '.png'
urllib.urlretrieve(image_url, filename)
return filename
def get_random_start_and_end_points_in_file(file_data):
start_point = random.randint(2600, len(file_data))
end_point = start_point + random.randint(0, len(file_data) - start_point)
return start_point, end_point
def splice_a_chunk_in_a_file(file_data):
start_point, end_point = get_random_start_and_end_points_in_file(file_data)
section = file_data[start_point:end_point]
repeated = ''
for i in range(1, random.randint(1,2)):
repeated += section
new_start_point, new_end_point = get_random_start_and_end_points_in_file(file_data)
file_data = file_data[:new_start_point] + repeated + file_data[new_end_point:]
return file_data
def glitch_an_image(local_image):
file_handler = open(local_image, 'r')
file_data = file_handler.read()
file_handler.close()
for i in range(1, random.randint(0,2)):
file_data = splice_a_chunk_in_a_file(file_data)
file_handler = open(local_image, 'w')
file_handler.write(file_data)
file_handler.close
return local_image
if __name__ == '__main__':
image_url = "https://41.media.tumblr.com/179e82abf626f870cb0b8fe93919eb67/tumblr_o4t9wtxwO31vq0p00o1_1280.png"
local_image = download_an_image(image_url)
image_glitch_file = glitch_an_image(local_image)
client.create_photo('glitchingimages', state="published", tags=["glitch"], data= image_glitch_file)
To make sure the downloaded picture is always saved as a .png-file I tried running the second line in the "def download_an_image(image_url):" section. For some reason, Tumblr still would not let me upload the glitched image. I even tried uploading it and it gave me an error. But I was able to upload it to Flickr. Only if I export the .png-file as .png again, I can upload it to Tumblr.
Do you know a way to avoid exporting the image manually? Is there maybe a better way to make sure the downloaded image is save as a .png-file?
Thank you!

Categories