Save video instead of saving images while using basler camera and python - python

I'm using Basler camera and python to record some video. I can successfully capture individual frames, but I don't know how to record a video.
Following is my code:
import os
import pypylon
from imageio import imwrite
import time
start=time.time()
print('Sampling rate (Hz):')
fsamp = input()
fsamp = float(fsamp)
time_exposure = 1000000*(1/fsamp)
available_cameras = pypylon.factory.find_devices()
cam = pypylon.factory.create_device(available_cameras[0])
cam.open()
#cam.properties['AcquisitionFrameRateEnable'] = True
#cam.properties['AcquisitionFrameRate'] = 1000
cam.properties['ExposureTime'] = time_exposure
buffer = tuple(cam.grab_images(2000))
for count, image in enumerate(buffer):
filename = str('I:/Example/{}.png'.format(count))
imwrite(filename, image)
del buffer

I haven't found a way to record a video using pypylon; it seems to be a pretty light wrapper around Pylon. However, I have found a way to save a video using imageio:
from imageio import get_writer
with get_writer('I:/output-filename.mp4', fps=fps) as writer:
# Some stuff with the frames
The above can be used with .mov, .avi, .mpg, .mpeg, .mp4, .mkv or .wmv, so long as the FFmpeg program is available. How you will install this program depends on your operating system. See this link for details on the parameters you can use.
Then, simply replace the call to imwrite with:
writer.append_data(image)
ensuring that this occurs in the with block.
An example implementation:
import os
import pypylon
from imageio import get_writer
while True:
try:
fsamp = float(input('Sampling rate (Hz): '))
break
except ValueError:
print('Invalid input.')
time_exposure = 1000000 / fsamp
available_cameras = pypylon.factory.find_devices()
cam = pypylon.factory.create_device(available_cameras[0])
cam.open()
cam.properties['ExposureTime'] = time_exposure
buffer = tuple(cam.grab_images(2000))
with get_writer(
'I:/output-filename.mkv', # mkv players often support H.264
fps=fsamp, # FPS is in units Hz; should be real-time.
codec='libx264', # When used properly, this is basically
# "PNG for video" (i.e. lossless)
quality=None, # disables variable compression
pixelformat='rgb24', # keep it as RGB colours
ffmpeg_params=[ # compatibility with older library versions
'-preset', # set to faster, veryfast, superfast, ultrafast
'fast', # for higher speed but worse compression
'-crf', # quality; set to 0 for lossless, but keep in mind
'11' # that the camera probably adds static anyway
]
) as writer:
for image in buffer:
writer.append_data(image)
del buffer

Related

Why do I only get the Desktop wallpaper when trying to take a screenshot on a Mac?

I'm trying to take a screenshot with python. But every option I try only seems to return the desktop wallpaper and not the programs on top.
Here's a run through of what I've tried and how I'm doing it.
First I used autopy to try and get pixels from the screen using autopy.color.hex_to_rgb(autopy.screen.get_color(x, y)). But it was only telling me the pixels of the desktop background.
Then I tried PIL(Pillow) using this code:
from PIL import ImageGrab
im = ImageGrab.grab()
im.save('screenshot.png')
This only returned the desktop wallpaper.
Finally, I've tried using this script which I found on this thread.
import Quartz
import LaunchServices
from Cocoa import NSURL
import Quartz.CoreGraphics as CG
def screenshot(path, region = None):
"""region should be a CGRect, something like:
>>> import Quartz.CoreGraphics as CG
>>> region = CG.CGRectMake(0, 0, 100, 100)
>>> sp = ScreenPixel()
>>> sp.capture(region=region)
The default region is CG.CGRectInfinite (captures the full screen)
"""
if region is None:
region = CG.CGRectInfinite
# Create screenshot as CGImage
image = CG.CGWindowListCreateImage(
region,
CG.kCGWindowListOptionOnScreenOnly,
CG.kCGNullWindowID,
CG.kCGWindowImageDefault)
dpi = 72 # FIXME: Should query this from somewhere, e.g for retina displays
url = NSURL.fileURLWithPath_(path)
dest = Quartz.CGImageDestinationCreateWithURL(
url,
LaunchServices.kUTTypePNG, # file type
1, # 1 image in file
None
)
properties = {
Quartz.kCGImagePropertyDPIWidth: dpi,
Quartz.kCGImagePropertyDPIHeight: dpi,
}
# Add the image to the destination, characterizing the image with
# the properties dictionary.
Quartz.CGImageDestinationAddImage(dest, image, properties)
# When all the images (only 1 in this example) are added to the destination,
# finalize the CGImageDestination object.
Quartz.CGImageDestinationFinalize(dest)
if __name__ == '__main__':
# Capture full screen
screenshot("/tmp/testscreenshot_full.png")
# Capture region (100x100 box from top-left)
region = CG.CGRectMake(0, 0, 100, 100)
screenshot("/tmp/testscreenshot_partial.png", region=region)
Again it returns my desktop wallpaper.
Why is it doing this? How can I get a screenshot in the same way I would if I were to press 'cmd + shift + 3'.
This is a privacy feature, macOS prevents arbitrary apps from viewing the contents of other app windows. To get permission, your app will need access to the screen recording permission in macOS preferences.
Since this is a Python script, I think the app you're running the script from needs to be given permission, so either Terminal or whatever IDE you're using.

Adding a pause in Google-text-to-speech

I am looking for a small pause, wait, break or anything that will allow for a short break (looking for about 2 seconds +-, configurable would be ideal) when speaking out the desired text.
People online have said that adding three full stops followed by a space creates a break but I don't seem to be getting that. Code below is my test that has no pauses, sadly.. Any ideas or suggestions?
Edit: It would be ideal if there is some command from gTTS that would allow me to do this, or maybe some trick like using the three full stops if that actually worked.
from gtts import gTTS
import os
tts = gTTS(text=" Testing ... if there is a pause ... ... ... ... ... longer pause? ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... insane pause " , lang='en', slow=False)
tts.save("temp.mp3")
os.system("temp.mp3")
Ok, you need Speech Synthesis Markup Language (SSML) to achieve this.
Be aware you need to setting up Google Cloud Platform credentials
first in the bash:
pip install --upgrade google-cloud-texttospeech
Then here is the code:
import html
from google.cloud import texttospeech
def ssml_to_audio(ssml_text, outfile):
# Instantiates a client
client = texttospeech.TextToSpeechClient()
# Sets the text input to be synthesized
synthesis_input = texttospeech.SynthesisInput(ssml=ssml_text)
# Builds the voice request, selects the language code ("en-US") and
# the SSML voice gender ("MALE")
voice = texttospeech.VoiceSelectionParams(
language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.MALE
)
# Selects the type of audio file to return
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
# Performs the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
# Writes the synthetic audio to the output file.
with open(outfile, "wb") as out:
out.write(response.audio_content)
print("Audio content written to file " + outfile)
def text_to_ssml(inputfile):
raw_lines = inputfile
# Replace special characters with HTML Ampersand Character Codes
# These Codes prevent the API from confusing text with
# SSML commands
# For example, '<' --> '<' and '&' --> '&'
escaped_lines = html.escape(raw_lines)
# Convert plaintext to SSML
# Wait two seconds between each address
ssml = "<speak>{}</speak>".format(
escaped_lines.replace("\n", '\n<break time="2s"/>')
)
# Return the concatenated string of ssml script
return ssml
text = """Here are <say-as interpret-as="characters">SSML</say-as> samples.
I can pause <break time="3s"/>.
I can play a sound"""
ssml = text_to_ssml(text)
ssml_to_audio(ssml, "test.mp3")
More documentation:
Speaking addresses with SSML
But if you don't have Google Cloud Platform credentials, the cheaper and easier way is to use time.sleep(1) method
If there is any background waits required, you can use the time module to wait as below.
import time
# SLEEP FOR 5 SECONDS AND START THE PROCESS
time.sleep(5)
Or you can do a 3 time check with wait etc..
import time
for tries in range(3):
if someprocess() is False:
time.sleep(3)
You can save multiple mp3 files, then use time.sleep() to call each with your desired amount of pause:
from gtts import gTTS
import os
from time import sleep
tts1 = gTTS(text="Testingn" , lang='en', slow=False)
tts2 = gTTS(text="if there is a pause" , lang='en', slow=False)
tts3 = gTTS(text="insane pause " , lang='en', slow=False)
tts1.save("temp1.mp3")
tts2.save("temp2.mp3")
tts3.save("temp3.mp3")
os.system("temp1.mp3")
sleep(2)
os.system("temp2.mp3")
sleep(3)
os.system("temp3.mp3")
Sadly the answer is no, gTTS package has no additional function for pause,an issue already been created in 2018 for adding a pause function ,but it is smart enough to add natural pauses by tokenizer.
What is tokenizer?
Function that takes text and returns it split into a list of tokens (strings). In the gTTS context, its goal is
to cut the text into smaller segments that do not exceed the maximum character size allowed(100) for each TTS API
request, while making the speech sound natural and continuous. It does so by splitting text where speech would
naturaly pause (for example on ".") while handling where it should not (for example on “10.5” or “U.S.A.”).
Such rules are called tokenizer cases, which it takes a list of.
Here is an example:
text = "regular text speed no pause regular text speed comma pause, regular text speed period pause. regular text speed exclamation pause! regular text speed ellipses pause... regular text speed new line pause \n regular text speed "
So in this case, adding a sleep() seems like the only answer. But tricking the tokenizer is worth mentioning.
You can add arbitrary pause with Pydub by saving and concatenating temporary mp3. Then you can use a silent audio for your pause.
You can use any break point symbols of your choice where you want to add pause (here $):
from pydub import AudioSegment
from gtts import gTTS
contents = "Hello with $$ 2 seconds pause"
contents.split("$") # I have chosen this symbol for the pause.
pause2s = AudioSegment.from_mp3("silent.mp3")
# silent.mp3 contain 2s blank mp3
cnt = 0
for p in parts:
# The pause will happen for the empty element of the list
if not p:
combined += pause2s
else:
tts = gTTS(text=p , lang=langue, slow=False)
tmpFileName="tmp"+str(cnt)+".mp3"
tts.save(tmpFileName)
combined+=AudioSegment.from_mp3(tmpFileName)
cnt+=1
combined.export("out.mp3", format="mp3")
Late to the party here, but you might consider trying out the audio_program_generator package. You provide a text file comprised of individual phrases, each of which has a configurable pause at the end. In return, it gives you an mp3 file that 'stitches together' all the phrases and their pauses into one continuous audio file. You can optionally mix in a background sound-file, as well. And it implements several of the other bells and whistles that Google TTS provides, like accents, slow-play-speech, etc.
Disclaimer: I am the author of the package.
I had the same problem, and didn't want to use lots of temporary files on disk. This code parses an SSML file, and creates silence whenever a <break> tag is found:
import io
from gtts import gTTS
import lxml.etree as etree
import pydub
ssml_filename = 'Section12.35-edited.ssml'
wav_filename = 'Section12.35-edited.mp3'
events = ('end',)
DEFAULT_BREAK_TIME = 250
all_audio = pydub.AudioSegment.silent(100)
for event, element in etree.iterparse(
ssml_filename,
events=events,
remove_comments=True,
remove_pis=True,
attribute_defaults=True,
):
tag = etree.QName(element).localname
if tag in ['p', 's'] and element.text:
tts = gTTS(element.text, lang='en', tld='com.au')
with io.BytesIO() as temp_bytes:
tts.write_to_fp(temp_bytes)
temp_bytes.seek(0)
audio = pydub.AudioSegment.from_mp3(temp_bytes)
all_audio = all_audio.append(audio)
elif tag == 'break':
# write silence to the file.
time = element.attrib.get('time', None) # Shouldn't be possible to have no time value.
if time:
if time.endswith('ms'):
time_value = int(time.removesuffix('ms'))
elif time.endswith('s'):
time_value = int(time.removesuffix('s')) * 1000
else:
time_value = DEFAULT_BREAK_TIME
else:
time_value = DEFAULT_BREAK_TIME
silence = pydub.AudioSegment.silent(time_value)
all_audio = all_audio.append(silence)
with open(wav_filename, 'wb') as output_file:
all_audio.export(output_file, format='mp3')
I know 4Rom1 used this method above, but to put it more simply, I found this worked really well for me. Get a 1 sec silent mp3, I found one by googling 1 sec silent mp3. Then use pydub to add together audio segments however many times you need. For example to add 3 seconds of silence
from pydub import AudioSegment
seconds = 3
output = AudioSegment.from_file("yourfile.mp3")
output += AudioSegment.from_file("1sec_silence.mp3") * seconds
output.export("newaudio.mp3", format="mp3")

How to save/display giphy gif using python API?

I am creating one of those cool moving photograph frames, eventually with my own pictures, but for now I just want to search giphy and save/display a gif.
Here's the code I gathered would be useful from their API.
import giphy_client as gc
from giphy_client.rest import ApiException
from random import randint
api_instance = gc.DefaultApi()
api_key = 'MY_API_KEY'
query = 'art'
fmt = 'gif'
try:
response = api_instance.gifs_search_get(api_key,query,limit=1,offset=randint(1,10),fmt=fmt)
gif_id = response.data[0]
except ApiException:
print("Exception when calling DefaultApi->gifs_search_get: %s\n" % e)
with open('test.txt','w') as f:
f.write(type(gif_id))
I get an object of type: class 'giphy_client.models.gif.Gif', I want to save this gif and display it on a monitor. I understand that I am a far way off on this but I am still learning about API and how to use them. If anyone can help me find a way to save this gif or display it directly from their website, that would be much appreciated!
Welcome dbarth!
I see your code does successfully retrieve a random image, that is good.
There are 3 steps needed to get the image:
Get the GIF URL.
That giphy_client client you are using, is made with Swagger, so, you can access the REST Response elements like any other object, or print them.
For example:
>>> print(gif_id.images.downsized.url)
'https://media0.giphy.com/media/l3nWlvtvAFHcDFKXm/giphy-downsized.gif?cid=e1bb72ff5c7dc1c67732476c2e69b2ff'
Note that when I print this, I get an URL. The Gif object you got, called gif_id, has a bunch of URLs to download the GIF or MP4 at different resolutions. In this case, I went with the downsized GIF. You can see all the elements retrieved using print(gif_id)
So, I will add this to your code:
gif_url = gif_id.images.downsized.url
Download the GIF
Now that you have a URL, it's time to download the GIF. I will use the requests library to do this, install it with pip if you don't have in your environment. Seems that you already tried to do this, but with an error.
import requests
[...]
with open('test.gif','wb') as f:
f.write(requests.get(url_gif).content)
Display the GIF
There are a bunch of GUIs for Python to do this, or you can even invoke a browser to show it. You need to investigate which GUI adapts better to your needs. For this case, I will use the example posted here, with a few modifications,to display the Gif using TKinter. Install Tkinter if isn't included with your Python installation.
Final code:
import giphy_client as gc
from giphy_client.rest import ApiException
from random import randint
import requests
from tkinter import *
import time
import os
root = Tk()
api_instance = gc.DefaultApi()
api_key = 'YOUR_OWN_API_KEY'
query = 'art'
fmt = 'gif'
try:
response = api_instance.gifs_search_get(api_key,query,limit=1,offset=randint(1,10),fmt=fmt)
gif_id = response.data[0]
url_gif = gif_id.images.downsized.url
except ApiException:
print("Exception when calling DefaultApi->gifs_search_get: %s\n" % e)
with open('test.gif','wb') as f:
f.write(requests.get(url_gif).content)
frames = []
i = 0
while True: # Add frames until out of range
try:
frames.append(PhotoImage(file='test.gif',format = 'gif -index %i' %(i)))
i = i + 1
except TclError:
break
def update(ind): # Display and loop the GIF
if ind >= len(frames):
ind = 0
frame = frames[ind]
ind += 1
label.configure(image=frame)
root.after(100, update, ind)
label = Label(root)
label.pack()
root.after(0, update, 0)
root.mainloop()
Keep learning how to use a REST API, and Swagger, if you want to keep using the giphy_client library. If not, you can make the requests directly using the requests library.

Play mp4 video with python and gstreamer

I'm trying to play video in mp4 format but not working.
In console I execute this line and it works:
gst-launch playbin uri=rtmp://localhost:1935/files/video.mp4
But if I change to version 1.0 only works the audio:
gst-launch-1.0 playbin uri=rtmp://localhost:1935/files/video.mp4
in python I have the following code:
self.player = Gst.Pipeline.new("player")
source = Gst.ElementFactory.make("filesrc", "file-source")
demuxer = Gst.ElementFactory.make("mp4mux", "demuxer")
demuxer.connect("pad-added", self.demuxer_callback)
self.video_decoder = Gst.ElementFactory.make("x264enc", "video-decoder")
self.audio_decoder = Gst.ElementFactory.make("vorbisdec", "audio-decoder")
audioconv = Gst.ElementFactory.make("audioconvert", "converter")
audiosink = Gst.ElementFactory.make("autoaudiosink", "audio-output")
videosink = Gst.ElementFactory.make("autovideosink", "video-output")
self.queuea = Gst.ElementFactory.make("queue", "queuea")
self.queuev = Gst.ElementFactory.make("queue", "queuev")
colorspace = Gst.ElementFactory.make("videoconvert", "colorspace")
self.player.add(source)
self.player.add(demuxer)
self.player.add(self.video_decoder)
self.player.add(self.audio_decoder)
self.player.add(audioconv)
self.player.add(audiosink)
self.player.add(videosink)
self.player.add(self.queuea)
self.player.add(self.queuev)
self.player.add(colorspace)
source.link(demuxer)
self.queuev.link(self.video_decoder)
self.video_decoder.link(colorspace)
colorspace.link(videosink)
self.queuea.link(self.audio_decoder)
self.audio_decoder.link(audioconv)
audioconv.link(audiosink)
but I get this error:
Error: Error in the internal data flow. gstbasesrc.c(2865): gst_base_src_loop (): /GstPipeline:player/GstFileSrc:file-source:
streaming task paused, reason not-linked (-1)
What can be happening? think I am no good decoding
You are missing linking the demuxer pads to your queues. Demuxers have 'sometimes' pads so you need to listen to the pad-added signal of them and link in this callback. Remember to check the pad caps once you get them and link to the appropriate branch of your pipeline.
You can read about dynamic pads here: http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/chapter-pads.html#section-pads-dynamic
You have in your code:
demuxer = Gst.ElementFactory.make("mp4mux", "demuxer")
demuxer.connect("pad-added", self.demuxer_callback)
I hope this is a cut/paste error, as demuxing with a mux will not work. I believe for an .mp4 file, the normal demuxer (if you are choosing one by hand) is qtdemux.
You could also use decodebin to decode the file for you.

reading a h264 RTSP stream into python and opencv

Here is the problem
i have an IP camera that stream a h264 video using RTSP protocol,
all i want to do is read this stream and pass it to Open CV to decode it,
using this function
cv2.imdecode()
how?
Update
i solved this problem
here is the solution : Convert YUVj420p pixel format to RGB888 using gstreamer
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst
import numpy as np
import cv2
GObject.threads_init()
Gst.init(None)
def YUV_stream2RGB_frame(data):
w=640
h=368
size=w*h
stream=np.fromstring(data,np.uint8) #convert data form string to numpy array
#Y bytes will start form 0 and end in size-1
y=stream[0:size].reshape(h,w) # create the y channel same size as the image
#U bytes will start from size and end at size+size/4 as its size = framesize/4
u=stream[size:(size+(size/4))].reshape((h/2),(w/2))# create the u channel itssize=framesize/4
#up-sample the u channel to be the same size as the y channel and frame using pyrUp func in opencv2
u_upsize=cv2.pyrUp(u)
#do the same for v channel
v=stream[(size+(size/4)):].reshape((h/2),(w/2))
v_upsize=cv2.pyrUp(v)
#create the 3-channel frame using cv2.merge func watch for the order
yuv=cv2.merge((y,u_upsize,v_upsize))
#Convert TO RGB format
rgb=cv2.cvtColor(yuv,cv2.cv.CV_YCrCb2RGB)
#show frame
cv2.imshow("show",rgb)
cv2.waitKey(5)
def on_new_buffer(appsink):
sample = appsink.emit('pull-sample')
#get the buffer
buf=sample.get_buffer()
#extract data stream as string
data=buf.extract_dup(0,buf.get_size())
YUV_stream2RGB_frame(data)
return False
def Init():
CLI="rtspsrc name=src location=rtsp://192.168.1.20:554/live/ch01_0 latency=10 !decodebin ! appsink name=sink"
#simplest way to create a pipline
pipline=Gst.parse_launch(CLI)
#getting the sink by its name set in CLI
appsink=pipline.get_by_name("sink")
#setting some important properties of appsnik
appsink.set_property("max-buffers",20) # prevent the app to consume huge part of memory
appsink.set_property('emit-signals',True) #tell sink to emit signals
appsink.set_property('sync',False) #no sync to make decoding as fast as possible
appsink.connect('new-sample', on_new_buffer) #connect signal to callable func
def run():
pipline.set_state(Gst.State.PLAYING)
GObject.MainLoop.run()
Init()
run()

Categories