i am implementing a FTP upload with my python script. Everything works except for my ftp upload. I have used chmod to change the permissions of the folder that i have got my images and script in to chmod -R 777. And the permissions on the FTP server are all set at 777. Please note that ftpDir is intentionally left blank as this is the root folder for the account on the ftp server. I do not know what else i can do to fix this issue. 3
This is my error code that i get:
placeFile - Start FTP to ftp.brisbaneskycams.com
placeFile - ERROR FTP transfer Failed ...
Filename : /home/pi/pimotion/images/capture-20160503-044806.jpg
Error Msg: ['[Errno', '-2] Name or service not known']
Please Investigate Problem ...
This is the script:
#!/usr/bin/python
#
# Lightweight Motion Detection using python picamera libraries
# based on code from raspberry pi forum by user utpalc
# modified by Claude Pageau for this working example
# ------------------------------------------------------------
# original code on github https://github.com/pageauc/picamera-motion
# This is sample code that can be used for further development
verbose = True
if verbose:
print "Loading python libraries ....."
else:
print "verbose output has been disabled verbose=False"
import picamera
import picamera.array
import datetime
import time
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw
from fractions import Fraction
import ftplib
from ftplib import FTP
import os
#Constants
SECONDS2MICRO = 1000000 # Constant for converting Shutter Speed in Seconds to Microseconds
# User Customizable Settings
imageDir = "images"
imagePath = "/home/pi/pimotion/" + imageDir
imageNamePrefix = 'capture-' # Prefix for all image file names. Eg front-
imageWidth = 1980
imageHeight = 1080
imageVFlip = False # Flip image Vertically
imageHFlip = False # Flip image Horizontally
imagePreview = False
# FTP Server location variables
ftpOn = True
ftpServer = "ftphostname"
ftpDir = ""
ftpUser = "ftpusername"
ftpPass = "XXXXXXXXXX"
numberSequence = False
threshold = 10 # How Much pixel changes
sensitivity = 100 # How many pixels change
nightISO = 800
nightShutSpeed = 6 * SECONDS2MICRO # seconds times conversion to microseconds constant
# Advanced Settings not normally changed
testWidth = 100
testHeight = 75
def checkImagePath(imagedir):
# Find the path of this python script and set some global variables
mypath=os.path.abspath(__file__)
baseDir=mypath[0:mypath.rfind("/")+1]
baseFileName=mypath[mypath.rfind("/")+1:mypath.rfind(".")]
# Setup imagePath and create folder if it Does Not Exist.
imagePath = baseDir + imagedir # Where to save the images
# if imagePath does not exist create the folder
if not os.path.isdir(imagePath):
if verbose:
print "%s - Image Storage folder not found." % (progName)
print "%s - Creating image storage folder %s " % (progName, imagePath)
os.makedirs(imagePath)
return imagePath
def takeDayImage(imageWidth, imageHeight, filename):
if verbose:
print "takeDayImage - Working ....."
with picamera.PiCamera() as camera:
camera.resolution = (imageWidth, imageHeight)
# camera.rotation = cameraRotate #Note use imageVFlip and imageHFlip variables
if imagePreview:
camera.start_preview()
camera.vflip = imageVFlip
camera.hflip = imageHFlip
# Day Automatic Mode
camera.exposure_mode = 'auto'
camera.awb_mode = 'auto'
camera.capture(filename)
def placeFile(filepath):
path, filename = os.path.split(filepath)
os.chdir(path)
print filename
print path
if verbose:
print("placeFile - Start FTP to %s" % ftpServer )
try:
ftp = FTP(ftpServer)
ftp.login(user=ftpUser, passwd = ftpPass)
ftp.cwd(ftpDir)
ftp.storbinary('STOR ' + filename, open(filename, 'rb'))
ftp.quit()
if verbose:
print("placeFile - SUCCESSFUL FTP Transfer")
print(" Filename : %s " % (filepath))
except ftplib.all_errors as e:
errorcode_string = str(e).split(None, 1)
if verbose:
print("placeFile - ERROR FTP transfer Failed ...")
print(" Filename : %s " % (filepath))
print(" Error Msg: %s" % ( errorcode_string ))
print(" Please Investigate Problem ...")
def takeNightImage(imageWidth, imageHeight, filename):
if verbose:
print "takeNightImage - Working ....."
with picamera.PiCamera() as camera:
camera.resolution = (imageWidth, imageHeight)
if imagePreview:
camera.start_preview()
camera.vflip = imageVFlip
camera.hflip = imageHFlip
# Night time low light settings have long exposure times
# Settings for Low Light Conditions
# Set a frame rate of 1/6 fps, then set shutter
# speed to 6s and ISO to approx 800 per nightISO variable
camera.framerate = Fraction(1, 6)
camera.shutter_speed = nightShutSpeed
camera.exposure_mode = 'off'
camera.iso = nightISO
# Give the camera a good long time to measure AWB
# (you may wish to use fixed AWB instead)
time.sleep(10)
camera.capture(filename)
if verbose:
print "checkNightMode - Captured %s" % (filename)
return filename
def takeMotionImage(width, height, daymode):
with picamera.PiCamera() as camera:
time.sleep(1)
camera.resolution = (width, height)
with picamera.array.PiRGBArray(camera) as stream:
if daymode:
camera.exposure_mode = 'auto'
camera.awb_mode = 'auto'
else:
# Take Low Light image
# Set a framerate of 1/6 fps, then set shutter
# speed to 6s and ISO to 800
camera.framerate = Fraction(1, 6)
camera.shutter_speed = nightShutSpeed
camera.exposure_mode = 'off'
camera.iso = nightISO
# Give the camera a good long time to measure AWB
# (you may wish to use fixed AWB instead)
time.sleep( 10 )
camera.capture(stream, format='rgb')
return stream.array
def scanIfDay(width, height, daymode):
data1 = takeMotionImage(width, height, daymode)
while not motionFound:
data2 = takeMotionImage(width, height, daymode)
pCnt = 0L;
diffCount = 0L;
for w in range(0, width):
for h in range(0, height):
# get the diff of the pixel. Conversion to int
# is required to avoid unsigned short overflow.
diff = abs(int(data1[h][w][1]) - int(data2[h][w][1]))
if diff > threshold:
diffCount += 1
if diffCount > sensitivity:
break; #break outer loop.
if diffCount > sensitivity:
motionFound = True
else:
# print "Sum of all pixels=", pxCnt
data2 = data1
return motionFound
def scanMotion(width, height, daymode):
motionFound = False
data1 = takeMotionImage(width, height, daymode)
while not motionFound:
data2 = takeMotionImage(width, height, daymode)
diffCount = 0L;
for w in range(0, width):
for h in range(0, height):
# get the diff of the pixel. Conversion to int
# is required to avoid unsigned short overflow.
diff = abs(int(data1[h][w][1]) - int(data2[h][w][1]))
if diff > threshold:
diffCount += 1
if diffCount > sensitivity:
break; #break outer loop.
if diffCount > sensitivity:
motionFound = True
else:
data2 = data1
return motionFound
def getFileName(imagePath, imageNamePrefix, currentCount):
rightNow = datetime.datetime.now()
if numberSequence :
filename = imagePath + "/" + imageNamePrefix + str(currentCount) + ".jpg"
else:
filename = "%s/%s%04d%02d%02d-%02d%02d%02d.jpg" % ( imagePath, imageNamePrefix ,rightNow.year, rightNow.month, rightNow.day, rightNow.hour, rightNow.minute, rightNow.second)
return filename
def motionDetection():
print "Scanning for Motion threshold=%i sensitivity=%i ......" % (threshold, sensitivity)
isDay = True
currentCount= 1000
while True:
if scanMotion(testWidth, testHeight, isDay):
filename = getFileName(imagePath, imageNamePrefix, currentCount)
if numberSequence:
currentCount += 1
if isDay:
takeDayImage( imageWidth, imageHeight, filename )
else:
takeNightImage( imageWidth, imageHeight, filename )
if verbose:
print("MotionDetection - Saved %s" % filename)
if ftpOn:
placeFile(filename)
if __name__ == '__main__':
try:
motionDetection()
finally:
print ""
print "+++++++++++++++"
print "Exiting Program"
print "+++++++++++++++"
print ""
Related
I want to take a series of Images in python with different webcams, stack them and afterwards save them in different folders counted from 1 to 14 for each camera. Stacking is a process by which each pixel of different pictures get summed up and by the end divided by the number of pictures.
Here is my code:
import numpy as np
import time
from time import strftime, localtime, gmtime
import subprocess
import sys
import glob
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
install("opencv-python")
import cv2
###################### Camport Access #####################################################################################
def list_ports():
# Test the ports and returns a tuple with the available ports and the ones that are working.
is_working = True
dev_port = 0
working_ports = []
available_ports = []
while is_working:
camera = cv2.VideoCapture(dev_port)
if not camera.isOpened():
is_working = False
print("Port %s is not working." % dev_port)
else:
is_reading, img = camera.read()
w = camera.get(3)
h = camera.get(4)
if is_reading:
print("Port %s is working and reads images (%s x %s)" % (dev_port, h, w))
working_ports.append(dev_port)
else:
print("Port %s for camera ( %s x %s) is present but does not reads." % (dev_port, h, w))
available_ports.append(dev_port)
dev_port += 1
return available_ports, working_ports
available, camports = list_ports()
print(camports)
picture_count = 5
hours = int(input("amount of hours: "))
minute = localtime().tm_min
for i in range(stunden):
hour = localtime().tm_hour
print("starting hour: ", i)
for j in camports:
cam_port = j
print("currently taking images at camport: ", j + 1)
cam = cv2.VideoCapture(cam_port, cv2.CAP_DSHOW)
# take picture here and safe it
Images = []
timestr = time.strftime("%d.%m.%Y-%H_%M_%S")
for k in range(picture_count):
cam.set(cv2.CAP_PROP_AUTOFOCUS, 0)
time.sleep(0.5)
result, image = cam.read()
if result:
Images.append(image)
img_sum = np.zeros(np.shape(image)) # Sum of images
img_cnt = np.full(image.shape, picture_count)
for img in Images:
img_sum = img_sum + img.astype(float) # Sum images
avg_img = img_sum / img_cnt # Divide sum by count, for computing the average.
avg_img = np.round(avg_img).astype(np.uint8) # Round an cast to uint8
else:
print("failure at: ", j + 1, k + 1)
cv2.imwrite(f"path\\{j + 1}\\pic {i + 1} {timestr}.tiff", avg_img)
print("waiting for the next hour")
while localtime().tm_min is not minute or localtime().tm_hour == hour:
time.sleep(1)
I have a problem while accessing the cameras. Unattached to which camera I'm using the 4th, 6th, 8th, 10th, 12th and 14th camport won't work and give me the message that every picture made has failed. (see "failure at: ").
Do you have an idea how this could be fixed?
friend, i am currently looking for a way to video stegano. I successfully in splitting frames from video file and hide messages inside them. But when i combine these frames into video and trying to extract info from the hiden video, i always failed. I guess here is problem with video compression.
Here is my code.
from stegano import lsb
from os.path import isfile, join
import time # install time ,opencv,numpy modules
import cv2
import numpy as np
import math
import os
import shutil
from moviepy.editor import *
from subprocess import call, STDOUT
def split_string(s_str, count=10):
per_c = math.ceil(len(s_str)/count)
c_cout = 0
out_str = ''
split_list = []
for s in s_str:
out_str += s
c_cout += 1
if c_cout == per_c:
split_list.append(out_str)
out_str = ''
c_cout = 0
if c_cout != 0:
split_list.append(out_str)
return split_list
def frame_extraction(video):
if not os.path.exists("./tmp"):
os.makedirs("tmp")
temp_folder = "./tmp"
print("[INFO] tmp directory is created")
vidcap = cv2.VideoCapture(video)
count = 0
while True:
success, image = vidcap.read()
if not success:
break
cv2.imwrite(os.path.join(temp_folder, "{:d}.png".format(count)), image)
count += 1
print("[INFO] frame {} is extracted".format(count))
def encode_string(input_string, root="./tmp/"):
split_string_list = split_string(input_string)
for i in range(0, len(split_string_list)):
f_name = "{}{}.png".format(root, i)
secret_enc = lsb.hide(f_name, split_string_list[i])
secret_enc.save(f_name)
print("[INFO] frame {} holds {}".format(f_name, lsb.reveal(f_name)))
def decode_string(video):
frame_extraction(video)
secret = []
root = "./tmp/"
for i in range(len(os.listdir(root))):
f_name = "{}{}.png".format(root, i)
print("[INFO] frame {} is decoding".format(f_name))
secret_dec = lsb.reveal(f_name)
if secret_dec == None:
break
secret.append(secret_dec)
print("[INFO] secret is {}".format("".join(secret)))
print(''.join([i for i in secret]))
# clean_tmp()
def clean_tmp(path="./tmp"):
if os.path.exists(path):
shutil.rmtree(path)
print("[INFO] tmp files are cleaned up")
def main():
input_string = input("Enter the input string: ")
f_name = input("enter the name of video: ")
# 从源文件分离出帧
frame_extraction(f_name)
# 分离文件路径和扩展名
file_path, file_extraction = os.path.splitext(f_name)
# 创建输出音频文件
audio_path = file_path + "_temp.mp3"
video = VideoFileClip(f_name)
video.audio.write_audiofile(audio_path)
# 加密字符
encode_string(input_string)
# 从tmp文件夹的图片创建没有声音的视频
fps=30
img_root = r"./tmp/"
# fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video_file_path = file_path + "_temp.avi"
# 获取tmp文件夹第一张视频的尺寸
img = cv2.imread(img_root + "0.png")
height, width, layers = img.shape
size=(width,height)
videoWriter = cv2.VideoWriter(video_file_path,fourcc=fourcc,fps=fps,frameSize=size)
for i in range(len(os.listdir(img_root))):
frame = cv2.imread(img_root+str(i)+'.png')
videoWriter.write(frame)
videoWriter.release()
# 合并视频和音频 audio_path video_file_path
video = VideoFileClip(video_file_path)
audio_clip = AudioFileClip(audio_path)
video = video.set_audio(audio_clip)
video.write_videofile(file_path + "_hide.avi")
clean_tmp()
if __name__ == "__main__":
while True:
print("1.Hide a message in video 2.Reveal the secret from video")
print("any other value to exit")
choice = input()
if choice == '1':
main()
elif choice == '2':
decode_string(input("enter the name of video with extension: "))
else:
break
I have tried mp4, avi, wov format. But none of them worked.
IF YOU HAVE ANY IDEA OR SUGGESTION GIVEN TO ME, I WOULD BE VERY GRATEFUL
The server is sending video by video using the same RTSP URL(rtsp://192.168.0.2:8554/)
I can capture and display video using opencv.
import numpy as np
import cv2 as cv
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
cap = cv.VideoCapture('rtsp://192.168.0.2:8554/')
while cap.isOpened():
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break
cv.imshow('frame', frame)
if cv.waitKey(1) == ord('q'):
break
cap.release()
cv.destroyAllWindows()
This program returns error when going on to the next video.
I tried this, but this didn't work.
import cv2 as cv
import os
import time
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
cap = cv.VideoCapture('rtsp://192.168.0.26:8554/')
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
try:
time.sleep(2)
# Capture frame-by-frame
ret, frame = cap.read()
# if frame is read correctly ret is True
# Our operations on the frame come here
# Display the resulting frame
cv.imshow('frame',frame)
if cv.waitKey(1) == ord('q'):
break
except:
print("Exception!!")
# When everything done, release the capture
cap.release()
cv.destroyAllWindows()
Can I get some help?
Thanks in advance!
I solved this by using multi-threaded program.
Main file
from datasets import LoadStreams
import threading
import os
import logging
import cv2
import torch
import time
logger = logging.getLogger(__name__)
def select_device(device='', batch_size=None):
# device = 'cpu' or '0' or '0,1,2,3'
cpu_request = device.lower() == 'cpu'
if device and not cpu_request: # if device requested other than 'cpu'
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availablity
cuda = False if cpu_request else torch.cuda.is_available()
if cuda:
c = 1024 ** 2 # bytes to MB
ng = torch.cuda.device_count()
if ng > 1 and batch_size: # check that batch_size is compatible with device_count
assert batch_size % ng == 0, f'batch-size {batch_size} not multiple of GPU count {ng}'
x = [torch.cuda.get_device_properties(i) for i in range(ng)]
s = f'Using torch {torch.__version__} '
for i, d in enumerate((device or '0').split(',')):
if i == 1:
s = ' ' * len(s)
logger.info(f"{s}CUDA:{d} ({x[i].name}, {x[i].total_memory / c}MB)")
else:
logger.info(f'Using torch {torch.__version__} CPU')
logger.info('') # skip a line
return torch.device('cuda:0' if cuda else 'cpu')
def detect(rtsp_url):
dataset = LoadStreams(rtsp_url)
device = select_device('')
count = 0
view_img = True
# img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
try:
for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): # for every frame
count += 1
im0 = im0s[0].copy()
if view_img:
cv2.imshow(str(path), im0)
# if cv2.waitKey(1) == ord('q'): # q to quit
# raise StopIteration
except:
print("finish execption")
dataset.stop()
return "good"
if __name__ == '__main__':
rtsp_url = "rtsp://192.168.0.26:8554/"
while True:
for thread in threading.enumerate():
print(thread.name)
print(detect(rtsp_url))
dataset class file
import glob
import logging
import math
import os
import random
import shutil
import time
import re
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from threading import Thread
import cv2
import numpy as np
import torch
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640):
self.mode = 'stream'
self.img_size = img_size
self.capture = None
self.my_thread = None
self.stopFlag = False
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs = [None] * n
self.sources = [clean_str(x) for x in sources] # clean source names for later
s = sources[0]
# for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
# print('%g/%g: %s... ' % (i + 1, n, s), end='')
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
assert cap.isOpened(), 'Failed to open %s' % s
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) % 100
self.ret, self.imgs[0] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([0, cap]), daemon=True)
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
thread.start()
self.capture = cap
self.my_thread = thread
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, index, cap):
# Read next stream frame in a daemon thread
n = 0
while cap.isOpened() and not self.stopFlag:
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
_, self.imgs[index] = cap.retrieve()
n = 0
time.sleep(0.01) # wait time
def stop(self):
self.stopFlag = True
try:
# self.capture.release()
# self.my_thrsead.join()
print("stop thread!!")
except:
print("ERROR stopping thread!!")
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
img0 = self.imgs.copy()
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
if not self.ret:
print("error!!!")
self.stop()
# Letterbox
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
# def stop(self):
def clean_str(s):
# Cleans a string by replacing special characters with underscore _
return re.sub(pattern="[|##!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
while cap.isOpened() and not self.stopFlag:
this line is especially important because
without this line the threads will be stacked and will have memory error
as the stack stacks up.
I have recently been working with pocket sphinx in python. I have successfully got the
example below to work recognising a recorded wav.
#!/usr/bin/env python
import sys,os
def decodeSpeech(hmmd,lmdir,dictp,wavfile):
"""
Decodes a speech file
"""
try:
import pocketsphinx as ps
import sphinxbase
except:
print """Pocket sphinx and sphixbase is not installed
in your system. Please install it with package manager.
"""
speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp)
wavFile = file(wavfile,'rb')
wavFile.seek(44)
speechRec.decode_raw(wavFile)
result = speechRec.get_hyp()
return result[0]
if __name__ == "__main__":
hmdir = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/hmm/wsj1"
lmd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.3e-7.vp.tg.lm.DMP"
dictd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.dic"
wavfile = "/home/jaganadhg/Desktop/Docs_New/kgisl/sa1.wav"
recognised = decodeSpeech(hmdir,lmd,dictd,wavfile)
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
print recognised
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
The problem is how can I do real time speech recognition from a microphone? In
a while loop with a if statement so that if a set word is recognised from the microphone
a function can be called?
The code for realtime recognition looks like this:
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)
import pyaudio
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
stream.start_stream()
in_speech_bf = False
decoder.start_utt()
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
if decoder.get_in_speech() != in_speech_bf:
in_speech_bf = decoder.get_in_speech()
if not in_speech_bf:
decoder.end_utt()
print 'Result:', decoder.hyp().hypstr
decoder.start_utt()
else:
break
decoder.end_utt()
You can also use gstreamer python bindings in pocketsphinx, check livedemo.py
Try this. Pocketsphinx is now a GStreamer plugin.
This is the code I see on the internet and I've modified a few things to really listen to the words very bad and slow
You can help me modify it for good. It is built on ubuntu 16.04 LTS
I do not know much about programming
Looking forward to help
# -*- encoding: utf-8 -*-
#!/usr/bin/env python
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math;import Mic
"""
Written by Sophie Li, 2016
http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
"""
class SpeechDetector:
def __init__(self):
# Microphone stream config.
self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
# only silence is recorded. When this time passes the
# recording finishes and the file is decoded
self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
# is detected, how much of previously recorded audio is
# prepended. This helps to prevent chopping the beginning
# of the phrase.
self.THRESHOLD = 4500
self.num_phrases = -1
# These will need to be modified according to where the pocketsphinx folder is
MODELDIR = "/home/l/Desktop/pocketsphinx/model/en-us"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us/'))
config.set_string('-lm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us.lm.bin'))
config.set_string('-dict', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/cmudict-en-us.dict'))
config.set_string('-keyphrase', 'no one')
config.set_float('-kws_threshold', 1e+20)
# Creaders decoder object for streaming data.
self.decoder = Decoder(config)
def setup_mic(self, num_samples=50):
""" Gets average audio intensity of your mic sound. You can use it to get
average intensities while you're talking and/or silent. The average
is the avg of the .2 of the largest intensities recorded.
"""
#print "Getting intensity values from mic."
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
for x in range(num_samples)]
values = sorted(values, reverse=True)
r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
#print " Finished "
#print " Average audio intensity is ", r
stream.close()
p.terminate()
if r < 3000:
self.THRESHOLD = 3500
else:
self.THRESHOLD = r + 100
def save_speech(self, data, p):
"""
Saves mic data to temporary WAV file. Returns filename of saved
file
"""
filename = 'output_'+str(int(time.time()))
# writes data to WAV file
data = ''.join(data)
wf = wave.open(filename + '.wav', 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000) # TODO make this value a function parameter?
wf.writeframes(data)
wf.close()
return filename + '.wav'
def decode_phrase(self, wav_file):
self.decoder.start_utt()
stream = open(wav_file, "rb")
while True:
buf = stream.read(1024)
if buf:
self.decoder.process_raw(buf, False, False)
else:
break
self.decoder.end_utt()
words = []
[words.append(seg.word) for seg in self.decoder.seg()]
return words
def run(self):
"""
Listens to Microphone, extracts phrases from it and calls pocketsphinx
to decode the sound
"""
self.setup_mic()
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
audio2send = []
cur_data = '' # current chunk of audio data
rel = self.RATE/self.CHUNK
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
started = False
while True:
cur_data = stream.read(self.CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > self.THRESHOLD for x in slid_win]) > 0:
if started == False:
print "Bắt đầu ghi âm"
started = True
audio2send.append(cur_data)
elif started:
print "Hoàn thành ghi âm"
filename = self.save_speech(list(prev_audio) + audio2send, p)
r = self.decode_phrase(filename)
print "RESULT: ", r
# hot word for me " no one" if r.count('one') and r.count("no") > 0 the end programs
if r.count("one") > 0 and r.count("no") > 0:
Mic.playaudiofromAudio().play("/home/l/Desktop/PROJECT/Audio/beep_hi.wav")
os.remove(filename)
return
# Removes temp audio file
os.remove(filename)
# Reset all
started = False
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
prev_audio = deque(maxlen= 0.5 * rel)
audio2send = []
print "Chế độ nghe ..."
else:
prev_audio.append(cur_data)
print "* Hoàn thành nghe"
stream.close()
p.terminate()
i'm looking for a solution, either in linux or in windows, that allows me to
record video (+audio) from my webcam & microphone, simultaneously.
save it as a file.AVI (or mpg or whatever)
display the video on the screen while recording it
Compression is NOT an issue in my case, and i actually prefer to capture RAW and compress it later.
So far i've done it with an ActiveX component in VB which took care of everything, and i'd like to progress with python (the VB solution is unstable, unreliable).
so far i've seen code that captures VIDEO only, or individual frames...
I've looked so far at
OpenCV - couldn't find audio capture there
PyGame - no simultaneous audio capture (AFAIK)
VideoCapture - provide only single frames.
SimpleCV - no audio
VLC - binding to VideoLAN program into wxPthon - hopefully it will do (still investigating this option)
kivy - just heard about it, didn't manage to get it working under windows SO FAR.
The question - is there a video & audio capture library for python?
or - what are the other options if any?
Answer: No. There is no single library/solution in python to do video/audio recording simultaneously. You have to implement both separately and merge the audio and video signal in a smart way to end up with a video/audio file.
I got a solution for the problem you present. My code addresses your three issues:
Records video + audio from webcam and microphone simultaneously.
It saves the final video/audio file as .AVI
Un-commenting lines 76, 77 and 78 will make the video to be displayed to screen while recording.
My solution uses pyaudio for audio recording, opencv for video recording, and ffmpeg for muxing the two signals. To be able to record both simultaneously, I use multithreading. One thread records video, and a second one the audio. I have uploaded my code to github and also have included all the essential parts it here.
https://github.com/JRodrigoF/AVrecordeR
Note: opencv is not able to control the fps at which the webcamera does the recording. It is only able to specify in the encoding of the file the desired final fps, but the webcamera usually behaves differently according to specifications and light conditions (I found). So the fps have to be controlled at the level of the code.
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
class VideoRecorder():
# Video class based on openCV
def __init__(self):
self.open = True
self.device_index = 0
self.fps = 6 # fps should be the minimum constant rate at which the camera can
self.fourcc = "MJPG" # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (640,480) # video formats and sizes also depend and vary according to the camera used
self.video_filename = "temp_video.avi"
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
# Video starts being recorded
def record(self):
# counter = 1
timer_start = time.time()
timer_current = 0
while(self.open==True):
ret, video_frame = self.video_cap.read()
if (ret==True):
self.video_out.write(video_frame)
# print str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current)
self.frame_counts += 1
# counter += 1
# timer_current = time.time() - timer_start
time.sleep(0.16)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
# 0.16 delay -> 6 fps
#
# Finishes the video recording therefore the thread too
def stop(self):
if self.open==True:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
else:
pass
# Launches the video recording function using a thread
def start(self):
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
# Audio class based on pyAudio and Wave
def __init__(self):
self.open = True
self.rate = 44100
self.frames_per_buffer = 1024
self.channels = 2
self.format = pyaudio.paInt16
self.audio_filename = "temp_audio.wav"
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
# Audio starts being recorded
def record(self):
self.stream.start_stream()
while(self.open == True):
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
if self.open==False:
break
# Finishes the audio recording therefore the thread too
def stop(self):
if self.open==True:
self.open = False
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
pass
# Launches the audio recording function using a thread
def start(self):
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder()
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename):
global audio_thread
audio_thread = AudioRecorder()
audio_thread.start()
return filename
def stop_AVrecording(filename):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print "total frames " + str(frame_counts)
print "elapsed time " + str(elapsed_time)
print "recorded fps " + str(recorded_fps)
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
# Merging audio and video signal
if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
print "Re-encoding"
cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
subprocess.call(cmd, shell=True)
print "Muxing"
cmd = "ffmpeg -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
else:
print "Normal recording\nMuxing"
cmd = "ffmpeg -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
print ".."
# Required and wanted processing of final files
def file_manager(filename):
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
if os.path.exists(str(local_path) + "/" + filename + ".avi"):
os.remove(str(local_path) + "/" + filename + ".avi")
To the questions asked above: Yes the code should also works under Python3. I adjusted it a little bit and now works for python2 and python3 (tested it on windows7 with 2.7 and 3.6, though you need to have ffmpeg installed or the executable ffmpeg.exe at least in the same directory, you can get it here: https://www.ffmpeg.org/download.html ). Of course you also need all the other libraries cv2, numpy, pyaudio, installed like herewith:
pip install opencv-python numpy pyaudio
You can now run the code directly:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
class VideoRecorder():
"Video class based on openCV"
def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
self.open = True
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
self.video_filename = name
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
# counter = 1
timer_start = time.time()
timer_current = 0
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
# print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
self.frame_counts += 1
# counter += 1
# timer_current = time.time() - timer_start
time.sleep(1/self.fps)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
def stop(self):
"Finishes the video recording therefore the thread too"
if self.open:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
def start(self):
"Launches the video recording function using a thread"
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, filename="temp_audio.wav", rate=44100, fpb=1024, channels=2):
self.open = True
self.rate = rate
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
while self.open:
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
if not self.open:
break
def stop(self):
"Finishes the audio recording therefore the thread too"
if self.open:
self.open = False
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
def start(self):
"Launches the audio recording function using a thread"
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename="test"):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder()
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename="test"):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename="test"):
global audio_thread
audio_thread = AudioRecorder()
audio_thread.start()
return filename
def stop_AVrecording(filename="test"):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
# Merging audio and video signal
if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
print("Re-encoding")
cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
subprocess.call(cmd, shell=True)
print("Muxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
else:
print("Normal recording\nMuxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
print("..")
def file_manager(filename="test"):
"Required and wanted processing of final files"
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
# if os.path.exists(str(local_path) + "/" + filename + ".avi"):
# os.remove(str(local_path) + "/" + filename + ".avi")
if __name__ == '__main__':
start_AVrecording()
time.sleep(5)
stop_AVrecording()
file_manager()
I would recommend ffmpeg. There is a python wrapper.
http://code.google.com/p/pyffmpeg/
I've been looking around for a good answer to this, and I think it is GStreamer...
The documentation for the python bindings is extremely light, and most of it seemed centered around the old 0.10 version of GStreamer instead of the new 1.X versions, but GStreamer is a extremely powerful, cross-platform multimedia framework that can stream, mux, transcode, and display just about anything.
I used JRodrigoF's script for a while on a project. However, I noticed that sometimes the threads would hang and it would cause the program to crash. Another issue is that openCV does not capture video frames at a reliable rate and ffmpeg would distort my video when re-encoding.
I came up with a new solution that records much more reliably and with much higher quality for my application. It presently only works for Windows because it uses pywinauto and the built-in Windows Camera app. The last bit of the script does some error-checking to confirm the video successfully recorded by checking the timestamp of the name of the video.
https://gist.github.com/mjdargen/956cc968864f38bfc4e20c9798c7d670
import pywinauto
import time
import subprocess
import os
import datetime
def win_record(duration):
subprocess.run('start microsoft.windows.camera:', shell=True) # open camera app
# focus window by getting handle using title and class name
# subprocess call opens camera and gets focus, but this provides alternate way
# t, c = 'Camera', 'ApplicationFrameWindow'
# handle = pywinauto.findwindows.find_windows(title=t, class_name=c)[0]
# # get app and window
# app = pywinauto.application.Application().connect(handle=handle)
# window = app.window(handle=handle)
# window.set_focus() # set focus
time.sleep(2) # have to sleep
# take control of camera window to take video
desktop = pywinauto.Desktop(backend="uia")
cam = desktop['Camera']
# cam.print_control_identifiers()
# make sure in video mode
if cam.child_window(title="Switch to Video mode", auto_id="CaptureButton_1", control_type="Button").exists():
cam.child_window(title="Switch to Video mode", auto_id="CaptureButton_1", control_type="Button").click()
time.sleep(1)
# start then stop video
cam.child_window(title="Take Video", auto_id="CaptureButton_1", control_type="Button").click()
time.sleep(duration+2)
cam.child_window(title="Stop taking Video", auto_id="CaptureButton_1", control_type="Button").click()
# retrieve vids from camera roll and sort
dir = 'C:/Users/m/Pictures/Camera Roll'
all_contents = list(os.listdir(dir))
vids = [f for f in all_contents if "_Pro.mp4" in f]
vids.sort()
vid = vids[-1] # get last vid
# compute time difference
vid_time = vid.replace('WIN_', '').replace('_Pro.mp4', '')
vid_time = datetime.datetime.strptime(vid_time, '%Y%m%d_%H_%M_%S')
now = datetime.datetime.now()
diff = now - vid_time
# time different greater than 2 minutes, assume something wrong & quit
if diff.seconds > 120:
quit()
subprocess.run('Taskkill /IM WindowsCamera.exe /F', shell=True) # close camera app
print('Recorded successfully!')
win_record(2)
If you notice misalignment between video and audio by the code above, please see my solution below
I think the most rated answer above does a great job. However, it did not work perfectly when I was using it,especially when you use a low fps rate (say 10). The main issue is with video recording. In order to properly synchronize video and audio recording with ffmpeg, one has to make sure that cv2.VideoCapture() and cv2.VideoWriter() share exact same FPS, because the recorded video time length is solely determined by fps rate and the number of frames.
Following is my suggested update:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
import ffmpeg
class VideoRecorder():
"Video class based on openCV"
def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
self.open = True
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
self.video_filename = name
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_cap.set(cv2.CAP_PROP_FPS, self.fps)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
# counter = 1
timer_start = time.time()
timer_current = 0
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
# print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
self.frame_counts += 1
# print(self.frame_counts)
# counter += 1
# timer_current = time.time() - timer_start
# time.sleep(1/self.fps)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
def stop(self):
"Finishes the video recording therefore the thread too"
if self.open:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
def start(self):
"Launches the video recording function using a thread"
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, filename="temp_audio.wav", rate=44100, fpb=1024, channels=2):
self.open = True
self.rate = rate
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
while self.open:
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
if not self.open:
break
def stop(self):
"Finishes the audio recording therefore the thread too"
if self.open:
self.open = False
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
def start(self):
"Launches the audio recording function using a thread"
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename="test"):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder()
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename="test"):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename="test"):
global audio_thread
audio_thread = AudioRecorder()
audio_thread.start()
return filename
def stop_AVrecording(filename="test"):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
video_stream = ffmpeg.input(video_thread.video_filename)
audio_stream = ffmpeg.input(audio_thread.audio_filename)
ffmpeg.output(audio_stream, video_stream, 'out.mp4').run(overwrite_output=True)
# # Merging audio and video signal
# if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
# print("Re-encoding")
# cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
# subprocess.call(cmd, shell=True)
# print("Muxing")
# cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
# subprocess.call(cmd, shell=True)
# else:
# print("Normal recording\nMuxing")
# cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
# subprocess.call(cmd, shell=True)
# print("..")
def file_manager(filename="test"):
"Required and wanted processing of final files"
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
# if os.path.exists(str(local_path) + "/" + filename + ".avi"):
# os.remove(str(local_path) + "/" + filename + ".avi")
if __name__ == '__main__':
start_AVrecording()
# try:
# while True:
# pass
# except KeyboardInterrupt:
# stop_AVrecording()
time.sleep(10)
stop_AVrecording()
print("finishing recording")
file_manager()
Using everyone's contributions and following the suggestion of Paul
I was able to come up with the following code:
Recorder.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import sys
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
import ffmpeg
REC_FOLDER = "recordings/"
class Recorder():
def __init__(self, filename):
self.filename = filename
self.video_thread = self.VideoRecorder(self, REC_FOLDER + filename)
self.audio_thread = self.AudioRecorder(self, REC_FOLDER + filename)
def startRecording(self):
self.video_thread.start()
self.audio_thread.start()
def stopRecording(self):
self.video_thread.stop()
self.audio_thread.stop()
def saveRecording(self):
#Save audio / Show video resume
self.audio_thread.saveAudio()
self.video_thread.showFramesResume()
#Merges both streams and writes
video_stream = ffmpeg.input(self.video_thread.video_filename)
audio_stream = ffmpeg.input(self.audio_thread.audio_filename)
while (not os.path.exists(self.audio_thread.audio_filename)):
print("waiting for audio file to exit...")
stream = ffmpeg.output(video_stream, audio_stream, REC_FOLDER + self.filename +".mp4")
try:
ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, overwrite_output=True)
except ffmpeg.Error as e:
print(e.stdout, file=sys.stderr)
print(e.stderr, file=sys.stderr)
class VideoRecorder():
"Video class based on openCV"
def __init__(self, recorder, name, fourcc="MJPG", frameSize=(640,480), camindex=0, fps=15):
self.recorder = recorder
self.open = True
self.duration = 0
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.video_filename = name + ".avi" # video formats and sizes also depend and vary according to the camera used
self.video_cap = cv2.VideoCapture(self.device_index, cv2.CAP_DSHOW)
self.video_writer = cv2.VideoWriter_fourcc(*fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
counter = 1
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
self.frame_counts += 1
counter += 1
self.duration += 1/self.fps
if (video_frame is None): print("I WAS NONEEEEEEEEEEEEEEEEEEEEEE")
gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('video_frame', gray)
cv2.waitKey(1)
while(self.duration - self.recorder.audio_thread.duration >= 0.2 and self.recorder.audio_thread.open):
time.sleep(0.2)
else:
break
#Release Video
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
self.video_out = None
def stop(self):
"Finishes the video recording therefore the thread too"
self.open=False
def start(self):
"Launches the video recording function using a thread"
self.thread = threading.Thread(target=self.record)
self.thread.start()
def showFramesResume(self):
#Only stop of video has all frames
frame_counts = self.frame_counts
elapsed_time = time.time() - self.start_time
recorded_fps = self.frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, recorder, filename, rate=44100, fpb=1024, channels=1, audio_index=0):
self.recorder = recorder
self.open = True
self.rate = rate
self.duration = 0
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename + ".wav"
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
input_device_index=audio_index,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
t_start = time.time_ns()
while self.open:
try:
self.duration += self.frames_per_buffer / self.rate
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
except Exception as e:
print('\n' + '*'*80)
print('PyAudio read exception at %.1fms\n' % ((time.time_ns() - t_start)/10**6))
print(e)
print('*'*80 + '\n')
while(self.duration - self.recorder.video_thread.duration >= 0.5):
time.sleep(0.5)
#Closes audio stream
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
def stop(self):
"Finishes the audio recording therefore the thread too"
self.open = False
def start(self):
"Launches the audio recording function using a thread"
self.thread = threading.Thread(target=self.record)
self.thread.start()
def saveAudio(self):
#Save Audio File
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
Main.py
from recorder import Recorder
import time
recorder = Recorder("test1")
recorder.startRecording()
time.sleep(240)
recorder.stopRecording()
recorder.saveRecording()
With this solution, the camera and the audio will wait for each other.
I also tried the FFmpeg Re-encoding and Muxing and even though it was able to synchronize the audio with video, the video had a massive quality drop.
You can do offline html,js code to do video with audio recording. Using python lib python webview open that page. It should work fine.
I was randomly getting "[Errno -9999] Unanticipated host error" while using JRodrigoF's solution and found that it's due to a race condition where an audio stream can be closed just before being read for the last time inside record() of AudioRecorder class.
I modified slightly so that all the closing procedures are done after the while loop and added a function list_audio_devices() that shows the list of audio devices to select from. I also added an audio device index as a parameter to choose an audio device.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py
from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
class VideoRecorder():
"Video class based on openCV"
def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
self.open = True
self.device_index = camindex
self.fps = fps # fps should be the minimum constant rate at which the camera can
self.fourcc = fourcc # capture images (with no decrease in speed over time; testing is required)
self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
self.video_filename = name
self.video_cap = cv2.VideoCapture(self.device_index)
self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
self.frame_counts = 1
self.start_time = time.time()
def record(self):
"Video starts being recorded"
# counter = 1
timer_start = time.time()
timer_current = 0
while self.open:
ret, video_frame = self.video_cap.read()
if ret:
self.video_out.write(video_frame)
# print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
self.frame_counts += 1
# counter += 1
# timer_current = time.time() - timer_start
time.sleep(1/self.fps)
# gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('video_frame', gray)
# cv2.waitKey(1)
else:
break
def stop(self):
"Finishes the video recording therefore the thread too"
if self.open:
self.open=False
self.video_out.release()
self.video_cap.release()
cv2.destroyAllWindows()
def start(self):
"Launches the video recording function using a thread"
video_thread = threading.Thread(target=self.record)
video_thread.start()
class AudioRecorder():
"Audio class based on pyAudio and Wave"
def __init__(self, filename="temp_audio.wav", rate=44100, fpb=2**12, channels=1, audio_index=0):
self.open = True
self.rate = rate
self.frames_per_buffer = fpb
self.channels = channels
self.format = pyaudio.paInt16
self.audio_filename = filename
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
input_device_index=audio_index,
frames_per_buffer = self.frames_per_buffer)
self.audio_frames = []
def record(self):
"Audio starts being recorded"
self.stream.start_stream()
t_start = time.time_ns()
while self.open:
try:
data = self.stream.read(self.frames_per_buffer)
self.audio_frames.append(data)
except Exception as e:
print('\n' + '*'*80)
print('PyAudio read exception at %.1fms\n' % ((time.time_ns() - t_start)/10**6))
print(e)
print('*'*80 + '\n')
time.sleep(0.01)
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
waveFile = wave.open(self.audio_filename, 'wb')
waveFile.setnchannels(self.channels)
waveFile.setsampwidth(self.audio.get_sample_size(self.format))
waveFile.setframerate(self.rate)
waveFile.writeframes(b''.join(self.audio_frames))
waveFile.close()
def stop(self):
"Finishes the audio recording therefore the thread too"
if self.open:
self.open = False
def start(self):
"Launches the audio recording function using a thread"
audio_thread = threading.Thread(target=self.record)
audio_thread.start()
def start_AVrecording(filename="test", audio_index=0, sample_rate=44100):
global video_thread
global audio_thread
video_thread = VideoRecorder()
audio_thread = AudioRecorder(audio_index=audio_index, rate=sample_rate)
audio_thread.start()
video_thread.start()
return filename
def start_video_recording(filename="test"):
global video_thread
video_thread = VideoRecorder()
video_thread.start()
return filename
def start_audio_recording(filename="test", audio_index=0, sample_rate=44100):
global audio_thread
audio_thread = AudioRecorder(audio_index=audio_index, rate=sample_rate)
audio_thread.start()
return filename
def stop_AVrecording(filename="test"):
audio_thread.stop()
frame_counts = video_thread.frame_counts
elapsed_time = time.time() - video_thread.start_time
recorded_fps = frame_counts / elapsed_time
print("total frames " + str(frame_counts))
print("elapsed time " + str(elapsed_time))
print("recorded fps " + str(recorded_fps))
video_thread.stop()
# Makes sure the threads have finished
while threading.active_count() > 1:
time.sleep(1)
# Merging audio and video signal
if abs(recorded_fps - 6) >= 0.01: # If the fps rate was higher/lower than expected, re-encode it to the expected
print("Re-encoding")
cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
subprocess.call(cmd, shell=True)
print("Muxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
else:
print("Normal recording\nMuxing")
cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
subprocess.call(cmd, shell=True)
print("..")
def file_manager(filename="test"):
"Required and wanted processing of final files"
local_path = os.getcwd()
if os.path.exists(str(local_path) + "/temp_audio.wav"):
os.remove(str(local_path) + "/temp_audio.wav")
if os.path.exists(str(local_path) + "/temp_video.avi"):
os.remove(str(local_path) + "/temp_video.avi")
if os.path.exists(str(local_path) + "/temp_video2.avi"):
os.remove(str(local_path) + "/temp_video2.avi")
# if os.path.exists(str(local_path) + "/" + filename + ".avi"):
# os.remove(str(local_path) + "/" + filename + ".avi")
def list_audio_devices(name_filter=None):
pa = pyaudio.PyAudio()
device_index = None
sample_rate = None
for x in range(pa.get_device_count()):
info = pa.get_device_info_by_index(x)
print(pa.get_device_info_by_index(x))
if name_filter is not None and name_filter in info['name']:
device_index = info['index']
sample_rate = int(info['defaultSampleRate'])
break
return device_index, sample_rate
if __name__ == '__main__':
start_AVrecording()
time.sleep(5)
stop_AVrecording()
file_manager()