Python-Download video from youtube and convert it to mp3 - python

This code downloads the video and convert it to mp3 file. However, the mp3 audio will become 2 times longer than normal video. How can I solve this problem?
import pafy
import os
import moviepy.editor as mp
print "[+] Welcome to Youtube downloader."
download_url = raw_input("URL :")
video = pafy.new(download_url)
best = video.streams
file_name = video.streams[0]
print file_name
directory = "downloaded-music"
if not os.path.exists(directory):
os.makedirs(directory)
x = file_name.download(filepath = directory)
clip = mp.VideoFileClip(x)
print clip.size
clip.audio.write_audiofile(x + ".mp3")
os.remove(x)

It's the value "clip.size" that is twice bigger than the real one, or it's the real lenght of the file ?

Related

I receive a WinError2 whenever I try to download a playlist using pytube

I'm trying to create a mp3 playlist downloader using python and the module 'pytube' Though whenever I run the code I get '[WinError 2] The system cannot find the file specified'. Here's the code I'm working on:
from pytube import Playlist
import os
yt = Playlist((input("Enter the URL of the video you want to download: \n>> ")))
for video in yt.videos:
print('videos to be downloaded:')
print(video.title)
print("Enter the destination (leave blank for current directory)")
destination = str(input(">> ")) or '.'
out_file = video.streams.filter(only_audio=True).first().download(destination)
for len in out_file:
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
print(video.title + " has been successfully downloaded.")
It could only download one video and result in this error. If I remove it from the 'for' loop, it downloads all the song but only the last downloaded video gets converted into an mp3 while the rest is an .mp4 file type. I was expecting it to download all the song while updating the user of what song has finished downloading.

How to compress WAV file in python?

I have converted MP3 files to WAV format but how can I compress WAV file to very small size less or same size that of MP3 size without changing the file format
from pydub import AudioSegment
import os
# files
src_folder = "D:/projects/data/mp3"
dst_folder = "D:/projects/data/wav"
#get all audio file
files = os.listdir(src_folder)
for name in files:
#name of the file
wav_name = name.replace(".mp3", "")
try:
# convert wav to mp3
sound = AudioSegment.from_mp3("{}/{}".format(src_folder, name))
sound.export("{}/{}.wav".format(dst_folder, wav_name), format="wav")
except Exception as e:
pass
s1.export("output.mp3", format='mp3', parameters=["-ac","2","-ar","8000"])
The line of code managed to reduce my audio size by half its previous size. Hope this is helpful to someone

Reg : How to Download YT playlist and convert them compressed Audio file using Python?

Can someone please help me in Downloading a Playlist from YT and compressing them into Audio file please.. I tried with below shown code..
from pytube import YouTube
from pytube import Playlist
from pydub import AudioSegment
import os
import moviepy.editor as mp
import re
YOUTUBE_STREAM_AUDIO = '140' # modify the value to download a different stream
DOWNLOAD_DIR = 'C:\\MyData\\NewAudio'
playlist = Playlist("https://www.youtube.com/watch?v=z9bZufPHFLU&list=PLfqMhTWNBTe0b2nM6JHVCnAkhQRGiZMSJ")
# this fixes the empty playlist.videos list
#playlist._video_regex = re.compile(r"\"url\":\"(/watch\?v=[\w-]*)")
print(len(playlist.video_urls))
for url in playlist.video_urls:
print(url)
# physically downloading the audio track
for video in playlist.videos:
audioStream = video.streams.get_by_itag(YOUTUBE_STREAM_AUDIO)
audioStream.download(output_path=DOWNLOAD_DIR)
for file in os.listdir(DOWNLOAD_DIR):
if re.search('mp4', file):
mp4_path = os.path.join(DOWNLOAD_DIR, file)
mp3_path = os.path.join(DOWNLOAD_DIR, os.path.splitext(file)[0] + '.mp3')
new_file = mp.AudioFileClip(mp4_path)
new_file.write_audiofile(mp3_path)
os.remove(mp4_path)
If those warnings are preventing your code to function properly... You'll want to install FFMPEG by either:
Placing ffmpeg.exe in your project's directory
Putting ffmpeg.exe into PATH
You can find downloads for FFMPEG here, and you can find the ffmpeg.exe in the bin folder of the .zip/.7z file downloaded from there.

How to batch rename video files with quality information using python?

I was trying to write a program to rename all the video files of a folder. I just wanted to add video quality or dimension like (720p) or (1080p) or something like that to the end of the current file name. But I'm getting the following error:
Traceback (most recent call last):
File "f:\Python Projects\Practice\mm.py", line 17, in <module>
os.rename(file_name, f'{file_title} ({height}p){file_extension}')
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'Video 1.mp4' -> 'Video 1 (1080p).mp4'
Here is my code:
import os
from cv2 import cv2
os.chdir(r'F:\Python Projects\Practice\Temp Files')
for file_name in os.listdir():
# Getting Video Resolution
with open(file_name, 'r') as f:
f_string = str(f).split('\'')[1]
video_path = f'F:\\Python Projects\\Practice\\Temp Files\\{f_string}'
video = cv2.VideoCapture(video_path)
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Getting the title
file_title, file_extension = os.path.splitext(file_name)
os.rename(file_name, f'{file_title} ({height}p){file_extension}')
Can anyone tell me how I can fix this problem? Thanks in advance... :)
The problem is that cv2.VideoCapture(video_path) opens your file as well. As this object continues to exist, the file is still open (even if it no longer is by your open(...) as f: once you exit the with block.)
So, you have to do it explicitely with:
video.release()
Something like this. Tested. Use
video.release()
to close a file opened with cv2.
import os
from cv2 import cv2
os.chdir(r'F:\Python Projects\Practice\Temp Files')
for file_name in os.listdir():
# Getting Video Resolution
f = open(file_name, 'r')
f_string = str(f).split('\'')[1]
f.close()
video_path = f'F:\\Python Projects\\Practice\\Temp Files\\{f_string}'
video = cv2.VideoCapture(video_path)
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
video.release()
# Getting the title
file_title, file_extension = os.path.splitext(file_name)
os.rename(file_name, f'{file_title} ({height}p){file_extension}')
I've simplified the code and it works quite fine. I'm sharing my code here. If anyone becomes benefited from my code, it will be a matter of pride for me... :)
import os
from cv2 import cv2
video_folder = r'F:\Python Projects\Practice\Temp Files'
os.chdir(video_folder)
for file_name in os.listdir():
# Getting video quality
video_path = f'{video_folder}\\{file_name}'
video = cv2.VideoCapture(video_path)
width = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
video.release()
# Getting title
file_title, file_extension = os.path.splitext(file_name)
new_file_name = f'{file_title} ({width}p){file_extension}'
# Renaming the file
os.rename(file_name, new_file_name)
print('Rename Successful!')

Python Wand Eating all available Disk Space on Mac when converting PDFs using OCR

I believe this is my first StackOverflow question, so please be nice.
I am OCRing a repository of PDFs (~1GB in total) ranging from 50-200 pages each and found that suddenly all of the available 100GB of remaining harddrive space on my Macbook Pro were gone. Based on a previous post, it seems that ImageMagick is the culprit as shown here.
I found that these files are called 'magick-*' and are stored in /private/var/tmp. For only 23 PDFs it had created 3576 files totaling 181GB.
How can I delete these files immediately within the code after they are no longer needed? Thank you in advance for any suggestions to remedy this issue.
Here is the code:
import io, os
import json
import unicodedata
from PIL import Image as PI
import pyocr
import pyocr.builders
from wand.image import Image
from tqdm import tqdm
# Where you want to save the PDFs
destination_folder = 'contract_data/Contracts_Backlog/'
pdfs = [unicodedata.normalize('NFKC',f.decode('utf8')) for f in os.listdir(destination_folder) if f.lower().endswith('.pdf')]
txt_files = [unicodedata.normalize('NFKC',f.decode('utf8')) for f in os.listdir(destination_folder) if f.lower().endswith('.txt')]
### Perform OCR on PDFs
def ocr_pdf_to_text(filename):
tool = pyocr.get_available_tools()[0]
lang = 'spa'
req_image = []
final_text = []
image_pdf = Image(filename=filename, resolution=300)
image_jpeg = image_pdf.convert('jpeg')
for img in image_jpeg.sequence:
img_page = Image(image=img)
req_image.append(img_page.make_blob('jpeg'))
for img in req_image:
txt = tool.image_to_string(
PI.open(io.BytesIO(img)),
lang=lang,
builder=pyocr.builders.TextBuilder()
)
final_text.append(txt)
return final_text
for filename in tqdm(pdfs):
txt_file = filename[:-3] +'txt'
txt_filename = destination_folder + txt_file
if not txt_file in txt_files:
print 'Converting ' + filename
try:
ocr_txt = ocr_pdf_to_text(destination_folder + filename)
with open(txt_filename,'w') as f:
for i in range(len(ocr_txt)):
f.write(json.dumps({i:ocr_txt[i].encode('utf8')}))
f.write('\n')
f.close()
except:
print "Could not OCR " + filename
A hacky way of dealing with this was to add an os.remove() statement within the main loop to remove the tmp files after creation.
tempdir = '/private/var/tmp/'
files = os.listdir(tempdir)
for file in files:
if "magick" in file:
os.remove(os.path.join(tempdir,file))
Image should be used as a context manager, because Wand determine timings to dispose resources including temporary files, in-memory buffers, and so on. with block help Wand to know boundaries when these Image objects are still needed and when they are now unnecessary.
See also the official docs.

Categories