This code downloads the video and convert it to mp3 file. However, the mp3 audio will become 2 times longer than normal video. How can I solve this problem?
import pafy
import os
import moviepy.editor as mp
print "[+] Welcome to Youtube downloader."
download_url = raw_input("URL :")
video = pafy.new(download_url)
best = video.streams
file_name = video.streams[0]
print file_name
directory = "downloaded-music"
if not os.path.exists(directory):
os.makedirs(directory)
x = file_name.download(filepath = directory)
clip = mp.VideoFileClip(x)
print clip.size
clip.audio.write_audiofile(x + ".mp3")
os.remove(x)
It's the value "clip.size" that is twice bigger than the real one, or it's the real lenght of the file ?
Related
I'm trying to create a mp3 playlist downloader using python and the module 'pytube' Though whenever I run the code I get '[WinError 2] The system cannot find the file specified'. Here's the code I'm working on:
from pytube import Playlist
import os
yt = Playlist((input("Enter the URL of the video you want to download: \n>> ")))
for video in yt.videos:
print('videos to be downloaded:')
print(video.title)
print("Enter the destination (leave blank for current directory)")
destination = str(input(">> ")) or '.'
out_file = video.streams.filter(only_audio=True).first().download(destination)
for len in out_file:
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
print(video.title + " has been successfully downloaded.")
It could only download one video and result in this error. If I remove it from the 'for' loop, it downloads all the song but only the last downloaded video gets converted into an mp3 while the rest is an .mp4 file type. I was expecting it to download all the song while updating the user of what song has finished downloading.
I have converted MP3 files to WAV format but how can I compress WAV file to very small size less or same size that of MP3 size without changing the file format
from pydub import AudioSegment
import os
# files
src_folder = "D:/projects/data/mp3"
dst_folder = "D:/projects/data/wav"
#get all audio file
files = os.listdir(src_folder)
for name in files:
#name of the file
wav_name = name.replace(".mp3", "")
try:
# convert wav to mp3
sound = AudioSegment.from_mp3("{}/{}".format(src_folder, name))
sound.export("{}/{}.wav".format(dst_folder, wav_name), format="wav")
except Exception as e:
pass
s1.export("output.mp3", format='mp3', parameters=["-ac","2","-ar","8000"])
The line of code managed to reduce my audio size by half its previous size. Hope this is helpful to someone
Can someone please help me in Downloading a Playlist from YT and compressing them into Audio file please.. I tried with below shown code..
from pytube import YouTube
from pytube import Playlist
from pydub import AudioSegment
import os
import moviepy.editor as mp
import re
YOUTUBE_STREAM_AUDIO = '140' # modify the value to download a different stream
DOWNLOAD_DIR = 'C:\\MyData\\NewAudio'
playlist = Playlist("https://www.youtube.com/watch?v=z9bZufPHFLU&list=PLfqMhTWNBTe0b2nM6JHVCnAkhQRGiZMSJ")
# this fixes the empty playlist.videos list
#playlist._video_regex = re.compile(r"\"url\":\"(/watch\?v=[\w-]*)")
print(len(playlist.video_urls))
for url in playlist.video_urls:
print(url)
# physically downloading the audio track
for video in playlist.videos:
audioStream = video.streams.get_by_itag(YOUTUBE_STREAM_AUDIO)
audioStream.download(output_path=DOWNLOAD_DIR)
for file in os.listdir(DOWNLOAD_DIR):
if re.search('mp4', file):
mp4_path = os.path.join(DOWNLOAD_DIR, file)
mp3_path = os.path.join(DOWNLOAD_DIR, os.path.splitext(file)[0] + '.mp3')
new_file = mp.AudioFileClip(mp4_path)
new_file.write_audiofile(mp3_path)
os.remove(mp4_path)
If those warnings are preventing your code to function properly... You'll want to install FFMPEG by either:
Placing ffmpeg.exe in your project's directory
Putting ffmpeg.exe into PATH
You can find downloads for FFMPEG here, and you can find the ffmpeg.exe in the bin folder of the .zip/.7z file downloaded from there.
I was trying to write a program to rename all the video files of a folder. I just wanted to add video quality or dimension like (720p) or (1080p) or something like that to the end of the current file name. But I'm getting the following error:
Traceback (most recent call last):
File "f:\Python Projects\Practice\mm.py", line 17, in <module>
os.rename(file_name, f'{file_title} ({height}p){file_extension}')
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'Video 1.mp4' -> 'Video 1 (1080p).mp4'
Here is my code:
import os
from cv2 import cv2
os.chdir(r'F:\Python Projects\Practice\Temp Files')
for file_name in os.listdir():
# Getting Video Resolution
with open(file_name, 'r') as f:
f_string = str(f).split('\'')[1]
video_path = f'F:\\Python Projects\\Practice\\Temp Files\\{f_string}'
video = cv2.VideoCapture(video_path)
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Getting the title
file_title, file_extension = os.path.splitext(file_name)
os.rename(file_name, f'{file_title} ({height}p){file_extension}')
Can anyone tell me how I can fix this problem? Thanks in advance... :)
The problem is that cv2.VideoCapture(video_path) opens your file as well. As this object continues to exist, the file is still open (even if it no longer is by your open(...) as f: once you exit the with block.)
So, you have to do it explicitely with:
video.release()
Something like this. Tested. Use
video.release()
to close a file opened with cv2.
import os
from cv2 import cv2
os.chdir(r'F:\Python Projects\Practice\Temp Files')
for file_name in os.listdir():
# Getting Video Resolution
f = open(file_name, 'r')
f_string = str(f).split('\'')[1]
f.close()
video_path = f'F:\\Python Projects\\Practice\\Temp Files\\{f_string}'
video = cv2.VideoCapture(video_path)
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
video.release()
# Getting the title
file_title, file_extension = os.path.splitext(file_name)
os.rename(file_name, f'{file_title} ({height}p){file_extension}')
I've simplified the code and it works quite fine. I'm sharing my code here. If anyone becomes benefited from my code, it will be a matter of pride for me... :)
import os
from cv2 import cv2
video_folder = r'F:\Python Projects\Practice\Temp Files'
os.chdir(video_folder)
for file_name in os.listdir():
# Getting video quality
video_path = f'{video_folder}\\{file_name}'
video = cv2.VideoCapture(video_path)
width = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
video.release()
# Getting title
file_title, file_extension = os.path.splitext(file_name)
new_file_name = f'{file_title} ({width}p){file_extension}'
# Renaming the file
os.rename(file_name, new_file_name)
print('Rename Successful!')
I believe this is my first StackOverflow question, so please be nice.
I am OCRing a repository of PDFs (~1GB in total) ranging from 50-200 pages each and found that suddenly all of the available 100GB of remaining harddrive space on my Macbook Pro were gone. Based on a previous post, it seems that ImageMagick is the culprit as shown here.
I found that these files are called 'magick-*' and are stored in /private/var/tmp. For only 23 PDFs it had created 3576 files totaling 181GB.
How can I delete these files immediately within the code after they are no longer needed? Thank you in advance for any suggestions to remedy this issue.
Here is the code:
import io, os
import json
import unicodedata
from PIL import Image as PI
import pyocr
import pyocr.builders
from wand.image import Image
from tqdm import tqdm
# Where you want to save the PDFs
destination_folder = 'contract_data/Contracts_Backlog/'
pdfs = [unicodedata.normalize('NFKC',f.decode('utf8')) for f in os.listdir(destination_folder) if f.lower().endswith('.pdf')]
txt_files = [unicodedata.normalize('NFKC',f.decode('utf8')) for f in os.listdir(destination_folder) if f.lower().endswith('.txt')]
### Perform OCR on PDFs
def ocr_pdf_to_text(filename):
tool = pyocr.get_available_tools()[0]
lang = 'spa'
req_image = []
final_text = []
image_pdf = Image(filename=filename, resolution=300)
image_jpeg = image_pdf.convert('jpeg')
for img in image_jpeg.sequence:
img_page = Image(image=img)
req_image.append(img_page.make_blob('jpeg'))
for img in req_image:
txt = tool.image_to_string(
PI.open(io.BytesIO(img)),
lang=lang,
builder=pyocr.builders.TextBuilder()
)
final_text.append(txt)
return final_text
for filename in tqdm(pdfs):
txt_file = filename[:-3] +'txt'
txt_filename = destination_folder + txt_file
if not txt_file in txt_files:
print 'Converting ' + filename
try:
ocr_txt = ocr_pdf_to_text(destination_folder + filename)
with open(txt_filename,'w') as f:
for i in range(len(ocr_txt)):
f.write(json.dumps({i:ocr_txt[i].encode('utf8')}))
f.write('\n')
f.close()
except:
print "Could not OCR " + filename
A hacky way of dealing with this was to add an os.remove() statement within the main loop to remove the tmp files after creation.
tempdir = '/private/var/tmp/'
files = os.listdir(tempdir)
for file in files:
if "magick" in file:
os.remove(os.path.join(tempdir,file))
Image should be used as a context manager, because Wand determine timings to dispose resources including temporary files, in-memory buffers, and so on. with block help Wand to know boundaries when these Image objects are still needed and when they are now unnecessary.
See also the official docs.