pyCURL download and get content from mp3 file - python

I'm trying to get content from downloaded file. If i save file like .mp3 it even playing. But when i get content there are a lot of "terrible" characters i.e. (û dInfo.ð"""".....::::EEEEEQQQQ]]]]]hhhhttttt¢¢¢¢®®®®®ººººÅÅÅÅÅÑÑÑÑÝÝÝÝÝèèèèôôôôôÿÿÿÿ:LAME3.96r´.l4 $N .ð]Ú5ÿû d Ô|c½i4BGçá"ʹU§ a0pTìÌ xà0 +£â?=yÃkðý§ýÛußAfØÿ®ÙBDgøQÊ0£a=¹ OTG# )É ÄN¨hÎDMWQÛ0wmrÛA MdCeÞ9:!b>¢~Ú½´&ÞýÛ/¿h­·{þ>Åï²{·w±Ä,ÅõtewßS©?Ï'è! p#lHåÀ1üoù!c Aæø?Íæ0#äzôüÿsì§òp}o¾Ðn¨:Ð À#1®-0¦ ÐËÛþÐ褹À~! êõCÄâr+Ççú!ɱA3uå^O¦I÷'ív(µ~ÉNÎ~8æÙøÕ+Xy¬gt}êÑ3xk¿½ÞZ÷]ë^ÖÅ¢}åýíªn¾?µ)_{×ÇÎÿÝ>ÿzÞ>wÿß¾5﬿»ü}z}kDtwk)Ó=Ú[ÿzfXÞµ%q Gŧ~(°Ë%¬ÇºJùÝÇá3JBĸÑâ·Ê!W²qll°¡WÎÚRÕ¨âU0BD$F$ÅÕÀèûÏ*©l^Î¥¢3ëÿû¢d¬LY~s`AJ#Á%ù­ 4¨ËÍ;3sB½²ý»é¨murz{S0Ühà #Qö0Â(ÚFíê9(øi¸ò½¸~äÌ]ï¼a b°±±§Rióÿ÷·±O?Ã?:3£M20 /ÿÿÿçßîzöDIþï_ÿÿÿÿú±c>a¾¨2e ÁÙ£¯ÿÿÿÿÿýþÿÿÿÿ ![]h1EOFðKçYwA%ÜSԾó":9Ç5 RþèvC?7òEWÔ´üJdzcKÿÿTdp Lï¬DÆØ£Rm7£Ww·´ÅUeé¢hÇRî÷ #\uä«À#¿6òXµËÖÿ+U Oó}å-) This my code:
fp = open('audio.txt', "wb")
ch = curl.Curl()
ch.setopt(curl.URL, url)
ch.setopt(curl.TRANSFERTEXT, True)
ch.setopt(curl.AUTOREFERER, True)
ch.setopt(curl.FOLLOWLOCATION, True)
ch.setopt(curl.POST, False)
ch.setopt(curl.HTTPHEADER, ['REMOTE_ADDR:' + self.ip, 'HTTP_X_FORWARDED_FOR:' + self.ip])
ch.setopt(curl.USERAGENT, self.useragent)
ch.setopt(curl.CONNECTTIMEOUT, self.connect_timeout)
ch.setopt(curl.TIMEOUT, self.curl_timeout)
ch.setopt(curl.SSL_VERIFYPEER, False)
ch.setopt(curl.COOKIE, "JSESSIONID=" + sessionid)
ch.setopt(curl.WRITEDATA, fp)
try:
result = ch.perform()
except curl.error as error:
#errno, errstr = error
ch.close()
return 'Ошибка считывания mp3 файла с сервиса ФМС.'
fp.close()
with open('audio.txt', 'r', encoding = "ISO-8859-1") as content_file:
content_file.seek(0)
content = content_file.read()
return content
How I can get normal characters (in UTF8) ? thanks.

try to remove ch.setopt(curl.TRANSFERTEXT, True), cause mp3 is binary.
After that change fp = open('audio.txt', "wb") to fp = open('audio.mp3', "wb")
now you'll have correct mp3 saved on disk.
Then try use following
import mp3play
filename = r'audio.mp3'
mp3 = mp3play.load(filename)
mp3.play()
you should have installed mp3play package.

Related

How to know the cause for assertion errors Python?

I am compiling a script for adding custom property in PDF files using PdfMerger() in PyPdf2. It worked fine for almost all the files except a few. And error occurs in some function inside the PdfMerge. I don't understand what exactly is causing this error or how to rectify it. Here is the entire program - not sure if giving a snippet would be helpful.
import os
import pandas as pd
from PyPDF2 import PdfReader, PdfMerger
df = pd.read_excel('S:\\USERS\\VINTON\\F001A - Item Master (Stock and Cost)- 270001.xlsx')
folder_path = "U:\\BMP" pdf_files = [os.path.splitext(f)[0] for f in os.listdir(folder_path) if f.endswith('.pdf')]
for EachFile in pdf_files:
search_value = EachFile
print(EachFile)
search_result = df[df['Item Number 02'] == search_value]
# Find the corresponding value in the "Name" column of the same w
if not search_result.empty:
print("Found in JDE")
Revision = search_result['Rev'].values[0]
Description = search_result['Item Description 01'].values[0]
FileName = "U:\\BMP\\" + search_value + ".pdf"
# Get the file from BMP Folder
file_in = open(FileName, 'rb')
pdf_reader = PdfReader(file_in)
if pdf_reader.is_encrypted:
print("Encrypted")
continue
metadata = pdf_reader.metadata
# Adding entire existing file to the new file created
pdf_merger = PdfMerger()
pdf_merger.append(file_in)
pdf_merger.add_metadata({
'/Revision': Revision,
'/Description': Description
})
file_out = open("S:\\USERS\\VINTON\\BMP-Rev\\" + search_value ".pdf", 'wb')
pdf_merger.write(file_out)
file_in.close()
file_out.close()
print("All Done!!")
I cannot figure out how to overcome assertion errors because the error is shown to have occurred in several layers below the simplified syntax.
There is "+" sign missing in this line before ".pdf"
file_out = open("S:\USERS\VINTON\BMP-Rev\" + search_value ".pdf", 'wb')
try this:
file_out = open("S:\USERS\VINTON\BMP-Rev\" + search_value + ".pdf", 'wb')
hope it works
Use try and except statements when reading or merging pdf files to throw the exception messages if failed. It's always a good practice to throw errors and exceptions when working with files or memory for development purposes.
import os
import pandas as pd
from PyPDF2 import PdfReader, PdfMerger
df = pd.read_excel('S:\\USERS\\VINTON\\F001A - Item Master (Stock and Cost)- 270001.xlsx')
folder_path = "U:\\BMP"
pdf_files = [os.path.splitext(f)[0] for f in os.listdir(folder_path) if f.endswith('.pdf')]
for EachFile in pdf_files:
search_value = EachFile
print(EachFile)
search_result = df[df['Item Number 02'] == search_value]
# Find the corresponding value in the "Name" column of the same w
if not search_result.empty:
print("Found in JDE")
Revision = search_result['Rev'].values[0]
Description = search_result['Item Description 01'].values[0]
FileName = "U:\\BMP\\" + search_value + ".pdf"
# Get the file from BMP Folder
file_in = open(FileName, 'rb')
try:
pdf_reader = PdfReader(file_in)
if pdf_reader.is_encrypted:
print("Encrypted")
continue
metadata = pdf_reader.metadata
# Adding entire existing file to the new file created
pdf_merger = PdfMerger()
pdf_merger.append(file_in)
pdf_merger.add_metadata({
'/Revision': Revision,
'/Description': Description
})
except Exception as e:
print(e)
file_out = open("S:\\USERS\\VINTON\\BMP-Rev\\" + search_value ".pdf", 'wb')
pdf_merger.write(file_out)
file_in.close()
file_out.close()
print("All Done!!")

Response binary data to download

I am trying to input a file from an input field, save it temporarily to the disk and reply with a response to re-download the same file.
In order to do this, I've read that I need to reply to the browser with a content-type : application/octet-stream and a content-disposition: attachment; "filename=myfile.extension".
I can store and listen to my music file in the /tmp folder so I know that the input part of it works.
This is my code in Pyramid:
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
input_file.seek(0)
file_path = os.path.join('/tmp', '%s.mp3' % uuid.uuid4())
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
print(f"Wrote: {file_path}")
filename = file_path.split('/')[-1]
print(filename)
f = open(file_path, 'rb')
return Response(body_file=f, charset='UTF-8', content_type='application/octet-stream', content_disposition=f'attachment; "filename={filename}"')
These are my response headers:
And this is my response body:
However Chrome/Firefox do not start the download of my binary file. What am I doing wrong?
UPDATE
I also tried with FileResponse from Pyramid without success, I still do not get the download popup.
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
input_file.seek(0)
file_path = os.path.join('/tmp', '%s.mp3' % uuid.uuid4())
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
print(f"Wrote: {file_path}")
return FileResponse(file_path, request=request)
Apparently I was thinking how to perform this in the wrong way. I need to return a Response('OK') when I upload the file through /process and make another request to return a FileResponse object, building another endpoint /download and returning that fileresponse object fixed this issue.
Example:
#view_config(route_name='process')
def process_file(request):
input_file = request.POST['file'].file
db = request.POST['volume']
input_file.seek(0)
filename = '%s.mp3' % uuid.uuid4()
file_path = os.path.join('/tmp', filename)
with open(file_path, 'wb') as output_file:
shutil.copyfileobj(input_file, output_file)
if boost_track(file_path, filename, db):
return Response(json_body={'filename': filename})
#view_config(route_name='download')
def download_file(request):
filename = request.GET['filename']
file_path = os.path.join('/tmp', filename)
f = open(file_path, 'rb')
return Response(body_file=f, charset='UTF-8', content_type='application/download', content_disposition=f'attachment; filename="{filename}"')

How to create a loop with FOR in a temporary file?

I am working with an encrypted file, but I can't manage to create a loop with for in order to read it before it get closed and removed.
My intention is to read the data given in the encrypted file and loop it to assign each line to a variable.
Whenever I execute my code, Python just goes straight to finish, without working with the decrypted info; I believe it is because the with command close it before the loop starts.
This is what I want, not working, no errors either:
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with tempfile.TemporaryFile() as fp:
fp.write(encrypted)
for url in fp: #Python ignores the tempfile. I belive it is closed in the previous line.
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
This is the working code (Sorry, tried to make it shorter but couldn't):
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with open(output_file, 'wb') as fp:
fp.write(encrypted)
with open(output_file) as fp:
for url in fp:
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Header for both examples (apart to make it shorter):
#python 3.6.6
from urllib.request import urlopen
import urllib.request
from os.path import join as pjoin
import re, os, sys, tempfile, six, ctypes, time, fileinput
from cryptography.fernet import Fernet
print("[*] Checking list.dat for consistency . . .")
key = b'wTmVBRLytAmlfkctCuEf59K0LDCXa3sGas3kPg3r4fs=' #Decrypt list.dat
input_file = 'List.dat'
output_file = 'List.txt'
List.txt content:
errors
classes
stdlib
Any hints?
The problem is that once you have written to the file, the "file pointer" is at the end of the file. There's nothing to read.
You can use the seek method to reposition the file pointer at the beginning. Alternatively, closing and re-opening the file (as in your working code) will position the pointer at the beginning of the file.
#LarryLustig pretty much answered why your code wasn't working, but IMO if you eliminate the temp file altogether (which shouldn't be necessary) you don't even need to worry about the cursor. See below commented changes on your desired code.
# We'll use os.linesep to get the line terminator string for your os.
import os
...
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
# decode your decrypted bytes into strings. Change 'utf-8' into whichever file encoding you're using if necessary.
decrypted = fernet.decrypt(data).decode('utf-8')
# Don't write to a temp file
# Iterate directly on each line of the extracted data
for url in decrypted.split(os.linesep):
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Alternatively, if you know for sure what is the line terminator used in the file (e.g. \r\n, or \n) then you can eliminate using os.linesep altogether.

PyPDF2: TypeError: coercing to Unicode: need string or buffer, PdfFileWriter found

Reworking code to include Context Managers via with statements. However I am receiving a Traceback: using Python 2.7 on Windows
Traceback (most recent call last):
File "CommissionSecurity.py", line 52, in <module>
with open(output, 'w') as output_stream :
TypeError: coercing to Unicode: need string or buffer, PdfFileWriter found
I'm not sure how to fix this, I am trying to use PyPDF2 to encrypt pdf files, and I'm having trouble figuring out exactly what I've done wrong here. Any guidance is appreciated.
import os
from PyPDF2 import PdfWriter, PdfReader
for ID in file_dict:
# print REP
# print ID # debug: REP always coming over 764
if ID in email_dict:
# print file_dict[ID]
path = "C:\\Apps\\CorVu\\DATA\\Reports\\AlliD\\Monthly Commission Reports\\Output\\pdcom1\\"
file_path = os.path.join(path + file_dict[ID])
writer = PdfWriter()
reader = PdfReader(file_path)
output_stream = (file_path, "wb")
with open(file_path, "rb") as reader:
with open(writer, "w") as output_stream:
writer.encrypt(email_dict[ID][1])
writer.write(output_stream)
else:
continue
The code has several issues:
You don't use reader
You overwrite several variables
I guess what you want to do is this:
import os
from PyPDF2 import PdfWriter, PdfReader
for ID in file_dict:
# print REP
# print ID # debug: REP always coming over 764
if ID in email_dict:
# print file_dict[ID]
path = "C:\\Apps\\CorVu\\DATA\\Reports\\AlliD\\Monthly Commission Reports\\Output\\pdcom1\\"
file_path = os.path.join(path + file_dict[ID])
writer = PdfWriter()
reader = PdfReader(file_path)
output_stream = (file_path, "wb")
with open("target.pdf", "w") as fp: # this is
writer.encrypt(email_dict[ID][1]) # the important
writer.write(fp) # part
else:
continue
There are for sure other things necessary

How to convert a file to utf-8 in Python?

I need to convert a bunch of files to utf-8 in Python, and I have trouble with the "converting the file" part.
I'd like to do the equivalent of:
iconv -t utf-8 $file > converted/$file # this is shell code
Thanks!
You can use the codecs module, like this:
import codecs
BLOCKSIZE = 1048576 # or some other, desired size in bytes
with codecs.open(sourceFileName, "r", "your-source-encoding") as sourceFile:
with codecs.open(targetFileName, "w", "utf-8") as targetFile:
while True:
contents = sourceFile.read(BLOCKSIZE)
if not contents:
break
targetFile.write(contents)
EDIT: added BLOCKSIZE parameter to control file chunk size.
This worked for me in a small test:
sourceEncoding = "iso-8859-1"
targetEncoding = "utf-8"
source = open("source")
target = open("target", "w")
target.write(unicode(source.read(), sourceEncoding).encode(targetEncoding))
Thanks for the replies, it works!
And since the source files are in mixed formats, I added a list of source formats to be tried in sequence (sourceFormats), and on UnicodeDecodeError I try the next format:
from __future__ import with_statement
import os
import sys
import codecs
from chardet.universaldetector import UniversalDetector
targetFormat = 'utf-8'
outputDir = 'converted'
detector = UniversalDetector()
def get_encoding_type(current_file):
detector.reset()
for line in file(current_file):
detector.feed(line)
if detector.done: break
detector.close()
return detector.result['encoding']
def convertFileBestGuess(filename):
sourceFormats = ['ascii', 'iso-8859-1']
for format in sourceFormats:
try:
with codecs.open(fileName, 'rU', format) as sourceFile:
writeConversion(sourceFile)
print('Done.')
return
except UnicodeDecodeError:
pass
def convertFileWithDetection(fileName):
print("Converting '" + fileName + "'...")
format=get_encoding_type(fileName)
try:
with codecs.open(fileName, 'rU', format) as sourceFile:
writeConversion(sourceFile)
print('Done.')
return
except UnicodeDecodeError:
pass
print("Error: failed to convert '" + fileName + "'.")
def writeConversion(file):
with codecs.open(outputDir + '/' + fileName, 'w', targetFormat) as targetFile:
for line in file:
targetFile.write(line)
# Off topic: get the file list and call convertFile on each file
# ...
(EDIT by Rudro Badhon: this incorporates the original try multiple formats until you don't get an exception as well as an alternate approach that uses chardet.universaldetector)
Answer for unknown source encoding type
based on #Sébastien RoccaSerra
python3.6
import os
from chardet import detect
# get file encoding type
def get_encoding_type(file):
with open(file, 'rb') as f:
rawdata = f.read()
return detect(rawdata)['encoding']
from_codec = get_encoding_type(srcfile)
# add try: except block for reliability
try:
with open(srcfile, 'r', encoding=from_codec) as f, open(trgfile, 'w', encoding='utf-8') as e:
text = f.read() # for small files, for big use chunks
e.write(text)
os.remove(srcfile) # remove old encoding file
os.rename(trgfile, srcfile) # rename new encoding
except UnicodeDecodeError:
print('Decode Error')
except UnicodeEncodeError:
print('Encode Error')
You can use this one liner (assuming you want to convert from utf16 to utf8)
python -c "from pathlib import Path; path = Path('yourfile.txt') ; path.write_text(path.read_text(encoding='utf16'), encoding='utf8')"
Where yourfile.txt is a path to your $file.
For this to work you need python 3.4 or newer (probably nowadays you do).
Below a more readable version of the code above
from pathlib import Path
path = Path("yourfile.txt")
path.write_text(path.read_text(encoding="utf16"), encoding="utf8")
This is a Python3 function for converting any text file into the one with UTF-8 encoding. (without using unnecessary packages)
def correctSubtitleEncoding(filename, newFilename, encoding_from, encoding_to='UTF-8'):
with open(filename, 'r', encoding=encoding_from) as fr:
with open(newFilename, 'w', encoding=encoding_to) as fw:
for line in fr:
fw.write(line[:-1]+'\r\n')
You can use it easily in a loop to convert a list of files.
To guess what's the source encoding you can use the file *nix command.
Example:
$ file --mime jumper.xml
jumper.xml: application/xml; charset=utf-8
This is my brute force method. It also takes care of mingled \n and \r\n in the input.
# open the CSV file
inputfile = open(filelocation, 'rb')
outputfile = open(outputfilelocation, 'w', encoding='utf-8')
for line in inputfile:
if line[-2:] == b'\r\n' or line[-2:] == b'\n\r':
output = line[:-2].decode('utf-8', 'replace') + '\n'
elif line[-1:] == b'\r' or line[-1:] == b'\n':
output = line[:-1].decode('utf-8', 'replace') + '\n'
else:
output = line.decode('utf-8', 'replace') + '\n'
outputfile.write(output)
outputfile.close()
except BaseException as error:
cfg.log(self.outf, "Error(18): opening CSV-file " + filelocation + " failed: " + str(error))
self.loadedwitherrors = 1
return ([])
try:
# open the CSV-file of this source table
csvreader = csv.reader(open(outputfilelocation, "rU"), delimiter=delimitervalue, quoting=quotevalue, dialect=csv.excel_tab)
except BaseException as error:
cfg.log(self.outf, "Error(19): reading CSV-file " + filelocation + " failed: " + str(error))
convert all file in a dir to utf-8 encode. it is recursive and can filter file by suffix. thanks #Sole Sensei
# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple chardet
import os
import re
from chardet import detect
def get_file_list(d):
result = []
for root, dirs, files in os.walk(d):
dirs[:] = [d for d in dirs if d not in ['venv', 'cmake-build-debug']]
for filename in files:
# your filter
if re.search(r'(\.c|\.cpp|\.h|\.txt)$', filename):
result.append(os.path.join(root, filename))
return result
# get file encoding type
def get_encoding_type(file):
with open(file, 'rb') as f:
raw_data = f.read()
return detect(raw_data)['encoding']
if __name__ == "__main__":
file_list = get_file_list('.')
for src_file in file_list:
print(src_file)
trg_file = src_file + '.swp'
from_codec = get_encoding_type(src_file)
try:
with open(src_file, 'r', encoding=from_codec) as f, open(trg_file, 'w', encoding='utf-8') as e:
text = f.read()
e.write(text)
os.remove(src_file)
os.rename(trg_file, src_file)
except UnicodeDecodeError:
print('Decode Error')
except UnicodeEncodeError:
print('Encode Error')

Categories