Printing Arabic characters in python/django - python

I have a script that modifies data in a django app.I have data in an excel file that i process then update my models with it, some of the data is in Arabic and when i execute the script i get the following error:
Traceback (most recent call last):
File "script.py", line 77, in <module>
update_locations(path)
File "script.py", line 36, in update_locations
household.location = new_location
File "/data/envs/ve.maidea/lib/python2.7/site-packages/django/db/models/fields/related_descriptors.py", line 207, in __set__
self.field.remote_field.model._meta.object_name,
ValueError: Cannot assign "'\xd8\xa7\xd9\x84\xd8\xa8\xd8\xad\xd9\x8a\xd8\xb1\xd9\x87'": "Household.location" must be a "Location" instance.
I think the error is been raised by these Arabic characters.
here is my script:
import django
django.setup()
import sys
reload(sys) # to re-enable sys.setdefaultencoding()
sys.setdefaultencoding('utf-8')
import xlrd
from django.db import transaction
from foodnet.apps.registration.models import Household
from geo.models import Location
log_file = "/opt/cv_instances/cv1/autodeploy/branches/nboreports/maidea/egypt/data_import_files/egypt_beheira_locations.txt"
logfile_to_write = open(log_file, "w")
def process_file(path):
book = xlrd.open_workbook(path)
print("Got {0} number of sheets.".format(book.nsheets))
hh_counter = 0
for sheet_num in range(book.nsheets-1, -1, -1):
sheet = book.sheet_by_index(sheet_num)
print("Processing sheet number {0} ({1})".format(sheet_num, sheet.name))
for row_idx in range(1, sheet.nrows):
with transaction.atomic():
try:
household_name = str(sheet.row_values(row_idx)[0]).strip().replace(".0","")
# old_location = str(sheet.row_values(row_idx)[1]).strip().replace(".0","")
new_location = str(sheet.row_values(row_idx)[2]).strip().replace(".0","")
if household_name:
household = Household.objects.get(office__slug='eg-co',name=household_name)
# print(household.name, household.location)
#update new locations
household.location = new_location
household.save()
hh_counter += 1
logfile_to_write.write("Household {0} updated to location {1}".format(household, household.location))
except Household.DoesNotExist:
continue
print("Done looping and updating locations")
print("================================================================================================================================")
def delete_old_locations(path):
"""
Delete old locations no longer needed by the country office
"""
book = xlrd.open_workbook(path)
print("Got {0} number of sheets.".format(book.nsheets))
location_counter = 0
for sheet_num in range(book.nsheets-1, -1, -1):
sheet = book.sheet_by_index(sheet_num)
print("Processing sheet number {0} ({1})".format(sheet_num, sheet.name))
for row_idx in range(1, sheet.nrows):
with transaction.atomic():
try:
old_location = str(sheet.row_values(row_idx)[1]).strip().replace(".0","")
if old_location:
location = Location.objects.get(country__name="Egypt", name=old_location)
# print(location.name, location.country)
location.delete()
location_counter += 1
logfile_to_write.write("Location {0} deleted ".format(location))
except Location.DoesNotExist:
continue
print("Done looping and deleting locations")
print("================================================================================================================================")
#call the our process file method
if __name__=="__main__":
path = "/opt/cv_instances/cv1/autodeploy/branches/nboreports/maidea/egypt/data_import_files/egypt-sf-beheira-enrolments.xlsx"
process_file(path)
delete_old_locations(path)
print("Done processing file")
I kindly need advice on the best way of printing this arabic characters.Thanks in advance.

This has nothing to do with Arabic characters. As the error says, you need to assign an instance of Location there, not a string.

Related

JSONDecodeError when parsing through data, works half the time?

Heyo, I've been working on this code:
import requests as r
import datetime
import openpyxl
import json
import matplotlib #i was gonna use this for graphing later lol
from openpyxl.chart.axis import DateAxis
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference, Series, LineChart
#i dont thinks we have to do it this way, i just saw that this is how the guy on stackoverflow was doing it and didnt argue lol
try:
from openpyxl.cell import get_column_letter
except ImportError:
from openpyxl.utils import get_column_letter
from openpyxl.utils import column_index_from_string
myname = input('Enter your name: ')
def do_the_work(myname):
userLink = 'https://api.faceit.com/users/v1/nicknames/'+myname #Inputting the name given here to get the UUID of the user
print("Getting user . . . this may take a while")
reqUserLink = r.get(userLink).json() #requesting the link's JSON data
print("UUID Found; ",str(reqUserLink['payload']['id']))
reqUserList = []
reqUserList.append(str(reqUserLink['payload']['id'])) #adding it to our own list because python HATES nested stuff
print(reqUserList)
userStatsLink = ""
userStatsLink = f"https://api.faceit.com/stats/v1/stats/time/users/{reqUserList[0]}/games/csgo"
print(userStatsLink)
params = {"page": 0, "size": 30} #we have to do this because the data prevents us from having more than 30 matches a page
matches = [] #creating our own dictionary to grab only relevant data from the matches
totalRankedM = 1 #showing ranked matches (aka matches that have an ELO value)
while True:
reqStatsPage = r.get(userStatsLink, params=params) #Requesting
statsPageJSON = reqStatsPage.json()
print(statsPageJSON)
for match in statsPageJSON:
elo = match.get("elo") #elo variable of the stats
if elo is not None: #checking if elo does exist (Faceit removes this parameter from matches that have no elo)
matches.append({ #adding them to our own list with the relevant data wanted
'match_id': match["matchId"],
'played_on': match["date"],
'map': match["i1"],
'elo': match["elo"]
})
totalRankedM = totalRankedM + 1 #each time we have a match that counts elo, it is a ranked match
else:
matches.append({
'match_id': match["matchId"],
'played_on': match["date"],
'map': match["i1"],
'elo': '' #just putting nothing tbh, we replace this later to "No elo recorded" but i like the freedom
})
if len(statsPageJSON) < params['size']: #check if we went thru all the pages
break
params['page'] += 1 #changing page
print(f'Total number of matches: {len(matches)}') #print the total number of matches
print(f'Total number of ranked matches: {totalRankedM}') #print total ranked matches
matches.reverse() #since we start at the top of the pages aka most recent matches and go down from there, we reverse this to make it easier for the excel sheet graph so it's in order
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.append(['Match ID', 'Played On', 'Map', 'ELO', 'Total Ranked Matches']) #those are the columns
for match in matches: #adding the data from the matches=[] list to the sheet
worksheet.append([match['match_id'], match['played_on'], match['map'], match['elo']])
worksheet["E2"].value = totalRankedM
worksheet["F1"].value = 'To view the game room of a specific match, simply use this link: https://www.faceit.com/en/csgo/room/REPLACE THIS WITH MATCHID/scoreboard'
for cell in worksheet['D']: #to make it pretty in the excel sheet and get rid of errors, we convert the strings that are numbers to actual numbers in the sheet (excel considers them strings, so it throws an error when its a string number)
if isinstance(cell.value, str):
try:
number = int(cell.value)
cell.data_type = 'n'
cell.value = number
except ValueError:
pass #just ignoring if we can't make it a number
for cell in worksheet['B']: #the data we are given for the dates are in unix so we just convert
if isinstance(cell.value, int):
date = datetime.datetime.fromtimestamp(cell.value / 1000)
date_str = date.strftime('%d/%m/%Y')
cell.value = date_str
for column_cells in worksheet.columns: #this is just to make each cell size fit the length of the text in it
new_column_length = max(len(str(cell.value)) for cell in column_cells)
new_column_letter = (get_column_letter(column_cells[0].column))
if new_column_length > 0:
worksheet.column_dimensions[new_column_letter].width = new_column_length*1.23
#and we save the final product xd
workbook.save('matches.xlsx')
do_the_work(myname)
#fancy stuff
print("\n")
print("\n")
print("\n")
print("You may now look at the excel sheet.")
that basically
lets you input a name
uses faceit's API calls to get the data from said account, gets the UUID (because this is the only way you can see the stats page of a user in the link),
adds that UUID to our list (i think I made it that way when trying to figure out why I was getting the error but that part still works fine) (i think lol, still had the same error before it)
then it adds that UUID to the stats link so we can parse through it
grabs the relevant data and adds it to our own list
prints that list
adds it to an excel sheet
Now the weird part of this is that when I try to run the program, it works for the first time and loads everything correctly and adds it to the excel sheet.
But then I want to do it again with the same username (it should just overwrite) and then it stops halfway through it to show me this error:
Traceback (most recent call last):
File "C:\Users\SAM\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\models.py", line 971, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Users\SAM\AppData\Local\Programs\Python\Python310\lib\json\__init__.py", line 346, in loads
return _default_decoder.decode(s)
File "C:\Users\SAM\AppData\Local\Programs\Python\Python310\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\SAM\AppData\Local\Programs\Python\Python310\lib\json\decoder.py", line 353, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 7 column 10 (char 107)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\SAM\Desktop\elo-scrape\test3.py", line 111, in <module>
do_the_work(myname)
File "C:\Users\SAM\Desktop\elo-scrape\test3.py", line 43, in do_the_work
statsPageJSON = reqStatsPage.json()
File "C:\Users\SAM\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\models.py", line 975, in json
raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
requests.exceptions.JSONDecodeError: Expecting property name enclosed in double quotes: line 7 column 10 (char 107)
This specific account I am loading the data from has a total of 1605 ranked matches for context
But then I try to do it with my own account (328 ranked matches) over and over and it works over and over.
Does it just stop working when we have a huge amount of data? If so what way i could improve this code? I've been spending a while trying to fix it and I'm out of ideas which is why I'm here

Index Error when using pushbullet and python

I'm really new to these things so I followed a geekforgeeks tutorial. I have some experience in python, but I couldn't figure out what the problem is.
This is the code.
# Import following modules
import urllib.request
import pandas as pd
from pushbullet import PushBullet
# Get Access Token from pushbullet.com
Access_token = "#.########################"
# Authentication
pb = PushBullet(Access_token)
# All pushes created by you
all_pushes = pb.get_pushes()
# Get the latest push
latest_one = all_pushes[0]
# Fetch the latest file URL link
url = latest_one['file_url']
# Create a new text file for storing
# all the chats
Text_file = "All_Chats.txt"
# Retrieve all the data store into
# Text file
urllib.request.urlretrieve(url, Text_file)
# Create an empty chat list
chat_list = []
# Open the Text file in read mode and
# read all the data
with open(Text_file, mode='r', encoding='utf8') as f:
# Read all the data line-by-line
data = f.readlines()
# Excluded the first item of the list
# first items contains some garbage
# data
final_data_set = data[1:]
# Run a loop and read all the data
# line-by-line
for line in final_data_set:
# Extract the date, time, name,
# message
date = line.split(",")[0]
tim = line.split("-")[0].split(",")[1]
name = line.split(":")[1].split("-")[1]
message = line.split(":")[2][:-0] ##### THIS IS THE LINE 53 #####
# Append all the data in a List
chat_list.append([date, tim, name, message])
# Create a dataframe, for storing
# all the data in a excel file
df = pd.DataFrame(chat_list,
columns = ['Date', 'Time',
'Name', 'Message'])
df.to_excel("BackUp.xlsx", index = False)
This is the error message I am getting.
Traceback (most recent call last):
File "d:\#adress to the file location", line 53, in <module>
message = line.split(":")[2][:-0]
IndexError: list index out of range
I have made a note at the line 53, so as I am just getting started, please excuse for any silly mistakes, point me out anything. I just want to figure this out.
Thanks in advance.🥲

Reads And Updates XML in pycharm but not command line

I am very new to python and SO. The script opens xml files inside of a folder. Using os.walk I iterate over the collection and open the file and then calls the function to iterate over the xml file and update the xml file rewriting the updated file over the original using .writexml. the problem is when i run this program from the command line the it says there is an error
Traceback (most recent call last):
File "./XMLParser.py", line 67, in <module>
xmldoc = minidom.parse(xml)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/xml/dom/minidom.py", line 1918, in parse
return expatbuilder.parse(file)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/xml/dom/expatbuilder.py", line 928, in parse
result = builder.parseFile(file)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/xml/dom/expatbuilder.py", line 207, in parseFile
parser.Parse(buffer, 0)
UnicodeEncodeError: 'ascii' codec can't encode character u'\xa0' in position 5614: ordinal not in range(128)
CODE:
from xml.dom import minidom
import os
import codecs
'''
Function to iterate over the directory that contains the work items
params:
CoreID of new author,
x is the path to the workItem.xml file,
p is the path to the workItem.xml that will be overwritten with new data
'''
def changeauthor(coreid, x, p):
# Gets the content of the xml based within the work item tag.
testcase = x.getElementsByTagName("work-item")[0]
# All fields are stored as a <field> tag with the id attribute being the
# differentiators between them. Fields is a list of all the field tags in the
# document.
fields = testcase.getElementsByTagName("field")
# Loop iterates over the field tags and looks for the one tag where the id
# attribute has a value of author. when this tag is found the tags value is
# updated to the core id passed to the function.
for field in fields:
attribute = field.attributes['id'].value
if attribute == "author":
# print the current author.
print("Previous Author: ", field.firstChild.data)
# Set the author to the core id entered into the script
field.firstChild.data = coreid
# Print the updated author field
print("New Author: ", field.firstChild.data)
# Create a temp file with the same path as the source
tmp_config = p
# Open the new temp file with the write mode set.
with codecs.open(tmp_config, 'w', "utf-8") as f:
# f = open(tmp_config, 'w')
# Write the xml into the file at the same location as the orginal
x.writexml(f)
# Close the file
# f.close()
return
while True:
core = str(input("Enter Core ID of the new author: "))
core = core.upper()
spath = str(input("Please enter the full path to the directory of test cases: "))
count = 0
confirm = str(input("Confirm path and core id (Y/N or Exit to leave script): "))
confirm = confirm.upper()
if confirm == "Y":
'''Hard code path here and comment out line above asking for input either will work.'''
# spath = "/Users/Evan/Desktop/workitems-r583233"
# Loop iterates over the directory. Whenever a workitem.xml file is found the path is stored and the file is
# parsed. the core ID entered and the path as well as the parsed xml doc are passed to the change author
# function.
for roots, dirs, files in os.walk(spath):
for file in files:
title = file.title()
if title == "Workitem.Xml":
path = os.path.join(roots, file)
with codecs.open(path, 'r+', "utf-8") as xml:
xmldoc = minidom.parse(xml)
lst = path.split('/')
wi = lst[5]
print("Updating: ", wi)
changeauthor(core, xmldoc, path)
count += 1
print(wi, "updated succesfully.")
print("-------------------------------")
if count > 0:
# Print how many test cases were updated.
print("All Done", count, "workItems updated!")
else:
print("Please double check path and try again no workItems found to update!")
elif confirm == "N":
continue
elif confirm == "EXIT":
break

Python requests stops working mid-file

I've got a csv file with URL's and I need to scrape metadata from those website. I'm using python requests for that reasons with code below:
from tempfile import NamedTemporaryFile
import shutil
import csv
from bs4 import BeautifulSoup
import requests
import re
import html5lib
import sys
#import logging
filename = 'TestWTF.csv'
#logging.basicConfig(level=logging.DEBUG)
#Get filename (with extension) from terminal
#filename = sys.argv[1]
tempfile = NamedTemporaryFile(delete=False)
read_timeout = 1.0
#Does actual scraping done, returns metaTag data
def getMetadata (url, metaTag):
r = requests.get("http://" + url, timeout=2)
data = r.text
soup = BeautifulSoup(data, 'html5lib')
metadata = soup.findAll(attrs={"name":metaTag})
return metadata
#Gets either keyword or description
def addDescription ( row ):
scrapedKeywordsData = getMetadata(row, 'keywords')
if not scrapedKeywordsData:
print row + ' NO KEYWORDS'
scrapedKeywordsData = getMetadata(row, 'description')
if not scrapedKeywordsData:
return ''
return scrapedKeywordsData[0]
def prepareString ( data ):
output = data
#Get rid of opening meta content
if output.startswith( '<meta content="' ):
output = data[15:]
#Get rid of closing meta content (keywords)
if output.endswith( '" name="keywords"/>' ):
output = output[:-19]
#Get rid of closing meta content (description)
if output.endswith( '" name="description"/>' ):
output = output[:-22]
return output
def iterator():
with open(filename, 'rb') as csvFile, tempfile:
reader = csv.reader(csvFile, delimiter=',', quotechar='"')
writer = csv.writer(tempfile, delimiter=',', quotechar='"')
i = 0
for row in reader:
try:
data = str(addDescription (row[1] ))
row[3] = prepareString( data )
except requests.exceptions.RequestException as e:
print e
except requests.exceptions.Timeout as e:
print e
except requests.exceptions.ReadTimeout as e:
print "lol"
except requests.exceptions.ConnectionError as e:
print "These aren't the domains we're looking for."
except requests.exceptions.ConnectTimeout as e:
print "Too slow Mojo!"
writer.writerow(row)
i = i + 1
print i
shutil.move(tempfile.name, filename)
def main():
iterator()
#Defining main function
if __name__ == '__main__':
main()
It works just fine but at some URL's (out of 3000 let's say maybe 2-3) it would just suddenly stop working and not progress to next one after timeout time.. So I have to kill it using Ctr+C which results in file not being saved.
I know it's a problem of catching exceptions but I cannot figure out which one or what to do with that problem.. I'm more than happy to simply ignore the one which is stuck on..
EDIT:
Added traceback:
^CTraceback (most recent call last):
File "blacklist.py", line 90, in <module>
main()
File "blacklist.py", line 85, in main
iterator()
File "blacklist.py", line 62, in iterator
data = str(addDescription (row[1] ))
File "blacklist.py", line 30, in addDescription
scrapedKeywordsData = getMetadata(row, 'keywords')
File "blacklist.py", line 25, in getMetadata
metadata = soup.findAll(attrs={"name":metaTag})
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1259, in find_all
return self._find_all(name, attrs, text, limit, generator, **kwargs)
File "/Library/Python/2.7/site-packages/bs4/element.py", line 537, in _find_all
found = strainer.search(i)
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1654, in search
found = self.search_tag(markup)
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1626, in search_tag
if not self._matches(attr_value, match_against):
File "/Library/Python/2.7/site-packages/bs4/element.py", line 1696, in _matches
if isinstance(markup, Tag):
KeyboardInterrupt
EDIT 2:
Example website for which script doesn't work: miniusa.com

Python detecting file type before operation

I'm working on this piece of code and this weird bug showed up on the Try command near the end of the code. The whole script is aimed towards .flac files, and sometimes it'd read .jpg files in the folders and blow up. Simply enough I went ahead and added if (".flac" or ".FLAC" in Song): before the Try, this way easily enough it would only process the correct filetype. However this made absolutely no difference and I kept on getting the following error
Traceback (most recent call last):
File ".\musync.py", line 190, in <module>
match_metadata(CurrentAlbum + Song, CoAlbum + Song)
File ".\musync.py", line 152, in match_metadata
TagSource = FLAC(SrcFile)
File "C:\Python34\lib\site-packages\mutagen\_file.py", line 41, in __init__
self.load(filename, *args, **kwargs)
File "C:\Python34\lib\site-packages\mutagen\flac.py", line 721, in load
self.__check_header(fileobj)
File "C:\Python34\lib\site-packages\mutagen\flac.py", line 844, in __check_header
"%r is not a valid FLAC file" % fileobj.name)
mutagen.flac.FLACNoHeaderError: 'C:/Users/berna/Desktop/Lib/Andrew Bird/Armchair Apocrypha/cover.jpg' is not a valid FLAC file
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File ".\musync.py", line 194, in <module>
check_song(CurrentAlbum + Song, CoAlbum)
File ".\musync.py", line 83, in check_song
TagSource = FLAC(SrcFile)
File "C:\Python34\lib\site-packages\mutagen\_file.py", line 41, in __init__
self.load(filename, *args, **kwargs)
File "C:\Python34\lib\site-packages\mutagen\flac.py", line 721, in load
self.__check_header(fileobj)
File "C:\Python34\lib\site-packages\mutagen\flac.py", line 844, in __check_header
"%r is not a valid FLAC file" % fileobj.name)
mutagen.flac.FLACNoHeaderError: 'C:/Users/berna/Desktop/Lib/Andrew Bird/Armchair Apocrypha/cover.jpg' is not a valid FLAC file
Why is the if condition not doing it's job and how can I fix this? Code Is currently as follows:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import shutil
import os
from mutagen.flac import FLAC # Used for metadata handling.
from os import listdir # Used for general operations.
from fuzzywuzzy import fuzz # Last resource name association.
# Insert here the root directory of your library and device respectively.
lib = 'C:/Users/berna/Desktop/Lib/'
dev = 'C:/Users/berna/Desktop/Dev/'
# Faster file copying function, arguments go as follows: Source file location,
# target directory, whether to keep the filename intact and whether to create
# the target directory in case it doesn't exist.
def copy_file(SrcFile, TgtDir, KeepName=True, MakeDir=True):
SourceFile = None
TargetFile = None
KeepGoing = False
# Checks is TgtDir is valid and creates if needed.
if MakeDir and not os.path.isdir(TgtDir):
os.makedirs(TgtDir)
# Processes TgtDir depending on filename choice.
if KeepName is True:
TgtDir += os.path.basename(SrcFile)
print(TgtDir)
try:
SourceFile = open(SrcFile, 'rb')
TargetFile = open(TgtDir, 'wb')
KeepGoing = True
Count = 0
while KeepGoing:
# Read blocks of size 2**20 = 1048576
Buffer = SourceFile.read(2 ** 20)
if not Buffer:
break
TargetFile.write(Buffer)
Count += len(Buffer)
finally:
if TargetFile:
TargetFile.close()
if SourceFile:
SourceFile.close()
return KeepGoing
# XXX TODO
# Copies a directory (SrcDir) to TgtDir, if Replace is True will delete same
# name directory and replace with new one.
def copy_tree(SrcDir, TgtDir, Replace=True):
if not os.path.isdir(TgtDir):
os.makedirs(TgtDir)
Target = format_dir(TgtDir, os.path.basename(SrcDir))
if os.path.isdir(Target) and Replace:
shutil.rmtree(Target)
if not os.path.isdir(Target):
os.makedirs(Target)
for File in listdir(SrcDir):
FileDir = format_dir(SrcDir, File)
# copy_file(FileDir, Tgt)
return()
# Checks for new and deleted folders and returns their name.
def check_folder(SrcDir, TgtDir):
# Lists Source and Target folder.
Source = listdir(SrcDir)
Target = listdir(TgtDir)
# Then creates a list of deprecated and new directories.
Deleted = [FileName for FileName in Target if FileName not in Source]
Added = [FileName for FileName in Source if FileName not in Target]
# Returns both lists.
return (Added, Deleted)
# Checks for song in case there's a name mismatch or missing file.
def check_song(SrcFile, TgtDir):
Matches = []
# Invariably the new name will be that of the source file, the issue here
# is finding which song is the correct one.
NewName = TgtDir + '/' + os.path.basename(SrcFile)
TagSource = FLAC(SrcFile)
# Grabs the number of samples in the original file.
SourceSamples = TagSource.info.total_samples
# Checks if any song has a matching sample number and if true appends the
# song's filename to Matches[]
for Song in listdir(TgtDir):
SongInfo = FLAC(TgtDir + '/' + Song)
if (SongInfo.info.total_samples == SourceSamples):
Matches.append(Song)
# If two songs have the same sample rate (44100Hz for CDs) and the same
# length it matches them to the source by filename similarity.
if (Matches.count > 1):
Diffs = []
for Song in Matches:
Diffs.append(fuzz.ratio(Song, os.path.basename(SrcFile)))
if (max(Diffs) > 0.8):
BestMatch = TgtDir + '/' + Matches[Diffs.index(max(Diffs))]
os.rename(BestMatch, NewName)
else:
shutil.copy(SrcFile, TgtDir)
# If there's no match at all simply copy over the missing file.
elif (Matches.count == 0):
shutil.copy(SrcFile, TgtDir)
# If a single match is found the filename will be the first item on the
# Matches[] list.
else:
os.rename(TgtDir + '/' + Matches[0], NewName)
# Syncs folders in a directory and return the change count.
def sync(SrcDir, TgtDir):
AddCount = 0
DeleteCount = 0
# Grabs the folders to be added and deleted.
NewDir, OldDir = check_folder(SrcDir, TgtDir)
# Checks if any and then does add/rm.
if OldDir:
for Folder in OldDir:
shutil.rmtree(TgtDir + Folder)
DeleteCount += 1
if NewDir:
for Folder in NewDir:
shutil.copytree(SrcDir + Folder, TgtDir + Folder)
AddCount += 1
return(AddCount, DeleteCount)
# Fixes missing metadata fields.
def fix_metadata(SrcFile, TgtFile):
TagSource = FLAC(TgtFile)
TagTarget = FLAC(SrcFile)
# Checks for deleted tags on source file and deletes them from target.
if (set(TagTarget) - set(TagSource)):
OldTags = list(set(TagTarget) - set(TagSource))
for Tag in OldTags:
# TODO Right now I haven't quite figured out how to delete
# specific tags, so workaround is to delete them all.
TagTarget.delete()
# Checks for new tags on source file and transfers them to target.
if (set(TagSource) != set(TagTarget)):
NewTags = list(set(TagSource) - set(TagTarget))
for Tag in NewTags:
TagTarget["%s" % Tag] = TagSource[Tag]
TagTarget.save(TgtFile)
# Does metadata transfer between two files.
def match_metadata(SrcFile, TgtFile):
Altered = 0
TagSource = FLAC(SrcFile)
TagTarget = FLAC(TgtFile)
# For every different Tag in source song copy it to target and save.
for Tag in TagSource:
if TagSource[Tag] != TagTarget[Tag]:
Altered += 1
TagTarget[Tag] = TagSource[Tag]
TagTarget.save(TgtFile)
return(Altered)
# Simply does directory formatting to make things easier.
def format_dir(Main, Second, Third=""):
# Replaces \ with /
Main = Main.replace('\\', '/')
# Adds a / to the end of Main and concatenates Main and Second.
if(Main[len(Main) - 1] != '/'):
Main += '/'
Main += Second + '/'
# Concatenates Main and Third if necessary.
if (Third):
Main += Third + '/'
return (Main)
# Sync main folders in lib with dev.
sync(lib, dev)
# For every Artist in lib sync it's Albums
for Artist in listdir(lib):
sync(format_dir(lib, Artist), format_dir(dev, Artist))
# For every Album in Artist match songs
for Album in listdir(format_dir(lib, Artist)):
# Declares lib Album and dev Album to make function calls shorter.
CurrentAlbum = format_dir(lib, Artist, Album)
CoAlbum = format_dir(dev, Artist, Album)
for Song in listdir(CurrentAlbum):
if (".flac" or ".FLAC" in Song):
try:
# Tries to match lib and dev song's metadata.
match_metadata(CurrentAlbum + Song, CoAlbum + Song)
except:
# If that fails will try to fix both Filename and Tag
# fields.
check_song(CurrentAlbum + Song, CoAlbum)
fix_metadata(CurrentAlbum + Song, CoAlbum + Song)
try:
# Try again after fix.
match_metadata(CurrentAlbum + Song, CoAlbum + Song)
except Exception as e:
# If it still doesn't work there's black magic in place
# go sleep, drink a beer and try again later.
print("""Ehm, something happened and your sync failed.\n
Error:{}""".format(e))
raise SystemExit(0)
Try it:
Songs = ["a.flac", "a.mp3", "b.FLAC"]
flac_files = [s for s in Songs if s.lower().endswith('.flac')]
As pointed by #EliKorvigo the error was caused by a simple miswriting in the if condition, fix looks as follows:
for Song in listdir(CurrentAlbum):
if (".flac" in Song or ".FLAC" in Song):
try:
# Tries to match lib and dev song's metadata.
match_metadata(CurrentAlbum + Song, CoAlbum + Song)
except:
# If that fails will try to fix both Filename and Tag
# fields.
check_song(CurrentAlbum + Song, CoAlbum)
fix_metadata(CurrentAlbum + Song, CoAlbum + Song)
try:
# Try again after fix.
match_metadata(CurrentAlbum + Song, CoAlbum + Song)
except Exception as e:
# If it still doesn't work there's black magic in place
# go sleep, drink a beer and try again later.
print("""Ehm, something happened and your sync failed.\n
Error:{}""".format(e))
raise SystemExit(0)

Categories