function write in python for a json file - python

I'm a beginner in python so I have this program where it classifies tweets into different categories (sport,sante, culture...) using keywords and I would like to copy-paste every line of the JSON file that belongs to a certain category into a file named text1
and I did the following :
but I guess I did it the wrong way since I keep receiving the same error
please any suggestion on how to solve this problem!
import json
import mysql.connector
'''
python -m pip install unicode
'''
c = 0
n = 0
sportcount=0
religcount=0
santecount=0
educcount=0
cultcount=0
socicount=0
policount=0
covid = ['كوفيد','MEDECIN','كورونا','CORONA', 'COVID','VACCIN', 'PANDEMIE', 'CONFINEMENT', 'PANDEMIE', 'CHU', 'GEL','ﻣﺎﺳﻚ' ,'CHINA','ANTIVIRALES','LAVAGE DE MAINS','VIRUS','اﻟﺤﺮاﻙ','اﻟﺤﺠﺮ','CHLOROQUINE','FATIGUE','كماما','STAYSAFE','EPIDEMIE','STAYHOME','منظمة الصحة',' pas de prière']
sport=['مولودية','WORKOUT','بايرن','ارسنال','ليفربول','منتخب','تتويج','ميسي','PSG','FIFA', 'FOOT','FEKIR', 'BOUGER', 'DANSER', 'STADE','بونجاح','JOUEUR', 'COMPETION', 'SPORT', 'SALLE', 'SPONSOR', 'PISCINE', 'PUMA', 'GYM', 'TEN', 'MATCH', 'CHAMPION', 'BASKET', 'NOVAK', 'DJOKOVIC', 'MESSI', 'OLYMPIQUE']
religion=['ALLAH','المساجد','مصل','HAMDULILAH','المسلم','فتوى','لله','EID','الله','MOSQ','دين']
sante=['controle','إصاب','OXYGENE','بوناطيرو','حالات','مؤكد','IMMUNIT','CAS','صح','DOCTEUR','مخبر','حصيلة','صحة','أطباء','تسجل','FATIG','مستشف','HOPITAUX','سعال','لقاح','SOUCHES','MALADE','حصيلة','FUMEURS', 'DIABETE', 'EPIDEMIE', 'DEPISTAGE', 'SOIGNANT', 'INJECTION','GEL','SANTE', 'FIEVRE', 'KAWASAKI', 'RESPIRATOIRE', 'PATIENT', 'TEST', 'TRAITEMENT','فحص','كماما', 'CHU','منظمة الصحة', 'MEDECINE', 'POSITIF', 'PHARMACE', 'INFECTES', 'IMMUN', 'VACCIN', 'PFIZER', 'PCR', 'PANDÉM', 'PANADEMI', 'ÉPIDÉMI', 'EPIDEMIC', 'MASQUE', 'BAVETTE', 'MASK', 'MÉDICAL', 'MEDICAL', 'HÔPITAL', 'HOSPITAL', 'INFECT', 'TRANSMISSION', 'SURVIVANT', 'SURVIVORS', 'DIAGNOSTIC', 'DIAGNOSIS', 'SANTÉ', 'HEALTH', 'MÉDECIN', 'DOCTOR', 'MÉDICAMENT', 'MEDICIN', 'AMBULANCE', 'DÉPISTAGE', 'DEPISTAGE', 'STATISTI', 'MALAD', 'SICK', 'CONFIN', 'PROPAGATION', 'PRÉVENTION', 'PREVENTION', 'CONTAGION', 'SYMPT', 'MESUR', 'MEASUR', 'MICROB', 'WASH', 'ISOL']
education=['سنة','collaboration','bac','EDUC','ÉCOLE','PROF','بتدائي','تعليم','أساتذة','دراس','طلبة']
culture=['LIVRE', 'BOOK', 'SHOP', 'FILM', 'MOVIE', 'MUSIC', 'TV', 'VOYAGE', 'CINEMA', 'ART', 'BLOG', 'SONG']
social=['الشباب','TWITTER','تغريد','متابعة','SOCI','NETFLIX','YOUTUBE','JOURNAL','solde','liquidationy']
politique=['manifestation','AFFAIRE', 'PUBLIQUE', 'AMBASSADE', 'CIRCONSPECTION', 'CIVI', 'COMBINAISON', 'DÉMAGOGIE', 'DÉMOCRATIE', 'DIPLOMATE', 'ÉCONOMIE', 'ÉTAT', 'FÉDÉRALISME', 'GOUVERNEMENT', 'LEGATION', 'MACHIAVÉLIQUE', 'MACHIAVÉLISME', 'MANŒUVRIER', 'NÉGOCI', 'POLICE', 'POUVOIR', 'PRUDE', 'PUBLIC', 'STRATÉGIE', 'TRACTATION', 'RÉPUBLIQUE', 'SONDAGE', 'OPINION', 'PARLEMENT', 'CITO', 'DÉPUTÉ', 'DIRIGEANT', 'MAIRE', 'MINIST', 'SECRÉTAIRE', 'SÉNATEUR', 'CONSEILLER', 'MAIRE', 'COMMUNES', 'MEMBRE', 'DU', 'PARLEMENT', 'CONGRÈS', 'SÉNAT', 'PROTESTATION', 'PROCURATION', 'POUVOIR', 'FRAUDE','النفط','وزير','HOLLANDE','خدعة','تبون','الغلق','BORIS JOHNSON','وزار','رئيس','DROITS','مؤامرة','والي','TRUMP','GOUVERNEMEN', 'POLITI', 'OUYAHIA', 'ERAK', 'IRAK', 'REINE', 'MACRON', 'MINIST', 'AMBASSAD', 'MANIFEST', 'PRESIDENT', 'SELLAL', 'NATIONAL', 'مظاهر','MILITAIRE', 'DICTATEUR','اﻟﺤﺮاﻙ']
myJsonFile = open('tweet.json', encoding="utf-16")
resultat = open('texte.txt', 'w')
for line in myJsonFile:
data = json.loads(line)
c = c+1
text = data['raw_text'].upper()
tweet = any(ele in text for ele in covid )
if tweet == True:
n=n+1
#print(str(n) + " the " +str(c)+" tweet---------------------------------------------------------------------------------------")
in_sport = any(ele in text for ele in sport )
if in_sport:
data["cat"]='sport'
resultat.write(line)
sportcount=sportcount+1
print(data)
else :
in_sante = any(ele in text for ele in sante )
if in_sante :
data["cat"]='sante'
resultat.write(line)
santecount=santecount+1
print(data)
else :
in_politique = any(ele in text for ele in politique)
if in_politique:
data["cat"]='politique'
policount=policount+1
resultat.write(line)
print(data)
else:
in_culture = any(ele in text for ele in culture )
if in_culture:
data["cat"]='culture'
cultcount=cultcount+1
resultat.write(line)
print(data)
else:
in_religion = any(ele in text for ele in religion)
if in_religion:
data["cat"]='religion'
religcount=religcount+1
resultat.write(line)
print(data)
else:
in_education = any(ele in text for ele in education )
if in_education:
data["cat"]='education'
educcount=educcount+1
resultat.write(line)
print(data)
else:
in_social = any(ele in text for ele in social)
if in_social:
data["cat"]='social'
socicount=socicount+1
resultat.write(line)
print(data)
else:
print(" the tweet---------------------------------------------------------------------------------------")
print("a partir de",c," le nombre de tweets concernant le covid sont ",n)
print("sport ",sportcount," sante ",santecount," politique ",policount," culture ",cultcount," religion ",religcount," education ",educcount,"social",socicount)
print("les tweets non classifies",n-(educcount+religcount+cultcount+policount+santecount+sportcount+socicount))
error:
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/NIHAD/PycharmProjects/pythonProject3/classification.py", line 51, in <module>
resultat.write(line)
File "C:\Users\NIHAD\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 451-454: character maps to <undefined>

This might be a very simple case of fixing the encoding.
Your error says:
UnicodeEncodeError: 'charmap' codec can't encode characters in position 451-454: character maps to <undefined>
If it does not have to be encoded in utf-16, try opening the json file like:
myJsonFile = open('tweet.json', encoding="utf-8")
and specifying the encoding in the result file:
resultat = open(file, 'w', encoding='utf-8')
You might get lucky and that will fix it!
It it doesn't fix it, here and here are answers that go deeper into detail about diagnosing the problem and potential solutions:
UnicodeEncodeError: 'charmap' codec can't encode characters

Related

I am getting a KeyError an am not sure how to fix it

I have written out my code and when I run it, I get a KeyError:
Traceback (most recent call last):
File "C:/Users/sagar/Desktop/Sagar CS131B Files/convert_to_fixed.py", line 21, in <module>
birthdate = sample['Birthdate']
KeyError: 'Birthdate'
my code:
inputFile = 'raw.data.py'
data = list()
columns = ['First name','Last name','Telephone','Address','City','State','Birthdate']
for line in open(inputFile):
# Assuming comments in the text file as '#'
if line.startswith('#'): continue
row = line.strip().split(':')
data.append(dict(zip(columns, row)))
#print(data)
formatted_data = list()
for sample in data:
birthdate = sample['Birthdate']
mm,dd,yy = birthdate.split('/')
if len(yy)==2:
yy = '19' + yy
birthdate = '/'.join([mm,dd,yy])
sample['Birthdate'] = birthdate
modified_row = ':'.join(
[sample['Last name'], sample['First name'],
sample['Telephone'], sample['Address'],
sample['City'], sample['State'], sample['Birthdate']])
formatted_data.append(modified_row + '\n')
with open('fixed.data','w') as f:
f.writelines(formatted_data)
I have looked up how to fix it, just not sure on the execution of a try-except function. If someone could help me out with this that would be amazing..
This is what is inside the file given:
'Betty:Boop:245-836-8357:635 Cutesy Lane:Hollywood:CA:6/23/1923',
'Ephram:Hardy:293-259-5395:235 Carlton Lane:Joliet:IL:8/12/1920',
'Fred:Fardbarkle:674-843-1385:20 Parak Lane:DeLuth:MN:4/12/23',
'Igor:Chevsky:385-375-8395:3567 Populus Place:Caldwell:NJ:6/18/68',
'James:Ikeda:834-938-8376:23445 Aster Ave.:Allentown:NJ:12/1/1938',
'Jennifer:Cowan:548-834-2348:408 Laurel Ave.:Kingsville:TX:10/1/35',
'Jesse:Neal:408-233-8971:45 Rose Terrace:San Francisco:CA:2/3/2001',
'Jon:DeLoach:408-253-3122:123 Park St.:San Jose:CA:7/25/53',
'Jose:Santiago:385-898-8357:38 Fife Way:Abilene:TX:1/5/58',
'Karen:Evich:284-758-2867:23 Edgecliff Place:Lincoln:NB:11/3/35',
'Lesley:Kirstin:408-456-1234:4 Harvard Square:Boston:MA:4/22/2001',
'Lori:Gortz:327-832-5728:3465 Mirlo Street:Peabody:MA:10/2/65',
'Norma:Corder:397-857-2735:74 Pine Street:Dearborn:MI:3/28/45',
'Paco:Gutierrez:835-365-1284:454 Easy Street:Decatur:IL:2/28/53',
'Popeye:Sailor:156-454-3322:945 Bluto Street:Anywhere:USA:3/19/35',
'Sir:Lancelot:837-835-8257:474 Camelot Boulevard:Bath:WY:5/13/69',
'Steve:Blenheim:238-923-7366:95 Latham Lane:Easton:PA:11/12/1956',
'Tommy:Savage:408-724-0140:1222 Oxbow Court:Sunnyvale:CA:5/19/66',
'Vinh:Tranh:438-910-7449:8235 Maple Street:Wilmington:VM:9/23/63',
'William:Kopf:846-836-2837:6937 Ware Road:Milton:PA:9/21/46',
'Yukio:Takeshida:387-827-1095:13 Uno Lane:Ashville:NC:7/1/29',
'Zippy:Pinhead:834-823-8319:2356 Bizarro Ave.:Farmount:IL:1/1/67',
'Andy:Warhol:212-321-7654:231 East 47th Street:New York City:NY:8/6/1928'
zip() only produces results up to the shorter iterables length:
print(list(zip([1,2],[1,2,3,4,5,6]))) # [(1, 1), (2, 2)]
Your source data somehow at least one line with less elements in it that is why one of your dicts does not have the 'Birthdate' key (the last one).
You can guard against it:
data = list()
columns = ['First name', 'Last name', 'Telephone',
'Address', 'City', 'State', 'Birthdate']
# use a context manager for file open
with open(inputFile) as f:
for line in f:
# Assuming comments in the text file as '#'
if line.startswith('#'):
continue
# ignore empty lines (you can combine with above)
if not line.strip():
continue
row = line.strip().split(':')
# raise exception if not enough data found
if len(row) != len(columns):
raise AttributeError("Not enough datapoints in line: ", line)
data.append(dict(zip(columns, row)))

error with regex matching over 2 source files, expected string or buffer

so I would like to from a input.txt file, create a two dictionaries
for example, here is sample of the input.txt file
#. VAR #first=Billy
#. VAR #last=Bob
#. PRINT VARS
#. VAR #petName=Gato
#. VAR #street="1234 Home Street"
#. VAR #city="New York"
#. VAR #state=NY
#. VAR #zip=21236
#. VAR #title=Dr.
#. PRINT VARS
#. FORMAT LM=5 JUST=LEFT
#. PRINT FORMAT
so VAR #varName=value
i.e in the case of #first=Billy you would get something like varDict = {"first": "Billy"} right?
Now I wanna know how to do that thru the entire file
There are two dictionaries that I would need to populate, one for the variables, and one for FORMAT, which just holds values, doesn't actually do anything for now.
As far as a desired output, In the input file, there are commands that when read, will trigger to either add variables to the directory, or print that directory, or add to the format directory. I would use the pprint function like this pprint.pprint(varDict , width=30) and would output something like this
{'first': 'Billy',
'last': 'Bob'}
{'city': 'New York',
'first': 'Billy',
'last': 'Bob',
'petName': 'Gato',
'state': 'NY',
'street': '1234 Home Street',
'title': 'Dr.',
'zip': '21236'}
{'BULLET': 'o',
'FLOW': 'YES',
'JUST': 'LEFT',
'LM': '5',
'RM': '80'}
Unfortunately i keep getting errors all over the place on the driver and source file
AttributeError: 'list' object has no attribute 'groups'
TypeError: expected string or buffer
Driver.py
input=(sys.argv[1])
# Group 1. VAR
# Group 2. #first=Mae or JUST=RIGHT FLOW=NO
# pass Group 2 as atString
regexSearch = re.compile(r'^#. ([A-Z]+) (.*)', re.MULTILINE)
regexPrintVAR = re.compile(r'^#\.\s*PRINT\s(VARS)', re.MULTILINE)
regexPrintFORMAT = re.compile(r'^#\.\s*PRINT\s(FORMAT)',re.MULTILINE)
regexERRCheck = re.compile(r'^#\.\s*FORMAT\s+BAD', re.MULTILINE)
varDictionary = dict()
formatDictionary = {"FLOW":"YES", "LM":"1", "RM":"80","JUST":"LEFT","BULLET":"o"}
file = open(input, "r")
while True:
inputLine = file.readline()
matchObj = regexSearch.search(inputLine)
command, atString = matchObj.groups()
if command == "VAR":
setVariable(atString,varDictionary)
if command == "FORMAT":
formatListERR = regexERRCheck.search(inputLine)
if formatListERR != None:
print("*** Not a recognizable command")
line = file.readline()
setFormat(atString, formatDictionary)
if command == "PRINT":
printVARObj = regexPrintVAR.search(inputLine)
printFormatObj = regexPrintFORMAT.search(inputLine)
if printVARObj != None:
pprint.pprint(varDictionary, width=30)
elif printFormatObj != None:
pprint.pprint(formatDict, width=30)
inputLine = file.readline()
file.close()
importFileIUse.py
# The atString is the remainder of the string after the VAR or FORMAT key word.
varDictionary = dict()
formatDictionary = {"FLOW":"YES", "LM":"1", "RM":"80","JUST":"LEFT","BULLET":"o"}
def setFormat(atString,formatDictionary):
regexFormat = re.compile(r'((?:(?:\w+)=(?:\w+)\s*)*)$')
line = re.split(" +", atString)
formatList = regexFormat.search(line)
if formatList:
for param in formatList[0].split():
splitParam = param.split('=')
formatDictionary[splitParam[0]] = splitParam[1]
def setVariable (atString, varDictionary):
regexVAR = re.compile(r'#(\w+)=(\w+|.*)\s*$', re.MULTILINE)
# file = open(input)
# line = file.readline()
# line = re.split(" +", atString)
#while line:
varList = regexVAR.findall(atString)
for key, value in varList:
varDictionary[key] = value

separate text with semicolons in python

I have a text file in this format:
subscriber=admin lname="adamec22a" password="kofola1224" first-name="Anton net na M.lehote,zapajal si to sam!!" last-name="Adamec 1.3.2012 skoncil zmluvu" phone="00421917499086" location="NB, Sturova 18, 2pos." rate-limit=" 1M/3M" last-seen=never
What I need to do in Python is that each record in the line should be separated by a semicolon and if there is no record (like first-name, or some other), the script should leave there a blank space between two semicolons.
Assuming that the input lines are consistently formatted, and that I understand what you're asking, you can recover the data in the way indicated here. Then you can output it in any way that suits you.
>>> pieces = '''subscriber=admin lname="adamec22a" password="kofola1224" first-name="Anton net na M.lehote,zapajal si to sam!!" last-name="Adamec 1.3.2012 skoncil zmluvu" phone="00421917499086" location="NB, Sturova 18, 2pos." rate-limit=" 1M/3M" last-seen=never'''.split('=')
>>> fieldNames = [ pieces[0] ]
>>> for i in range(1, -1+len(pieces)):
... fieldNames.append(pieces[i][1+pieces[i].rfind(' '):])
...
>>> fieldNames
['subscriber', 'lname', 'password', 'first-name', 'last-name', 'phone', 'location', 'rate-limit', 'last-seen']
>>> fieldValues = [ pieces[-1]]
>>> for i in range(-2+len(pieces),0,-1):
... fieldValues.append(pieces[i][:pieces[i].rfind(' ')])
...
>>> fieldValues.reverse()
>>> fieldValues
['admin', '"adamec22a"', '"kofola1224"', '"Anton net na M.lehote,zapajal si to sam!!"', '"Adamec 1.3.2012 skoncil zmluvu"', '"00421917499086"', '"NB, Sturova 18, 2pos."', '" 1M/3M"', 'never']
>>> for fieldName, fieldValue in zip(fieldNames, fieldValues):
... fieldName, fieldValue
...
('subscriber', 'admin')
('lname', '"adamec22a"')
('password', '"kofola1224"')
('first-name', '"Anton net na M.lehote,zapajal si to sam!!"')
('last-name', '"Adamec 1.3.2012 skoncil zmluvu"')
('phone', '"00421917499086"')
('location', '"NB, Sturova 18, 2pos."')
('rate-limit', '" 1M/3M"')
('last-seen', 'never')

python 2.7 variable string to bytes savReaderWriter

I know that python 2.7 str and bytes are the same(according to Changing string to byte type in Python 2.7)
but I'm using savReaderWriter to write in a .sav file, that is, first column will be a string, and the others are numbers..
my definition is basically the same as in the documentation of savReaderWriter:
savFileName = 'someFile.sav'
records = [[b'Test1', 1, 1], [b'Test2', 2, 1]]
varNames = ['var1', 'v2', 'v3']
varTypes = {'var1': 5, 'v2': 0, 'v3': 0}
with SavWriter(savFileName, varNames, varTypes) as writer:
for record in records:
writer.writerow(record)
on this, they use b'' to convert the string to bytes, but that's a string example, that was written with the code..
but I have a list of strings strings = ["string1", "string2", ...] and each of the strings will be on the my "records"...something like this:
records = [
[strings[0], 0, 0, ...],
[strings[1], 1, 0, ...],
...
]
so, how do I convert the variable to bytes?
I'm getting 'str' object does not support item assigment
Edit: my code: and the traceback "error" that my exception generates is what i've wrote before..
def exporta_sav(matrizes, listas):
#matrizes = [matriz_grupo, matriz_n8]
#listas = [cpcs_grupo, patentes_grupo, cpcs_n8, patentes_n8]
savFile1 = "matrizgrupo.sav"
savFile2 = "matrizsubgrupo.sav"
varTypesGrupo = {}
varTypesNivel8 = {}
stringpatente = 'PATENTE'
#Cria lista auxiliar para fazer a matriz corretamente no formato .sav
varTypesGrupo[stringpatente] = 7
varTypesNivel8[stringpatente] = 7
for i in xrange(len(listas[0])):
varTypesGrupo[listas[0][i]] = 0
for i in xrange(len(listas[2])):
varTypesNivel8[listas[2][i]] = 0
cpcsGrupoAux = [stringpatente]
for i in xrange(len(listas[0])):
cpcsGrupoAux.append(listas[0][i])
cpcsN8Aux = [stringpatente]
for i in xrange(len(listas[2])):
cpcsN8Aux.append(listas[2][i])
#Com as colunas definidas, vai criar listas auxiliares para cada linha
matrizGrupoAux = []
matrizN8Aux = []
if len(matrizes[0]) == len(listas[1]):
for i in xrange(len(matrizes[0])):
matrizGrupoAux.append([ bytes(listas[1][i]) ])
for j in xrange(len(matrizes[0][i])):
matrizGrupoAux[i].append(matrizes[0][i][j])
if len(matrizes[1]) == len(listas[3]):
for i in xrange(len(matrizes[1])):
matrizN8Aux.append([ bytes(listas[3][i]) ])
for j in xrange(len(matrizes[1][i])):
matrizN8Aux[i].append(matrizes[1][i][j])
del listas
del matrizes
#Escrever nos arquivos
try:
with SavWriter(savFile1, cpcsGrupoAux, varTypesGrupo) as writer:
for patente in matrizGrupoAux:
writer.writerows(patente)
with SavWriter(savFile2, cpcsN8Aux, varTypesNivel8) as writer:
for patente in matrizN8Aux:
writer.writerows(patente)
except Exception as error:
print("Ocorreu um erro ao escrever nos arquivos: " + str(error))
Since it's asked, the full traceback(I removed the handling):
Traceback (most recent call last):
File "matrizbinaria.py", line 242, in (module)
exporta_sav(resultados[0], resultados[1])
File "matrizbinaria.py", line 233, in exporta_sav
writer.writerows(patente)
File "C:\Python27\lib\site-packages\savReaderWriter\savWriter.py", line 429 in writerows
self.writerow(record)
File "C:\Python27\lib\site-packages\savReaderWriter\savWriter.py", line 387 in writerow
self._pyWriterow(record)
File "C:\Python27\lib\site-packages\savReaderWriter\savWriter.py", line 379 in _pyWriterow
record[i] = value
TypeError: 'str' object does not support item assignment

Convert a csv into category-subcategory using array

Above is the input table i have in csv
I am trying to use array and while loops in python. I am new to this language. Loops should occur twice to give Category\sub-category\sub-category_1 order...I am trying to use split().Ouput should be like below
import csv
with open('D:\\test.csv', 'rb') as f:
reader = csv.reader(f, delimiter='',quotechar='|')
data = []
for name in reader:
data[name] = []
And if you read the lines of your csv and access the data then you can manipulate the way you want later.
cats = {}
with open('my.csv', "r") as ins:
# check each line of the fine
for line in ins:
# remove double quotes: replace('"', '')
# remove break line : rstrip()
a = str(line).replace('"', '').rstrip().split('|')
if a[0] != 'CatNo':
cats[int(a[0])] = a[1:];
for p in cats:
print 'cat_id: %d, value: %s' % (p, cats[p])
# you can access the value by the int ID
print cats[1001]
the output:
cat_id: 100, value: ['Best Sellers', 'Best Sellers']
cat_id: 1001, value: ['New this Month', 'New Products\\New this Month']
cat_id: 10, value: ['New Products', 'New Products']
cat_id: 1003, value: ['Previous Months', 'New Products\\Previous Months']
cat_id: 110, value: ['Promotional Material', 'Promotional Material']
cat_id: 120, value: ['Discounted Products & Special Offers', 'Discounted Products & Special Offers']
cat_id: 1002, value: ['Last Month', 'New Products\\Last Month']
['New this Month', 'New Products\\New this Month']
Updated script for your question:
categories = {}
def get_parent_category(cat_id):
if len(cat_id) <= 2:
return '';
else:
return cat_id[:-1]
with open('my.csv', "r") as ins:
for line in ins:
# remove double quotes: replace('"', '')
# remove break line : rstrip()
a = str(line).replace('"', '').rstrip().split('|')
cat_id = a[0]
if cat_id != 'CatNo':
categories[cat_id] = {
'parent': get_parent_category(cat_id),
'desc': a[1],
'long_desc': a[2]
};
print 'Categories relations:'
for p in categories:
parent = categories[p]['parent']
output = categories[p]['desc']
while parent != '':
output = categories[parent]['desc'] + ' \\ ' + output
parent = categories[parent]['parent']
print '\t', output
output:
Categories relations:
New Products
New Products \ Best Sellers
New Products \ Discounted Products & Special Offers
New Products \ Best Sellers \ Previous Months
New Products \ Best Sellers \ Last Month
New Products \ Best Sellers \ New this Month

Categories