Yaml To Json using Regex python

Yaml To Json using Regex python - python

I just converted my Yaml file to Json with Python
/Stackoverflow telling me to put more lines of text so adding/
Yaml file here.
timetable:
subject1:
day: Пн
time: 08:20-09:50
room: 0
lesson: Физическая Культура
teacher: Трифонов
location: Онлайн
parity: False
subject2:
day: Пн
time: 10:00-11:30
room: 11210
lesson: Математика (Практические занятия)
teacher: Игоревич
location: д.9, лит.
parity: False
Here is my converting code:
inputfile = open('saturday.yaml', 'r', encoding = "utf-8")
outputfile = open('timetable.json','w', encoding = "utf-8")
newline = inputfile.readline()
data = list()
lines = 0
list1 = list()
stringo = list()
while newline:
data.append(newline)
lines += 1
newline = inputfile.readline()
inputfile.close()
start_k = len(data[0]) - len(data[0].lstrip())
outputfile.write("{\n")
for i in range(0, lines - 1):
if data[i].lstrip()[0] == '-':
list1.append(' "' + data[i].lstrip().lstrip('-'))
outputfile.write(' "' + data[i].lstrip().lstrip('-'))
else:
stringo = data[i].lstrip().split(':', maxsplit = 1)
outputfile.write(' "' + stringo[0] + '":' + stringo[1].lstrip())
end_k = len(data[i + 1]) - len(data[i + 1].lstrip())
if end_k < start_k:
outputfile.write(" },"'\n')
if end_k > start_k:
outputfile.write('\n'" {"'\n')
start_k = end_k
outputfile.write(' }\n }\n}')
inputfile.close()
outputfile.close()
Now I need to convert the file Yaml to Json using Regular Expressions and I am stuck there. Any suggestions?

Related

How can I convert an array to JSON?

I have an array that I need to convert to a JSON file. There is a text file that holds the data. But I don't understand why it only adds one record.
import collections
list = []
with open("file.txt") as f:
for line in f:
info = line.split()
lists = ("ip" + " " + info[0].replace(":", " ").split()[0] + " " + "port" + " " + info[0].replace(":", " ").split()[1] + " " + "region" + " " + info[1].replace("-", " ").split()[0]).split()
list.append(lists)
d = collections.defaultdict(dict)
for l in list:
d[l[0]] = l[1]
d[l[2]] = l[3]
d[l[4]] = l[5]
print(json.dumps(d))
with open("proxy.json", "w") as f:
f.write(json.dumps(d))
Example of a text file:
154.0.5.178:8080 ZA-N-S! -
119.28.156.115:3128 KR-N -
207.144.111.230:8080 US-H -
3.20.236.208:49205 US-H-S -
217.60.194.43:8080 IR-N! -
190.61.41.106:999 CO-N-S +
What I get:
enter image description here

info[1].replace("-", " ").split()[0]
will always return a single value! Try this:
import json
alist = []
with open("file.txt") as f:
for line in f:
info = line.split()
data = {"ip": info[0].split(":")[0], "port": info[0].split(":")[1],"region": info[1].split("-")}
alist.append(data)
print(json.dumps(alist))
with open("proxy.json", "w") as f:
f.write(json.dumps(alist))

Python read data in as binary

I am wanting to read in the logData as binary and then parse the binary output in the second for loop as it is for a string but for binary. Is this possible?
logData = open(sys.argv[1]).readlines()
processedSources = sys.stdin.readlines()
stringDictionary = {}
for line in processedSources:
# Match data looking for MODULE_ID, LOG_LINE, ARG_COUNT, FILE_NAME, DATA_STRING
match = re.search("(\d+),\s+(\d+),\s+(\d+),\s+(.*),\s+(\".*\")", line)
if match:
moduleId = int(match.group(1))
logLine = int(match.group(2))
argCount = int(match.group(3))
fileName = match.group(4)
outputString = match.group(5)
stringDictionary[(moduleId, logLine)] = [ moduleId, logLine, argCount, fileName, outputString ]
else:
print "Failed string dictionary on: " + line
for line in logData:
# Match data looking for MODULE_ID, LOG_LINE, ARG_COUNT, ARGUMENTS
matchLogData = re.split("\s+", line)
if matchLogData:
moduleId = int(matchLogData[0], 16)
logLine = int(matchLogData[1], 16)
argCount = int(matchLogData[2], 16)
if stringDictionary[(moduleId, logLine)]:
processedData = stringDictionary[(moduleId, logLine)]
if argCount != processedData[2]:
print "Argument count mismatch on : " + line
print " expected %d found %d" % (argCount, processedData[2])
else:
index = 0
logString = "%02x:%4d:%s:" + processedData[4]
logData = (processedData[0], processedData[1], processedData[3])
while index < argCount:
logData = logData + (int(matchLogData[index+3], 16),)
index = index + 1
print logString % logData
else:
print "ModuleId:%d Line:%d, not found in source dictionary" % (moduleId, logLine)
print " Line data: " + line
else:
print "Expected log input data mismatch MODULE_ID LOG_LINE ARG_COUNT ARGS"
print "Line: " + line

regarding re-factoring python code

so I have this python file which looks out for all the "label" tags in a XML file and does some modification with it. label is a string containing at max three lines. the code is manipulating XML file.
#1 label="Number of Packets Transmitted by the Source
Node of the Path to the Destination Node Of
the Path"
#2 label="Number of Packets Transmitted by the Source
node of the path to the destination node of
the path"
notice in label #2 words in second and third line are not in upper case which is not what I want. I want help in correcting logic of my program such that I should not write label twice.
import os
from io import StringIO, BytesIO
def splitAndMakeTitleCase(line):
# does something not relevant to context
fileList = open("AllFiles")
for fileStr in fileList:
fileName = fileStr.rstrip('\n')
openFile = open(fileName)
openNewFile = open(fileName+'TitleCase.xml','w')
lines = openFile.readlines()
for lineIndex in range(0,len(lines)):
line = lines[lineIndex]
skip = 0
if "label=" in line and "const" not in line:
segs = line.split('"')
if len(segs) >= 3:
pass
else:
openNewFile.write(lines[lineIndex])
secondTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 1])
skip = lineIndex + 1
openNewFile.write(secondTitleCaseLine)
if '"' not in lines[lineIndex + 1]:
thirdTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 2])
skip = lineIndex + 1
openNewFile.write(thirdTitleCaseLine)
openNewFile.write(lines[lineIndex])
openFile.close()
openNewFile.close()
#cmd = "mv " + fileName + "TitleCase.xml " + fileName
#os.system(cmd)

In your for loop you have the first if and then within that you do some printing to the file. Then after that you do another print of the line to the file. I think that you probably want that last line in a else like this:
for fileStr in fileList:
fileName = fileStr.rstrip('\n')
openFile = open(fileName)
openNewFile = open(fileName+'TitleCase.xml','w')
lines = openFile.readlines()
for lineIndex in range(0,len(lines)):
line = lines[lineIndex]
skip = 0
if "label=" in line and "const" not in line:
segs = line.split('"')
if len(segs) >= 3:
pass
else:
openNewFile.write(lines[lineIndex])
secondTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 1])
skip = lineIndex + 1
openNewFile.write(secondTitleCaseLine)
if '"' not in lines[lineIndex + 1]:
thirdTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 2])
skip = lineIndex + 1
openNewFile.write(thirdTitleCaseLine)
else:
openNewFile.write(lines[lineIndex])
openFile.close()
openNewFile.close()
#cmd = "mv " + fileName + "TitleCase.xml " + fileName
#os.system(cmd)

python chinese character not writing to file correctly…..with some programs

So I've been working with the CC-CEDICT, a free downloadable Chinese-English dictionary. I've been using python to make some small changes and reformat the dictionary. When I ran code that just reorganized the dictionary as a csv file, I had no issues and the characters were written into the file as expected. Here is the code for that:
filename = 'cedict_ts.u8.txt'
newname = 'cedict_ts.u8.csv'
f = open(filename,'r')
allLines = f.readlines()
f.close()
newf = open(newname, 'w')
endofhash = False
for i in range(0, len(allLines)):
curLine = allLines[i]
if curLine[0] == '#':
newf.write(curLine)
else:
if(not endofhash):
newarr = ['Traditional','Simplified','Pinyin','Definition(s)\r\n']
newline = ','.join(newarr)
newf.write(newline)
endofhash = True
firstws = curLine.find(' ')
lsbrack = curLine.find('[')
rsbrack = curLine.find(']')
fslash = curLine.find('/')
lslash = curLine.rfind('/')
trad = curLine[0:firstws]
simp = curLine[firstws+1:lsbrack-1]
piny = curLine[lsbrack+1:rsbrack]
defin = curLine[fslash+1:lslash]
defin = defin.replace('/','; ')
defin = defin + '\r\n'
newarr = [trad, simp, piny, defin]
newline = ','.join(newarr)
newf.write(newline)
newf.close()
However, when I run a program that also changes the pinyin system and adds it to the dictionary, the content of the text file is gobbly-gook. But, as a test I had the program print out each line before it was written to the text file, and it prints to the terminal as expected. Here is the code that does that:
from pinyinConverter import *
filename = 'cedict_ts.u8.txt'
newname = 'cedict_ts_wpym.u8.csv'
f = open(filename,'r')
allLines = f.readlines()
f.close()
apy = readPinyinTextfile('pinyinchars.txt')
newf = open(newname, 'w')
endofhash = False
for i in range(0, len(allLines)):
curLine = allLines[i]
if curLine[0] == '#':
newf.write(curLine)
else:
if(not endofhash):
newarr = ['Traditional','Simplified','Pinyin','PinyinWithMarks','Definition(s)\r\n']
newline = ','.join(newarr)
newf.write(newline)
endofhash = True
firstws = curLine.find(' ')
lsbrack = curLine.find('[')
rsbrack = curLine.find(']')
fslash = curLine.find('/')
lslash = curLine.rfind('/')
trad = curLine[0:firstws]
simp = curLine[firstws+1:lsbrack-1]
piny = curLine[lsbrack+1:rsbrack]
split_piny = piny.split(' ')
for i in range(0, len(split_piny)):
curPin = split_piny[i]
newPin = convertPinyinSystem(curPin, apy)
split_piny[i] = newPin
pnwm = ' '.join(split_piny)
defin = curLine[fslash+1:lslash]
defin = defin.replace('/','; ')
defin = defin + '\r\n'
newarr = [trad, simp, piny, pnwm, defin]
newline = ','.join(newarr)
newf.write(newline)
newf.close()
And here is the pinyinConverter file code:
def convertPinyinSystem(inputString, allPinyin):
chars = ['a','e', 'i', 'o','u','u:']
tone = grabTone(inputString)
toneIdx = (tone - 1) * 2
hasIdx = -1
for i in range(0, len(chars)):
if(chars[i] in inputString):
hasIdx = i
newString = inputString
newString = newString.replace(str(tone),'')
if(not ('iu' in inputString)):
newChar = allPinyin[hasIdx][toneIdx:toneIdx+2]
else:
newChar = allPinyin[4][toneIdx:toneIdx+2]
newString = newString.replace(chars[hasIdx],newChar)
if(tone == 5):
newString = inputString
newString = newString.replace(str(tone),'')
return newString
elif(tone == -1):
return inputString
else:
return newString
def readPinyinTextfile(pinyintextfile):
f = open(pinyintextfile, 'r')
allLines = f.readlines()
f.close()
for i in range(0, len(allLines)):
curLine = allLines[i]
curLine = curLine[0:len(curLine)-1]
allLines[i] = curLine
return allLines
def grabTone(inputText):
isToneIdx = False
idx = 0
while(not isToneIdx):
isToneIdx = is_int(inputText[idx])
if(isToneIdx):
break
else:
idx += 1
if(idx == len(inputText)):
return -1
return int(inputText[idx])
def is_int(s):
try:
int(s)
return True
except ValueError:
return False
And the content of the pinyin chars.txt file is this:
āáăà
ēéĕè
īíĭì
ōóŏò
ūúŭù
ǖǘǚǜ
I'm on a 2009 MacBook Pro, running OSX version 10.8.5, python is version 2.7.6 and the coding of the dictionary is UTF-8. Also I know some of the code for doing the pinyin conversion is not optimized, but for this it doesn't really matter.

If your pinyin file is encoded as utf-8, you might want to try using the codecs package, which is part of the standard library, like this:
import codecs
...
def readPinyinTextfile(pinyintextfile):
f = codecs.open(pinyintextfile, 'r', 'utf-8')
If it looks okay in the terminal, then it's likely that you need to specifically change the writing function to use the codecs package:
apy = readPinyinTextfile('pinyinchars.txt')
newf = codecs.open(newname, 'w', 'utf-8')

Parsing through newline characters in Python

I am working on a fairly basic encoder/decoder where you can input your own text file (as a string) and your own encoder (also as a string: it must be a text file).
Here is my decoder function:
def cDecode(file_name, encoder='standard_encoder.txt', save_new=True): # does not decode multi-lines correctly -- everything goes on a single line. See next comment
'''Decodes <'file_name'> with the reverse method of <'encoder'>.'''
if type(file_name) != str or type(encoder) != str: raise TypeError("<'file_name'> and <'encoder'> must be of type <'str'>.")
if type(save_new) != bool: raise TypeError("<'save_new'> must be of type <'bool'>.")
if file_name[-4:] != '.txt': file_name += '.txt'
if encoder[-4:] != '.txt': encoder += '.txt'
decoder_set = {}
try:
with open(encoder, 'r') as encoding_file:
for line in encoding_file:
line_parts = line.split(': ')
my_key, my_value = line_parts[1], line_parts[0]
I think the error is in here:
I have to remove the '\n' because every character (in the decoding file) is on a new line, like such: 'A: Ð'.
if '\n' in my_key:
loc = my_key.find('\n') # this may be the cause of the single-line of the decoding.
my_key = my_key[:loc] + my_key[loc + 1:]
decoder_set[my_key] = my_value
encoding_file.close()
except IOError:
encoder = 'standard_encoder.txt'
with open(encoder, 'r') as encoding_file:
for line in encoding_file:
line_parts = line.split(': ')
my_key, my_value = line_parts[1], line_parts[0]
# every key has a new line character automatically because it's on a different line
if '\n' in my_key:
loc = my_key.find('\n')
my_key = my_key[:loc] + my_key[loc + 1:]
decoder_set[my_key] = my_value
encoding_file.close()
decodingKeys = decoder_set.keys()
Here is the rest of the function:
if save_new:
try:
decoded_file_name = file_name[:-12] + '_decoded' + file_name[-4:]
encoded_file = open(decoded_file_name, 'a+')
with open(file_name, 'r') as my_file:
for line in my_file:
de_line = ''
for char in line:
if char in decodingKeys: de_char = decoder_set[char]
else: de_char = char
de_line += de_char
encoded_file.write(de_line)
except IOError:
raise NameError(file_name + ' was not found. Decoding process terminated.')
else:
try:
import os
encoded_file = file_name[:-12] + '_decoded' + file_name[-4:]
with open(file_name, 'r+') as my_file:
for line in my_file:
de_line = ''
for char in line:
if char in decodingKeys: en_char = decoding_set[char]
else: de_char = char
de_line += de_char
encoded_file.write(de_line)
os.remove(file_name)
os.rename(encoded_file, file_name)
except IOError:
raise NameError(file_name + ' was not found. Decoding process terminated.')
Say I have a multi-line text-file:
This is a test.
As is this one.
Good bye!
When encoded and then decoded afterward, it shows up like this: This is a test.As is this one.Good bye!.
How can I fix this? I'm expecting it to show up like:
This is a test.
As is this one.
Good bye!
Thanks!

Add a '\n' while writing back the line to file:
encoded_file.write(de_line+'\n')

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Yaml To Json using Regex python - python

Related

How can I convert an array to JSON?

Python read data in as binary

regarding re-factoring python code

python chinese character not writing to file correctly…..with some programs

Parsing through newline characters in Python

Categories

Resources