Reading and editing a font file and using a dictionary - python

I have to take the values from a text file which contains the co-ordinates to draw characters out in TurtleWorld, an example of the text file is the following:
<character=B, width=21, code=66>
4 21
4 0
-1 -1
4 21
13 21
16 20
17 19
18 17
18 15
17 13
16 12
13 11
-1 -1
4 11
13 11
16 10
17 9
18 7
18 4
17 2
16 1
13 0
4 0
</character>
I have to then write a function to take all of these points and then convert them into a dictionary where a key is the character and the corresponding values are the set of points which can be used to draw that character in TurtleWorld.
The code I have tried is the following:
def read_font():
"""
Read the text from font.txt and convert the lines into instructions for how to plot specific characters
"""
filename = raw_input("\n\nInsert a file path to read the text of that file (or press any letter to use the default font.txt): ")
if len(filename) == 1:
filename = 'E:\words.txt'
words = open(filename, 'r')
else:
words = open(filename, 'r')
while True: # Restarts the function if the file path is invalid
line = words.readline()
line = line.strip()
if line[0] == '#' or line[0] == ' ': # Used to omit the any unwanted lines of text
continue
elif line[0] == '<' and line[1] == '/': # Conditional used for the end of each character
font_dictionary[character] = numbers_list
elif line[0] == '<' and line[1] != '/':

take a look at http://oreilly.com/catalog/pythonxml/chapter/ch01.html :: specifically, hit up the example titled :: Example 1-1: bookhandler.py
you can more or less credit/copy that and tweak it to read your particular xml. once you get the 'guts'(your coords), you can split it into a list of x/y coords really easily
such as
a = "1 3\n23 4\n3 9\n"
coords = map(int,a.split())
and chunk it into a list w/ groups of 2 How do you split a list into evenly sized chunks?
and store the result letters[letter] = result
or you can do the chunking more funky using the re module
import re
a = "1 13\n4 5\n"
b = re.findall("\d+ *\d+",a)
c = [map(int,item.split()) for item in b]
c
[[1, 13], [4, 5]]

Related

Moving contents from one file to a new one in python

i want to print in the even file all even numbers with spaces between them eg: 12 6 20 10 not 1262010 with no spaces in front or back. How can i do this?
def write_positive_even_to_file(filename):
with open(filename, 'r') as orginal, open('xxx.txt', 'a') as even:
red = orginal.read().split()
for number in red:
if number % 2 == 0:
even.write(number + " ")
Input file:
15 12 6
7 20 9 10
13 17
3
You need to split each input line into tokens (assumed to represent integers) convert to int then determine if any value is even.
Something like this:
def write_positives(infile, outfile, mode='a'):
with open(infile) as fin, open(outfile, mode) as fout:
if (evens := [x for x in map(int, fin.read().split()) if x % 2 == 0]):
print(*evens, file=fout)
By printing the unpacked list you will, by default, have space separation

How to split a list in python [duplicate]

This question already has answers here:
How do I split a list into equally-sized chunks?
(66 answers)
Closed 1 year ago.
I'm trying to get a python script working that will take an .txt file with number inputs
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
and turn it into
Hello 10 and 11 and 12 and 13....
and after x numbers it should start again like
Hello 10 and 11 and 12 and 13
Hello 14 and 15 and 16 and 17
Hello 18 ....
what ive got is this
def idcommands(contentsplit):
command = ' and '.join(contentsplit)
textfile = open('command.txt', 'w')
textfile.write("Hello " +command)
textfile.close()
def main():
file = open("ids.txt", "r")
content = file.read()
contentsplit = content.split(' ')
file.close()
idcommands(contentsplit)
So it will do it all in one line but I don't know how to split after x numbers.
It's not clear if you want a separate file for each line or one file for them all.
This code will create one file.
def idcommands(contentsplit, n = 4):
commands = ['Hello ' + ' and '.join(contentsplit[idx:idx+n]) for idx in range(0, len(contentsplit), n)]
with open('command.txt', 'w') as textfile:
textfile.write('\n'.join(commands))
def main():
file = open("ids.txt", "r")
content = file.read()
contentsplit = content.split(' ')
file.close()
idcommands(contentsplit, 2)
main()

Separating Values from a file in Python

So let's say I have this txt file formatted (value)(space)(value) and there's a second set of numbers separated with a (tab). An example file is given here:
Header
5 5 6 7 8 7 8 9 0 1
7 6 3 4 1 1 3 6 8 1
8 7 4 1 3 1 9 8 5 1
Now I'm using this code to print all the values shown in the txt file:
NEWLINE = "\n"
def readBoardFromFile():
inputFileOK = False
aBoard = []
while (inputFileOK == False):
try:
inputFileName = input("Enter the name of your file: ")
inputFile = open(inputFileName, "r")
print("Opening File " + inputFileName + "for reading")
currentRow = 0
next(inputFile)
for line in inputFile:
aBoard.append([])
for ch in line:
if (ch != NEWLINE):
aBoard[currentRow].append(ch)
currentRow = currentRow + 1
inputFileOK = True
print("Completed reading of file " + inputFileName)
except IOError:
print("Error: File couldn't be opened")
numRows = len(aBoard)
numColumns = len(aBoard[0])
return(aBoard,numRows,numColumns)
def display(aBoard, numRows, numColumns):
currentRow = 0
currentColumn = 0
print("DISPLAY")
while (currentRow < numRows):
currentColumn = 0
while (currentColumn < numColumns):
print("%s" %(aBoard[currentRow][currentColumn]), end="")
currentColumn = currentColumn + 1
currentRow = currentRow + 1
print()
for currentColumn in range (0,numColumns,1):
print("*", end ="")
print(NEWLINE)
def start():
aBoard,numRows,numColumns = readBoardFromFile()
display(aBoard,numRows,numColumns)
start()
Normally when I run this code this is the output:
DISPLAY
5 5 6 7 8 7 8 9 0 1
7 6 3 4 1 1 3 6 8 1
8 7 4 1 3 1 9 8 5 1
*******************
How do I make it so that the output is:
DISPLAY
5 5 6 7 8
7 6 3 4 1
8 7 4 1 3
Only displaying the numbers in the left half?
Perhaps you can try using the csv module and open the file with tab delimiter.
Then assuming you go with the list approach you could only print the first element from each row.
Something like:
import csv
with open("a.txt") as my_file:
reader = csv.reader(my_file, delimiter ='\t')
next(reader) # to skip header if exists
for line in reader:
print(line[0])
From what I can see you aren't taking the tab character into account in your code which is probably why you have the additional characters in your output.
Here's the approach I would take, utilising Python's power when it comes to processing strings.
I would encourage you to use this type of approach when writing Python, as it will make your like much easier.
NEWLINE = "\n"
def read_board_from_file():
input_file_OK = False
a_board = []
while not input_file_OK:
try:
input_file_name = input("Enter the name of your file: ")
with open(input_file_name, "r") as input_file:
# A file-like object (as returned by open())
# can be simply iterated
for line in input_file:
# Skip header line if present
# (not sure how you would want to handle the
# header.
if "header" in line.lower():
continue
# Strip NEWLINE from each line,
# then split the line at the tab character.
# See comment above.
parts = line.strip(NEWLINE).split("\t")
# parts is a list, we are only interested
# in the first bit.
first_part = parts[0]
# Split the left part at all whitespaces.
# I'm assuming that this is what you want.
# A more complex treatment might make sense here,
# depending on your use-case.
entries = first_part.split()
# entries is a list, so we just need to append it
# to the board
a_board.append(entries)
input_file_OK = True
print(f"Completed reading of file {input_file_name}")
except IOError:
print("Error: File {input_file_name} couldn't be opened.")
return a_board
def display_board(a_board):
print("DISPLAY")
longest_row = 0
# a_board is a list of lists,
# no need to keep track of the number of rows and columns,
# we can just iterate it.
for row in a_board:
# row is a list of entries, we can use str.join() to add a space
# between the parts and format the row nicely.
row_str = " ".join(row)
# At the same time we can keep track of the longest row
# for printing the footer later.
len_row_str = len(row_str)
if len_row_str > longest_row:
longest_row = len_row_str
print(row_str)
# The footer is simply the asterisk character
# printed as many times as the longest row.
footer = "*" * longest_row
print(footer, end="")
print(NEWLINE)
def start():
a_board = read_board_from_file()
display_board(a_board)
start()
I'd do this with a bit more separation between inputs, outputs, and data processing. Your input is the name of the file, and a secondary input is the actual file contents. The data processing step is taking the file contents, and returning some internal representation of a collection of boards. The output is displaying the first board.
from typing import Iterable, List
def parse(lines: Iterable[str], board_sep: str = "\t") -> List[List[List[str]]]:
boards = []
for i, line in enumerate(lines):
# list of the same line from each board
board_lines = line.split(board_sep)
if i == 0:
# this only happens once at the start
# each board can be a list of lists. so boards is a list of "list of lists"
# we're going to append lines to each board, so we need some initial setup
# of boards
boards = [[] for _ in range(len(board_lines))]
for board_idx, board_line in enumerate(board_lines):
# then just add each line of each board to the corresponding section
boards[board_idx].append(board_line.split())
return boards
def show_board(board: List[List[str]]) -> None:
for row in board:
print(" ".join(row))
Now we can put that all together. We need to:
Get the filename
Open the file
Filter out the "Header" and any blank lines
Pass the rest of the lines to the parse() function
Get the first board
Print it with the context
from typing import Tuple
def get_board_dimensions(board: List[List[str]]) -> Tuple[int, int]:
""" Returns a tuple of (rows, cols) """
return len(board), len(board[0])
def get_filtered_file(filename: str) -> Iterable[str]:
with open(filename) as f:
for line in f:
if not line or line.lower() == "header":
continue
yield line
def main():
filename = input("Enter the name of your file: ")
filtered_lines = get_filtered_file(filename)
boards = parse(filtered_lines)
# now we can show the first one
b = boards[0]
_, cols = get_board_dimensions(b)
print("DISPLAY")
show_board(b)
print("*" * (2 * cols - 1)) # columns plus gaps

How to print specific lines of a file in Python

I have a .txt file and I would like to print lines 3, 7, 11, 15,...
So, after printing the third line, I would like to print every 4th line afterward.
I began by looking at the modulus operator:
#Open the file
with open('file.txt') as file:
#Iterate through lines
for i, line in enumerate(file):
#Choose every third line in a file
if i % 3 == 0:
print(line)
#Close the file when you're done
file.close()
but that approach prints every third line. If i % 3 == 1 that prints lines 1, 4, 7, 10, 13 etc.
Instead of using modulo, simply just use addition, start it with the first line you want to show, and then add 4 to it
next_line = 2 # Line 3 is index 2
for i, line in enumerate(file):
if i == next_line:
print(line)
next_line = next_line + 4
Your code is almost fine, except for the modulo: you want the remainder of the division by 4 to be 3.
with open('file.txt') as file:
for i, line in enumerate(file):
if i % 4 == 3:
print(line)
Note that you don't need to explicitely close your file at the end: that's what with is intended for, it makes sure that your file gets closed whatever happens.
So you want to something to happen every fourth time, that means modulo 4. Try changing your if to if i % 4 == N: with a good number for N.
By the way, when using the with statement you have don't have to call close(), it does so automatically.
How about:
# Fetch all lines from the file
lines = open('20 - Modular OS - lang_en_vs2.srt').readlines()
# Print the 3rd line
print(lines[2])
# throw away the first 3 lines, so the modulo (below) works ok
for i in range(3):
del(lines[0])
# print every 4th line after that
for (i in range(len(lines)):
if (i > 0 and i % 4 == 0):
print(lines[i])
Read every line into an array.
Output the 3rd line.
We then need every fourth line, so by deleteing the first 3 elements, it's easy to simply test against modulo 4 (the "% 4") and output the line.
x = 0
with open('file.txt') as file:
#Iterate through lines
for i, line in enumerate(file):
x += 1
#Choose every third line in a file
if x == 4:
print(line)
x = 0
#Close the file when you're done
file.close()
Result
>>> i = 0
>>> for x in range(0, 100):
... i += 1
... if i is 4:
... print(x)
... i = 0
3
7
11
15
19
23
27
31
35
39
43
47
51
55
59
63
67
71
75
79
83
87
91
95
99
file = open('file.txt')
print(file[2])
#Iterate through lines
for i in file:
#Choose every third line in a file, beginning with 4
if i % 4 == 0:
print(i+3)
elif i % 4 == 0:
print(i)
This works, but isn't super elegant.

how to keep ordered rows in dictionary?

I wrote the following script to retrieve the gene count for each contains. It works well but the order of the ID list that I use as an input is not conserved in the output.
I would need to conserve the same order as my input contigs list is ordered depending on their level of expression
Can anyone help me?
Thanks for your help.
from collections import defaultdict
import numpy as np
gene_list = {}
for line in open('idlist.txt'):
columns = line.strip().split()
gene = columns[0]
rien = columns[1]
gene_list[gene] = rien
gene_count = defaultdict(lambda: np.zeros(6, dtype=int))
out_file= open('out.txt','w')
esem_file = open('Aquilonia.txt')
esem_file.readline()
for line in esem_file:
fields = line.strip().split()
exon = fields[0]
numbers = [float(field) for field in fields[1:]]
if exon in gene_list.keys():
gene = gene_list[exon]
gene_count[gene] += numbers
print >> out_file, gene, gene_count[gene]
input file:
comp54678_c0_seq3
comp56871_c2_seq8
comp56466_c0_seq5
comp57004_c0_seq1
comp54990_c0_seq11
...
output file comes back in numerical order:
comp100235_c0_seq1 [22 13 15 6 15 16]
comp101274_c0_seq1 [55 2 27 26 6 6]
comp101915_c0_seq1 [20 2 34 12 8 7]
comp101956_c0_seq1 [13 21 11 17 17 28]
comp101964_c0_seq1 [30 73 45 36 0 1]
Use collections.OrderedDict(); it preserves entries in input order.
from collections import OrderedDict
with open('idlist.txt') as idlist:
gene_list = OrderedDict(line.split(None, 1) for line in idlist)
The above code reads your gene_list ordered dictionary using one line.
However, it looks as if you generate the output file purely based on the order of the input file lines:
for line in esem_file:
# ...
if exon in gene_list: # no need to call `.keys()` here
gene = gene_list[exon]
gene_count[gene] += numbers
print >> out_file, gene, gene_count[gene]
Rework your code to first collect the counts, then use a separate loop to write out your data:
with open('Aquilonia.txt') as esem_file:
next(esem_file, None) # skip first line
for line in esem_file:
fields = line.split()
exon = fields[0]
numbers = [float(field) for field in fields[1:]]
if exon in gene_list:
gene_count[gene_list[exon]] += numbers
with open('out.txt','w') as out_file:
for gene in gene_list:
print >> out_file, gene, gene_count[gene]

Categories