Python select a file from a list - python

I have a folder that contains several log file that I will parse with python.
I would show the list of file contained into a folder like:
[1] FileName1.log
[2] FileName2.log
And then the user can choose the right file writing the file list number.
For instance, to parse the file "FileName2.log" the user press 2.
In my script I can show the list of file but I don't now how to pick up a file from a list by index.
This is my script
import os
import sys
items = os.listdir("D:/Logs")
fileList = []
for names in items:
if names.endswith(".log"):
fileList.append(names)
cnt = 0
for fileName in fileList:
sys.stdout.write( "[%d] %s\n\r" %(cnt, fileName) )
cnt = cnt + 1
fileName = raw_input("\n\rSelect log file [0 -" + str(cnt) + " ]: ")
Thanks for the help!

import os
import sys
items = os.listdir("D:/Logs")
fileList = [name for name in items if name.endswith(".log")]
for cnt, fileName in enumerate(fileList, 1):
sys.stdout.write("[%d] %s\n\r" % (cnt, fileName))
choice = int(input("Select log file[1-%s]: " % cnt))
print(fileList[choice])
you own version of code with few modifications, hope this solves your purpose

If you have the names in an array like this:
fileList = ['FileName1.log','FileName2.log']
you can pull them out by using their index (remember that arrarys are 0-indexed) so fileList[0] would be 'FileName1.log'
when you ask for the user to input a number (eg 0, 1, 2) you would then use that number to get the file you want. like this:
fileToRead=fileList[userInput]
if you asked for 1,2,3 you would need to use userInput-1 to make sure it is correctly 0-indexed.
then you open the file you now have:
f=open(fileToRead, 'r')
you can read more about open here

If fileList is a list of files, and fileName is the user input, you can reference the file the user chose by using the following:
fileList[fileName]

import glob
import os
dirpath = r"D:\Logs" # the directory that contains the log files
prefix = "FileName"
fpaths = glob.glob(os.path.join(dirpath, "{}*.log".format(prefix))) # get all the log files
fpaths.sort(key=lambda fname: int(fname.split('.',1)[0][len(prefix):])) # sort the log files by number
print("Select a file to view:")
for i,fpath in enumerate(fpaths, 1):
print("[{}]: {}".format(i, os.path.basename(fpath)))
choice = int(input("Enter a selection number: ")) # assuming valid inputs
choice -= 1 # correcting for python's 0-indexing
print("You have chosen", os.path.basename(fpaths[choice]))

Just add in the end something like this...
sys.stdout.write(fileList[int(fileName)])

Indexing in python as in many other languages starts from 0. Try this:
import os
import sys
items = os.listdir("D:/Logs")
fileList = []
for names in items:
if names.endswith(".log"):
fileList.append(names)
cnt = 0
for fileName in fileList:
sys.stdout.write( "[%d] %s\n\r" %(cnt, fileName) )
cnt = cnt + 1
fileName = int(raw_input("\n\rSelect log file [0 - " + str(cnt - 1) + "]: "))
print(fileList[fileName])
You need to cast input from raw_input() to int. And then you can use the obtained number as index for your list. 0 is the first file, 1 is the second file etc.

Related

Filename Numbering in Python

I want to put proper sequence no in my file using python. Its working partially not fully. Suppose in a particular folder there is 3 files. The sequence should Num1_.doc,Num2_.pdf,Num3_.doc. It's working fine.
But suppose a new file is coming on that folder, how it maintain the proper sequence.
My code is -
import os
os.chdir('C:\\Users\\Project\\')
print(os.getcwd())
for count, f in enumerate(os.listdir()):
f_name, f_ext = os.path.splitext(f)
f_name = "Num" + str(count) + '_' + f_name
new_name = f'{f_name}{f_ext}'
os.rename(f, new_name)
Its generate Num1_.doc,Num2_.pdf,Num3_.doc etc.
Now new file is added on that folder. The sequence should Num4_.doc. How to do it in python.
Find what the maximum number is out of the current files, then rename any new files with a number 1 higher
import os
current_max = 0
for i in os.listdir():
# The number in this filename
num = int(i[3:-5])
if num > current_max:
current_max = num
# If the current_file has not been named yet
if "Num_" not in i:
#Rename by adding 1 to the current maximum number recorded
os.rename(i, f"Num_{current_max+1}")
Try to first get the maximum number already assigned from previous renamings, and then rename the remaining files starting from that number.
import os
import re
os.chdir('C:\\Users\\Project\\')
print(os.getcwd())
# find max number in filenames, if already named
filenames = [os.path.splitext(f)[0] for f in os.listdir()]
filenames = filter(lambda f: f[:3] == "Num" and len(f) >= 4, filenames)
max_n_file = max(map(lambda f: int(re.search('^[0-9]*', f[3:]).group(0)), filenames), default=0) + 1
# apply same algorithm but rename only not-"Num_" files
for _, f in enumerate(os.listdir()):
f_name, f_ext = os.path.splitext(f)
if f_name[:3] != "Num":
f_name = "Num" + str(max_n_file) + '_' + f_name
new_name = f'{f_name}{f_ext}'
os.rename(f, new_name)
max_n_file += 1

Counting reads and bases from a list of fastq files

I trimmed my Illumina short reads, forward and reverse, by using Trimmomatic. The Trimmomatic's outputs were: paired_1 - unpaired_1, and paired_2 - unpaired_2.fastq.gz files. I want to know how big was the impact of trimming by counting the number of reads and bases of each file in my directory. I had made a script to count the number of bases and reads for each file in my directory; however, I have problems in if __name__=='__main__'. When I do the for loop I don't know the order of the files that will be run, how can I make it to call the files by the order I see from the screen? Additionally, I also need help with correcting the script as I don't get any stdout.
Thank you in advance for your help.
#!/usr/bin/env python
from sys import argv
import os
def get_num_bases(file_content):
total = []
for linenumber, line in enumerate(file_content):
mod=linenumber%4
if mod==0:
ID = line.strip()[1:]
#print(ID)
if mod==1:
seq = line.strip()
counting = 0
counting += seq.count("T")+ seq.count("A") + seq.count("C") + seq.count("G")
total.append(counting)
allbases = sum(total)
print("Number of bases are: " , allbases)
def get_num_reads(file_content):
total_1 = []
for line in file_content:
num_reads = 0
num_reads += content.count(line)
total_1.append(num_reads)
print("Number of reads are: ", sum(total_1)/int(4))
if __name__=='__main__':
path = os.getcwd()
dir_files = os.listdir(path)
list_files = []
for file in dir_files:
if file.endswith("fastq.gz"):
if file not in list_files:
file_content = open(file, "r").readlines()
list_files.append(file)
print("This is the filename: ", file, get_num_bases(file_content), get_num_reads(file_content))

How do I create different txt.name in other execute [duplicate]

This question already has answers here:
Create file but if name exists add number
(16 answers)
Closed 3 years ago.
i = 1
while i <=10:
f = open("txtfile.txt",+str(i) "a+")
f.write("111\n")
f.write("222\n")
i = i + 1
f.close()
I want to create txt in many times , But the top code is not in my mind.
I want to create a txt.file1 , and if it exist , the next time execute name is txt.file2.
Assuming, on every run new file (txtfileNN.txt) with incremented number need to be created ( NN is one or two digit number), try below code:
import os
import re
file_base_name = 'txtfile'
r = re.compile(file_base_name+'\d{0,2}.txt')
all_files_in_dir=sorted([i for i in os.listdir() if r.match(i)])
print('Existing files in directory: {}'.format(all_files_in_dir))
# Existing files in directory: ['txtfile.txt', 'txtfile1.txt', 'txtfile10.txt']
if not all_files_in_dir:
# File does not exist yet
out_file = file_base_name + '.txt'
else:
highest_file=all_files_in_dir[-1]
# 'txtfile10.txt'
int_portion = highest_file.replace('.txt', '').split(file_base_name)[-1]
if not int_portion:
# no integer in file, so it it txtfile.txt
next_int = 1
else:
next_int = int(int_portion) + 1
out_file = file_base_name + str(next_int) + '.txt'
print('Next file name : {}'.format(out_file))
# Next file name : txtfile11.txt
# Now write text in new file
f = open(out_file, 'a')
f.write("111\n")
f.write("222\n")
f.close()
Here is a simple way to create 10 files in Python 3.6+, named from file.txt01 to file.txt10:
from pathlib import Path
for i in range(1, 11):
f = Path(f'file.txt{i:02d}')
f.write_text('111\n222\n')
If you want to create a new file on every run, sequentially numbered ad infinitum, do this:
from pathlib import Path
i = 1
while True:
if Path(f'file.txt{i}').exists():
i += 1
else:
Path(f'file.txt{i}').write_text('111\n222\n')
break
But that is very inefficient though.
So maybe this is a better solution:
from pathlib import Path
source = Path('/home/accdias/temp')
prefix = 'file.txt'
slots = set([int(_.name.replace(prefix, '')) for _ in source.glob(f'{prefix}*')])
slot = min(set(range(1, max(slots, default=1) + 1)) - slots, default=max(slots, default=1) + 1)
filename = source / f'{prefix}{slot}'
filename.write_text('111\n222\n')
The solution above is nice because it take into account any gaps that may exist and pick the next lowest slot number available.

how to fix the error displayed on python shell ?

i have a code that is written in PYTHON where the code allow the user to select the path of folder that contains PDF files and convert it to text files.
the system system work perfect when the content is not ARABIC.
error displayed :
Traceback (most recent call last): File
"C:\Users\test\Downloads\pdf-txt\text maker.py", line 32, in
path=list[i] IndexError: list index out of range
code:
import os
from os import chdir, getcwd, listdir, path
import codecs
import pyPdf
from time import strftime
def check_path(prompt):
''' (str) -> str
Verifies if the provided absolute path does exist.
'''
abs_path = raw_input(prompt)
while path.exists(abs_path) != True:
print "\nThe specified path does not exist.\n"
abs_path = raw_input(prompt)
return abs_path
print "\n"
folder = check_path("Provide absolute path for the folder: ")
list=[]
directory=folder
for root,dirs,files in os.walk(directory):
for filename in files:
if filename.endswith('.pdf'):
t=os.path.join(directory,filename)
list.append(t)
m=len(list)
i=0
while i<=len(list):
path=list[i]
head,tail=os.path.split(path)
var="\\"
tail=tail.replace(".pdf",".txt")
name=head+var+tail
content = ""
# Load PDF into pyPDF
##pdf = pyPdf.PdfFileReader(file(path, "rb"))
pdf = pyPdf.PdfFileReader(codecs.open(path, "rb", encoding='UTF-8'))
# Iterate pages
for i in range(0, pdf.getNumPages()):
# Extract text from page and add to content
content += pdf.getPage(i).extractText() + "\n"
print strftime("%H:%M:%S"), " pdf -> txt "
f=open(name,'w')
f.decode(content.encode('UTF-8'))
## f.write(content.encode("UTF-8"))
f.write(content)
f.close
the error can probably be solved by just changing
while i<=len(list):
to:
while i<len(list):
because in python allowed indices for a list with N elements are:
0,1,...,N-1
while trying to access the element N gives an IndexError.
If a list's last index is n, then the len of the list is n+1.
This means that when you want to access a list, you do NOT want to access list[length of list] aka n+1 as this does not exist!
I believe the only wrong line in your code is the while, it should be:
while i < len(list):
And not
while i <= len(list):
You do not want i to take the value len(list).

Using python script to search in multiple files and outputting an individual file for each one

I am trying to get a program up and running that takes astronomical data files with the extension .fits and takes all of the files with that extension in a folder and searches for specific header information, and subsequently places it into a text folder corresponding to each file. I am using a while loop, and please forgive me if this code is badly formatted, it is my first time using python! My main problem is that I can only get the program to read one file before it closes itself.
#!/usr/bin/env python
#This code properly imports all '.fits' files in a specified directory and
#outputs them into a .txt format that allows several headers and their contained
#data to be read.
import copy
import sys
import pyfits
import string
import glob
import os.path
import fnmatch
import numpy as np
DIR = raw_input("Please input a valid directory : ") #-----> This prompts for input from the user to find the '.fits' files
os.chdir(DIR)
initialcheck = 0 #Initiates the global counter for the number of '.fits' files in the specified directory
targetcheck = 0 #Initiates the global counter for the amount of files that have been processed
def checkinitial(TD):
#This counts the number of '.fits' files in your directory
for files in glob.iglob('*.fits'):
check = len(glob.glob1(TD,"*.fits"))
global initialcheck
initialcheck = check
if initialcheck == 0:
print 'There are no .FITS files in this directory! Try Again...'
sys.exit()
return initialcheck
def sorter(TD, targcheck, inicheck):
#This function will call the two counters and compare them until the number of processed files is greater than the files in the #directory, thereby finishing the loop
global initialcheck
inicheck = initialcheck
global targetcheck
targcheck = targetcheck
while targcheck <= inicheck:
os.walk(TD)
for allfiles in glob.iglob('*.fits'):
print allfiles #This prints out the filenames the porgram is currently processing
with pyfits.open(allfiles) as HDU:
#This block outlines all of the search terms in their respective headers, you will need to set the indices #below to search in the correct header for the specified term you are looking for, however no alterations to #the header definitions should be made.
HDU_HD_0 = HDU[0].header
HDU_HD_1 = HDU[1].header
#HDU_HD_2 = HDU[2].header -----> Not usually needed, can be activated if data from this header is required
#HDU_HD_3 = HDU[3].header -----> Use this if the '.fits' file contains a third header (unlikely but possible)
KeplerIDIndex = HDU_HD_0.index('KEPLERID')
ChannelIndex = HDU_HD_0.index('SKYGROUP')
TTYPE1Index = HDU_HD_1.index('TTYPE1')
TTYPE8Index = HDU_HD_1.index('TTYPE8')
TTYPE9Index = HDU_HD_1.index('TTYPE9')
TTYPE11Index = HDU_HD_1.index('TTYPE11')
TTYPE12Index = HDU_HD_1.index('TTYPE12')
TTYPE13Index = HDU_HD_1.index('TTYPE13')
TTYPE14Index = HDU_HD_1.index('TTYPE14')
TUNIT1Index = HDU_HD_1.index('TUNIT1')
TUNIT8Index = HDU_HD_1.index('TUNIT8')
TUNIT9Index = HDU_HD_1.index('TUNIT9')
TUNIT11Index = HDU_HD_1.index('TUNIT11')
TUNIT12Index = HDU_HD_1.index('TUNIT12')
TUNIT13Index = HDU_HD_1.index('TUNIT13')
TUNIT14Index = HDU_HD_1.index('TUNIT14')
#The below variables are an index search for the data found in the specified indices above, allowing the data #to be found in teh numpy array that '.fits' files use
File_Data_KID = list( HDU_HD_0[i] for i in [KeplerIDIndex])
File_Data_CHAN = list( HDU_HD_0[i] for i in [ChannelIndex])
Astro_Data_1 = list( HDU_HD_1[i] for i in [TTYPE1Index])
Astro_Data_8 = list( HDU_HD_1[i] for i in [TTYPE8Index])
Astro_Data_9 = list( HDU_HD_1[i] for i in [TTYPE9Index])
Astro_Data_11 = list( HDU_HD_1[i] for i in [TTYPE11Index])
Astro_Data_12 = list( HDU_HD_1[i] for i in [TTYPE12Index])
Astro_Data_13 = list( HDU_HD_1[i] for i in [TTYPE13Index])
Astro_Data_14 = list( HDU_HD_1[i] for i in [TTYPE14Index])
Astro_Data_Unit_1 = list( HDU_HD_1[i] for i in [TUNIT1Index])
Astro_Data_Unit_8 = list( HDU_HD_1[i] for i in [TUNIT8Index])
Astro_Data_Unit_9 = list( HDU_HD_1[i] for i in [TUNIT9Index])
Astro_Data_Unit_11 = list( HDU_HD_1[i] for i in [TUNIT11Index])
Astro_Data_Unit_12 = list( HDU_HD_1[i] for i in [TUNIT12Index])
Astro_Data_Unit_13 = list( HDU_HD_1[i] for i in [TUNIT13Index])
Astro_Data_Unit_14 = list( HDU_HD_1[i] for i in [TUNIT14Index])
HDU.close()
with open('Processed ' + allfiles + ".txt", "w") as copy:
targetcheck += 1
Title1_Format = '{0}-----{1}'.format('Kepler I.D.','Channel')
Title2_Format = '-{0}--------{1}------------{2}------------{3}------------{4}------------{5}-------------{6}-'.format('TTYPE1','TTYPE8','TTYPE9','TTYPE11','TTYPE12','TTYPE13','TTYPE14')
File_Format = '{0}--------{1}'.format(File_Data_KID, File_Data_CHAN)
Astro_Format = '{0}---{1}---{2}---{3}---{4}---{5}---{6}'.format(Astro_Data_1, Astro_Data_8, Astro_Data_9, Astro_Data_11, Astro_Data_12, Astro_Data_13, Astro_Data_14)
Astro_Format_Units = '{0} {1} {2} {3} {4} {5} {6}'.format(Astro_Data_Unit_1, Astro_Data_Unit_8, Astro_Data_Unit_9, Astro_Data_Unit_11, Astro_Data_Unit_12, Astro_Data_Unit_13, Astro_Data_Unit_14)
copy.writelines("%s\n" % Title1_Format)
copy.writelines( "%s\n" % File_Format)
copy.writelines('\n')
copy.writelines("%s\n" % Title2_Format)
copy.writelines( "%s\n" % Astro_Format)
copy.writelines('\n')
copy.writelines( "%s\n" % Astro_Format_Units)
Results = copy
return Results
checkinitial(DIR)
sorter(DIR, targetcheck, initialcheck)
I think you keep getting confused between a single file and a list of files. Try something like this:
def checkinitial(TD):
#This counts the number of '.fits' files in your directory
check = len(glob.glob1(TD,"*.fits"))
if not check:
print 'There are no .FITS files in this directory! Try Again...'
sys.exit()
return check
def sorter(TD, targcheck, inicheck):
"""This function will call the two counters and compare them until the number of processed
files is greater than the files in the directory, thereby finishing the loop
"""
for in_file in glob.iglob(os.path.join(TD,'*.fits')):
print in_file # This prints out the filenames the program is currently processing
with pyfits.open(in_file) as HDU:
# <Process input file HDU here>
out_file_name = 'Processed_' + os.path.basename(in_file) + ".txt"
with open(os.path.join(TD, out_file_name), "w") as copy:
# <Write stuff to your output file copy here>

Categories