Combining multiple CSV files into 1 - python

I have a python code that takes multiple text files as input and generates output in separate CSV file so if my text files are ABC.txt and XYX.txt then my code is generating output in 2 CSV files ABC.csv and XYX.csv. My ultimate goal is get one single CSV file with all the outputs. Since I am more comfortable with sql I was thinking about uploading all the files to a database and then combine them using sql but I was wondering if I can modify my python code below to generate one single CSV file containing all output. Here is my code:
import json
from watson_developer_cloud import ToneAnalyzerV3Beta
import urllib.request
import codecs
import csv
import os
import re
import sys
import collections
import glob
import xlwt
from bs4 import BeautifulSoup
ipath = 'C:/TEMP/' # input folder
opath = 'C:/TEMP/' # output folder
reader = codecs.getreader("utf-8")
tone_analyzer = ToneAnalyzerV3Beta(
url='https://gateway.watsonplatform.net/tone-analyzer/api',
username='1f2fd51b-d0fb-45d8-aba2-08e22777b77d',
password='DykYfXjV4UXP',
version='2016-02-11')
path = 'C:/TEMP/*.html'
file = glob.glob(path)
# iterate over the list getting each file
writer = csv.writer(open('C:/TEMP/test', mode='w'))
# now enter our input loop
for fle in file:
# open the file and then call .read() to get the text
with open(fle) as f:
...
# output tone name and score to file
for i in tonename:
writer.writerows((tone['tone_name'],tone['score']) for tone in cat['tones'])

Modifying your existing code as little as possible ... you simply need to open the csv file before entering your loop that reads the text files:
...
path = 'C:/TEMP/*.html'
file = glob.glob(path)
# !! open our output csv
writer = csv.writer(open('our-merged-data', mode='w'))
# iterate over the list getting each file
for fle in file:
# open the file and then call .read() to get the text
with open(fle) as f:
...
# output tone name and score to file
for i in tonename:
writer.writerows((tone['tone_name'],tone['score'],Date,Title) for tone in cat['tones'])

Related

Read a file from a folder and extract a specific key from the file and save as in CSV file

I'm new to Python and the task I am performing is to extract a specific key value from a list of .iris ( which contains the list of nested dictionary format) files in a specific directory.
I wanted to extract the specific value and save it as a new .csv file and repeat it for all other files.
Below is my sample of .iris file from which I should extract only for the these keys ('uid','enabled','login','name').
{"streamType":"user",
"uid":17182,
"enabled":true,
"login":"xyz",
"name":"abcdef",
"comment":"",
"authSms":"",
"email":"",
"phone":"",
"location":"",
"extraLdapOu":"",
"mand":997,
"global":{
"userAccount":"View",
"uid":"",
"retention":"No",
"enabled":"",
"messages":"Change"},
"grants":[{"mand":997,"role":1051,"passOnToSubMand":true}],
I am trying to convert the .iris file to .json and reading the files one by, but unfortunately, I am not getting the exact output as desired.
Please, could anyone help me?
My code (added from comments):
import os
import csv
path = ''
os.chdir(path)
# Read iris File
def read_iris_file(file_path):
with open(file_path, 'r') as f:
print(f.read())
# iterate through all files
for file in os.listdir():
# Check whether file is in iris format or not
if file.endswith(".iris"):
file_path = f"{path}\{file}"
# call read iris file function
print(read_iris_file(file_path))
Your files contain data in JSON format, so we can use built-in json module to parse it. To iterate over files with certain extension you can use pathlib.glob() with next pattern "*.iris". Then we can use csv.DictWriter() and pass "ignore" to extrasaction argument which will make DictWriter ignore keys which we don't need and write only those which we passed to fieldnames argument.
Code:
import csv
import json
from pathlib import Path
path = Path(r"path/to/folder")
keys = "uid", "enabled", "login", "name"
with open(path / "result.csv", "w", newline="") as out_f:
writer = csv.DictWriter(out_f, fieldnames=keys, extrasaction='ignore')
writer.writeheader()
for file in path.glob("*.iris"):
with open(file) as inp_f:
data = json.load(inp_f)
writer.writerow(data)
Try the below (the key point here is loading the iris file using ast)
import ast
fields = ('uid','enabled','login','name')
with open('my.iris') as f1:
data = ast.literal_eval(f1.read())
with open('my.csv','w') as f2:
f2.write(','.join(fields) + '\n')
f2.write(','.join(data[f] for f in fields) + '\n')
my.csv
uid,enabled,login,name
17182,true,xyz,abcdef

changing the value in ASCII file and saving it as with different name - Python

I have one ASCII file with .dat extention. The file has a data as shown below,
MPOL3_VPROFILE
{
ID="mpvp_1" Cycle="(720)[deg]" Lift="(9)[mm]" Period="(240)[deg]"
Phase="(0)[deg]" TimingHeight="(1.0)[mm]" RampTypeO="Const Velo"
RampHO="(0.3)[mm]" RampVO="(0.00625)[mm/deg]" RampTypeC="auto"
RampHC="(auto)[mm]" RampVC="(auto)[mm/deg]" bO="0.7" cO="0.6" dO="1.0"
eO="1.5" bC="auto" cC="auto" dC="auto" eC="auto" th1O="(14)[deg]"
Now I would like to read this file in Python and then change the value of RampHO="(0.3)[mm]" to lets say RampHO="(0.2)[mm]" and save it as a new .dat file. How can I do this ?
Currently I am able to read the file and line successfully using below code,
import sys
import re
import shutil
import os
import glob
import argparse
import copy
import fileinput
rampOpen = 'RampHO='
file = open('flatFollower_GenCam.dat','r')
#data = file.readlines()
#print (data)
for line in file:
line.strip().split('/n')
if rampOpen in line:
print (line[4:22])
But I am now stuck how to change the float value and save it as with different name.
First up, you should post your code inside your text and not in seperate images. Just indent each line with four spaces to format it as code.
You can simply read in a file line by line, change the lines you want to change and then write the output.
with open(infile, 'r') as f_in, open(outfile, 'w') as f_out:
for line in f_in:
output_line = edit_line(line)
f_out.write(output_line)
Then you just have to write a function that does the string replacement.

Why can't I create a csv file with the csv module?

I am trying to write a list to a csv file.
the following code runs and returns no error, but it doesnt work, in that it doesnt actually populate the csv file with the stuff in the list. I am probably doing it wrong because I don't understand something.
import newspaper
import os
from newspaper import article
libya_newspaperlist = []
libya_newspaper=newspaper.build('https://www.cnn.com', memoize_article=False)
for article in libya_newspaper.articles:
libya_newspaperlist.append(article.url)
import csv
os.chdir("/users/patrickharned/")
libya_newspaper.csv = "/users/patrickharned/libya_newspaper.csv"
def write_list_to_file(libya_newspaperlist):
"""Write the list to csv file."""
with open("/users/patrickharned/libya_newspaper.csv") as outfile:
outfile.write(libya_newspaperlist)
So I changed the code to this.
import newspaper
import os
from newspaper import article
libya_newspaperlist = []
libya_newspaper=newspaper.build('https://www.cnn.com', memoize_article=False)
for article in libya_newspaper.articles:
libya_newspaperlist.append(article.url)
import csv
os.chdir("/users/patrickharned/")
libya_newspaper.csv = "/users/patrickharned/libya_newspaper.csv"
with open("/users/patrickharned/libya_newspaper.csv", "w") as outfile:
outfile.write(str(libya_newspaperlist))
now it does output to the csv file, but it only outputs the first entry and wont do the rest. any suggestions?
You have to open the file in write mode:
with open("/users/patrickharned/libya_newspaper.csv", "w") as outfile:
outfile.write(libya_newspaperlist)

running data parser on multiple files in folder? Python

long time lurker, but never posted here. Sorry if this isn't a good post...I made a program that uses regex to pull the names and emails out of resumes. I can get it to open a specific file in my resume folder, but getting the program to iterate over all of the files in the folder has me stumped. Here's the pseudo-code for what I'm doing:
open resume folder
read file1.txt
execute nameFinder
execute emailFinder
create new dictionary candidateData
Export to Excel
read file2.txt
...
Here's the code:
import re
import os
import pprint
with open('John Doe -Resume.txt', 'r') as f:
#This pulls the first line of the resume,
#Which is generally the name.
first_line_name = f.readline().strip()
#This pulls the Email from the resume.
bulkemails = f.read()
r = re.compile(r'(\b[\w.]+#+[\w.]+.+[\w.]\b)')
candidateEmail = r.findall(bulkemails)
emails = ""
for x in candidateEmail:
emails += str(x)+"\n"
#This creates the dictionary data
candidateData = {'candidateEmail' : str(candidateEmail), \
'candidateName' : str(first_line_name)}
pprint.pprint(candidateData)
Then, I get this as an output:
{'candidateEmail': "['JohnDoe#gmail.com']",
'candidateName': 'John Doe'}
All ready to be exported into Excel.
SO HERE"S MY QUESTION FOR YOU! How do I get it to do this for ALL of the .txt files in my resume folder, and not just the file I specify? Also, any cod critique would be greatly appreciated, Thanks guys! :D
You can use glob to iterate over all .txt files in your directory and then run the function on each file. Add this to the start
import re
import os
import glob
import pprint
os.chdir("resumes")
for file in glob.glob("*.txt"):
with open(file, 'r') as f:
#Rest of your execution code here
EDIT: In answer to your question in the comments:
import re
import os
import glob
import pprint
candidateDataList = []
for file in glob.glob("*.txt"):
with open(file, 'r') as f:
#This pulls the first line of the resume,
#Which is generally the name.
first_line_name = f.readline().strip()
#This pulls the Email from the resume.
bulkemails = f.read()
r = re.compile(r'(\b[\w.]+#+[\w.]+.+[\w.]\b)')
candidateDataList.append({'name':str(first_line_name),
'email':r.findall(bulkemails)})
pprint.pprint(candidateDataList)
#Jakob's answer is spot on. I only wanted to mention a nice alternative which I usually prefer myself, the pathlib:
import re
import pprint
from pathlib import Path
resumes_dir = Path("resumes")
for path in resumes_dir.glob("*.txt"):
with path.open() as f:
#Rest of your execution code here

How to read multiple CSV files in a zip folder using Python

I need to read multiple csv files in a zip folder and extract the data from those csv's into a container in Python.
I am new to Python having basic knowledge.So detailed explanation is appreciable.
Thanks in advance
Sampath
The first thing to do is to open the zip file using module zipfile. Then read the CSV data from each archived file and store it in a container such as a dictionary.
The following will read the data from each file in the zip archive into a dictionary keyed by the file name.
import zipfile
container = {}
with zipfile.ZipFile('/path/to/your/zipfile') as zf:
for name in zf.namelist():
container[name] = zf.read(name)
for name in container:
print("Contents of file {}:".format(name))
print(container[name])
print("============================\n")
Optionally you could process the csv data using module csv. Something like this should get you started:
import csv
import zipfile
from cStringIO import StringIO
container = {}
with zipfile.ZipFile('/path/to/your/zipfile') as zf:
for name in zf.namelist():
container[name] = csv.reader(StringIO(zf.read(name)))
Now container is a dictionary keyed by file name with csv.reader objects as values.
Here is how you can read all the text inside zip:
import zipfile
archive = 'c:\\test\\archive.zip'
def readZip(archive):
zfile = zipfile.ZipFile(archive)
for finfo in zfile.infolist():
ifile = zfile.open(finfo)
lines = ifile.readlines()
return lines
print(readZip(archive))
Thanks for the help.
Apart from the code provided above,I have come up with a code which satisfies the question
import os
import csv
from zipfile import ZipFile
#Extracts and loads the files in a zip file to a specified destination
ze = ZipFile(open("Src_AdventureWorks_Files.zip","r"))
ze.extractall("/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files/")
print "Extraction successful"
#Meta data of the zipfile
zf = ZipFile('Src_AdventureWorks_Files.zip', 'r')
zc = zf.namelist()
print zc
#Loop to run each csv file and print the data
if __name__ == "__main__":
i=0
while i < len(zc):
#path = '/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files/'+zc[i]
#print path
print zc[i]
for csv_path in zc:
print "###########"
print zc[i]
print "###########"
os.chdir('/home/sreddi/workspace/DQAS_Main/Src_AdventureWorks_Files')
f = open(zc[i])
csv_f = csv.reader(f)
for row in csv_f:
print row
f.close()
i += 1

Categories