I'm tring to count the occurence of a word in a text file.
sub = 'Date:'
#opening and reading the input file
#In path to input file use '\' as escape character
with open ("C:\\Users\\md_sarfaraz\\Desktop\\ctl_Files.txt", "r") as myfile:
val=myfile.read().replace('\n', ' ')
#val
#len(val)
occurence = str.count(sub, 0, len(val))
I'm getting this error :--
>>> occurence = str.count('Date:', 0,len(val))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: expected a character buffer object
>>> occurence = str.count('Date:', 0,20)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: expected a character buffer object
You are over-complicating it:
open(file).read().count(WORD)
You're using count wrong. Try this:
occurence = val.count(sub)
If you want to know how many times the word Date: occurs in the text file, this is one way to do it:
myfile = open("C:\\Users\\md_sarfaraz\\Desktop\\ctl_Files.txt", "r").read()
sub = "Date:"
occurence = myfile.count(sub)
print occurence
Related
How can I get a string between 2 specified strings in Python3 using regex?
b'SSH-2.0-OpenSSH_6.6.1p1 Ubuntu-2ubuntu2.11\r\n'
desired output:
>>> SSH-2.0-OpenSSH_6.6.1p1 Ubuntu-2ubuntu2.11
# ipsort.py
import re
f = open("ssh.txt", "r")
line = f.readline()
for line in f:
version = re.search(r"b'(.*?)\r\n'", line)
new_file = open("ssh_versions.txt", "a")
new_file.write(version)
new_file.close()
Traceback (most recent call last):
File "ipsort.py", line 11, in <module>
new_file.write(version)
TypeError: write() argument must be str, not None
You need to double the \ in the regexp, because otherwise it's matching CR and LF characters rather than literal \r and \n.
You also need to use version.group(1) to get the string that the capture group matched.
version = re.search(r"b'(.*?)\\r\\n'", line)
if version:
with open("ssh_versions.txt", "a") as new_file:
new_file.write(version.group(1))
I have a text file containing some data, among these data there's a JSON packed with msgPack.
I am able to unpack on https://toolslick.com/conversion/data/messagepack-to-json but I can't get to make it work in python.
Up to now I am trying to do the following :
def parseAndSplit(path):
with open(path) as f:
fContent = f.read()
for subf in fContent.split('Payload: '):
'''for ssubf in subf.split('DataChunkMsg'):
print(ssubf)'''
return subf.split('DataChunkMsg')[0]
fpath = "path/to/file"
t = parseAndSplit(fpath)
l = t.split("-")
s = ""
for i in l:
s=s+i
print(s)
a = msgpack.unpackb(bytes(s,"UTF-8"), raw=False)
print(a)
but the output is
import msgpack
Traceback (most recent call last):
File "C:/Users/Marco/PycharmProjects/codeTest/msgPack.py", line 19, in <module>
a = msgpack.unpackb(bytes(s,"UTF-8"), raw=False)
File "msgpack\_unpacker.pyx", line 202, in msgpack._cmsgpack.unpackb
msgpack.exceptions.ExtraData: unpack(b) received extra data.
9392AA6E722D736230322D3032AC4F444D44617...(string goes on)
I am quite sure that it's an encoding problem of some sort but I am having no luck, wether in the docs or by trying .
Thank you very much for the attention
I found the solution in the end:
msgpack.unpackb(bytes.fromhex(hexstring)
where hexstring is the string read from the file.
I want to remove all the special characters from the csv file. I tried in many ways but couldn't fix it
import re
data=("C:/Users/Niroshima/Desktop/Research/post.csv")
for i in data.values():
i = re.sub(r'[^\x00-\x7F]', '', i)
print(i)
And this error came up
AttributeError
Traceback (most recent call last)
<ipython-input-17-ee7352e82dd3> in <module>
----> 1 for i in data.values():
2 i=re.sub(r'[^\x00-\x7F]','',i)
3 print(i)
AttributeError: 'str' object has no attribute 'values'
data is just your file name, try opening the file and changing each line like so:
file_name = "C:/Users/Niroshima/Desktop/Research/post.csv"
with open(file_name) as f:
for line in f:
l = re.sub(r'[^\x00-\x7F]','', line)
print(l)
If you want this data in another file, then you have to write each l to a different file
I tried to use the below program
import os
HOME= os.getcwd()
STORE_INFO_FILE = os.path.join(HOME,'storeInfo')
def searchStr(STORE_INFO_FILE, storeId):
with open (STORE_INFO_FILE, 'r') as storeInfoFile:
for storeLine in storeInfoFile:
## print storeLine.split(r'\s+')[0]
if storeLine.split()[0] == storeId:
print storeLine
searchStr(STORE_INFO_FILE, 'Star001')
An example line in the file:
Star001 Sunnyvale 9.00 USD Los_angeles/America sunnvaleStarb#startb.com
But it gives the below error
./searchStore.py Traceback (most recent call last): File
"./searchStore.py", line 21, in
searchStr(STORE_INFO_FILE, 'Star001') File "./searchStore.py", line 17, in searchStr
if storeLine.split()[0] == storeId: IndexError: list index out of range
I have tried printing using split function on the command line and I was able to print it.
It looks like you have an empty or blank line in your file:
>>> 'abc def hij\n'.split()
['abc', 'def', 'hij']
>>> ' \n'.split() # a blank line containing white space
[]
>>> '\n'.split() # an empty line
[]
The last 2 cases show that an empty list can be returned by split(). Trying to index that list raises an exception:
>>> '\n'.split()[0]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
IndexError: list index out of range
You can fix the problem by checking for empty and blank lines. Try this code:
def searchStr(store_info_file, store_id):
with open (store_info_file) as f:
for line in f:
if line.strip() and (line.split()[0] == store_id):
print line
Adding line.strip() allows you to ignore empty lines and lines containing only whitespace.
Code has an issue if split method returns an empty list.
You can change code that calls split method and add error handling code.
Following can be done
storeLineWords = storeLine.split()
if len(storeLineWords) > 0 and storeLineWords[0] == storeId:
I am doing a sentiment analysis and I want to Add NOT to every word between negation and following punctuation. I am performing the following code:
import re
fin=open("aboveE1.txt",'r', encoding='UTF-8')
transformed = re.sub(r'\b(?:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint)\b[\w\s]+[^\w\s]',
lambda match: re.sub(r'(\s+)(\w+)', r'\1NEG_\2', match.group(0)),
fin,
flags=re.IGNORECASE)
Traceback (most recent call last):
line 14, in
flags=re.IGNORECASE)
line 182, in sub return _compile(pattern, flags).sub(repl, string, count)
TypeError: expected string or bytes-like object
I dont know how to fix the error. Can you help me?
re.sub takes in a string, not a file object. Documentation here.
import re
fin=open("aboveE1.txt",'r', encoding='UTF-8')
transformed = ''
for line in fin:
transformed += re.sub(r'\b(?:never|no|nothing|nowhere|noone|none|not|havent|hasnt|hadnt|cant|couldnt|shouldnt|wont|wouldnt|dont|doesnt|didnt|isnt|arent|aint)\b[\w\s]+[^\w\s]',
lambda match: re.sub(r'(\s+)(\w+)', r'\1NEG_\2', match.group(0)),
line,
flags=re.IGNORECASE)
# No need to append '\n' to 'transformed'
# because the line returned via the iterator includes the '\n'
fin.close()
Also remember to always close the file you open.