TypeError While loading the files - python

train_dir = os.path.join(X_train,y_train)
test_dir = os.path.join(X_test, y_test)
if not os.path.exists(train_dir):
os.makedirs(train_dir)
if not os.path.exists(test_dir):
os.makedirs(test_dir)
This is my piece of code to load the files the of train and test, but got the error of type;
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-39-144de685caf8> in <module>
----> 1 train_dir = os.path.join(X_train,y_train)
2 test_dir = os.path.join(X_test, y_test)
3
4 if not os.path.exists(train_dir):
5 os.makedirs(train_dir)
~/anaconda3/lib/python3.7/posixpath.py in join(a, *p)
78 will be discarded. An empty last part will result in a path that
79 ends with a separator."""
---> 80 a = os.fspath(a)
81 sep = _get_sep(a)
82 path = a
TypeError: expected str, bytes or os.PathLike object, not list
I do all the things which can do this to correct, please help me out.

Related

Generator raised StopIteration in find_job_titles package

I am trying to run this code:
from find_job_titles import FinderAcora
finder=FinderAcora()
finder.findall('IT Audit & Governance')
But it gives me this error everytime:
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
/usr/local/lib/python3.8/dist-packages/find_job_titles/__init__.py in longest_match(matches)
48 """
---> 49 longest = next(matches)
50
StopIteration:
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
1 frames
<ipython-input-31-5b965ac3d7be> in <module>
----> 1 finder.findall('IT Audit & Governance')
/usr/local/lib/python3.8/dist-packages/find_job_titles/__init__.py in findall(self, string, use_longest)
82 else return all overlapping matches
83 :returns: list of matches of type `Match`
---> 84 """
85 return list(self.finditer(string, use_longest=use_longest))
86
RuntimeError: generator raised StopIteration
I tried using the suggestions from this Stack Overflow post but it didn't work.

RuntimeError: stack expects a non-empty TensorList

I am trying to create an embedding to use for a matching technique of words but I get the following error:
Traceback (most recent call last)
/var/folders/k1/jt1nfyks4cx689d50f5mtg0w0000gp/T/ipykernel_1349/3490519318.py in <module>
53 #Compute embedding for both lists
54
---> 55 embeddings1 = model.encode(fifteen_percent_list, convert_to_tensor=True)
56
57
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sentence_transformers/SentenceTransformer.py in encode(self, sentences, batch_size, show_progress_bar, output_value, convert_to_numpy, convert_to_tensor, device, normalize_embeddings)
185
186 if convert_to_tensor:
--> 187 all_embeddings = torch.stack(all_embeddings)
188 elif convert_to_numpy:
189 all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
RuntimeError: stack expects a non-empty TensorList
I do not seem to understand why it happens since my second embedding(2) goes through just fine without any errors?
Here is some of the code if that helps:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')
fifteen_percent_list = list(fiften_percent)
#Compute embedding for both lists
embeddings1 = model.encode(fifteen_percent_list, convert_to_tensor=True)
# try on a smaller set of 10k, as it takes too long to run on full set of queries
rest_of_queries_list = list(set(rest_of_queries))[:10000]
embeddings2 = model.encode(rest_of_queries_list, convert_to_tensor=True)

OpenCV TypeError: Expected on iteration files in directory Python

I have a problem with convert files in the directory. It gives me a TypeError, and I'm not sure how to fix it.
Below I am pasting my code along with the output.
directory = './Input/'
for filename in os.scandir(directory):
print(filename)
print(type(filename))
# image = cv2.imread(filename)
result = sr.upsample(filename)
cv2.imwrite("./Output/image.png", result)
my output:
<DirEntry '1.png'>
<class 'posix.DirEntry'>
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-e9096dfd223e> in <module>()
28
29 # image = cv2.imread(filename)
---> 30 result = sr.upsample(filename)
31 # cv2.imwrite("./Output/image.png", result)
32 # else:
TypeError: Expected Ptr<cv::UMat> for argument 'img'
upsample function takes the image Mat, not the DirEntry or the path. you need to read the image first.
maybe try:
directory = './Input/'
for filename in os.scandir(directory):
if (filename.path.endswith(".jpg") or filename.path.endswith(".png")):
print(filename.path)
image = cv2.imread(filename.path)
result = sr.upsample(image)
cv2.imwrite("./Output/image.png", result)

How to write a proper dataset_fn in tff.simulation.FilePerUserClientData?

I'm currently implementing federated learning using tff.
Because the dataset is very large, we split it into many npy files, and I'm currently putting the dataset together using tff.simulation.FilePerUserClientData.
This is what I'm trying to do
client_ids_to_files = dict()
for i in range(len(train_filepaths)):
client_ids_to_files[str(i)] = train_filepaths[i]
def dataset_fn(filepath):
print(filepath)
dataSample = np.load(filepath)
label = filepath[:-4].strip().split('_')[-1]
return tf.data.Dataset.from_tensor_slices((dataSample, label))
train_filePerClient = tff.simulation.FilePerUserClientData(client_ids_to_files,dataset_fn)
However, it doesn't seem to work well, the filepath in the callback function has is a tensor with dtype of string. The value of filepath is: Tensor("hash_table_Lookup/LookupTableFindV2:0", shape=(), dtype=string)
Instead of containing a path in client_ids_to_files, the tensor seems to contains error messages? Am I doing something wrong? How can I write a proper dataset_fn for tff.simulation.FilePerUserClientData using npy files?
EDIT:
Here is the error log. The error itself is not really related to the question I'm asking, but you can find the called functions:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-46-e61ddbe06cdb> in <module>
22 return tf.data.Dataset.from_tensor_slices(filepath)
23
---> 24 train_filePerClient = tff.simulation.FilePerUserClientData(client_ids_to_files,dataset_fn)
25
~/fasttext-venv/lib/python3.6/site-packages/tensorflow_federated/python/simulation/file_per_user_client_data.py in __init__(self, client_ids_to_files, dataset_fn)
52 return dataset_fn(client_ids_to_files[client_id])
53
---> 54 #computations.tf_computation(tf.string)
55 def dataset_computation(client_id):
56 client_ids_to_path = tf.lookup.StaticHashTable(
~/fasttext-venv/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/wrappers/computation_wrapper.py in __call__(self, tff_internal_types, *args)
405 parameter_type)
406 args, kwargs = unpack_arguments_fn(next(wrapped_fn_generator))
--> 407 result = fn_to_wrap(*args, **kwargs)
408 if result is None:
409 raise ComputationReturnedNoneError(fn_to_wrap)
~/fasttext-venv/lib/python3.6/site-packages/tensorflow_federated/python/simulation/file_per_user_client_data.py in dataset_computation(client_id)
59 list(client_ids_to_files.values())), '')
60 client_path = client_ids_to_path.lookup(client_id)
---> 61 return dataset_fn(client_path)
62
63 self._create_tf_dataset_fn = create_dataset_for_filename_fn
<ipython-input-46-e61ddbe06cdb> in dataset_fn(filepath)
17 filepath = tf.print(filepath)
18 print(filepath)
---> 19 dataSample = np.load(filepath)
20 print(dataSample)
21 label = filepath[:-4].strip().split('_')[-1]
~/fasttext-venv/lib/python3.6/site-packages/numpy/lib/npyio.py in load(file, mmap_mode, allow_pickle, fix_imports, encoding)
426 own_fid = False
427 else:
--> 428 fid = open(os_fspath(file), "rb")
429 own_fid = True
430
TypeError: expected str, bytes or os.PathLike object, not Operation
The problem is the dataset_fn must be serializable as a tf.Graph. This is required because TFF uses TensorFlow graphs to execute logic on remote machines.
In this case, np.load is not serializable to a graph operation. It looks like numpy is used to load from disk in to memory, and then tf.data.Dataset.from_tensor_slices is used to create a dataset from an in-memory object? I may be possible to save the file in a different format and use a native tf.data.Dataset operation to load from disk, rather than using Python. Some options could be tf.data.TFRecordDataset, tf.data.TextLineDataset, or tf.data.experimental.SqlDataset.

How to derive weights for bucketized_column in tf.estimator.LinearRegressor in tensorflow?

I am studying Google Crash ML cause.
I have trouble in chapter “Feature Cross”.
https://developers.google.com/machine-learning/crash-course/feature-crosses/programming-exercise
I tried to get the weight of cross feature from linear_regressor.
# here I change _ to linear_model
linear_model = train_model(
learning_rate=1.0,
steps=500,
batch_size=100,
feature_columns=construct_feature_columns(),
training_examples=training_examples,
training_targets=training_targets,
validation_examples=validation_examples,
validation_targets=validation_targets)
Weight_bucketized_longitude= linear_model.get_variable_value('linear/linear_model/bucketized_longitude/weights')
print(Weight_bucketized_longitude)
However, I got error message as below:
Error Message:
NotFoundError: Key linear/linear_model/bucketized_longitude/weights
not found in checkpoint
It looks like the path is wrong.
The path works for numeric_column, but it doesn’t for bucketized_column.
Could you help to indicate the correct path?
Thanks.
#
I tried Geeocode's method.
However, I still got error message.
Weight_bucketized_longitude= linear_model.get_variable_value('linear/linear_model/bucketized_longitude/weights')
AttributeErrorTraceback (most recent call last)
in ()
----> 1 Weight_bucketized_longitude= >linear_model.get_variable_value(["linear", "linear_model", >"bucketized_longitude", "weights"])
/usr/local/lib/python2.7/dist->packages/tensorflow/python/estimator/estimator.pyc in >get_variable_value(self, name)
252 _check_checkpoint_available(self.model_dir)
253 with context.graph_mode():
--> 254 return training.load_variable(self.model_dir, name)
255
256 def get_variable_names(self):
/usr/local/lib/python2.7/dist->packages/tensorflow/python/training/checkpoint_utils.pyc in >load_variable(ckpt_dir_or_file, name)
77 """
78 # TODO(b/29227106): Fix this in the right place and remove >this.
---> 79 if name.endswith(":0"):
80 name = name[:-2]
81 reader = load_checkpoint(ckpt_dir_or_file)
AttributeError: 'list' object has no attribute 'endswith'
The problem is that linear_model.get_variable_value() have to pass a list of string with variables' name. From the documentation:
get_variable_value
get_variable_value(name)
Returns value of the variable given by name.
Args: name: string or a list of string, name of the tensor. Returns:
Numpy array - value of the tensor.
Raises: ValueError: If the Estimator has not produced a checkpoint
yet.
Thus your code should changes as follow:
Weight_bucketized_longitude= linear_model.get_variable_value(["linear", "linear_model", "bucketized_longitude", "weights"])

Categories