22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161 | class Loader:
"""
A Loader class for processing image datasets. This class is designed to handle the loading,
extracting, and preprocessing of image data from a zip archive, preparing it for machine learning models.
Attributes:
image_path (str, optional): The path to the zip file containing the dataset. Default is None.
batch_size (int): The number of images to process in each batch. Default is 64.
image_height (int): The height to which each image will be resized. Default is 64 pixels.
image_width (int): The width to which each image will be resized. Default is 64 pixels.
normalized (bool): Flag to determine whether the images should be normalized. Default is True.
raw_image_path (str): The path where extracted images are stored. Initially empty.
"""
def __init__(
self,
image_path=None,
batch_size=64,
image_height=64,
image_width=64,
normalized=True,
):
"""
Initializes the Loader with the dataset path, batch size, image dimensions, and normalization flag.
"""
self.image_path = image_path
self.batch_size = batch_size
self.image_height = image_height
self.image_width = image_width
self.normalized = normalized
self.raw_image_path = ""
def unzip_dataset(self, extract_to=None):
"""
Extracts the dataset from a zip archive to a specified directory.
Parameters:
extract_to (str): The directory where the zip file contents will be extracted. If None, an exception is raised.
Raises:
Exception: If `extract_to` is None, indicating the path is not properly defined.
"""
if extract_to is not None:
with zipfile.ZipFile(file=self.image_path, mode="r") as zip_ref:
zip_ref.extractall(path=extract_to)
else:
raise Exception(
"Path is not defined properly in unzip_dataset method".capitalize()
)
def extract_features(
self,
):
"""
Prepares the dataset by checking for or creating the necessary directories and unzipping the dataset.
This method checks if a raw folder exists for the dataset; if not, it creates one and extracts the dataset there.
"""
dataset_folder_name = "./data"
extract_to = os.path.join(dataset_folder_name, "raw/")
if os.path.exists(path=os.path.join(dataset_folder_name, "raw/")):
logging.info("raw folder already exists".title())
try:
self.unzip_dataset(extract_to=extract_to)
except Exception as e:
print("Error - {}".format(e))
else:
self.raw_image_path = os.path.join(dataset_folder_name, "raw/")
else:
logging.info("raw folder does not exists and is about to create".title())
try:
os.makedirs(os.path.join(dataset_folder_name, "raw/"))
except Exception as e:
print("Error - {}".format(e))
else:
self.unzip_dataset(extract_to=extract_to)
self.raw_image_path = os.path.join(dataset_folder_name, "raw/")
def saved_dataloader(self, dataloader=None):
"""
Saves the processed dataloader object to disk.
Parameters:
dataloader: The dataloader object to be saved. If None, an exception is raised.
Raises:
Exception: If `dataloader` is None, indicating it is not properly defined.
"""
if dataloader is not None:
processed_data_path = "./data"
if os.path.exists(os.path.join(processed_data_path, "processed")):
try:
create_pickle(
value=dataloader,
filename=os.path.join(
processed_data_path, "processed/dataloader.pkl"
),
)
logging.info("done to create pickle file".title())
except Exception as e:
print("Error - {}".format(e))
else:
logging.info(
"Processed data folder is not exists and is about to create".capitalize()
)
os.makedirs(os.path.join(processed_data_path, "processed"))
try:
create_pickle(
value=dataloader,
filename=os.path.join(
processed_data_path, "processed/dataloader.pkl"
),
)
except Exception as e:
print("Error - {}".format(e))
else:
raise Exception(
"Dataloader is not defined properly in saved_dataloader method".capitalize()
)
def create_dataloader(self):
"""
Creates a dataloader with the specified transformations and batch size for the dataset.
Returns:
DataLoader: The DataLoader object for the dataset, ready for use in training or evaluation.
"""
transform = transforms.Compose(
[
transforms.Resize((self.image_height, self.image_width)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]
)
dataset = datasets.ImageFolder(root=self.raw_image_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
self.saved_dataloader(dataloader=dataloader)
return dataloader
|