|
|
@@ -24,21 +24,21 @@ class CodeSet:
|
|
|
B = 2
|
|
|
C = 3
|
|
|
|
|
|
-class Normalize:
|
|
|
- def __init__(self, mean=(0.45, 0.45, 0.45), std=(0.24, 0.24, 0.24)):
|
|
|
- if not (isinstance(mean, (list, tuple)) and isinstance(std, (list, tuple))):
|
|
|
- raise ValueError("mean and std should be of type list or tuple.")
|
|
|
- self.mean = np.array(mean, dtype=np.float32)
|
|
|
- self.std = np.array(std, dtype=np.float32)
|
|
|
-
|
|
|
- # Reshape for broadcasting to apply mean and std across the spatial dimensions of an image
|
|
|
- self.mean = self.mean.reshape((1, 1, 3))
|
|
|
- self.std = self.std.reshape((1, 1, 3))
|
|
|
-
|
|
|
- def __call__(self, img):
|
|
|
- img = img.astype(np.float32) / 255.0 # Scale pixel values to [0, 1]
|
|
|
- img = (img - self.mean) / self.std # Normalize
|
|
|
- return img
|
|
|
+# class Normalize:
|
|
|
+# def __init__(self, mean=(0.45, 0.45, 0.45), std=(0.25, 0.25, 0.25)):
|
|
|
+# if not (isinstance(mean, (list, tuple)) and isinstance(std, (list, tuple))):
|
|
|
+# raise ValueError("mean and std should be of type list or tuple.")
|
|
|
+# self.mean = np.array(mean, dtype=np.float32)
|
|
|
+# self.std = np.array(std, dtype=np.float32)
|
|
|
+
|
|
|
+# # Reshape for broadcasting to apply mean and std across the spatial dimensions of an image
|
|
|
+# self.mean = self.mean.reshape((1, 1, 3))
|
|
|
+# self.std = self.std.reshape((1, 1, 3))
|
|
|
+
|
|
|
+# def __call__(self, img):
|
|
|
+# img = img.astype(np.float32) / 255.0 # Scale pixel values to [0, 1]
|
|
|
+# img = (img - self.mean) / self.std # Normalize
|
|
|
+# return img
|
|
|
|
|
|
class BarcodeDecodeModel:
|
|
|
def __init__(self, decoding_model_path=None):
|
|
|
@@ -52,8 +52,8 @@ class BarcodeDecodeModel:
|
|
|
self.decoding_net = self.ie.read_model(model=decoding_model_path)
|
|
|
self.decoding_sess = self.ie.compile_model(model=self.decoding_net, device_name="CPU")
|
|
|
|
|
|
- self.decoding_input_shape = (1, 3, 32, 256)
|
|
|
- self.normalize = Normalize() # Normalization instance
|
|
|
+ self.decoding_input_shape = (1, 3, 32, 512)
|
|
|
+ # self.normalize = Normalize() # Normalization instance
|
|
|
self._lock = threading.Lock()
|
|
|
self._image_embedding_cache = collections.OrderedDict()
|
|
|
self._max_cache_size = 10
|
|
|
@@ -70,19 +70,23 @@ class BarcodeDecodeModel:
|
|
|
# self.pixmap = pixmap
|
|
|
# logger.debug("Pixmap set successfully in BarcodeDecodeModel.")
|
|
|
|
|
|
- def preprocess_image(self, normalized_img):
|
|
|
- norm = Normalize(mean=(0.45, 0.45, 0.45), std=(0.25, 0.25, 0.25))
|
|
|
- resized_image = cv2.resize(normalized_img, (self.decoding_input_shape[3], self.decoding_input_shape[2]))
|
|
|
- resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
|
|
|
- resized_image = norm(resized_image)
|
|
|
-
|
|
|
- # Resize image for detection model input size
|
|
|
- logger.debug(f"Preprocessing image for detection: {normalized_img.shape}")
|
|
|
- # resized_image = resized_image.astype('float32') / 255.0
|
|
|
-
|
|
|
- input_tensor = resized_image.transpose(2, 0, 1) # Convert HWC to CHW
|
|
|
- input_tensor = np.expand_dims(input_tensor, 0) # Add batch dimension
|
|
|
+ def preprocess_image(self, img):
|
|
|
+ logger.debug(f"Preprocessing image for decoding: {img.shape}")
|
|
|
+ img = cv2.resize(
|
|
|
+ img,
|
|
|
+ (self.decoding_input_shape[3], self.decoding_input_shape[2]),
|
|
|
+ interpolation=cv2.INTER_NEAREST
|
|
|
+ )
|
|
|
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
|
+ img = img.astype("float32") / 255.0
|
|
|
+ mean = np.array([0.45, 0.45, 0.45], dtype=np.float32)
|
|
|
+ std = np.array([0.25, 0.25, 0.25], dtype=np.float32)
|
|
|
+ img = (img - mean) / std
|
|
|
+ img = img.transpose(2, 0, 1)
|
|
|
+ img = img.astype("float32")
|
|
|
+ input_tensor = np.expand_dims(img, 0)
|
|
|
logger.debug(f"Processed image shape: {input_tensor.shape}")
|
|
|
+
|
|
|
return input_tensor
|
|
|
|
|
|
def decode_from_points(self, points, detection_idx, original_image):
|
|
|
@@ -95,33 +99,34 @@ class BarcodeDecodeModel:
|
|
|
str: Decoded text from the decoding model.
|
|
|
"""
|
|
|
try:
|
|
|
-
|
|
|
# Convert scaled_points to a numpy array
|
|
|
polygon = np.array(points, dtype=np.int32)
|
|
|
|
|
|
# Create a mask of the same size as the original image
|
|
|
# original_image = labelme.utils.img_qt_to_arr(self.pixmap.toImage())
|
|
|
# cv2.imwrite(f"original_image{detection_idx + 1}.png", original_image)
|
|
|
- mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
|
|
|
- cv2.fillPoly(mask, [polygon], 255) # Fill the polygon with white
|
|
|
+
|
|
|
+ # mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
|
|
|
+ # cv2.fillPoly(mask, [polygon], 255) # Fill the polygon with white
|
|
|
|
|
|
# Apply the mask to the original image
|
|
|
- masked_image = cv2.bitwise_and(original_image, original_image, mask=mask)
|
|
|
+ # masked_image = cv2.bitwise_and(original_image, original_image, mask=mask)
|
|
|
+ # cv2.imwrite(f"masked_image{detection_idx + 1}.png", masked_image)
|
|
|
|
|
|
# Get the bounding rectangle of the polygon to crop the ROI
|
|
|
x, y, w, h = cv2.boundingRect(polygon)
|
|
|
- cropped_image_dec = masked_image[y:y+h, x:x+w]
|
|
|
-
|
|
|
+ # cropped_image_dec = masked_image[y:y+h, x:x+w]
|
|
|
# cv2.imwrite(f"cropped_exact_{detection_idx + 1}.png", cropped_image_dec)
|
|
|
- logger.debug(f"cropped_exact image saved at {detection_idx + 1}.")
|
|
|
-
|
|
|
+ # logger.debug(f"cropped_exact image saved at {detection_idx + 1}.")
|
|
|
+
|
|
|
src_points = np.float32(points)
|
|
|
+
|
|
|
|
|
|
# Calculate the width and height of the barcode based on scaled_points
|
|
|
- width = int(np.linalg.norm(src_points[0] - src_points[1]))
|
|
|
- # print(width)
|
|
|
- height = int(np.linalg.norm(src_points[1] - src_points[2]))
|
|
|
- # print(height)
|
|
|
+ width = int(np.ceil(np.linalg.norm(src_points[0] - src_points[1])))
|
|
|
+ print(f" width:{ width}")
|
|
|
+ height = int(np.ceil(np.linalg.norm(src_points[1] - src_points[2])))
|
|
|
+ print(f" height:{ height}")
|
|
|
|
|
|
# Correct width/height if needed
|
|
|
if width < height:
|
|
|
@@ -133,23 +138,32 @@ class BarcodeDecodeModel:
|
|
|
src_points[3], # Bottom-right becomes bottom-left
|
|
|
src_points[0] # Bottom-left becomes top-left
|
|
|
])
|
|
|
+ print(f"src_points:{src_points}")
|
|
|
|
|
|
# Define destination points for the flattened barcode
|
|
|
dst_points = np.float32([
|
|
|
[0, 0],
|
|
|
- [width - 1, 0],
|
|
|
- [width - 1, height - 1],
|
|
|
- [0, height - 1]
|
|
|
+ [width, 0],
|
|
|
+ [width, height],
|
|
|
+ [0, height]
|
|
|
])
|
|
|
|
|
|
# Calculate the perspective transformation matrix
|
|
|
- M = cv2.getPerspectiveTransform(src_points, dst_points)
|
|
|
-
|
|
|
+ M = cv2.getPerspectiveTransform(src_points, dst_points)
|
|
|
+ print(f" M:{ M}")
|
|
|
+ # t_start = time.perf_counter()
|
|
|
+
|
|
|
# Apply the perspective transformation
|
|
|
- aligned_barcode = cv2.warpPerspective(original_image, M, (width, height), flags=cv2.INTER_LINEAR)
|
|
|
+ aligned_barcode = cv2.warpPerspective(original_image, M, (width, height), flags=cv2.INTER_LANCZOS4)
|
|
|
+
|
|
|
+ # t_end = time.perf_counter()
|
|
|
+ # print(
|
|
|
+ # f"[TIMING] Total crop+align time: {(t_end - t_start) * 1000:.3f} ms | "
|
|
|
+ # f"aligned_size=({height},{width})"
|
|
|
+ # )
|
|
|
|
|
|
# Save the aligned barcode image
|
|
|
- # cv2.imwrite(f"decoding_barcode_{detection_idx + 1}.png", aligned_barcode)
|
|
|
+ # cv2.imwrite(f"old_aligned_barcode_decoding_barcode_{detection_idx + 1}.png", aligned_barcode)
|
|
|
# logger.debug(f"Aligned barcode saved at {detection_idx + 1}.")
|
|
|
|
|
|
# Normalize the image to scale pixel intensities to the range [0, 255]
|
|
|
@@ -157,9 +171,9 @@ class BarcodeDecodeModel:
|
|
|
cv2.normalize(aligned_barcode, normalized_img, 0, 255, cv2.NORM_MINMAX)
|
|
|
logger.debug("Image normalized.")
|
|
|
|
|
|
- # Save the cropped image
|
|
|
- cv2.imwrite(f"cropped_image_decoding_normalized{detection_idx + 1}.png",normalized_img)
|
|
|
- logger.debug(f"Saved normalized image for decoding : {detection_idx + 1}")
|
|
|
+ # # Save the cropped image
|
|
|
+ cv2.imwrite(f"cropped_image__normalized{detection_idx + 1}.png",normalized_img)
|
|
|
+ # logger.debug(f"Saved normalized image for decoding : {detection_idx + 1}")
|
|
|
|
|
|
# Run decoding model
|
|
|
# confidence = None
|
|
|
@@ -195,10 +209,10 @@ class BarcodeDecodeModel:
|
|
|
decode_result = self.decoding_sess.infer_new_request({'x': preprocessed_img})
|
|
|
output_tensor = decode_result['save_infer_model/scale_0.tmp_0']
|
|
|
logger.debug(f"Output tensor shape: {output_tensor.shape}")
|
|
|
- print(f"decode_result: {decode_result}")
|
|
|
+ # print(f"decode_result: {decode_result}")
|
|
|
output_indices_batch = np.argmax(output_tensor, axis=2)
|
|
|
output_probs_batch = np.max(output_tensor, axis=2)
|
|
|
- print(f"output_probs_batch:{output_probs_batch}")
|
|
|
+ # print(f"output_probs_batch:{output_probs_batch}")
|
|
|
# Decode text from indices
|
|
|
|
|
|
def preprocess_output_indices(output_indices_batch, output_probs_batch):
|
|
|
@@ -206,7 +220,7 @@ class BarcodeDecodeModel:
|
|
|
if output_indices_batch is None or len(output_indices_batch) == 0:
|
|
|
return False, "Empty output indices batch", None
|
|
|
|
|
|
- print(f"output_indices_batch: {output_indices_batch}")
|
|
|
+ # print(f"output_indices_batch: {output_indices_batch}")
|
|
|
first_row = output_indices_batch[0]
|
|
|
first_row_probs = output_probs_batch[0]
|
|
|
if first_row is None or len(first_row) == 0:
|