소스 검색

Added BarcodeDecodeModel for enhanced decoding capabilities. Implemented manual intervention for barcode detection by allowing specification of general areas when automatic detection fails. Reorganized code structure to streamline the execution flow and improve maintainability of segmentation and decoding operations.

Tricolops 3 달 전
부모
커밋
6f2e5e49e1

+ 1 - 0
examples/semantic_segmentation/README.md

@@ -8,6 +8,7 @@ labelme data_annotated --labels labels.txt --nodata --validatelabel exact --conf
 
 ![](.readme/annotation.jpg)
 
+labelme "/media/tricolops/New Volume/Merangue/" --labels labels.txt --nodata --validatelabel exact --config '{shift_auto_shape_color: -2}' --model /home/tricolops/PaddleDetection/Exported_model/openvino_new_50/Detection.xml --segmentation_model /home/tricolops/PaddleSeg/output_bisenetv2/openvino_2023.1_segmentaion/segmentation.xml
 
 ## Convert to VOC-format Dataset
 

+ 1 - 20
examples/semantic_segmentation/labels.txt

@@ -1,22 +1,3 @@
 __ignore__
 _background_
-aeroplane
-bicycle
-bird
-boat
-bottle
-bus
-car
-cat
-chair
-cow
-diningtable
-dog
-horse
-motorbike
-person
-potted plant
-sheep
-sofa
-train
-tv/monitor
+barcode

+ 6 - 0
labelme/__main__.py

@@ -114,6 +114,11 @@ def main():
         type=str,
         help="Path to the segmentation model"
     )
+    parser.add_argument(
+        "--decoding_model",
+        type=str,
+        help="Path to the decoding_model model"
+    )
     args = parser.parse_args()
 
     if args.version:
@@ -152,6 +157,7 @@ def main():
     config = get_config(config_file_or_yaml, config_from_args)
     config["model"]=config_from_args.pop("model")
     config["segmentation_model"]=config_from_args.pop("segmentation_model")
+    config["decoding_model"]=config_from_args.pop("decoding_model")
     if not config["labels"] and config["validate_label"]:
         logger.error(
             "--labels must be specified with --validatelabel or "

+ 15 - 5
labelme/ai/__init__.py

@@ -1,16 +1,25 @@
 import gdown
 from .efficient_sam import EfficientSam
 from .segment_anything_model import SegmentAnythingModel
-from .barcode_model import BarcodePredictModel
+from .barcode_detect import BarcodeDetectModel
+from .barcode_decode import BarcodeDecodeModel
 
-class BarcodePredict(BarcodePredictModel):
-    name="BarcodePredict(ov)"
+class BarcodeDetect(BarcodeDetectModel):
+    name="BarcodeDetect(ov)"
     def __init__(self, detection_model_path=None, segmentation_model_path=None):
         super().__init__(
             detection_model_path=detection_model_path,
-            segmentation_model_path=segmentation_model_path
+            segmentation_model_path=segmentation_model_path,
+            # decoding_model_path=decoding_model_path
         )
 
+class BarcodeDecode(BarcodeDecodeModel):
+    name="BarcodeDecode(ov)"
+    def __init__(self, decoding_model_path=None):
+        super().__init__(
+            decoding_model_path=decoding_model_path
+        )
+        
 class SegmentAnythingModelVitB(SegmentAnythingModel):
     name = "SegmentAnything (speed)"
 
@@ -97,5 +106,6 @@ MODELS = [
     SegmentAnythingModelVitH,
     EfficientSamVitT,
     EfficientSamVitS,
-    BarcodePredict,
+    BarcodeDetect,
+    BarcodeDecode,
 ]

+ 341 - 0
labelme/ai/barcode_decode.py

@@ -0,0 +1,341 @@
+import imgviz
+from qtpy import QtCore
+from qtpy import QtGui
+from qtpy import QtWidgets
+import labelme.ai
+import labelme.utils
+from labelme import QT5
+from labelme.logger import logger
+from labelme.shape import Shape
+import collections
+import threading
+import numpy as np
+import openvino as ov
+import os.path as osp
+import cv2
+from labelme.utils import img_qt_to_arr
+from labelme.utils import load_barcode_dict
+
+class CodeSet:
+    NONE = 0
+    A = 1
+    B = 2
+    C = 3
+
+class Normalize:
+    def __init__(self, mean=(0.15525904, 0.15525904, 0.15525904), std=(0.12552188, 0.12552188, 0.12552188)):
+        if not (isinstance(mean, (list, tuple)) and isinstance(std, (list, tuple))):
+            raise ValueError("mean and std should be of type list or tuple.")
+        self.mean = np.array(mean, dtype=np.float32)
+        self.std = np.array(std, dtype=np.float32)
+
+        # Reshape for broadcasting to apply mean and std across the spatial dimensions of an image
+        self.mean = self.mean.reshape((1, 1, 3))
+        self.std = self.std.reshape((1, 1, 3))
+
+    def __call__(self, img):
+        img = img.astype(np.float32) / 255.0  # Scale pixel values to [0, 1]
+        img = (img - self.mean) / self.std  # Normalize
+        return img
+
+class BarcodeDecodeModel:
+    def __init__(self, decoding_model_path=None):
+        self.ie = ov.Core()
+        self.pixmap = QtGui.QPixmap()
+        #Load Decoding model if provided
+        self.decoding_net = None
+        self.decoding_sess = None
+        self._characters = load_barcode_dict() 
+        if decoding_model_path:
+            self.decoding_net = self.ie.read_model(model=decoding_model_path)
+            self.decoding_sess = self.ie.compile_model(model=self.decoding_net, device_name="CPU")
+
+        self.decoding_input_shape = (1, 3, 32, 256)
+        self.normalize = Normalize()  # Normalization instance
+        self._lock = threading.Lock()
+        self._image_embedding_cache = collections.OrderedDict()
+        self._max_cache_size = 10
+        self.pixmap = QtGui.QPixmap()
+
+    # def set_pixmap(self, pixmap: QtGui.QPixmap):
+    #     """
+    #     Set the QPixmap object for decoding.
+    #     Args:
+    #         pixmap (QtGui.QPixmap): The QPixmap object containing the image.
+    #     """
+    #     if pixmap is None or pixmap.isNull():
+    #         raise ValueError("Invalid QPixmap provided.")
+    #     self.pixmap = pixmap
+    #     logger.debug("Pixmap set successfully in BarcodeDecodeModel.")
+
+    def preprocess_image(self, image):
+        norm = Normalize(mean=(0.44948044,0.44948044,0.44948044), std=(0.22099442,0.22099442,0.22099442))
+        resized_image = cv2.resize(image, (self.decoding_input_shape[3], self.decoding_input_shape[2]))
+        resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
+        resized_image = norm(resized_image)
+        # Resize image for detection model input size
+        logger.debug(f"Preprocessing image for detection: {image.shape}")
+        # resized_image = resized_image.astype('float32') / 255.0
+
+        input_tensor = resized_image.transpose(2, 0, 1)  # Convert HWC to CHW
+        input_tensor = np.expand_dims(input_tensor, 0)  # Add batch dimension
+        logger.debug(f"Processed image shape: {input_tensor.shape}")
+        return input_tensor
+    
+    def decode_from_points(self, points, detection_idx, original_image):
+        """
+        Decodes the cropped image based on points and returns the decoded text.
+        Args:
+            points (list): List of points defining the bounding box.
+            pixmap (QPixmap): Original image pixmap to crop from.
+        Returns:
+            str: Decoded text from the decoding model.
+        """
+        try:
+
+            # Convert scaled_points to a numpy array
+            polygon = np.array(points, dtype=np.int32)
+
+            # Create a mask of the same size as the original image
+            # original_image = labelme.utils.img_qt_to_arr(self.pixmap.toImage())
+            cv2.imwrite(f"original_image{detection_idx + 1}.png", original_image)
+            mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
+            cv2.fillPoly(mask, [polygon], 255)  # Fill the polygon with white
+
+            # Apply the mask to the original image
+            masked_image = cv2.bitwise_and(original_image, original_image, mask=mask)
+
+            # Get the bounding rectangle of the polygon to crop the ROI
+            x, y, w, h = cv2.boundingRect(polygon)
+            cropped_image_dec = masked_image[y:y+h, x:x+w]
+
+            cv2.imwrite(f"cropped_exact_{detection_idx + 1}.png", cropped_image_dec)
+            logger.debug(f"cropped_exact image saved at  {detection_idx + 1}.")
+             
+            src_points = np.float32(points)
+
+            # Calculate the width and height of the barcode based on scaled_points
+            width = int(np.linalg.norm(src_points[0] - src_points[1]))
+            # print(width)
+            height = int(np.linalg.norm(src_points[1] - src_points[2]))
+            # print(height)
+
+            # Correct width/height if needed
+            if width < height:
+                width, height = height, width
+                # Reorder src_points to ensure the transformation aligns the longer side to the width
+                src_points = np.float32([
+                    src_points[1],  # Top-left becomes top-right
+                    src_points[2],  # Top-right becomes bottom-right
+                    src_points[3],  # Bottom-right becomes bottom-left
+                    src_points[0]   # Bottom-left becomes top-left
+                ])
+
+            # Define destination points for the flattened barcode
+            dst_points = np.float32([
+                [0, 0],
+                [width - 1, 0],
+                [width - 1, height - 1],
+                [0, height - 1]
+            ])
+
+            # Calculate the perspective transformation matrix
+            M = cv2.getPerspectiveTransform(src_points, dst_points)
+
+            # Apply the perspective transformation
+            aligned_barcode = cv2.warpPerspective(original_image, M, (width, height), flags=cv2.INTER_LINEAR)
+
+            # Save the aligned barcode image
+            cv2.imwrite(f"decoding_barcode_{detection_idx + 1}.png", aligned_barcode)
+            logger.debug(f"Aligned barcode saved at  {detection_idx + 1}.")
+
+            # Normalize the image to scale pixel intensities to the range [0, 255]
+            normalized_img = np.zeros(aligned_barcode.shape, aligned_barcode.dtype)
+            cv2.normalize(aligned_barcode, normalized_img, 0, 255, cv2.NORM_MINMAX)
+            logger.debug("Image normalized.")
+
+            # Save the cropped image
+            cv2.imwrite(f"cropped_image_decoding_normalized{detection_idx + 1}.png",normalized_img)
+            logger.debug(f"Saved normalized image for decoding : {detection_idx + 1}")
+
+            # Run decoding model
+            confidence = None 
+            # Run decoding on the original image
+            decoded_text, confidence = self.run_decoding(normalized_img, detection_idx, confidence)
+
+            # Validate checksum
+            if decoded_text:
+                checksum_valid, validated_result = self.validate_code128_checksum(decoded_text, detection_idx)
+                if checksum_valid:
+                    logger.debug(f"Validated result for detection {detection_idx + 1}: {validated_result}")
+                    return validated_result  # Return validated result
+                else:
+                    logger.error(f"Checksum validation failed for detection {detection_idx + 1}. Retrying with 180° rotation.")
+
+                    # Rotate image 180 degrees and retry
+                    rotated_image = cv2.rotate(normalized_img, cv2.ROTATE_180)
+                    decoded_text, confidence = self.run_decoding(rotated_image, detection_idx, confidence)
+
+                    # Validate checksum again
+                    if decoded_text:
+                        checksum_valid, validated_result = self.validate_code128_checksum(decoded_text, detection_idx)
+                        if checksum_valid:
+                            logger.debug(f"Validated result after rotation for detection {detection_idx + 1}: {validated_result}")
+                            return validated_result
+                        else:
+                            logger.error(f"Checksum validation failed after rotation for detection {detection_idx + 1}. Error: {validated_result}")
+                            return
+            return "Decoding failed"
+        except Exception as e:
+            logger.error(f"Error in decode_from_points: {e}")
+            return "Error: Decoding failed"
+        
+    def run_decoding(self, image_np, detection_idx, confidence):
+        """Helper to run decoding on the given image."""
+        preprocessed_img = self.preprocess_image(
+            image_np
+        )
+        decode_result = self.decoding_sess.infer_new_request({'x': preprocessed_img})
+        output_tensor = decode_result['save_infer_model/scale_0.tmp_0']
+        logger.debug(f"Output tensor shape: {output_tensor.shape}")
+
+        output_indices = np.argmax(output_tensor, axis=2)
+        output_probs = np.max(output_tensor, axis=2)
+        
+        # Decode text from indices
+        decoded_text, confidence = self.decode_text(output_indices, output_probs, detection_idx)
+        logger.debug(f"Raw barcode: {decoded_text}, Confidence: {confidence:.2f}")
+        return decoded_text, confidence
+    
+    def decode_text(self, text_indices, text_probs, detection_idx):
+        """
+        Converts model output indices into text using the character dictionary.
+        Args:
+            text_indices (np.ndarray): Output indices from the decoding model.
+            text_probs (np.ndarray): Probabilities corresponding to the indices.
+        Returns:
+            tuple: Decoded text and its confidence score.
+        """
+        try:
+            max_index = len(self._characters) - 1
+            logger.debug(f"Loaded barcode dictionary with {len(self._characters)} characters.")
+
+            result_list = []
+            
+            for batch_idx in range(text_indices.shape[0]):  # Loop through batches
+                char_list = []
+                conf_list = []
+                for step_idx in range(text_indices.shape[1]):  # Loop through sequence length
+                    char_idx = int(text_indices[batch_idx, step_idx])
+                    if char_idx > max_index:
+                        logger.warning(f"Index {char_idx} is out of bounds for dictionary size {len(self._characters)}")
+                        continue  # Skip invalid indices
+
+                    char = self._characters[char_idx]
+                    # print("char",char)
+                    if char == "</s>":  # End token
+                        break
+                    char_list.append(char)
+                    conf_list.append(text_probs[batch_idx, step_idx])
+
+                text = ''.join(char_list)
+                confidence = np.mean(conf_list) if conf_list else 0.0
+                result_list.append((text, confidence))
+
+            # Return the first result (assuming batch size of 1 for now)
+            return result_list[0] if result_list else ("", 0.0)
+        except Exception as e:
+            logger.error(f"Error in decode_text: {e}")
+            return "Error: Decoding failed", 0.0
+
+
+    def validate_code128_checksum(self, decoded_text, detection_idx):
+        # Convert characters to their corresponding Code 128 values using the index in _characters
+        # print(self._characters)
+        code128Values = [self._characters.index(char, 1) - 1 if char in self._characters[1:] else -1 for char in decoded_text]
+        logger.debug(f"code128Values:{code128Values}")
+        result = ""
+        err_msg = ""
+        currentCodeSet = CodeSet.B  # Default to Code Set B, assuming start code is included in decoded_text
+
+
+        if code128Values[0] in [103, 104, 105]:
+            start_codes = {103: CodeSet.A, 104: CodeSet.B, 105: CodeSet.C}
+            currentCodeSet = start_codes[code128Values[0]]
+            # print("currentCodeSet",currentCodeSet)
+        else:
+            err_msg = f"No start code detected, first code is {code128Values[0]}"
+            return False, err_msg
+
+        checksum_expected = code128Values[-2]
+        # print("Expected checksum:", checksum_expected)
+
+        # Calculate the checksum using the formula
+        checksum_calculated = code128Values[0]  # Start with the start code value
+        for i, value in enumerate(code128Values[1:-2], start=1):  # Exclude stop code
+            weighted_value = value * i
+            checksum_calculated += weighted_value
+            # logger.debug(f"Position {i}, Value {value}, Weighted Value {weighted_value}, Running Checksum {checksum_calculated}")
+
+        checksum_calculated %= 103
+        logger.debug(f"Final Calculated Checksum (mod 103): {checksum_calculated}")
+        if checksum_calculated != checksum_expected:
+            err_msg = f"Invalid checksum value, supposed to be {checksum_calculated} but got {checksum_expected}"
+            return False, err_msg
+
+        # Verify the stop code
+        if code128Values[-1] != 106:
+            err_msg = "No valid stop code detected at the end of the sequence."
+            return False, err_msg
+        
+        result = ""
+        i = 1  # Start after the start code
+        while i < len(code128Values) - 2:  # Exclude checksum and stop code
+            value = code128Values[i]
+
+            # Handle special functions and code set shifts
+            if value == 102:  # FNC1 for GS1-128
+                logger.debug(f"Detected FNC1 at position {i}, treated as AI separator.")
+                # result += "|"  # Optional: Add a delimiter for AI parsing
+                i += 1
+                continue
+            elif value == 99:  # Switch to Code Set C
+                currentCodeSet = CodeSet.C
+                logger.debug(f"Switched to Code Set C at position {i}")
+                i += 1
+                continue
+            elif value == 100:  # Switch to Code Set B
+                currentCodeSet = CodeSet.B
+                logger.debug(f"Switched to Code Set B at position {i}")
+                i += 1
+                continue
+
+            # Decode based on the current Code Set
+            if currentCodeSet == CodeSet.C:
+                result += f"{value:02}"
+                # logger.debug(f"Added Code Set C value {value:02} at position {i}")
+                i += 1
+            elif currentCodeSet == CodeSet.B:
+                if 0 <= value <= 95:
+                    char = self._characters[value + 1]  # Map using the single dictionary
+                    result += char
+                    # logger.debug(f"Added Code Set B char {char} at position {i}")
+                    i += 1
+                else:
+                    err_msg = f"Invalid Code Set B value: {value}"
+                    logger.error(err_msg)
+                    return False, err_msg
+            elif currentCodeSet == CodeSet.A:
+                if 0 <= value <= 95:
+                    char = self._characters[value + 1]  # Map using the single dictionary
+                    result += char
+                    # logger.debug(f"Added Code Set A char {char} at position {i}")
+                    i += 1
+                else:
+                    err_msg = f"Invalid Code Set A value: {value}"
+                    logger.error(err_msg)
+                    return False, err_msg
+
+        # logger.debug(f"Decoded result after processing: {result}")
+        # logger.debug(f"Result indices for {detection_idx + 1}: {result_indices}")
+        return True, result       

+ 44 - 41
labelme/ai/barcode_model.py → labelme/ai/barcode_detect.py

@@ -11,21 +11,22 @@ from qtpy import QtGui
 
 
 class Normalize:
-    def __init__(self, mean=(0.5,), std=(0.5,)):
+    def __init__(self, mean=(0.15525904, 0.15525904, 0.15525904), std=(0.12552188, 0.12552188, 0.12552188)):
         if not (isinstance(mean, (list, tuple)) and isinstance(std, (list, tuple))):
             raise ValueError("mean and std should be of type list or tuple.")
         self.mean = np.array(mean, dtype=np.float32)
         self.std = np.array(std, dtype=np.float32)
 
-        if np.any(self.std == 0):
-            raise ValueError("std should not contain zero values.")
-    
+        # Reshape for broadcasting to apply mean and std across the spatial dimensions of an image
+        self.mean = self.mean.reshape((1, 1, 3))
+        self.std = self.std.reshape((1, 1, 3))
+
     def __call__(self, img):
         img = img.astype(np.float32) / 255.0  # Scale pixel values to [0, 1]
         img = (img - self.mean) / self.std  # Normalize
         return img
-    
-class BarcodePredictModel:
+
+class BarcodeDetectModel:
     def __init__(self, detection_model_path, segmentation_model_path=None):
         self.ie = ov.Core()
 
@@ -33,13 +34,14 @@ class BarcodePredictModel:
         self.detection_net = self.ie.read_model(model=detection_model_path)
         self.detection_sess = self.ie.compile_model(model=self.detection_net, device_name="CPU")
         self.detection_request = self.detection_sess.create_infer_request()
+        
         # Load segmentation model if provided
         self.segmentation_net = None
         self.segmentation_sess = None
         if segmentation_model_path:
             self.segmentation_net = self.ie.read_model(model=segmentation_model_path)
             self.segmentation_sess = self.ie.compile_model(model=self.segmentation_net, device_name="CPU")
-
+        
         self._lock = threading.Lock()
         self.input_height = 640  # Input shape for detection model (example size)
         self.input_width = 640
@@ -66,20 +68,21 @@ class BarcodePredictModel:
     def preprocess_image(self, image, for_segmentation=False):
         if for_segmentation:
             # Resize image to segmentation model input size
-            # logger.debug(f"Preprocessing image for segmentation: {image.shape}")
+            logger.debug(f"Preprocessing image for segmentation: {image.shape}")
+            norm = Normalize(mean=(0.447365,0.447365,0.447365), std=(0.17667491,0.17667491,0.17667491))
             resized_image = cv2.resize(image, (self.segmentation_input_shape[3], self.segmentation_input_shape[2]))  # Width, Height
             resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
-            resized_image = self.normalize(resized_image)  # Normalize for segmentation model
+            resized_image = norm(resized_image)  # Normalize for segmentation model
         else:
             # Resize image for detection model input size
             logger.debug(f"Preprocessing image for detection: {image.shape}")
             resized_image = cv2.resize(image, (self.input_width, self.input_height))
             resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
-            resized_image = resized_image.astype('float32') / 255.0
-        
+            resized_image = self.normalize(resized_image)
+            # resized_image = resized_image.astype('float32') / 255.0
         input_tensor = resized_image.transpose(2, 0, 1)  # Convert HWC to CHW
         input_tensor = np.expand_dims(input_tensor, 0)  # Add batch dimension
-        logger.debug(f"Processed image shape: {input_tensor.shape}")
+        # logger.debug(f"Processed image shape: {input_tensor.shape}")
         return input_tensor
 
     def _compute_and_cache_image_embedding(self):
@@ -103,7 +106,7 @@ class BarcodePredictModel:
             return new_result
 
     def predict_mask_from_points(self,points=None,point_labels=None):
-        return _collect_result_from_output(
+        return self._collect_result_from_output(
             outputs=self._get_image_embedding(),
             raw_width=self.raw_width,
             raw_height=self.raw_height,
@@ -113,33 +116,33 @@ class BarcodePredictModel:
         result_list=self.predict_mask_from_points(points,point_labels)
         return result_list
 
-def _collect_result_from_output(outputs, raw_width, raw_height):
-    # Extract the desired output array from outputs dictionary
-    output_array = None
-    for key in outputs:
-        if 'save_infer_model/scale_0.tmp_0' in key.names:
-            output_array = outputs[key]
-            break
-    if output_array is None:
-        raise ValueError("Desired output not found in outputs")
+    def _collect_result_from_output(self, outputs, raw_width, raw_height):
+        # Extract the desired output array from outputs dictionary
+        output_array = None
+        for key in outputs:
+            if 'save_infer_model/scale_0.tmp_0' in key.names:
+                output_array = outputs[key]
+                break
+        if output_array is None:
+            raise ValueError("Desired output not found in outputs")
 
-    outputs = output_array  # shape [50,6]
-    point_list = []
-    thresh_hold = 0.7
+        outputs = output_array  # shape [50,6]
+        point_list = []
+        thresh_hold = 0.7
 
-    for bbox_info in outputs:
-        score = bbox_info[1]
-        if score > thresh_hold:
-            x1_raw = bbox_info[2]
-            y1_raw = bbox_info[3]
-            x2_raw = bbox_info[4]
-            y2_raw = bbox_info[5]
-            print(f"Raw bbox coordinates: x1={x1_raw}, y1={y1_raw}, x2={x2_raw}, y2={y2_raw}")
-            x1 = max(min(int(x1_raw), raw_width - 1), 0)
-            y1 = max(min(int(y1_raw), raw_height - 1), 0)
-            x2 = max(min(int(x2_raw), raw_width - 1), 0)
-            y2 = max(min(int(y2_raw), raw_height - 1), 0)
-            print(f"Clamped bbox coordinates: x1={x1}, y1={y1}, x2={x2}, y2={y2}")
-            point_xy = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
-            point_list.append(point_xy)
-    return point_list
+        for bbox_info in outputs:
+            score = bbox_info[1]
+            if score > thresh_hold:
+                x1_raw = bbox_info[2]
+                y1_raw = bbox_info[3]
+                x2_raw = bbox_info[4]
+                y2_raw = bbox_info[5]
+                # print(f"Raw bbox coordinates: x1={x1_raw}, y1={y1_raw}, x2={x2_raw}, y2={y2_raw}")
+                x1 = max(min(int(x1_raw), raw_width - 1), 0)
+                y1 = max(min(int(y1_raw), raw_height - 1), 0)
+                x2 = max(min(int(x2_raw), raw_width - 1), 0)
+                y2 = max(min(int(y2_raw), raw_height - 1), 0)
+                # print(f"Clamped bbox coordinates: x1={x1}, y1={y1}, x2={x2}, y2={y2}")
+                point_xy = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
+                point_list.append(point_xy)
+        return point_list

+ 3 - 2
labelme/app.py

@@ -168,6 +168,7 @@ class MainWindow(QtWidgets.QMainWindow):
             crosshair=self._config["canvas"]["crosshair"],
             detection_model_path=self._config["model"],
             segmentation_model_path=self._config["segmentation_model"],
+            decoding_model_path=self._config["decoding_model"],
         )
         self.canvas.zoomRequest.connect(self.zoomRequest)
         self.canvas.mouseMoved.connect(
@@ -1457,10 +1458,10 @@ class MainWindow(QtWidgets.QMainWindow):
             text = items[0].data(Qt.UserRole)
         flags = {}
         group_id = None
-        description = ""
+        description = self.canvas.decoded_barcode
         if self._config["display_label_popup"] or not text:
             previous_text = self.labelDialog.edit.text()
-            text, flags, group_id, description = self.labelDialog.popUp(text)
+            text, flags, group_id, description = self.labelDialog.popUp(text, description)
             if not text:
                 self.labelDialog.edit.setText(previous_text)
 

+ 12 - 0
labelme/utils/__init__.py

@@ -27,3 +27,15 @@ from .qt import struct
 from .qt import distance
 from .qt import distancetoline
 from .qt import fmtShortcut
+
+import os
+
+def load_barcode_dict():
+    """Load the barcode dictionary from the utils folder."""
+    dict_path = os.path.join(os.path.dirname(__file__), "barcode_dict.txt")
+    try:
+        with open(dict_path, "r", encoding="utf-8") as f:
+            characters = f.read().splitlines()
+        return ["</s>"] + characters  # Add special character
+    except Exception as e:
+        raise FileNotFoundError(f"Error loading barcode_dict.txt: {e}")

+ 150 - 48
labelme/widgets/canvas.py

@@ -2,7 +2,8 @@ import imgviz
 from qtpy import QtCore
 from qtpy import QtGui
 from qtpy import QtWidgets
-
+import cv2
+import numpy as np
 import labelme.ai
 import labelme.utils
 from labelme import QT5
@@ -10,6 +11,8 @@ from labelme.logger import logger
 from labelme.shape import Shape
 import numpy as np
 import cv2
+from labelme.utils import load_barcode_dict
+from labelme.widgets.label_dialog import LabelDialog
 # TODO(unknown):
 # - [maybe] Find optimal epsilon value.
 
@@ -22,7 +25,6 @@ CURSOR_GRAB = QtCore.Qt.OpenHandCursor
 
 MOVE_SPEED = 5.0
 
-
 class Canvas(QtWidgets.QWidget):
     zoomRequest = QtCore.Signal(int, QtCore.QPoint)
     scrollRequest = QtCore.Signal(int, int)
@@ -44,7 +46,10 @@ class Canvas(QtWidgets.QWidget):
         self.epsilon = kwargs.pop("epsilon", 10.0)
         self.detection_model_path=kwargs.pop("detection_model_path",None)
         self.segmentation_model_path=kwargs.pop("segmentation_model_path",None)
+        self.decoding_model_path=kwargs.pop("decoding_model_path", None)
         self.double_click = kwargs.pop("double_click", "close")
+        self._characters = load_barcode_dict()  # Load dictionary once
+        logger.debug(f"Loaded barcode dictionary with {len(self._characters)} characters.")
         if self.double_click not in [None, "close"]:
             raise ValueError(
                 "Unexpected value for double_click event: {}".format(self.double_click)
@@ -99,6 +104,7 @@ class Canvas(QtWidgets.QWidget):
         self.draw_pred=False
         self.pred_bbox_points=None
         self.current_bbox_point=None
+        self.decoded_barcode = None
         # Menus:
         # 0: right-click without selection and dragging of shapes
         # 1: right-click with selection and dragging of shapes
@@ -110,6 +116,7 @@ class Canvas(QtWidgets.QWidget):
         self._ai_model = None
         self._detection_model = None
         self._segmentation_model = None
+        self._decoding_model = None
     def fillDrawing(self):
         return self._fill_drawing
 
@@ -135,18 +142,25 @@ class Canvas(QtWidgets.QWidget):
             raise ValueError("Unsupported createMode: %s" % value)
         self._createMode = value
 
-    def initializeBarcodeModel(self, detection_model_path, segmentation_model_path=None):
+    def initializeBarcodeModel(self, detection_model_path, segmentation_model_path=None, decoding_model_path=None):
         if not detection_model_path:
             raise ValueError("Detection model path is required.")
 
         logger.debug("Initializing only detection model: %r" % "BarcodePredictModel")
-        self._detection_model = labelme.ai.BarcodePredictModel(detection_model_path)
+        self._detection_model = labelme.ai.BarcodeDetectModel(detection_model_path)
 
         if segmentation_model_path:
             logger.debug("Initializing barcode detection  & Segmentation model: %r" % "BarcodePredictModel")
-            self._segmentation_model = labelme.ai.BarcodePredictModel(
+            self._segmentation_model = labelme.ai.BarcodeDetectModel(
                 detection_model_path, segmentation_model_path
             )
+
+        if decoding_model_path:
+            logger.debug("Initializing barcode detection,  Segmentation model, Decoding_model: %r" % "BarcodePredictModel")
+            self._decoding_model = labelme.ai.BarcodeDecodeModel(
+                decoding_model_path
+            )
+
         if self.pixmap is None:
             logger.warning("Pixmap is not set yet")
             return
@@ -198,7 +212,7 @@ class Canvas(QtWidgets.QWidget):
         # and app.py::loadShapes and our own Canvas::loadShapes function.
         if not self.isShapeRestorable:
             return
-        print(f"shape is restorable")
+        # print(f"shape is restorable")
         self.shapesBackups.pop()  # latest
 
         # The application will eventually call Canvas.loadShapes which will
@@ -273,6 +287,7 @@ class Canvas(QtWidgets.QWidget):
 
         # Polygon drawing.
         if self.drawing():
+            
             if self.createMode in ["ai_polygon", "ai_mask"]:
                 self.line.shape_type = "points"
             else:
@@ -323,6 +338,7 @@ class Canvas(QtWidgets.QWidget):
                 self.line.points = [self.current[0]]
                 self.line.point_labels = [1]
                 self.line.close()
+            # print("self.line.points", self.line.points)
             assert len(self.line.points) == len(self.line.point_labels)
             self.repaint()
             self.current.highlightClear()
@@ -846,6 +862,18 @@ class Canvas(QtWidgets.QWidget):
     def finalise(self):
         if(self.current is None):
             return
+            # If in manual mode and points are finalized
+        if self.createMode in ["polygon", "rectangle"] and len(self.current.points) > 0:
+            if self._detection_model is None:
+                logger.info(f"Initializing AI model")
+                self.initializeBarcodeModel(self.detection_model_path, self.segmentation_model_path, self.decoding_model_path)
+            # Extract the points from the manually drawn shape
+            manual_points = [[point.x(), point.y()] for point in self.current.points]
+            print(manual_points)
+            logger.debug(f"Manually drawn points: {manual_points}")
+            self.detect_and_segment(manual_points=manual_points)
+            self.mode = self.EDIT  # Or any appropriate mode to disable drawing behavior
+            return
         if self.createMode == "ai_polygon":
             # convert points to polygon by an AI model
             assert self.current.shape_type == "points"
@@ -853,6 +881,7 @@ class Canvas(QtWidgets.QWidget):
                 points=[[point.x(), point.y()] for point in self.current.points],
                 point_labels=self.current.point_labels,
             )
+            
             self.current.setShapeRefined(
                 points=[QtCore.QPointF(point[0], point[1]) for point in points],
                 point_labels=[1] * len(points),
@@ -882,11 +911,15 @@ class Canvas(QtWidgets.QWidget):
                     points=[QtCore.QPointF(point[0], point[1]) for point in bbox_point],
                     point_labels=[1]*len(bbox_point)
                 )
+                drawing_shape.description = self.decoded_barcode
                 drawing_shape.close()
                 self.shapes.append(drawing_shape)
                 self.storeShapes()
                 self.update()
                 self.newShape.emit()
+            # self.pred_bbox_points = None
+            # self.draw_pred = False
+            # self.detect_and_segment()  # Process the next detection
             current_copy.close()
             current_copy=None
             if(self.current):
@@ -1032,7 +1065,7 @@ class Canvas(QtWidgets.QWidget):
                 )
                         
                 if self._detection_model:
-                    if self._segmentation_model is None:
+                    if self.segmentation_model_path is None:
                         logger.info(f"Performing detection only.")
                         # Get prediction from model
                         self.pred_bbox_points = self._detection_model.predict_polygon_from_points()
@@ -1043,8 +1076,9 @@ class Canvas(QtWidgets.QWidget):
                         else:
                             print("No bounding boxes detected.")    
                     else:
-                            logger.info(f"Performing detection and segmentation.")
-                            self.detect_and_segment() 
+                            # logger.info(f"Performing detection, segmentation and decoding.")
+                            self.initializeBarcodeModel(self.detection_model_path, self.segmentation_model_path, self.decoding_model_path)
+                            self.detect_and_segment(manual_points=None) 
         elif self.editing():
             if key == QtCore.Qt.Key_Up:
                 self.moveByKeyboard(QtCore.QPointF(0.0, -MOVE_SPEED))
@@ -1055,13 +1089,57 @@ class Canvas(QtWidgets.QWidget):
             elif key == QtCore.Qt.Key_Right:
                 self.moveByKeyboard(QtCore.QPointF(MOVE_SPEED, 0.0))
 
-    
-    def scale_points(self, approx, mask_shape, cropped_shape, x_min, y_min):
+    def scale_points(self, points, mask_shape, cropped_shape, x_min, y_min):
+        """
+        Scale the points from the segmentation mask space back to the original cropped image coordinates.
+        
+        Args:
+            points (np.ndarray): Points to scale (Nx2 array or list of lists).
+            mask_shape (tuple): Shape of the segmentation mask (height, width).
+            cropped_shape (tuple): Shape of the cropped image in the original image (height, width).
+            x_min (int): Minimum x-coordinate of the cropped region in the original image.
+            y_min (int): Minimum y-coordinate of the cropped region in the original image.
+
+        Returns:
+            scaled_points (list): List of scaled points as [[x1, y1], [x2, y2], ...].
+        """
+        # Compute scaling factors
         scale_x = cropped_shape[1] / mask_shape[1]  # Scale factor for x-axis
         scale_y = cropped_shape[0] / mask_shape[0]  # Scale factor for y-axis
-        return [[int(pt[0][0] * scale_x) + x_min, int(pt[0][1] * scale_y) + y_min] for pt in approx]
+
+        # Scale and translate the points
+        scaled_points = []
+        for pt in points:
+            scaled_x = int(pt[0] * scale_x) + x_min
+            scaled_y = int(pt[1] * scale_y) + y_min
+            scaled_points.append([scaled_x, scaled_y])
+        # logger.debug(f"scaled points: {scaled_points}")
+        return scaled_points
     
-    def detect_and_segment(self):
+    def expand_bbox(self, x_min, y_min, x_max, y_max, factor=1.3):
+        """
+        Expands the bounding box by a given factor.
+        """
+        center_x = (x_max + x_min) / 2
+        center_y = (y_max + y_min) / 2
+        width = (x_max - x_min) * factor
+        height = (y_max - y_min) * factor
+
+        new_x_min = int(center_x - width / 2)
+        new_y_min = int(center_y - height / 2)
+        new_x_max = int(center_x + width / 2)
+        new_y_max = int(center_y + height / 2)
+
+        # Ensure the coordinates do not go out of image bounds
+        new_x_min = max(0, new_x_min)
+        new_y_min = max(0, new_y_min)
+        new_x_max = min(self.pixmap.width() - 1, new_x_max)
+        new_y_max = min(self.pixmap.height() - 1, new_y_max)
+
+        return new_x_min, new_y_min, new_x_max, new_y_max
+
+
+    def detect_and_segment(self, manual_points=None):
         """
         Perform detection and segmentation (if both models are available).
         """
@@ -1069,10 +1147,11 @@ class Canvas(QtWidgets.QWidget):
         self.current = Shape(
             shape_type="points" if self.createMode in ["ai_polygon", "ai_mask"] else self.createMode
         )
+        print("self.current",self.current)
 
         # Step 1: detection bounding box points
-        detection_results = self._detection_model.predict_polygon_from_points()
-
+        detection_results = [manual_points] if manual_points else self._detection_model.predict_polygon_from_points()
+        print(detection_results)
         if not detection_results or len(detection_results) == 0:
             logger.warning("No detection found")
             return
@@ -1099,7 +1178,7 @@ class Canvas(QtWidgets.QWidget):
                 continue
 
             # Converting bounding box values to integers for cropping
-            x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
+            x_min, y_min, x_max, y_max = self.expand_bbox(x_min, y_min, x_max, y_max)
 
             # Step 3: Cropping image based on detection output
             try:
@@ -1153,14 +1232,20 @@ class Canvas(QtWidgets.QWidget):
                 # Normalize the mask to 0 and 255 and convert to uint8
                 mask = (mask * 255).astype(np.uint8)
 
-                logger.debug(f"Converted mask shape for detection {detection_idx + 1}: {mask.shape}, dtype: {mask.dtype}")
-                cv2.imwrite(f"segmentation_mask_{detection_idx + 1}.png", mask)
+                # logger.debug(f"Converted mask shape for detection {detection_idx + 1}: {mask.shape}, dtype: {mask.dtype}")
+                # cv2.imwrite(f"segmentation_mask_{detection_idx + 1}.png", mask)
+                
+                # Scale mask to original cropped image size
+                scaled_mask = cv2.resize(mask, (orig_cropped_shape[1], orig_cropped_shape[0]), interpolation=cv2.INTER_NEAREST)
+  
                 if rotated:
                     cropped_image = cv2.rotate(cropped_image, cv2.ROTATE_90_COUNTERCLOCKWISE)
-                    mask = cv2.rotate(mask, cv2.ROTATE_90_COUNTERCLOCKWISE)
+                    mask = cv2.rotate(scaled_mask, cv2.ROTATE_90_COUNTERCLOCKWISE)
                     rotated_cropped_shape = cropped_image.shape[:2]
+                else:
+                    mask = scaled_mask
                 
-                # cv2.imwrite(f"segmentation_mask_{detection_idx + 1}.png", mask)
+                cv2.imwrite(f"scaled_segmentation_mask_{detection_idx + 1}.png", mask)
                 logger.debug(f"Saved segmentation mask for detection {detection_idx + 1}.")
                 
                 # Step 7: Find contours
@@ -1168,31 +1253,53 @@ class Canvas(QtWidgets.QWidget):
                 logger.debug(f"Found {len(contours)} contours in the mask for detection {detection_idx + 1}.")
 
                 if len(contours) > 0:
+                    # Get the largest contour
                     largest_contour = max(contours, key=cv2.contourArea)
-                    
-                    # Step 8: Approximate a polygon with exactly 4 points (quadrilateral)
-                    epsilon = 0.02 * cv2.arcLength(largest_contour, True)  # epsilon for precision
-                    approx = cv2.approxPolyDP(largest_contour, epsilon, True)
-
-                    # If the approximation doesn't result in 4 points, force it
-                    if len(approx) != 4:
-                        # Using boundingRect as fallback in case of insufficient points
-                        print("log here")
-                        x, y, w, h = cv2.boundingRect(largest_contour)
-                        point_xy = [
-                            [x + x_min, y + y_min],          # Top-left
-                            [x + w + x_min, y + y_min],      # Top-right
-                            [x + w + x_min, y + h + y_min],  # Bottom-right
-                            [x + x_min, y + h + y_min]       # Bottom-left
-                        ]
+
+                    # Get bounding and rotated rectangles
+                    rot_rect = cv2.minAreaRect(largest_contour)
+                    box_points = cv2.boxPoints(rot_rect)
+                    box_points = np.int32(box_points)
+
+                    # Log the rotated rectangle points
+                    # logger.debug(f"Rotated corners (box_points) for detection {detection_idx + 1}: {box_points}")
+
+                    # Step 1: Draw the rotated rectangle on the cropped image
+                    cropped_with_rects = cropped_image.copy()
+                    cv2.drawContours(cropped_with_rects, [box_points], -1, (0, 255, 0), 2)
+                    cv2.imwrite(f"cropped_with_rects_{detection_idx + 1}.png", cropped_with_rects)
+                    logger.debug(f"Saved cropped image with rectangles for detection {detection_idx + 1}.")
+
+                    if rotated:
+                        # Scale points from mask space to the original image space with rotation
+                        scaled_points = self.scale_points(box_points, mask.shape, rotated_cropped_shape, x_min, y_min)
                     else:
-                        if rotated:
-                            point_xy = self.scale_points(approx, mask.shape, rotated_cropped_shape, x_min, y_min)
-                        else:
-                            point_xy = self.scale_points(approx, mask.shape, orig_cropped_shape, x_min, y_min)
-                    logger.debug(f"Generated 4 corner points for the polygon for detection {detection_idx + 1}: {point_xy}")
-                    self.pred_bbox_points = [point_xy]
-                    logger.debug(f"Predicted Bounding Box Points for detection {detection_idx + 1}: {self.pred_bbox_points}")
+                        # Scale points without rotation
+                        scaled_points = self.scale_points(box_points, mask.shape, orig_cropped_shape, x_min, y_min)
+
+                    self.pred_bbox_points = [scaled_points]
+                    logger.debug(f"Predicted Bounding Box Points for barcode {detection_idx + 1}: {self.pred_bbox_points}")
+                  
+                    if self.pred_bbox_points:
+                        for bbox_points in self.pred_bbox_points:
+                            try:
+                                original_image = labelme.utils.img_qt_to_arr(self.pixmap.toImage())
+                                # Call the decoding function
+                                decoded_output = self._decoding_model.decode_from_points(
+                                    points=bbox_points,
+                                    detection_idx=detection_idx,
+                                    original_image = original_image
+                                )
+                                logger.debug(f"Decoded output for detection {detection_idx + 1}: {decoded_output}")
+                                #passed to popup through app.py and label_dailog.py
+                                self.decoded_barcode = decoded_output
+                                # (Optional) Use the decoded output for updating UI or further processing
+                                # Example: Print or log the decoded text
+                                # print(f"Decoded output: {decoded_output}")
+                            except Exception as e:
+                                logger.error(f"Error during decoding for bbox points {bbox_points}: {e}")
+                    
+                    ## pass each shape points to decode function here, do the decodign and give back the output here and print the output
                     if self.pred_bbox_points:
                         self.draw_pred = True
                         self.finalise()
@@ -1212,7 +1319,6 @@ class Canvas(QtWidgets.QWidget):
         if all_segmentation_results:
             logger.info(f"Segmentation results for all detections: {all_segmentation_results}")
 
-
     def keyReleaseEvent(self, ev):
         modifiers = ev.modifiers()
         if self.drawing():
@@ -1264,10 +1370,6 @@ class Canvas(QtWidgets.QWidget):
 
     def loadPixmap(self, pixmap, clear_shapes=True):
         self.pixmap = pixmap
-        if self._detection_model:
-            self._detection_model.set_image(
-                image=labelme.utils.img_qt_to_arr(self.pixmap.toImage())
-            )
         if clear_shapes:
             self.shapes = []
         self.update()

+ 3 - 2
labelme/widgets/label_dialog.py

@@ -211,9 +211,10 @@ class LabelDialog(QtWidgets.QDialog):
         # if text is None, the previous label in self.edit is kept
         if text is None:
             text = self.edit.text()
-        # description is always initialized by empty text c.f., self.edit.text
+        # description is always initialized by empty text c.f., self.edit.text 
+        #barcode value passed through app.py newshape
         if description is None:
-            description = ""
+            description = move
         self.editDescription.setPlainText(description)
         if flags:
             self.setFlags(flags)