Friday, September 12, 2025
HomeArtificial IntelligenceLearn how to Construct a Multilingual OCR AI Agent in Python with...

Learn how to Construct a Multilingual OCR AI Agent in Python with EasyOCR and OpenCV

class AdvancedOCRAgent:
   """
   Superior OCR AI Agent with preprocessing, multi-language help,
   and clever textual content extraction capabilities.
   """
  
   def __init__(self, languages: Checklist[str] = ['en'], gpu: bool = True):
       """Initialize OCR agent with specified languages."""
       print("🤖 Initializing Superior OCR Agent...")
       self.languages = languages
       self.reader = easyocr.Reader(languages, gpu=gpu)
       self.confidence_threshold = 0.5
       print(f"✅ OCR Agent prepared! Languages: {languages}")
  
   def upload_image(self) -> Elective[str]:
       """Add picture file via Colab interface."""
       print("📁 Add your picture file:")
       uploaded = information.add()
       if uploaded:
           filename = listing(uploaded.keys())[0]
           print(f"✅ Uploaded: {filename}")
           return filename
       return None
  
   def preprocess_image(self, picture: np.ndarray, improve: bool = True) -> np.ndarray:
       """Superior picture preprocessing for higher OCR accuracy."""
       if len(picture.form) == 3:
           grey = cv2.cvtColor(picture, cv2.COLOR_BGR2GRAY)
       else:
           grey = picture.copy()
      
       if improve:
           clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
           grey = clahe.apply(grey)
          
           grey = cv2.fastNlMeansDenoising(grey)
          
           kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
           grey = cv2.filter2D(grey, -1, kernel)
      
       binary = cv2.adaptiveThreshold(
           grey, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
       )
      
       return binary
  
   def extract_text(self, image_path: str, preprocess: bool = True) -> Dict:
       """Extract textual content from picture with superior processing."""
       print(f"🔍 Processing picture: {image_path}")
      
       picture = cv2.imread(image_path)
       if picture is None:
           elevate ValueError(f"Couldn't load picture: {image_path}")
      
       if preprocess:
           processed_image = self.preprocess_image(picture)
       else:
           processed_image = picture
      
       outcomes = self.reader.readtext(processed_image)
      
       extracted_data = {
           'raw_results': outcomes,
           'filtered_results': [],
           'full_text': '',
           'confidence_stats': {},
           'word_count': 0,
           'line_count': 0
       }
      
       high_confidence_text = []
       confidences = []
      
       for (bbox, textual content, confidence) in outcomes:
           if confidence >= self.confidence_threshold:
               extracted_data['filtered_results'].append({
                   'textual content': textual content,
                   'confidence': confidence,
                   'bbox': bbox
               })
               high_confidence_text.append(textual content)
               confidences.append(confidence)
      
       extracted_data['full_text'] = ' '.be a part of(high_confidence_text)
       extracted_data['word_count'] = len(extracted_data['full_text'].cut up())
       extracted_data['line_count'] = len(high_confidence_text)
      
       if confidences:
           extracted_data['confidence_stats'] = {
               'imply': np.imply(confidences),
               'min': np.min(confidences),
               'max': np.max(confidences),
               'std': np.std(confidences)
           }
      
       return extracted_data
  
   def visualize_results(self, image_path: str, outcomes: Dict, show_bbox: bool = True):
       """Visualize OCR outcomes with bounding packing containers."""
       picture = cv2.imread(image_path)
       image_rgb = cv2.cvtColor(picture, cv2.COLOR_BGR2RGB)
      
       plt.determine(figsize=(15, 10))
      
       if show_bbox:
           plt.subplot(2, 2, 1)
           img_with_boxes = image_rgb.copy()
          
           for merchandise in outcomes['filtered_results']:
               bbox = np.array(merchandise['bbox']).astype(int)
               cv2.polylines(img_with_boxes, [bbox], True, (255, 0, 0), 2)
              
               x, y = bbox[0]
               cv2.putText(img_with_boxes, f"{merchandise['confidence']:.2f}",
                          (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
          
           plt.imshow(img_with_boxes)
           plt.title("OCR Outcomes with Bounding Bins")
           plt.axis('off')
      
       plt.subplot(2, 2, 2)
       processed = self.preprocess_image(picture)
       plt.imshow(processed, cmap='grey')
       plt.title("Preprocessed Picture")
       plt.axis('off')
      
       plt.subplot(2, 2, 3)
       confidences = [item['confidence'] for merchandise in outcomes['filtered_results']]
       if confidences:
           plt.hist(confidences, bins=20, alpha=0.7, coloration="blue")
           plt.xlabel('Confidence Rating')
           plt.ylabel('Frequency')
           plt.title('Confidence Rating Distribution')
           plt.axvline(self.confidence_threshold, coloration="crimson", linestyle="--",
                      label=f'Threshold: {self.confidence_threshold}')
           plt.legend()
      
       plt.subplot(2, 2, 4)
       stats = outcomes['confidence_stats']
       if stats:
           labels = ['Mean', 'Min', 'Max']
           values = [stats['mean'], stats['min'], stats['max']]
           plt.bar(labels, values, coloration=['green', 'red', 'blue'])
           plt.ylabel('Confidence Rating')
           plt.title('Confidence Statistics')
           plt.ylim(0, 1)
      
       plt.tight_layout()
       plt.present()
  
   def smart_text_analysis(self, textual content: str) -> Dict:
       """Carry out clever evaluation of extracted textual content."""
       evaluation = {
           'language_detection': 'unknown',
           'text_type': 'unknown',
           'key_info': {},
           'patterns': []
       }
      
       email_pattern = r'b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Z|a-z]{2,}b'
       phone_pattern = r'(+d{1,3}[-.s]?)?(?d{3})?[-.s]?d{3}[-.s]?d{4}'
       url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
       date_pattern = r'bd{1,2}[/-]d{1,2}[/-]d{2,4}b'
      
       patterns = {
           'emails': re.findall(email_pattern, textual content, re.IGNORECASE),
           'telephones': re.findall(phone_pattern, textual content),
           'urls': re.findall(url_pattern, textual content, re.IGNORECASE),
           'dates': re.findall(date_pattern, textual content)
       }
      
       evaluation['patterns'] = {okay: v for okay, v in patterns.objects() if v}
      
       if any(patterns.values()):
           if patterns.get('emails') or patterns.get('telephones'):
               evaluation['text_type'] = 'contact_info'
           elif patterns.get('urls'):
               evaluation['text_type'] = 'web_content'
           elif patterns.get('dates'):
               evaluation['text_type'] = 'document_with_dates'
      
       if re.search(r'[а-яё]', textual content.decrease()):
           evaluation['language_detection'] = 'russian'
       elif re.search(r'[àáâãäåæçèéêëìíîïñòóôõöøùúûüý]', textual content.decrease()):
           evaluation['language_detection'] = 'romance_language'
       elif re.search(r'[一-龯]', textual content):
           evaluation['language_detection'] = 'chinese language'
       elif re.search(r'[ひらがなカタカナ]', textual content):
           evaluation['language_detection'] = 'japanese'
       elif re.search(r'[a-zA-Z]', textual content):
           evaluation['language_detection'] = 'latin_based'
      
       return evaluation
  
   def process_batch(self, image_folder: str) -> Checklist[Dict]:
       """Course of a number of photographs in batch."""
       outcomes = []
       supported_formats = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')
      
       for filename in os.listdir(image_folder):
           if filename.decrease().endswith(supported_formats):
               image_path = os.path.be a part of(image_folder, filename)
               strive:
                   end result = self.extract_text(image_path)
                   end result['filename'] = filename
                   outcomes.append(end result)
                   print(f"✅ Processed: {filename}")
               besides Exception as e:
                   print(f"❌ Error processing {filename}: {str(e)}")
      
       return outcomes
  
   def export_results(self, outcomes: Dict, format: str="json") -> str:
       """Export ends in specified format."""
       if format.decrease() == 'json':
           output = json.dumps(outcomes, indent=2, ensure_ascii=False)
           filename="ocr_results.json"
       elif format.decrease() == 'txt':
           output = outcomes['full_text']
           filename="extracted_text.txt"
       else:
           elevate ValueError("Supported codecs: 'json', 'txt'")
      
       with open(filename, 'w', encoding='utf-8') as f:
           f.write(output)
      
       print(f"📄 Outcomes exported to: {filename}")
       return filename

RELATED ARTICLES

LEAVE A REPLY

Please enter your comment!
Please enter your name here

Most Popular

Recent Comments