Learn how to Construct a Multilingual OCR AI Agent in Python with EasyOCR and OpenCV

By admin2010
September 12, 2025
54
class AdvancedOCRAgent:
   """
   Superior OCR AI Agent with preprocessing, multi-language help,
   and clever textual content extraction capabilities.
   """
  
   def __init__(self, languages: Checklist[str] = ['en'], gpu: bool = True):
       """Initialize OCR agent with specified languages."""
       print("🤖 Initializing Superior OCR Agent...")
       self.languages = languages
       self.reader = easyocr.Reader(languages, gpu=gpu)
       self.confidence_threshold = 0.5
       print(f"✅ OCR Agent prepared! Languages: {languages}")
  
   def upload_image(self) -> Elective[str]:
       """Add picture file via Colab interface."""
       print("📁 Add your picture file:")
       uploaded = information.add()
       if uploaded:
           filename = listing(uploaded.keys())[0]
           print(f"✅ Uploaded: {filename}")
           return filename
       return None
  
   def preprocess_image(self, picture: np.ndarray, improve: bool = True) -> np.ndarray:
       """Superior picture preprocessing for higher OCR accuracy."""
       if len(picture.form) == 3:
           grey = cv2.cvtColor(picture, cv2.COLOR_BGR2GRAY)
       else:
           grey = picture.copy()
      
       if improve:
           clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
           grey = clahe.apply(grey)
          
           grey = cv2.fastNlMeansDenoising(grey)
          
           kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
           grey = cv2.filter2D(grey, -1, kernel)
      
       binary = cv2.adaptiveThreshold(
           grey, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
       )
      
       return binary
  
   def extract_text(self, image_path: str, preprocess: bool = True) -> Dict:
       """Extract textual content from picture with superior processing."""
       print(f"🔍 Processing picture: {image_path}")
      
       picture = cv2.imread(image_path)
       if picture is None:
           elevate ValueError(f"Couldn't load picture: {image_path}")
      
       if preprocess:
           processed_image = self.preprocess_image(picture)
       else:
           processed_image = picture
      
       outcomes = self.reader.readtext(processed_image)
      
       extracted_data = {
           'raw_results': outcomes,
           'filtered_results': [],
           'full_text': '',
           'confidence_stats': {},
           'word_count': 0,
           'line_count': 0
       }
      
       high_confidence_text = []
       confidences = []
      
       for (bbox, textual content, confidence) in outcomes:
           if confidence >= self.confidence_threshold:
               extracted_data['filtered_results'].append({
                   'textual content': textual content,
                   'confidence': confidence,
                   'bbox': bbox
               })
               high_confidence_text.append(textual content)
               confidences.append(confidence)
      
       extracted_data['full_text'] = ' '.be a part of(high_confidence_text)
       extracted_data['word_count'] = len(extracted_data['full_text'].cut up())
       extracted_data['line_count'] = len(high_confidence_text)
      
       if confidences:
           extracted_data['confidence_stats'] = {
               'imply': np.imply(confidences),
               'min': np.min(confidences),
               'max': np.max(confidences),
               'std': np.std(confidences)
           }
      
       return extracted_data
  
   def visualize_results(self, image_path: str, outcomes: Dict, show_bbox: bool = True):
       """Visualize OCR outcomes with bounding packing containers."""
       picture = cv2.imread(image_path)
       image_rgb = cv2.cvtColor(picture, cv2.COLOR_BGR2RGB)
      
       plt.determine(figsize=(15, 10))
      
       if show_bbox:
           plt.subplot(2, 2, 1)
           img_with_boxes = image_rgb.copy()
          
           for merchandise in outcomes['filtered_results']:
               bbox = np.array(merchandise['bbox']).astype(int)
               cv2.polylines(img_with_boxes, [bbox], True, (255, 0, 0), 2)
              
               x, y = bbox[0]
               cv2.putText(img_with_boxes, f"{merchandise['confidence']:.2f}",
                          (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
          
           plt.imshow(img_with_boxes)
           plt.title("OCR Outcomes with Bounding Bins")
           plt.axis('off')
      
       plt.subplot(2, 2, 2)
       processed = self.preprocess_image(picture)
       plt.imshow(processed, cmap='grey')
       plt.title("Preprocessed Picture")
       plt.axis('off')
      
       plt.subplot(2, 2, 3)
       confidences = [item['confidence'] for merchandise in outcomes['filtered_results']]
       if confidences:
           plt.hist(confidences, bins=20, alpha=0.7, coloration="blue")
           plt.xlabel('Confidence Rating')
           plt.ylabel('Frequency')
           plt.title('Confidence Rating Distribution')
           plt.axvline(self.confidence_threshold, coloration="crimson", linestyle="--",
                      label=f'Threshold: {self.confidence_threshold}')
           plt.legend()
      
       plt.subplot(2, 2, 4)
       stats = outcomes['confidence_stats']
       if stats:
           labels = ['Mean', 'Min', 'Max']
           values = [stats['mean'], stats['min'], stats['max']]
           plt.bar(labels, values, coloration=['green', 'red', 'blue'])
           plt.ylabel('Confidence Rating')
           plt.title('Confidence Statistics')
           plt.ylim(0, 1)
      
       plt.tight_layout()
       plt.present()
  
   def smart_text_analysis(self, textual content: str) -> Dict:
       """Carry out clever evaluation of extracted textual content."""
       evaluation = {
           'language_detection': 'unknown',
           'text_type': 'unknown',
           'key_info': {},
           'patterns': []
       }
      
       email_pattern = r'b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Z|a-z]{2,}b'
       phone_pattern = r'(+d{1,3}[-.s]?)?(?d{3})?[-.s]?d{3}[-.s]?d{4}'
       url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
       date_pattern = r'bd{1,2}[/-]d{1,2}[/-]d{2,4}b'
      
       patterns = {
           'emails': re.findall(email_pattern, textual content, re.IGNORECASE),
           'telephones': re.findall(phone_pattern, textual content),
           'urls': re.findall(url_pattern, textual content, re.IGNORECASE),
           'dates': re.findall(date_pattern, textual content)
       }
      
       evaluation['patterns'] = {okay: v for okay, v in patterns.objects() if v}
      
       if any(patterns.values()):
           if patterns.get('emails') or patterns.get('telephones'):
               evaluation['text_type'] = 'contact_info'
           elif patterns.get('urls'):
               evaluation['text_type'] = 'web_content'
           elif patterns.get('dates'):
               evaluation['text_type'] = 'document_with_dates'
      
       if re.search(r'[а-яё]', textual content.decrease()):
           evaluation['language_detection'] = 'russian'
       elif re.search(r'[àáâãäåæçèéêëìíîïñòóôõöøùúûüý]', textual content.decrease()):
           evaluation['language_detection'] = 'romance_language'
       elif re.search(r'[一-龯]', textual content):
           evaluation['language_detection'] = 'chinese language'
       elif re.search(r'[ひらがなカタカナ]', textual content):
           evaluation['language_detection'] = 'japanese'
       elif re.search(r'[a-zA-Z]', textual content):
           evaluation['language_detection'] = 'latin_based'
      
       return evaluation
  
   def process_batch(self, image_folder: str) -> Checklist[Dict]:
       """Course of a number of photographs in batch."""
       outcomes = []
       supported_formats = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')
      
       for filename in os.listdir(image_folder):
           if filename.decrease().endswith(supported_formats):
               image_path = os.path.be a part of(image_folder, filename)
               strive:
                   end result = self.extract_text(image_path)
                   end result['filename'] = filename
                   outcomes.append(end result)
                   print(f"✅ Processed: {filename}")
               besides Exception as e:
                   print(f"❌ Error processing {filename}: {str(e)}")
      
       return outcomes
  
   def export_results(self, outcomes: Dict, format: str="json") -> str:
       """Export ends in specified format."""
       if format.decrease() == 'json':
           output = json.dumps(outcomes, indent=2, ensure_ascii=False)
           filename="ocr_results.json"
       elif format.decrease() == 'txt':
           output = outcomes['full_text']
           filename="extracted_text.txt"
       else:
           elevate ValueError("Supported codecs: 'json', 'txt'")
      
       with open(filename, 'w', encoding='utf-8') as f:
           f.write(output)
      
       print(f"📄 Outcomes exported to: {filename}")
       return filename
Learn how to Construct a Multilingual OCR AI Agent in Python with EasyOCR and OpenCV

Tips on how to Write Environment friendly Python Information Courses

The Obtain: Expanded provider screening, and the way Southeast Asia plans to get to house

Trinity Mini – A New U.S.-Constructed Open-Weight Reasoning Mannequin

LEAVE A REPLY Cancel reply

Most Popular

The best way to watch John Cena’s remaining match at no cost

Linear Sinus FT MT4 Indicator

May This Undervalued Canadian Inventory Be Your Ticket to Millionaire Standing

Citadel Securities and DeFi Waging Conflict of Phrases By means of SEC Correspondence

Recent Comments

ABOUT US

POPULAR POSTS

The best way to watch John Cena’s remaining match at no cost

Linear Sinus FT MT4 Indicator

May This Undervalued Canadian Inventory Be Your Ticket to Millionaire Standing

POPULAR CATEGORY