class AdvancedOCRAgent:
"""
Superior OCR AI Agent with preprocessing, multi-language help,
and clever textual content extraction capabilities.
"""
def __init__(self, languages: Checklist[str] = ['en'], gpu: bool = True):
"""Initialize OCR agent with specified languages."""
print("🤖 Initializing Superior OCR Agent...")
self.languages = languages
self.reader = easyocr.Reader(languages, gpu=gpu)
self.confidence_threshold = 0.5
print(f"✅ OCR Agent prepared! Languages: {languages}")
def upload_image(self) -> Elective[str]:
"""Add picture file via Colab interface."""
print("📁 Add your picture file:")
uploaded = information.add()
if uploaded:
filename = listing(uploaded.keys())[0]
print(f"✅ Uploaded: {filename}")
return filename
return None
def preprocess_image(self, picture: np.ndarray, improve: bool = True) -> np.ndarray:
"""Superior picture preprocessing for higher OCR accuracy."""
if len(picture.form) == 3:
grey = cv2.cvtColor(picture, cv2.COLOR_BGR2GRAY)
else:
grey = picture.copy()
if improve:
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
grey = clahe.apply(grey)
grey = cv2.fastNlMeansDenoising(grey)
kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
grey = cv2.filter2D(grey, -1, kernel)
binary = cv2.adaptiveThreshold(
grey, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
)
return binary
def extract_text(self, image_path: str, preprocess: bool = True) -> Dict:
"""Extract textual content from picture with superior processing."""
print(f"🔍 Processing picture: {image_path}")
picture = cv2.imread(image_path)
if picture is None:
elevate ValueError(f"Couldn't load picture: {image_path}")
if preprocess:
processed_image = self.preprocess_image(picture)
else:
processed_image = picture
outcomes = self.reader.readtext(processed_image)
extracted_data = {
'raw_results': outcomes,
'filtered_results': [],
'full_text': '',
'confidence_stats': {},
'word_count': 0,
'line_count': 0
}
high_confidence_text = []
confidences = []
for (bbox, textual content, confidence) in outcomes:
if confidence >= self.confidence_threshold:
extracted_data['filtered_results'].append({
'textual content': textual content,
'confidence': confidence,
'bbox': bbox
})
high_confidence_text.append(textual content)
confidences.append(confidence)
extracted_data['full_text'] = ' '.be a part of(high_confidence_text)
extracted_data['word_count'] = len(extracted_data['full_text'].cut up())
extracted_data['line_count'] = len(high_confidence_text)
if confidences:
extracted_data['confidence_stats'] = {
'imply': np.imply(confidences),
'min': np.min(confidences),
'max': np.max(confidences),
'std': np.std(confidences)
}
return extracted_data
def visualize_results(self, image_path: str, outcomes: Dict, show_bbox: bool = True):
"""Visualize OCR outcomes with bounding packing containers."""
picture = cv2.imread(image_path)
image_rgb = cv2.cvtColor(picture, cv2.COLOR_BGR2RGB)
plt.determine(figsize=(15, 10))
if show_bbox:
plt.subplot(2, 2, 1)
img_with_boxes = image_rgb.copy()
for merchandise in outcomes['filtered_results']:
bbox = np.array(merchandise['bbox']).astype(int)
cv2.polylines(img_with_boxes, [bbox], True, (255, 0, 0), 2)
x, y = bbox[0]
cv2.putText(img_with_boxes, f"{merchandise['confidence']:.2f}",
(x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
plt.imshow(img_with_boxes)
plt.title("OCR Outcomes with Bounding Bins")
plt.axis('off')
plt.subplot(2, 2, 2)
processed = self.preprocess_image(picture)
plt.imshow(processed, cmap='grey')
plt.title("Preprocessed Picture")
plt.axis('off')
plt.subplot(2, 2, 3)
confidences = [item['confidence'] for merchandise in outcomes['filtered_results']]
if confidences:
plt.hist(confidences, bins=20, alpha=0.7, coloration="blue")
plt.xlabel('Confidence Rating')
plt.ylabel('Frequency')
plt.title('Confidence Rating Distribution')
plt.axvline(self.confidence_threshold, coloration="crimson", linestyle="--",
label=f'Threshold: {self.confidence_threshold}')
plt.legend()
plt.subplot(2, 2, 4)
stats = outcomes['confidence_stats']
if stats:
labels = ['Mean', 'Min', 'Max']
values = [stats['mean'], stats['min'], stats['max']]
plt.bar(labels, values, coloration=['green', 'red', 'blue'])
plt.ylabel('Confidence Rating')
plt.title('Confidence Statistics')
plt.ylim(0, 1)
plt.tight_layout()
plt.present()
def smart_text_analysis(self, textual content: str) -> Dict:
"""Carry out clever evaluation of extracted textual content."""
evaluation = {
'language_detection': 'unknown',
'text_type': 'unknown',
'key_info': {},
'patterns': []
}
email_pattern = r'b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Z|a-z]{2,}b'
phone_pattern = r'(+d{1,3}[-.s]?)?(?d{3})?[-.s]?d{3}[-.s]?d{4}'
url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
date_pattern = r'bd{1,2}[/-]d{1,2}[/-]d{2,4}b'
patterns = {
'emails': re.findall(email_pattern, textual content, re.IGNORECASE),
'telephones': re.findall(phone_pattern, textual content),
'urls': re.findall(url_pattern, textual content, re.IGNORECASE),
'dates': re.findall(date_pattern, textual content)
}
evaluation['patterns'] = {okay: v for okay, v in patterns.objects() if v}
if any(patterns.values()):
if patterns.get('emails') or patterns.get('telephones'):
evaluation['text_type'] = 'contact_info'
elif patterns.get('urls'):
evaluation['text_type'] = 'web_content'
elif patterns.get('dates'):
evaluation['text_type'] = 'document_with_dates'
if re.search(r'[а-яё]', textual content.decrease()):
evaluation['language_detection'] = 'russian'
elif re.search(r'[àáâãäåæçèéêëìíîïñòóôõöøùúûüý]', textual content.decrease()):
evaluation['language_detection'] = 'romance_language'
elif re.search(r'[一-龯]', textual content):
evaluation['language_detection'] = 'chinese language'
elif re.search(r'[ひらがなカタカナ]', textual content):
evaluation['language_detection'] = 'japanese'
elif re.search(r'[a-zA-Z]', textual content):
evaluation['language_detection'] = 'latin_based'
return evaluation
def process_batch(self, image_folder: str) -> Checklist[Dict]:
"""Course of a number of photographs in batch."""
outcomes = []
supported_formats = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')
for filename in os.listdir(image_folder):
if filename.decrease().endswith(supported_formats):
image_path = os.path.be a part of(image_folder, filename)
strive:
end result = self.extract_text(image_path)
end result['filename'] = filename
outcomes.append(end result)
print(f"✅ Processed: {filename}")
besides Exception as e:
print(f"❌ Error processing {filename}: {str(e)}")
return outcomes
def export_results(self, outcomes: Dict, format: str="json") -> str:
"""Export ends in specified format."""
if format.decrease() == 'json':
output = json.dumps(outcomes, indent=2, ensure_ascii=False)
filename="ocr_results.json"
elif format.decrease() == 'txt':
output = outcomes['full_text']
filename="extracted_text.txt"
else:
elevate ValueError("Supported codecs: 'json', 'txt'")
with open(filename, 'w', encoding='utf-8') as f:
f.write(output)
print(f"📄 Outcomes exported to: {filename}")
return filename
Learn how to Construct a Multilingual OCR AI Agent in Python with EasyOCR and OpenCV
RELATED ARTICLES