pytesseract 識別率低提升方法

2022-01-12 06:19:07 字數 1541 閱讀 8580

from pil import image

from pil import imageenhance

import pytesseract

img = image.open('sanyecao.jpg')

img = img.convert('rgb') #這裡也可以嘗試使用l

enhancer = imageenhance.color(img)

enhancer = enhancer.enhance(0)

enhancer = imageenhance.brightness(enhancer)

enhancer = enhancer.enhance(2)

enhancer = imageenhance.contrast(enhancer)

enhancer = enhancer.enhance(8)

enhancer = imageenhance.sharpness(enhancer)

img = enhancer.enhance(20)

text=pytesseract.image_to_string(img)

這個情況有很多種,也不說了,可以自己去嘗試,簡單寫個調整亮度

#調整亮度

filename = "sanyecao.jpg"

img = cv2.imread(filename, 0)

print(np.shape(img))

kernel = np.ones((1,1), np.uint8)

dilate = cv2.dilate(img, kernel, iterations=1)

cv2.imwrite('new_dilate.jpg', dilate)

#還有些常用的方法

cv2.canny

cv2.erode

cv2.rectangle

original_img = cv2.imread("qingwen.png", 0)

# canny(): 邊緣檢測

img1 = cv2.gaussianblur(original_img,(3,3),0)

canny = cv2.canny(img1, 50, 150)

# 形態學:邊緣檢測

_,thr_img = cv2.threshold(original_img,210,255,cv2.thresh_binary)#設定紅色通道閾值210(閾值影響梯度運算效果)

kernel = cv2.getstructuringelement(cv2.morph_rect,(5,5)) #定義矩形結構元素

gradient = cv2.morphologyex(thr_img, cv2.morph_gradient, kernel) #梯度

cv2.imshow("original_img", original_img)

cv2.imshow("gradient", gradient)

cv2.imshow('canny', canny)