pip install pytesseract
python
import pytesseract
from PIL import Image
from pdf2image import convert_from_path
python
pdf_path = 'your_pdf_file.pdf'
images = convert_from_path(pdf_path)
python
text = ''
for image in images:
text += pytesseract.image_to_string(image, lang='chi_sim')
python
print(text)
python
import pytesseract
from PIL import Image
from pdf2image import convert_from_path
pdf_path = 'your_pdf_file.pdf'
images = convert_from_path(pdf_path)
text = ''
for image in images:
text += pytesseract.image_to_string(image, lang='chi_sim')
print(text)
python
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'