在Jupyter Notebook中运行,结果出错
# codeing:utf-8
import easyocr
import os
import time
# 获取横坐标
def takeX(elem):
return elem[0][0][1]
# 获取纵坐标
def takeY(elem):
return elem[0][0][0]
# 获取两个数的差值与第一个数的比值
def takeMinus(elem1,elem2):
return (abs(elem1-elem2)<3)
# 如果x坐标相同,则根据y坐标排序列表
def sortByY(ilist):
ilist.sort(key=takeY)
return(ilist)
# 文字图片的路径
path1 = 'C:\\Users\\Administrator\\Desktop\\123\\'
path2 = r'C:/Users/Administrator/Desktop/'
#获取图片路径列表
imgs = [path1 + i for i in os.listdir(path1)]
# 打开文件
f = open(path2 + '123图片目录.txt', 'w+', encoding='utf-8')
#将各个图片的路径写入text.txt文件当中
for img in imgs:
f.write(img + '\n')
# 关闭文件
f.close()
#识别一张,写入一张图片的文字的信息至文档中
for img in imgs:
#decoder为引擎,detail为是否显示位置信息,batch_size设置越大,占用内存越高,识别速度越快
reader = easyocr.Reader(['ch_sim','en']) #默认使用GPU,#False为不使用GPU
result = reader.readtext(path2 + img,decoder='greedy',batch_size=20,detail=0)
mylist = []
for x in result:
mylist.append(list(x)) # 把元组转换为列表,为了方便进行排序
for i in range(len(mylist)): # 如果x坐标差距过小,则统一x坐标值
for j in range(i+1,len(mylist),1):
if takeMinus(mylist[i][0][0][1],mylist[j][0][0][1]):
mylist[j][0][0][1] = mylist[i][0][0][1]
for i in range(len(mylist)): # 如果y坐标差距过小,则统一y坐标值
for j in range(i+1,len(mylist),1):
if takeMinus(mylist[i][0][0][0],mylist[j][0][0][0]):
mylist[j][0][0][0] = mylist[i][0][0][0]
mylist.sort(key = takeX) # 先按照x坐标排序列表
resultlist = [] # 定义最终结果列表
# 拆分列表,将x坐标相同的分为一组
tmplist = []
tmpx = takeX(mylist[0]) # 取第一个元素的x坐标值
for z in mylist:
if tmpx == takeX(z):
tmplist.append(z) # 添加x坐标相同的元素到子列表中
else:
tmplist = sortByY(tmplist) # 得到x坐标相同的子列表,立即根据y坐标排序
resultlist += tmplist # 将排序后的子列表存入结果列表
tmplist = [] # 清空子列表
tmpx = takeX(z) # 更新x坐标值
tmplist.append(z) # 并存入子列表
tmplist = sortByY(tmplist) # 最后一组x坐标相同的子列表!
resultlist += tmplist
for y in resultlist:
print(y[1])
f = open(path2 + '123转换.txt', 'w', encoding = 'utf8')
f.write(resultlist)
f.close()
Downloading detection model, please wait. This may take several minutes depending upon your network connection.
---------------------------------------------------------------------------
TimeoutError Traceback (most recent call last)
<ipython-input-4-facc949cf360> in <module>
42 for img in imgs:
43 #decoder为引擎,detail为是否显示位置信息,batch_size设置越大,占用内存越高,识别速度越快
---> 44 reader = easyocr.Reader(['ch_sim','en']) #默认使用GPU,#False为不使用GPU
45 result = reader.readtext(path2 + img,decoder='greedy',batch_size=20,detail=0)
46 mylist = []
C:\ProgramData\Anaconda3\lib\site-packages\easyocr\easyocr.py in __init__(self, lang_list, gpu, model_storage_directory, user_network_directory, recog_network, download_enabled, detector, recognizer, verbose, quantize, cudnn_benchmark)
88 LOGGER.warning('Downloading detection model, please wait. '
89 'This may take several minutes depending upon your network connection.')
---> 90 download_and_unzip(detection_models[detector_model]['url'], detection_models[detector_model]['filename'], self.model_storage_directory, verbose)
91 assert calculate_md5(detector_path) == detection_models[detector_model]['md5sum'], corrupt_msg
92 LOGGER.info('Download complete')
C:\ProgramData\Anaconda3\lib\site-packages\easyocr\utils.py in download_and_unzip(url, filename, model_storage_directory, verbose)
584 zip_path = os.path.join(model_storage_directory, 'temp.zip')
585 reporthook = printProgressBar(prefix='Progress:', suffix='Complete', length=50) if verbose else None
--> 586 urlretrieve(url, zip_path, reporthook=reporthook)
587 with ZipFile(zip_path, 'r') as zipObj:
588 zipObj.extract(filename, model_storage_directory)
C:\ProgramData\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
245 url_type, path = splittype(url)
246
--> 247 with contextlib.closing(urlopen(url, data)) as fp:
248 headers = fp.info()
249
C:\ProgramData\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
C:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
C:\ProgramData\Anaconda3\lib\urllib\request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
C:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
C:\ProgramData\Anaconda3\lib\urllib\request.py in https_open(self, req)
1358 def https_open(self, req):
1359 return self.do_open(http.client.HTTPSConnection, req,
-> 1360 context=self._context, check_hostname=self._check_hostname)
1361
1362 https_request = AbstractHTTPHandler.do_request_
C:\ProgramData\Anaconda3\lib\urllib\request.py in do_open(self, http_class, req, **http_conn_args)
1318 except OSError as err: # timeout error
1319 raise URLError(err)
-> 1320 r = h.getresponse()
1321 except:
1322 h.close()
C:\ProgramData\Anaconda3\lib\http\client.py in getresponse(self)
1319 try:
1320 try:
-> 1321 response.begin()
1322 except ConnectionError:
1323 self.close()
C:\ProgramData\Anaconda3\lib\http\client.py in begin(self)
294 # read until we get a non-100 response
295 while True:
--> 296 version, status, reason = self._read_status()
297 if status != CONTINUE:
298 break
C:\ProgramData\Anaconda3\lib\http\client.py in _read_status(self)
255
256 def _read_status(self):
--> 257 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
258 if len(line) > _MAXLINE:
259 raise LineTooLong("status line")
C:\ProgramData\Anaconda3\lib\socket.py in readinto(self, b)
587 while True:
588 try:
--> 589 return self._sock.recv_into(b)
590 except timeout:
591 self._timeout_occurred = True
C:\ProgramData\Anaconda3\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
1050 "non-zero flags not allowed in calls to recv_into() on %s" %
1051 self.__class__)
-> 1052 return self.read(nbytes, buffer)
1053 else:
1054 return super().recv_into(buffer, nbytes, flags)
C:\ProgramData\Anaconda3\lib\ssl.py in read(self, len, buffer)
909 try:
910 if buffer is not None:
--> 911 return self._sslobj.read(len, buffer)
912 else:
913 return self._sslobj.read(len)
TimeoutError: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。
批量扫描核酸检测手机截图,转换成txt文档,之前写的单张图片识别没有问题,但是出现了上下串行的问题,而且一张一张识别太麻烦了,
想批量解决这个问题,于是我在网上搜索了easyOCR识别中上下串行的解决代码,逐张写入txt文档中
批量识别,上下不串行