python 验证码处理,,一、灰度处理,就是把


一、

灰度处理,就是把彩色的验证码图片转为灰色的图片。

二值化,是将图片处理为只有黑白两色的图片,利于后面的图像处理和识别

 1   # 自适应阀值二值化  2   def _get_dynamic_binary_image(filedir, img_name): 3       filename =   ‘./out_img/‘ + img_name.split(‘.‘)[0] + ‘-binary.jpg‘ 4       img_name = filedir + ‘/‘ + img_name 5       print(‘.....‘ + img_name) 6       im =dz.imread(img_name) 7       im = dz.cvtColor(im,dz.COLOR_BGR2GRAY) #灰值化 8       # 二值化 9       th1 = dz.adaptiveThreshold(im, 255, dz.ADAPTIVE_THRESH_GAUSSIAN_C, dz.THRESH_BINARY, 21, 1)10      11      dz.imwrite(filename,th1)12      return th1

二、去除边框

 1 # 去除边框 2 def clear_border(img,img_name): 3   filename = ‘./out_img/‘ + img_name.split(‘.‘)[0] + ‘-clearBorder.jpg‘ 4   h, w = img.shape[:2] 5   for y in range(0, w): 6     for x in range(0, h): 7       if y < 2 or y > w - 2: 8         img[x, y] = 255 9       if x < 2 or x > h -2:10         img[x, y] = 25511 12   cv2.imwrite(filename,img)13   return img

在用OpenCV时,图片的矩阵点是反的,就是长和宽是颠倒的

三、降噪

降噪是验证码处理中比较重要的一个步骤,我这里使用了点降噪和线降噪,,,只能去除细的干扰线

 1 # 干扰线降噪 2 def interference_line(img, img_name): 3     filename =  ‘./out_img/‘ + img_name.split(‘.‘)[0] + ‘-interferenceline.jpg‘ 4     h, w = img.shape[:2] 5     # !!opencv矩阵点是反的 6     # img[1,2] 1:图片的高度,2:图片的宽度 7     for y in range(1, w - 1): 8         for x in range(1, h - 1): 9             count = 010             if img[x, y - 1] > 245:11                 count = count + 112             if img[x, y + 1] > 245:13                 count = count + 114             if img[x - 1, y] > 245:15                 count = count + 116             if img[x + 1, y] > 245:17                 count = count + 118             if count > 2:19                  img[x, y] = 25520 cv2.imwrite(filename,img)21 return img
  1 # 点降噪  2 def interference_point(img,img_name, x = 0, y = 0):  3     """  4     9邻域框,以当前点为中心的田字框,黑点个数  5     :param x:  6     :param y:  7     :return:  8     """  9     filename =  ‘./out_img/‘ + img_name.split(‘.‘)[0] + ‘-interferencePoint.jpg‘ 10     # todo 判断图片的长宽度下限 11     cur_pixel = img[x,y]# 当前像素点的值 12     height,width = img.shape[:2] 13  14     for y in range(0, width - 1): 15       for x in range(0, height - 1): 16         if y == 0:  # 第一行 17             if x == 0:  # 左上顶点,4邻域 18                 # 中心点旁边3个点 19                 sum = int(cur_pixel)  20                       + int(img[x, y + 1])  21                       + int(img[x + 1, y])  22                       + int(img[x + 1, y + 1]) 23                 if sum <= 2 * 245: 24                   img[x, y] = 0 25             elif x == height - 1:  # 右上顶点 26                 sum = int(cur_pixel)  27                       + int(img[x, y + 1])  28                       + int(img[x - 1, y])  29                       + int(img[x - 1, y + 1]) 30                 if sum <= 2 * 245: 31                   img[x, y] = 0 32             else:  # 最上非顶点,6邻域 33                 sum = int(img[x - 1, y])  34                       + int(img[x - 1, y + 1])  35                       + int(cur_pixel)  36                       + int(img[x, y + 1])  37                       + int(img[x + 1, y])  38                       + int(img[x + 1, y + 1]) 39                 if sum <= 3 * 245: 40                   img[x, y] = 0 41         elif y == width - 1:  # 最下面一行 42             if x == 0:  # 左下顶点 43                 # 中心点旁边3个点 44                 sum = int(cur_pixel)  45                       + int(img[x + 1, y])  46                       + int(img[x + 1, y - 1])  47                       + int(img[x, y - 1]) 48                 if sum <= 2 * 245: 49                   img[x, y] = 0 50             elif x == height - 1:  # 右下顶点 51                 sum = int(cur_pixel)  52                       + int(img[x, y - 1])  53                       + int(img[x - 1, y])  54                       + int(img[x - 1, y - 1]) 55  56                 if sum <= 2 * 245: 57                   img[x, y] = 0 58             else:  # 最下非顶点,6邻域 59                 sum = int(cur_pixel)  60                       + int(img[x - 1, y])  61                       + int(img[x + 1, y])  62                       + int(img[x, y - 1])  63                       + int(img[x - 1, y - 1])  64                       + int(img[x + 1, y - 1]) 65                 if sum <= 3 * 245: 66                   img[x, y] = 0 67         else:  # y不在边界 68             if x == 0:  # 左边非顶点 69                 sum = int(img[x, y - 1])  70                       + int(cur_pixel)  71                       + int(img[x, y + 1])  72                       + int(img[x + 1, y - 1])  73                       + int(img[x + 1, y])  74                       + int(img[x + 1, y + 1]) 75  76                 if sum <= 3 * 245: 77                   img[x, y] = 0 78             elif x == height - 1:  # 右边非顶点 79                 sum = int(img[x, y - 1])  80                       + int(cur_pixel)  81                       + int(img[x, y + 1])  82                       + int(img[x - 1, y - 1])  83                       + int(img[x - 1, y])  84                       + int(img[x - 1, y + 1]) 85  86                 if sum <= 3 * 245: 87                   img[x, y] = 0 88             else:  # 具备9领域条件的 89                 sum = int(img[x - 1, y - 1])  90                       + int(img[x - 1, y])  91                       + int(img[x - 1, y + 1])  92                       + int(img[x, y - 1])  93                       + int(cur_pixel)  94                       + int(img[x, y + 1])  95                       + int(img[x + 1, y - 1])  96                       + int(img[x + 1, y])  97                       + int(img[x + 1, y + 1]) 98                 if sum <= 4 * 245: 99                   img[x, y] = 0100     cv2.imwrite(filename,img)101     return img

五、字符切割

 1 def cfs(im,x_fd,y_fd): 2   ‘‘‘用队列和集合记录遍历过的像素坐标代替单纯递归以解决cfs访问过深问题 3   ‘‘‘ 4  5   # print(‘**********‘) 6  7   xaxis=[] 8   yaxis=[] 9   visited =set()10   q = Queue()11   q.put((x_fd, y_fd))12   visited.add((x_fd, y_fd))13   offsets=[(1, 0), (0, 1), (-1, 0), (0, -1)]#四邻域14 15   while not q.empty():16       x,y=q.get()17 18       for xoffset,yoffset in offsets:19           x_neighbor,y_neighbor = x+xoffset,y+yoffset20 21           if (x_neighbor,y_neighbor) in (visited):22               continue  # 已经访问过了23 24           visited.add((x_neighbor, y_neighbor))25 26           try:27               if im[x_neighbor, y_neighbor] == 0:28                   xaxis.append(x_neighbor)29                   yaxis.append(y_neighbor)30                   q.put((x_neighbor,y_neighbor))31 32           except IndexError:33               pass34   # print(xaxis)35   if (len(xaxis) == 0 | len(yaxis) == 0):36     xmax = x_fd + 137     xmin = x_fd38     ymax = y_fd + 139     ymin = y_fd40 41   else:42     xmax = max(xaxis)43     xmin = min(xaxis)44     ymax = max(yaxis)45     ymin = min(yaxis)46     #ymin,ymax=sort(yaxis)47 48   return ymax,ymin,xmax,xmin49 50 def detectFgPix(im,xmax):51   ‘‘‘搜索区块起点52   ‘‘‘53 54   h,w = im.shape[:2]55   for y_fd in range(xmax+1,w):56       for x_fd in range(h):57           if im[x_fd,y_fd] == 0:58               return x_fd,y_fd59 60 def CFS(im):61   ‘‘‘切割字符位置62   ‘‘‘63 64   zoneL=[]#各区块长度L列表65   zoneWB=[]#各区块的X轴[起始,终点]列表66   zoneHB=[]#各区块的Y轴[起始,终点]列表67 68   xmax=0#上一区块结束黑点横坐标,这里是初始化69   for i in range(10):70 71       try:72           x_fd,y_fd = detectFgPix(im,xmax)73           # print(y_fd,x_fd)74           xmax,xmin,ymax,ymin=cfs(im,x_fd,y_fd)75           L = xmax - xmin76           H = ymax - ymin77           zoneL.append(L)78           zoneWB.append([xmin,xmax])79           zoneHB.append([ymin,ymax])80 81       except TypeError:82           return zoneL,zoneWB,zoneHB83 84   return zoneL,zoneWB,zoneHB

切割粘连字符代码

 1       # 切割的位置 2       im_position = CFS(im) 3  4       maxL = max(im_position[0]) 5       minL = min(im_position[0]) 6  7       # 如果有粘连字符,如果一个字符的长度过长就认为是粘连字符,并从中间进行切割 8       if(maxL > minL + minL * 0.7): 9         maxL_index = im_position[0].index(maxL)10         minL_index = im_position[0].index(minL)11         # 设置字符的宽度12         im_position[0][maxL_index] = maxL // 213         im_position[0].insert(maxL_index + 1, maxL // 2)14         # 设置字符X轴[起始,终点]位置15         im_position[1][maxL_index][1] = im_position[1][maxL_index][0] + maxL // 216         im_position[1].insert(maxL_index + 1, [im_position[1][maxL_index][1] + 1, im_position[1][maxL_index][1] + 1 + maxL // 2])17         # 设置字符的Y轴[起始,终点]位置18         im_position[2].insert(maxL_index + 1, im_position[2][maxL_index])19 20       # 切割字符,要想切得好就得配置参数,通常 1 or 2 就可以21       cutting_img(im,im_position,img_name,1,1

切割粘连字符代码

 1 def cutting_img(im,im_position,img,xoffset = 1,yoffset = 1): 2   filename =  ‘./out_img/‘ + img.split(‘.‘)[0] 3   # 识别出的字符个数 4   im_number = len(im_position[1]) 5   # 切割字符 6   for i in range(im_number): 7     im_start_X = im_position[1][i][0] - xoffset 8     im_end_X = im_position[1][i][1] + xoffset 9     im_start_Y = im_position[2][i][0] - yoffset10     im_end_Y = im_position[2][i][1] + yoffset11     cropped = im[im_start_Y:im_end_Y, im_start_X:im_end_X]12     cv2.imwrite(filename + ‘-cutting-‘ + str(i) + ‘.jpg‘,cropped)

六、识别:

识别用的是typesseract库,主要识别一行字符和单个字符时的参数设置,识别中英文的参数设置,代码很简单就一行,我这里大多是filter文件的操作

 1       # 识别验证码 2       cutting_img_num = 0 3       for file in os.listdir(‘./out_img‘): 4         str_img = ‘‘ 5         if fnmatch(file, ‘%s-cutting-*.jpg‘ % img_name.split(‘.‘)[0]): 6           cutting_img_num += 1 7       for i in range(cutting_img_num): 8         try: 9           file = ‘./out_img/%s-cutting-%s.jpg‘ % (img_name.split(‘.‘)[0], i)10           # 识别字符11           str_img = str_img + image_to_string(Image.open(file),lang = ‘eng‘, config=‘-psm 10‘) #单个字符是10,一行文本是712         except Exception as err:13           pass14       print(‘切图:%s‘ % cutting_img_num)15       print(‘识别为:%s‘ % str_img

python 验证码处理

评论关闭