需求描述:
对表格图片,识别出表格里的横、纵坐标列表,并剔除异常点
解决方法:
- 通过opencv的getStructuringElement识别出横、竖线
- 通过bitwise_and取得交点并去除表格线
- 获取x和y的所有可能点,按照相邻点不超过阈值来筛选每一行、列最大的y和x
- 对于个别异常点通过卡图片临近点阈值、面积过滤、自定义异常筛选剔除
- 自定义异常筛选主要是通过对对相邻坐标数据进行统计,少于指定阈值认为是异常点
import cv2
import pandas as pd
import numpy as np
def outset(df):
df['diff'] = df.diff(periods=-1)
df.fillna(0, inplace=True)
df['flag'] = df['diff'].apply(lambda x: 1 if abs(x) > 10 else 0)
df.at[len(df) - 1, 'flag'] = 1
group0 = 1
for row_index, row_data in df.iterrows():
df.at[row_index, 'group0'] = group0
if row_data['flag'] == 1:
group0 += 1 df = df.astype(int)
grouped_df = df.groupby('group0').count()
df.to_csv(r"D:/df.csv")
filter_df = grouped_df[grouped_df['flag'] <= 40] #60
filter_df = filter_df.reset_index()
finadf=df[df['group0'].isin(list(filter_df['group0']))]['point']
finslist=list(finadf)
print("异常X坐标********")
print(sorted(list(set(finslist))))
print("异常X坐标********")
return list(set(finslist))
def seg_pic(img):
image = cv2.imread(img, 1)
w,h = image.shape[0:2]
print(w,h)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, -5)
rows, cols = binary.shape
scale = 40
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (cols // scale, 1))
eroded = cv2.erode(binary, kernel, iterations=1)
dilatedcol = cv2.dilate(eroded, kernel, iterations=1)
scale = 20
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, rows // scale))
eroded = cv2.erode(binary, kernel, iterations=1)
dilatedrow = cv2.dilate(eroded, kernel, iterations=1)
bitwiseAnd = cv2.bitwise_and(dilatedcol, dilatedrow)
# 标识表格
merge = cv2.add(dilatedcol, dilatedrow)
merge2 = cv2.subtract(binary, bitwiseAnd)
cv2.imwrite(('D:/bitwiseAnd/'+img.split("/")[-1] ), bitwiseAnd)
ys, xs = np.where(bitwiseAnd > 0)
mylisty = []
mylistx = []
i = 0
myxs = np.sort(xs)
myxs = np.delete(myxs, np.where(myxs <=10))
myxs = np.delete(myxs, np.where(myxs >= h-10))
#pd.DataFrame(myxs).to_csv(r"myxs.csv")
for i in range(len(myxs) - 1):
if (myxs[i + 1] - myxs[i] > 20 and abs(myxs[i]-h)>10): #>30
mylistx.append(myxs[i])
i = i + 1
mylistx.append(myxs[i])
myys = np.sort(ys)
#pd.DataFrame(myys).to_csv(r"myys.csv")
tuple1 = np.where(bitwiseAnd > 0)
for i in zip(*tuple1[::-1]):
if i[1] <=20:
myys = np.delete(myys, np.where(myys <= 20))
if i[0] in mylistx:
mylistx.remove(i[0])
elif i[1] >= w-20:
myys = np.delete(myys, np.where(myys >= w - 20))
if i[0] in mylistx:
mylistx.remove(i[0])
i = 0
for i in range(len(myys) - 1):
if (myys[i + 1] - myys[i] >= 23 and abs(myys[i]-w)>10): #阈值
mylisty.append(myys[i])
i = i + 1
mylisty.append(myys[i])
pointx = {'point': myxs}
dfx = pd.DataFrame(pointx)
del_x = []
del_y = []
for x in outset(dfx):
del_x.append(x)
if x in mylistx:
mylistx.remove(x)
for i in zip(*tuple1[::-1]):
for j in del_x:
if i[0] == j:
del_y.append(i[1])
for j in mylisty:
if j in del_y:
mylisty.remove(j)
# 面积法修正异常点
contours, hierarchy = cv2.findContours(np.uint8(bitwiseAnd), cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
cnts = sort_contours(contours, method="top-to-bottom")
x, y, w, h, t = 0,0,0,0,90
for j in cnts:
area = cv2.contourArea(j)
if 100 > area >= 57:
rect = cv2.minAreaRect(j)
(x, y), (w, h), t = rect
points_rect = cv2.boxPoints(rect)
abnormalx=int(points_rect[1][0])
if abnormalx in mylistx:
print("异常区域:\t",abnormalx,area)
mylistx.remove(abnormalx)
return image, mylistx, mylisty
if __name__ == '__main__':
img_path = r"D:/testslope_corr/_21.png"
print(seg_pic(img_path))