import cv2
import re
import io
from matplotlib import pyplot as plt
# Parse voice match final data
def parse_voice_match_final(file_path):
knowledge_points = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
match = re.search(r'range: (\d+)-(\d+); kp_id: (\w+)', line)
if match:
start_time = int(match.group(1))
end_time = int(match.group(2))
kp_id = match.group(3)
knowledge_points.append((start_time, end_time, kp_id))
return knowledge_points
import cv2
import pandas as pd
import re
# 解析 final_match_test.txt
def parse_final_match_test(file_path):
ocr_data = []
with open(file_path, 'r', encoding='utf-8') as file:
timestamp = None
for line in file:
if 'Timestamp' in line:
timestamp = int(line.split(': ')[1])
elif 'Knowledge_point_id:' in line:
match = re.search(r'\((\d+), (\d+), (\d+), (\d+)\): Knowledge_point_id: (\w+)', line)
if match:
x1, y1, x2, y2 = map(int, match.groups()[:4])
kp_id = match.group(5)
ocr_data.append((timestamp, (x1, y1, x2, y2), kp_id))
return ocr_data
# Match knowledge points with OCR/detection regions based on timestamps
def match_knowledge_points(voice_data, ocr_data):
matches = []
for (start_time, end_time, kp_id) in voice_data:
for (timestamp, region, ocr_kp_id) in ocr_data:
if kp_id == ocr_kp_id and start_time <= timestamp <= end_time:
matches.append((start_time, end_time, region, kp_id))
return matches
# Mark regions on the video
def mark_video(input_video_path, output_video_path, matches):
cap = cv2.VideoCapture(input_video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
frame_count = 0
current_region = None
current_kp_id = None
region_end_time = None
while cap.isOpened():
ret, frame = cap.read()
if not ret:
timestamp = int(frame_count / fps * 1000) # Convert frame count to milliseconds
for match in matches:
start_time, end_time, region, kp_id = match
if start_time <= timestamp <= end_time:
current_region = region
current_kp_id = kp_id
region_end_time = end_time
if current_region:
cv2.rectangle(frame, (current_region[0], current_region[1]), (current_region[2], current_region[3]), (0, 255, 0), 2)
cv2.putText(frame, current_kp_id, (current_region[0], current_region[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
frame_count += 1
if __name__ == "__main__":
# Paths to the files
voice_match_final_path = 'voice_match_final.txt'
final_match_test_path = 'final_match_test(1).txt'
input_video_path = 'video_data/5.mp4' # Path to the input video
output_video_path = 'video_data/5_match.mp4' # Path to save the output video with annotations
# Parse the files
voice_data = parse_voice_match_final(voice_match_final_path)
ocr_data = parse_final_match_test(final_match_test_path)
# Match the knowledge points with OCR/detection regions
matches = match_knowledge_points(voice_data, ocr_data)
# Print matches for debugging
for match in matches:
print(f"Start Time: {match[0]}, End Time: {match[1]}, Region: {match[2]}, KP_ID: {match[3]}")
# Mark the video with matched regions
# mark_video(input_video_path, output_video_path, matches)
import cv2
import pandas as pd
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler,MinMaxScaler
plt.rcParams['font.sans-serif'] = ['SimHei']
# 手动读取和处理眼动数据文本文件
eyedata_path = 'eye_output_16.txt' # 文本文件路径
data = []
with open(eyedata_path, 'r') as file:
for line in file:
line = line.strip()
if ':' in line: # 检查是否存在冒号
key, value = line.split(':', 1)
data.append([key.strip(), value.strip()])
# else:
# print(f"Skipping malformed line: {line}") # 记录格式不正确的行
data = pd.DataFrame(data, columns=['Type', 'Value'])
# 清洗数据
timestamps = data['Value'][data['Type'] == 'Timestamp'].astype(float).reset_index(drop=True)
videos = data['Value'][data['Type'] == 'Video'].reset_index(drop=True)
positions = data['Value'][data['Type'] == 'Relative Position'].str.extract(r'\[(.*?)\]')[0] # 眼动位置
positions = positions.str.split(expand=True).astype(float).reset_index(drop=True)
positions[0] = round(positions[0])
positions[1] = round(-positions[1])
# 提取第1列和第2列
data = positions.iloc[:, [0, 1]]
# 确保数据为数值类型
data = data.apply(pd.to_numeric, errors='coerce')
# print(type(data))
x_values = data[0].tolist()
y_values = data[1].tolist()
eye_pos = np.vstack([x_values, y_values]).T # df类型成功转换为np数组
eye_timestamps = np.array(timestamps.tolist())
# np.save('eye_positions.npy', eye_pos)
# np.save('eye_timestamps.npy', eye_timestamps)
eye_pos = np.load('eye_positions.npy')
eye_timestamps = np.load('eye_timestamps.npy')
# print(eye_pos,eye_timestamps)
text_path = 'final_match_test.txt'
import re
# Function to parse the text file and extract data
def parse_text_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Regular expressions to match timestamps, OCR, and detection positions
timestamp_pattern = re.compile(r'Timestamp: (\d+)')
ocr_pattern = re.compile(r'OCR \d+: \((\d+), (\d+), (\d+), (\d+)\) \(Knowledge_point_id: KP\d+\) (.+)')
detection_pattern = re.compile(r'Detection \d+ \(Knowledge_pdoint_id: (KP\d+(?:, KP\d+)*)\): \((\d+), (\d+), (\d+), (\d+)\)')
# Lists to store parsed data
parsed_data = []
# Current timestamp
current_timestamp = None
# Split content by lines
lines = content.split('\n')
for line in lines:
# Check for a timestamp
timestamp_match = timestamp_pattern.match(line)
if timestamp_match:
current_timestamp = int(timestamp_match.group(1))
# Check for OCR match
ocr_match = ocr_pattern.match(line)
if ocr_match:
x1, y1, x2, y2, ocr_text = ocr_match.groups()
parsed_data.append((current_timestamp, ocr_text, (int(x1), int(y1), int(x2), int(y2))))
# Check for detection match
detection_match = detection_pattern.match(line)
if detection_match:
knowledge_points, x1, y1, x2, y2 = detection_match.groups()
parsed_data.append((current_timestamp, f'Detection with {knowledge_points}', (int(x1), int(y1), int(x2), int(y2))))
return parsed_data
# Parse the file and print the extracted data
parsed_data = parse_text_file(text_path)
text_timestamps = []
text_pos = []
for entry in parsed_data:
# print(entry)
text_timestamps = np.array(text_timestamps)
text_pos = np.array(text_pos)
def check_gaze_in_regions(gaze_timestamps, gaze_positions, parsed_data):
results = []
gaze_idx = 0
num_gaze_points = len(gaze_timestamps)
idx = 0
while idx < len(parsed_data):
temp_gaze = []
temp_text = []
# print('timestamp,rect_coords',timestamp,rect_coords)
# Find gaze points that fall within the current timestamp range
while (gaze_idx < num_gaze_points and gaze_timestamps[gaze_idx] >= text_timestamps[idx]):
print(gaze_idx,num_gaze_points,gaze_timestamps[gaze_idx],text_timestamps[idx + 1])
gaze_idx += 1
idx += 1
while gaze_timestamps[gaze_idx] >= text_timestamps[idx - 1] and gaze_timestamps[gaze_idx] <= text_timestamps[idx]:
gaze_idx += 1
gaze_idx -= 1
# print(temp_text)
if gaze_idx >= num_gaze_points:
# Check if gaze point is within rectangle region
if gaze_idx < num_gaze_points and gaze_timestamps[gaze_idx] <= text_timestamps[idx + 1]:
# print(1)
for temp_gaze_idx in temp_gaze:
gaze_x, gaze_y = gaze_positions[temp_gaze_idx]
# print('gaze_x,gaze_y',gaze_x,gaze_y)
for temp_text_idx in temp_text:
# print('text_pos[temp_text_idx]',text_pos[temp_text_idx])
x1, y1, x2, y2 = text_pos[temp_text_idx]
if x1 <= gaze_x <= x2 and y1 <= gaze_y <= y2:
print('match found')
results.append(timestamp, gaze_positions[temp_gaze_idx])
return results
results = check_gaze_in_regions(eye_timestamps, eye_pos, parsed_data)
# Print or process results
for result in results: