虚拟现实环境下的远程教育和智能评估系统（十二）

接下来，把实时注视点位置、语音文本知识点、帧知识点区域进行匹配；

首先，第一步是匹配语音文本知识点和帧知识点区域，我们知道教师所说的每句话对应的知识点，然后寻找当前时间段内，知识点对应的ppt中的区域，即得到学生应该看的知识点区域，后续我的队友进行可视化展示（视频生成和报告生成）；

第二步，检测注视点位置是否在该区域；统计成功匹配的比例即可衡量该学生上课专注程度；

# -*- coding: utf-8 -*-
"""
@Time ： 2024/6/22 14:45
@Auth ： Zhao Yishuo
@File ：pre_match.py
@IDE ：PyCharm
"""
import cv2
import re
import io
from matplotlib import pyplot as plt

# Parse voice match final data
def parse_voice_match_final(file_path):
    knowledge_points = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            match = re.search(r'range: (\d+)-(\d+); kp_id: (\w+)', line)
            if match:
                start_time = int(match.group(1))
                end_time = int(match.group(2))
                kp_id = match.group(3)
                knowledge_points.append((start_time, end_time, kp_id))
    return knowledge_points


import cv2
import pandas as pd
import re

# 解析 final_match_test.txt
def parse_final_match_test(file_path):
    ocr_data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        timestamp = None
        for line in file:
            if 'Timestamp' in line:
                timestamp = int(line.split(': ')[1])
            elif 'Knowledge_point_id:' in line:
                match = re.search(r'\((\d+), (\d+), (\d+), (\d+)\): Knowledge_point_id: (\w+)', line)
                if match:
                    x1, y1, x2, y2 = map(int, match.groups()[:4])
                    kp_id = match.group(5)
                    ocr_data.append((timestamp, (x1, y1, x2, y2), kp_id))
    return ocr_data

# Match knowledge points with OCR/detection regions based on timestamps
def match_knowledge_points(voice_data, ocr_data):
    matches = []
    for (start_time, end_time, kp_id) in voice_data:
        for (timestamp, region, ocr_kp_id) in ocr_data:
            if kp_id == ocr_kp_id and start_time <= timestamp <= end_time:
                matches.append((start_time, end_time, region, kp_id))
    return matches

# Mark regions on the video
def mark_video(input_video_path, output_video_path, matches):
    cap = cv2.VideoCapture(input_video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    frame_count = 0

    current_region = None
    current_kp_id = None
    region_end_time = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        timestamp = int(frame_count / fps * 1000)  # Convert frame count to milliseconds

        for match in matches:
            start_time, end_time, region, kp_id = match
            if start_time <= timestamp <= end_time:
                current_region = region
                current_kp_id = kp_id
                region_end_time = end_time
                break
        if current_region:
            cv2.rectangle(frame, (current_region[0], current_region[1]), (current_region[2], current_region[3]), (0, 255, 0), 2)
            cv2.putText(frame, current_kp_id, (current_region[0], current_region[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        out.write(frame)
        frame_count += 1

    cap.release()
    out.release()

if __name__ == "__main__":
    # Paths to the files
    voice_match_final_path = 'voice_match_final.txt'
    final_match_test_path = 'final_match_test(1).txt'
    input_video_path = 'video_data/5.mp4'  # Path to the input video
    output_video_path = 'video_data/5_match.mp4'  # Path to save the output video with annotations

    # Parse the files
    voice_data = parse_voice_match_final(voice_match_final_path)
    ocr_data = parse_final_match_test(final_match_test_path)

    # Match the knowledge points with OCR/detection regions
    matches = match_knowledge_points(voice_data, ocr_data)

    # Print matches for debugging
    for match in matches:
        print(f"Start Time: {match[0]}, End Time: {match[1]}, Region: {match[2]}, KP_ID: {match[3]}")

    # Mark the video with matched regions
    # mark_video(input_video_path, output_video_path, matches)

# -*- coding: utf-8 -*-
"""
@Time ： 2024/6/16 14:52
@Auth ： Zhao Yishuo
@File ：match.py
@IDE ：PyCharm
"""
import cv2
import pandas as pd
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler,MinMaxScaler

plt.rcParams['font.sans-serif'] = ['SimHei']

# 手动读取和处理眼动数据文本文件
eyedata_path = 'eye_output_16.txt' # 文本文件路径
data = []
with open(eyedata_path, 'r') as file:
    for line in file:
        line = line.strip()
        if ':' in line:  # 检查是否存在冒号
            key, value = line.split(':', 1)
            data.append([key.strip(), value.strip()])
        # else:
            # print(f"Skipping malformed line: {line}")  # 记录格式不正确的行

data = pd.DataFrame(data, columns=['Type', 'Value'])

# 清洗数据
timestamps = data['Value'][data['Type'] == 'Timestamp'].astype(float).reset_index(drop=True)
videos = data['Value'][data['Type'] == 'Video'].reset_index(drop=True)
positions = data['Value'][data['Type'] == 'Relative Position'].str.extract(r'\[(.*?)\]')[0]  # 眼动位置
positions = positions.str.split(expand=True).astype(float).reset_index(drop=True)
positions[0] = round(positions[0])
positions[1] = round(-positions[1])

# 提取第1列和第2列
data = positions.iloc[:, [0, 1]]

# 确保数据为数值类型
data = data.apply(pd.to_numeric, errors='coerce')
# print(type(data))
x_values = data[0].tolist()
y_values = data[1].tolist()

eye_pos = np.vstack([x_values, y_values]).T  # df类型成功转换为np数组

eye_timestamps = np.array(timestamps.tolist())

# np.save('eye_positions.npy', eye_pos)
# np.save('eye_timestamps.npy', eye_timestamps)

eye_pos = np.load('eye_positions.npy')
eye_timestamps = np.load('eye_timestamps.npy')

# print(eye_pos,eye_timestamps)
text_path = 'final_match_test.txt'

import re

# Function to parse the text file and extract data
def parse_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Regular expressions to match timestamps, OCR, and detection positions
    timestamp_pattern = re.compile(r'Timestamp: (\d+)')
    ocr_pattern = re.compile(r'OCR \d+: \((\d+), (\d+), (\d+), (\d+)\) \(Knowledge_point_id: KP\d+\) (.+)')
    detection_pattern = re.compile(r'Detection \d+ \(Knowledge_pdoint_id: (KP\d+(?:, KP\d+)*)\): \((\d+), (\d+), (\d+), (\d+)\)')

    # Lists to store parsed data
    parsed_data = []

    # Current timestamp
    current_timestamp = None

    # Split content by lines
    lines = content.split('\n')
    for line in lines:
        # Check for a timestamp
        timestamp_match = timestamp_pattern.match(line)
        if timestamp_match:
            current_timestamp = int(timestamp_match.group(1))

        # Check for OCR match
        ocr_match = ocr_pattern.match(line)
        if ocr_match:
            x1, y1, x2, y2, ocr_text = ocr_match.groups()
            parsed_data.append((current_timestamp, ocr_text, (int(x1), int(y1), int(x2), int(y2))))

        # Check for detection match
        detection_match = detection_pattern.match(line)
        if detection_match:
            knowledge_points, x1, y1, x2, y2 = detection_match.groups()
            parsed_data.append((current_timestamp, f'Detection with {knowledge_points}', (int(x1), int(y1), int(x2), int(y2))))

    return parsed_data


# Parse the file and print the extracted data
parsed_data = parse_text_file(text_path)
text_timestamps = []
text_pos = []
for entry in parsed_data:
    # print(entry)
    text_timestamps.append(np.float32(entry[0])/1000)
    text_pos.append(np.array(entry[-1],dtype=np.float32))
text_timestamps = np.array(text_timestamps)
text_pos = np.array(text_pos)


def check_gaze_in_regions(gaze_timestamps, gaze_positions, parsed_data):
    results = []

    gaze_idx = 0

    num_gaze_points = len(gaze_timestamps)
    idx = 0

    while idx < len(parsed_data):
        temp_gaze = []
        temp_text = []
        # print('timestamp,rect_coords',timestamp,rect_coords)
        # Find gaze points that fall within the current timestamp range
        while (gaze_idx < num_gaze_points and gaze_timestamps[gaze_idx] >= text_timestamps[idx]):
            print(gaze_idx,num_gaze_points,gaze_timestamps[gaze_idx],text_timestamps[idx + 1])
            temp_gaze.append(gaze_idx)
            temp_text.append(idx)
            gaze_idx += 1
            idx += 1
            while gaze_timestamps[gaze_idx] >= text_timestamps[idx - 1] and gaze_timestamps[gaze_idx] <= text_timestamps[idx]:
                gaze_idx += 1
            gaze_idx -= 1

        # print(temp_text)
        print('gaze_idx,idx',gaze_idx,idx)
        if gaze_idx >= num_gaze_points:
            break

        # Check if gaze point is within rectangle region
        if gaze_idx < num_gaze_points and gaze_timestamps[gaze_idx] <= text_timestamps[idx + 1]:
            # print(1)
            for temp_gaze_idx in temp_gaze:
                gaze_x, gaze_y = gaze_positions[temp_gaze_idx]
                # print('gaze_x,gaze_y',gaze_x,gaze_y)
                for temp_text_idx in temp_text:
                    # print('text_pos[temp_text_idx]',text_pos[temp_text_idx])
                    x1, y1, x2, y2 = text_pos[temp_text_idx]
                    print('gaze_x,gaze_y,x1,y1,x2,y2',gaze_x,gaze_y,x1,y1,x2,y2)
                    if x1 <= gaze_x <= x2 and y1 <= gaze_y <= y2:
                        print('match found')
                        results.append(timestamp, gaze_positions[temp_gaze_idx])
                        break

        return results

results = check_gaze_in_regions(eye_timestamps, eye_pos, parsed_data)

# Print or process results
for result in results:
    print(result)