代码:
def inference(df, model, tokenizer, device):
batch_size = 64
max_length = 512
sentences = list(df['query_text'].values) # 问题列表
all_embeddings = []
# 对句子列表按长度降序排序,以优化内存使用和处理速度。
length_sorted_idx = np.argsort([-len(sen) for sen in sentences])
sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
for start_index in trange(0, len(sentences), batch_size, desc="Batches", disable=False):
sentences_batch = sentences_sorted[start_index: start_index + batch_size]
features = tokenizer(sentences_batch, max_length=max_length, padding=True, truncation=True,
return_tensors="pt")
features = batch_to_device(features, device)
with torch.no_grad(): # 在不计算梯度的情况下运行模型,以节省内存和计算资源。
# 使用双星号 ** 来传递参数是一种常见的做法,它允许你将字典中的键值对作为关键字参数传递给函数
outputs = model.model