def evaluate_summaries(row):
simple_messages = [{"role": "user", "content": evaluation_prompt.format(original_article=row["content"], summary=row['simple_summary'])}]
complex_messages = [{"role": "user", "content": evaluation_prompt.format(original_article=row["content"], summary=row['complex_summary'])}]
simple_summary = client.beta.chat.completions.parse(
model="gpt-4o",
messages=simple_messages,
response_format=ScoreCard)
simple_summary = simple_summary.choices[0].message.parsed
complex_summary = client.beta.chat.completions.parse(
model="gpt-4o",
messages=complex_messages,
response_format=ScoreCard)
complex_summary = complex_summary.choices[0].message.parsed
return simple_summary, complex_summary
# Add new columns to the dataframe for storing evaluations
df['simple_evaluation'] = None
df['complex_evaluation'] = None
# Use ThreadPoolExecutor to evaluate itineraries concurrently
with ThreadPoolExecutor() as executor:
futures = {executor.submit(evaluate_summaries, row): index for index, row in df.iterrows()}
for future in tqdm(as_completed(futures), total=len(futures), desc="Evaluating Summaries"):
index = futures[future]
simple_evaluation, complex_evaluation = future.result()
df.at[index, 'simple_evaluation'] = simple_evaluation
df.at[index, 'complex_evaluation'] = complex_evaluation
df.head()