Serialization
Section titled “Serialization”Overview
Section titled “Overview”Strands Evals provides JSON serialization for experiments and reports, enabling you to save, load, version, and share evaluation work.
Saving Experiments
Section titled “Saving Experiments”from strands_evals import Experiment
# Save to fileexperiment.to_file("my_experiment.json")experiment.to_file("my_experiment") # .json added automatically
# Relative pathexperiment.to_file("experiments/baseline.json")
# Absolute pathexperiment.to_file("/path/to/experiments/baseline.json")Loading Experiments
Section titled “Loading Experiments”# Load from fileexperiment = Experiment.from_file("my_experiment.json")
print(f"Loaded {len(experiment.cases)} cases")print(f"Evaluators: {[e.get_type_name() for e in experiment.evaluators]}")Custom Evaluators
Section titled “Custom Evaluators”Pass custom evaluator classes when loading:
from strands_evals.evaluators import Evaluator
class CustomEvaluator(Evaluator): def evaluate(self, evaluation_case): # Custom logic return EvaluationOutput(score=1.0, test_pass=True, reason="...")
# Save with custom evaluatorexperiment = Experiment( cases=cases, evaluators=[CustomEvaluator()])experiment.to_file("custom.json")
# Load with custom evaluator classloaded = Experiment.from_file( "custom.json", custom_evaluators=[CustomEvaluator])Dictionary Conversion
Section titled “Dictionary Conversion”# To dictionaryexperiment_dict = experiment.to_dict()
# From dictionaryexperiment = Experiment.from_dict(experiment_dict)
# With custom evaluatorsexperiment = Experiment.from_dict( experiment_dict, custom_evaluators=[CustomEvaluator])Saving Reports
Section titled “Saving Reports”import json
# Run evaluationreports = experiment.run_evaluations(task_function)
# Save reportsfor i, report in enumerate(reports): report_data = { "evaluator": experiment.evaluators[i].get_type_name(), "overall_score": report.overall_score, "scores": report.scores, "test_passes": report.test_passes, "reasons": report.reasons }
with open(f"report_{i}.json", "w") as f: json.dump(report_data, f, indent=2)Versioning Strategies
Section titled “Versioning Strategies”Timestamp Versioning
Section titled “Timestamp Versioning”from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")experiment.to_file(f"experiment_{timestamp}.json")Semantic Versioning
Section titled “Semantic Versioning”experiment.to_file("experiment_v1.json")experiment.to_file("experiment_v2.json")Organizing Files
Section titled “Organizing Files”Directory Structure
Section titled “Directory Structure”experiments/├── baseline/│ ├── experiment.json│ └── reports/├── iteration_1/│ ├── experiment.json│ └── reports/└── final/ ├── experiment.json └── reports/Organized Saving
Section titled “Organized Saving”from pathlib import Path
base_dir = Path("experiments/iteration_1")base_dir.mkdir(parents=True, exist_ok=True)
# Save experimentexperiment.to_file(base_dir / "experiment.json")
# Save reportsreports_dir = base_dir / "reports"reports_dir.mkdir(exist_ok=True)Saving Experiments with Reports
Section titled “Saving Experiments with Reports”from pathlib import Pathimport json
def save_with_reports(experiment, reports, base_name): base_path = Path(f"evaluations/{base_name}") base_path.mkdir(parents=True, exist_ok=True)
# Save experiment experiment.to_file(base_path / "experiment.json")
# Save reports for i, report in enumerate(reports): evaluator_name = experiment.evaluators[i].get_type_name() report_data = { "evaluator": evaluator_name, "overall_score": report.overall_score, "pass_rate": sum(report.test_passes) / len(report.test_passes), "scores": report.scores }
with open(base_path / f"report_{evaluator_name}.json", "w") as f: json.dump(report_data, f, indent=2)
# Usagereports = experiment.run_evaluations(task_function)save_with_reports(experiment, reports, "baseline_20250115")Error Handling
Section titled “Error Handling”from pathlib import Path
def safe_load(path, custom_evaluators=None): try: file_path = Path(path)
if not file_path.exists(): raise FileNotFoundError(f"File not found: {path}")
if file_path.suffix != ".json": raise ValueError(f"Expected .json file, got: {file_path.suffix}")
experiment = Experiment.from_file(path, custom_evaluators=custom_evaluators) print(f"✓ Loaded {len(experiment.cases)} cases") return experiment
except Exception as e: print(f"✗ Failed to load: {e}") return NoneBest Practices
Section titled “Best Practices”1. Use Consistent Naming
Section titled “1. Use Consistent Naming”# Goodexperiment.to_file("customer_service_baseline_v1.json")
# Less helpfulexperiment.to_file("test.json")2. Validate After Loading
Section titled “2. Validate After Loading”experiment = Experiment.from_file("experiment.json")
assert len(experiment.cases) > 0, "No cases loaded"assert len(experiment.evaluators) > 0, "No evaluators loaded"3. Include Metadata
Section titled “3. Include Metadata”experiment_data = experiment.to_dict()experiment_data["metadata"] = { "created_date": datetime.now().isoformat(), "description": "Baseline evaluation", "version": "1.0"}
with open("experiment.json", "w") as f: json.dump(experiment_data, f, indent=2)Related Documentation
Section titled “Related Documentation”- Experiment Management: Organize experiments
- Experiment Generator: Generate experiments
- Quickstart Guide: Get started with Strands Evals