[
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "anthropic/claude-3-5-haiku-20241022",
        "model": "anthropic/claude-3-5-haiku-20241022",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1372.1666666666667,
        "sum_squared": 1882841.3611111112,
        "min": 1372.1666666666667,
        "max": 1372.1666666666667,
        "mean": 1372.1666666666667,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 2.9493638438942993,
        "sum_squared": 8.698747083670957,
        "min": 2.9493638438942993,
        "max": 2.9493638438942993,
        "mean": 2.9493638438942993,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 460.10493827160496,
        "sum_squared": 211696.5542219174,
        "min": 460.10493827160496,
        "max": 460.10493827160496,
        "mean": 460.10493827160496,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1372.1666666666667,
        "sum_squared": 1882841.3611111112,
        "min": 1372.1666666666667,
        "max": 1372.1666666666667,
        "mean": 1372.1666666666667,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1372.1666666666667,
        "sum_squared": 1882841.3611111112,
        "min": 1372.1666666666667,
        "max": 1372.1666666666667,
        "mean": 1372.1666666666667,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 2.9493638438942993,
        "sum_squared": 8.698747083670957,
        "min": 2.9493638438942993,
        "max": 2.9493638438942993,
        "mean": 2.9493638438942993,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 2.9493638438942993,
        "sum_squared": 8.698747083670957,
        "min": 2.9493638438942993,
        "max": 2.9493638438942993,
        "mean": 2.9493638438942993,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 86.29012345679013,
        "sum_squared": 7445.9854061880815,
        "min": 86.29012345679013,
        "max": 86.29012345679013,
        "mean": 86.29012345679013,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 460.10493827160496,
        "sum_squared": 211696.5542219174,
        "min": 460.10493827160496,
        "max": 460.10493827160496,
        "mean": 460.10493827160496,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 460.10493827160496,
        "sum_squared": 211696.5542219174,
        "min": 460.10493827160496,
        "max": 460.10493827160496,
        "mean": 460.10493827160496,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7969821673525349,
        "sum_squared": 0.635180575077944,
        "min": 0.7969821673525349,
        "max": 0.7969821673525349,
        "mean": 0.7969821673525349,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7969821673525349,
        "sum_squared": 0.635180575077944,
        "min": 0.7969821673525349,
        "max": 0.7969821673525349,
        "mean": 0.7969821673525349,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7969821673525349,
        "sum_squared": 0.635180575077944,
        "min": 0.7969821673525349,
        "max": 0.7969821673525349,
        "mean": 0.7969821673525349,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "anthropic/claude-3-5-sonnet-20240620",
        "model": "anthropic/claude-3-5-sonnet-20240620",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1372.1666666666667,
        "sum_squared": 1882841.3611111112,
        "min": 1372.1666666666667,
        "max": 1372.1666666666667,
        "mean": 1372.1666666666667,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 3.4916146360797646,
        "sum_squared": 12.191372766886428,
        "min": 3.4916146360797646,
        "max": 3.4916146360797646,
        "mean": 3.4916146360797646,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 683.5370370370371,
        "sum_squared": 467222.8810013718,
        "min": 683.5370370370371,
        "max": 683.5370370370371,
        "mean": 683.5370370370371,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1372.1666666666667,
        "sum_squared": 1882841.3611111112,
        "min": 1372.1666666666667,
        "max": 1372.1666666666667,
        "mean": 1372.1666666666667,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1372.1666666666667,
        "sum_squared": 1882841.3611111112,
        "min": 1372.1666666666667,
        "max": 1372.1666666666667,
        "mean": 1372.1666666666667,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 3.4916146360797646,
        "sum_squared": 12.191372766886428,
        "min": 3.4916146360797646,
        "max": 3.4916146360797646,
        "mean": 3.4916146360797646,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 3.4916146360797646,
        "sum_squared": 12.191372766886428,
        "min": 3.4916146360797646,
        "max": 3.4916146360797646,
        "mean": 3.4916146360797646,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.02469135802468,
        "sum_squared": 16906.420362749577,
        "min": 130.02469135802468,
        "max": 130.02469135802468,
        "mean": 130.02469135802468,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 683.5370370370371,
        "sum_squared": 467222.8810013718,
        "min": 683.5370370370371,
        "max": 683.5370370370371,
        "mean": 683.5370370370371,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 683.5370370370371,
        "sum_squared": 467222.8810013718,
        "min": 683.5370370370371,
        "max": 683.5370370370371,
        "mean": 683.5370370370371,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.8072702331961568,
        "sum_squared": 0.6516852294045773,
        "min": 0.8072702331961568,
        "max": 0.8072702331961568,
        "mean": 0.8072702331961568,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.8072702331961568,
        "sum_squared": 0.6516852294045773,
        "min": 0.8072702331961568,
        "max": 0.8072702331961568,
        "mean": 0.8072702331961568,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.8072702331961568,
        "sum_squared": 0.6516852294045773,
        "min": 0.8072702331961568,
        "max": 0.8072702331961568,
        "mean": 0.8072702331961568,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=deepseek-ai_deepseek-v3",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/deepseek-v3",
        "model": "deepseek-ai/deepseek-v3",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1336.8456790123457,
        "sum_squared": 1787156.3694939797,
        "min": 1336.8456790123457,
        "max": 1336.8456790123457,
        "mean": 1336.8456790123457,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 15.269175598650802,
        "sum_squared": 233.14772346243308,
        "min": 15.269175598650802,
        "max": 15.269175598650802,
        "mean": 15.269175598650802,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1336.8456790123457,
        "sum_squared": 1787156.3694939797,
        "min": 1336.8456790123457,
        "max": 1336.8456790123457,
        "mean": 1336.8456790123457,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1336.8456790123457,
        "sum_squared": 1787156.3694939797,
        "min": 1336.8456790123457,
        "max": 1336.8456790123457,
        "mean": 1336.8456790123457,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 15.269175598650802,
        "sum_squared": 233.14772346243308,
        "min": 15.269175598650802,
        "max": 15.269175598650802,
        "mean": 15.269175598650802,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 15.269175598650802,
        "sum_squared": 233.14772346243308,
        "min": 15.269175598650802,
        "max": 15.269175598650802,
        "mean": 15.269175598650802,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.8161865569272949,
        "sum_squared": 0.6661604957088324,
        "min": 0.8161865569272949,
        "max": 0.8161865569272949,
        "mean": 0.8161865569272949,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.8161865569272949,
        "sum_squared": 0.6661604957088324,
        "min": 0.8161865569272949,
        "max": 0.8161865569272949,
        "mean": 0.8161865569272949,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.8161865569272949,
        "sum_squared": 0.6661604957088324,
        "min": 0.8161865569272949,
        "max": 0.8161865569272949,
        "mean": 0.8161865569272949,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=google_gemini-1.5-flash-002",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=google_gemini-1.5-flash-002",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "google/gemini-1.5-flash-002",
        "model": "google/gemini-1.5-flash-002",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1394.8086419753085,
        "sum_squared": 1945491.1477290045,
        "min": 1394.8086419753085,
        "max": 1394.8086419753085,
        "mean": 1394.8086419753085,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 0.9047453506493274,
        "sum_squared": 0.8185641495215743,
        "min": 0.9047453506493274,
        "max": 0.9047453506493274,
        "mean": 0.9047453506493274,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1394.8086419753085,
        "sum_squared": 1945491.1477290045,
        "min": 1394.8086419753085,
        "max": 1394.8086419753085,
        "mean": 1394.8086419753085,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1394.8086419753085,
        "sum_squared": 1945491.1477290045,
        "min": 1394.8086419753085,
        "max": 1394.8086419753085,
        "mean": 1394.8086419753085,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.9047453506493274,
        "sum_squared": 0.8185641495215743,
        "min": 0.9047453506493274,
        "max": 0.9047453506493274,
        "mean": 0.9047453506493274,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.9047453506493274,
        "sum_squared": 0.8185641495215743,
        "min": 0.9047453506493274,
        "max": 0.9047453506493274,
        "mean": 0.9047453506493274,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7764060356652923,
        "sum_squared": 0.6028063322174951,
        "min": 0.7764060356652923,
        "max": 0.7764060356652923,
        "mean": 0.7764060356652923,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7764060356652923,
        "sum_squared": 0.6028063322174951,
        "min": 0.7764060356652923,
        "max": 0.7764060356652923,
        "mean": 0.7764060356652923,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7764060356652923,
        "sum_squared": 0.6028063322174951,
        "min": 0.7764060356652923,
        "max": 0.7764060356652923,
        "mean": 0.7764060356652923,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=google_gemini-1.5-pro-002",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=google_gemini-1.5-pro-002",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "google/gemini-1.5-pro-002",
        "model": "google/gemini-1.5-pro-002",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1394.8086419753085,
        "sum_squared": 1945491.1477290045,
        "min": 1394.8086419753085,
        "max": 1394.8086419753085,
        "mean": 1394.8086419753085,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 2.7368182208802967,
        "sum_squared": 7.490173974142392,
        "min": 2.7368182208802967,
        "max": 2.7368182208802967,
        "mean": 2.7368182208802967,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1394.8086419753085,
        "sum_squared": 1945491.1477290045,
        "min": 1394.8086419753085,
        "max": 1394.8086419753085,
        "mean": 1394.8086419753085,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1394.8086419753085,
        "sum_squared": 1945491.1477290045,
        "min": 1394.8086419753085,
        "max": 1394.8086419753085,
        "mean": 1394.8086419753085,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 2.7368182208802967,
        "sum_squared": 7.490173974142392,
        "min": 2.7368182208802967,
        "max": 2.7368182208802967,
        "mean": 2.7368182208802967,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 2.7368182208802967,
        "sum_squared": 7.490173974142392,
        "min": 2.7368182208802967,
        "max": 2.7368182208802967,
        "mean": 2.7368182208802967,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.785322359396431,
        "sum_squared": 0.6167312081679772,
        "min": 0.785322359396431,
        "max": 0.785322359396431,
        "mean": 0.785322359396431,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.785322359396431,
        "sum_squared": 0.6167312081679772,
        "min": 0.785322359396431,
        "max": 0.785322359396431,
        "mean": 0.785322359396431,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.785322359396431,
        "sum_squared": 0.6167312081679772,
        "min": 0.785322359396431,
        "max": 0.785322359396431,
        "mean": 0.785322359396431,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/llama-3.1-405b-instruct-turbo",
        "model": "meta/llama-3.1-405b-instruct-turbo",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 3.4933589741035744,
        "sum_squared": 12.203556921949978,
        "min": 3.4933589741035744,
        "max": 3.4933589741035744,
        "mean": 3.4933589741035744,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -17.894988236128505,
        "sum_squared": 320.2306039711776,
        "min": -17.894988236128505,
        "max": -17.894988236128505,
        "mean": -17.894988236128505,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 620.4753086419753,
        "sum_squared": 384989.6086343545,
        "min": 620.4753086419753,
        "max": 620.4753086419753,
        "mean": 620.4753086419753,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.1655099442793402,
        "sum_squared": 1.3584134302140307,
        "min": 1.1655099442793402,
        "max": 1.1655099442793402,
        "mean": 1.1655099442793402,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.041608441827498704,
        "sum_squared": 0.0017312624313123438,
        "min": 0.041608441827498704,
        "max": 0.041608441827498704,
        "mean": 0.041608441827498704,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.028840774140223225,
        "sum_squared": 0.0008317902530073687,
        "min": -0.028840774140223225,
        "max": -0.028840774140223225,
        "mean": -0.028840774140223225,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 3.4933589741035744,
        "sum_squared": 12.203556921949978,
        "min": 3.4933589741035744,
        "max": 3.4933589741035744,
        "mean": 3.4933589741035744,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 3.4933589741035744,
        "sum_squared": 12.203556921949978,
        "min": 3.4933589741035744,
        "max": 3.4933589741035744,
        "mean": 3.4933589741035744,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -17.894988236128505,
        "sum_squared": 320.2306039711776,
        "min": -17.894988236128505,
        "max": -17.894988236128505,
        "mean": -17.894988236128505,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -17.894988236128505,
        "sum_squared": 320.2306039711776,
        "min": -17.894988236128505,
        "max": -17.894988236128505,
        "mean": -17.894988236128505,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 116.8395061728395,
        "sum_squared": 13651.470202713,
        "min": 116.8395061728395,
        "max": 116.8395061728395,
        "mean": 116.8395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 620.4753086419753,
        "sum_squared": 384989.6086343545,
        "min": 620.4753086419753,
        "max": 620.4753086419753,
        "mean": 620.4753086419753,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 620.4753086419753,
        "sum_squared": 384989.6086343545,
        "min": 620.4753086419753,
        "max": 620.4753086419753,
        "mean": 620.4753086419753,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7976680384087768,
        "sum_squared": 0.6362742994989058,
        "min": 0.7976680384087768,
        "max": 0.7976680384087768,
        "mean": 0.7976680384087768,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7976680384087768,
        "sum_squared": 0.6362742994989058,
        "min": 0.7976680384087768,
        "max": 0.7976680384087768,
        "mean": 0.7976680384087768,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7976680384087768,
        "sum_squared": 0.6362742994989058,
        "min": 0.7976680384087768,
        "max": 0.7976680384087768,
        "mean": 0.7976680384087768,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/llama-3.1-70b-instruct-turbo",
        "model": "meta/llama-3.1-70b-instruct-turbo",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0229934556984608,
        "sum_squared": 1.0465156104018787,
        "min": 1.0229934556984608,
        "max": 1.0229934556984608,
        "mean": 1.0229934556984608,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -24.021595388222757,
        "sum_squared": 577.0370449954848,
        "min": -24.021595388222757,
        "max": -24.021595388222757,
        "mean": -24.021595388222757,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 553.5987654320987,
        "sum_squared": 306471.59308794385,
        "min": 553.5987654320987,
        "max": 553.5987654320987,
        "mean": 553.5987654320987,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.260175999917213,
        "sum_squared": 1.5880435507673478,
        "min": 1.260175999917213,
        "max": 1.260175999917213,
        "mean": 1.260175999917213,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.06260100040826563,
        "sum_squared": 0.003918885252115674,
        "min": 0.06260100040826563,
        "max": 0.06260100040826563,
        "mean": 0.06260100040826563,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.04339170693322131,
        "sum_squared": 0.0018828402305785662,
        "min": -0.04339170693322131,
        "max": -0.04339170693322131,
        "mean": -0.04339170693322131,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0229934556984608,
        "sum_squared": 1.0465156104018787,
        "min": 1.0229934556984608,
        "max": 1.0229934556984608,
        "mean": 1.0229934556984608,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0229934556984608,
        "sum_squared": 1.0465156104018787,
        "min": 1.0229934556984608,
        "max": 1.0229934556984608,
        "mean": 1.0229934556984608,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -24.021595388222757,
        "sum_squared": 577.0370449954848,
        "min": -24.021595388222757,
        "max": -24.021595388222757,
        "mean": -24.021595388222757,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -24.021595388222757,
        "sum_squared": 577.0370449954848,
        "min": -24.021595388222757,
        "max": -24.021595388222757,
        "mean": -24.021595388222757,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 103.87654320987654,
        "sum_squared": 10790.33622923335,
        "min": 103.87654320987654,
        "max": 103.87654320987654,
        "mean": 103.87654320987654,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 553.5987654320987,
        "sum_squared": 306471.59308794385,
        "min": 553.5987654320987,
        "max": 553.5987654320987,
        "mean": 553.5987654320987,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 553.5987654320987,
        "sum_squared": 306471.59308794385,
        "min": 553.5987654320987,
        "max": 553.5987654320987,
        "mean": 553.5987654320987,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7894375857338795,
        "sum_squared": 0.6232117017693363,
        "min": 0.7894375857338795,
        "max": 0.7894375857338795,
        "mean": 0.7894375857338795,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7894375857338795,
        "sum_squared": 0.6232117017693363,
        "min": 0.7894375857338795,
        "max": 0.7894375857338795,
        "mean": 0.7894375857338795,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7894375857338795,
        "sum_squared": 0.6232117017693363,
        "min": 0.7894375857338795,
        "max": 0.7894375857338795,
        "mean": 0.7894375857338795,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/llama-3.1-8b-instruct-turbo",
        "model": "meta/llama-3.1-8b-instruct-turbo",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 0.6208152520803758,
        "sum_squared": 0.3854115772156205,
        "min": 0.6208152520803758,
        "max": 0.6208152520803758,
        "mean": 0.6208152520803758,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -35.72712776274848,
        "sum_squared": 1276.4276581757533,
        "min": -35.72712776274848,
        "max": -35.72712776274848,
        "mean": -35.72712776274848,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 621.4567901234568,
        "sum_squared": 386208.54199055023,
        "min": 621.4567901234568,
        "max": 621.4567901234568,
        "mean": 621.4567901234568,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.3468546931185368,
        "sum_squared": 1.814017564375428,
        "min": 1.3468546931185368,
        "max": 1.3468546931185368,
        "mean": 1.3468546931185368,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.08293955568219664,
        "sum_squared": 0.006878969896760198,
        "min": 0.08293955568219664,
        "max": 0.08293955568219664,
        "mean": 0.08293955568219664,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.057489319178009195,
        "sum_squared": 0.0033050218195510156,
        "min": -0.057489319178009195,
        "max": -0.057489319178009195,
        "mean": -0.057489319178009195,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1325.1604938271605,
        "sum_squared": 1756050.3344002438,
        "min": 1325.1604938271605,
        "max": 1325.1604938271605,
        "mean": 1325.1604938271605,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.6208152520803758,
        "sum_squared": 0.3854115772156205,
        "min": 0.6208152520803758,
        "max": 0.6208152520803758,
        "mean": 0.6208152520803758,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.6208152520803758,
        "sum_squared": 0.3854115772156205,
        "min": 0.6208152520803758,
        "max": 0.6208152520803758,
        "mean": 0.6208152520803758,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -35.72712776274848,
        "sum_squared": 1276.4276581757533,
        "min": -35.72712776274848,
        "max": -35.72712776274848,
        "mean": -35.72712776274848,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -35.72712776274848,
        "sum_squared": 1276.4276581757533,
        "min": -35.72712776274848,
        "max": -35.72712776274848,
        "mean": -35.72712776274848,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 119.98148148148148,
        "sum_squared": 14395.555898491084,
        "min": 119.98148148148148,
        "max": 119.98148148148148,
        "mean": 119.98148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 621.4567901234568,
        "sum_squared": 386208.54199055023,
        "min": 621.4567901234568,
        "max": 621.4567901234568,
        "mean": 621.4567901234568,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 621.4567901234568,
        "sum_squared": 386208.54199055023,
        "min": 621.4567901234568,
        "max": 621.4567901234568,
        "mean": 621.4567901234568,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7661179698216716,
        "sum_squared": 0.5869367436836797,
        "min": 0.7661179698216716,
        "max": 0.7661179698216716,
        "mean": 0.7661179698216716,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7661179698216716,
        "sum_squared": 0.5869367436836797,
        "min": 0.7661179698216716,
        "max": 0.7661179698216716,
        "mean": 0.7661179698216716,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7661179698216716,
        "sum_squared": 0.5869367436836797,
        "min": 0.7661179698216716,
        "max": 0.7661179698216716,
        "mean": 0.7661179698216716,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/mistral-7b-instruct-v0.3",
        "model": "mistralai/mistral-7b-instruct-v0.3",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 0.952246833730627,
        "sum_squared": 0.9067740323500044,
        "min": 0.952246833730627,
        "max": 0.952246833730627,
        "mean": 0.952246833730627,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -28.110070563464035,
        "sum_squared": 790.1760670829273,
        "min": -28.110070563464035,
        "max": -28.110070563464035,
        "mean": -28.110070563464035,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 625.2407407407408,
        "sum_squared": 390925.9838820302,
        "min": 625.2407407407408,
        "max": 625.2407407407408,
        "mean": 625.2407407407408,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.240677824363209,
        "sum_squared": 1.539281463866626,
        "min": 1.240677824363209,
        "max": 1.240677824363209,
        "mean": 1.240677824363209,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.06486183122504556,
        "sum_squared": 0.004207057149866295,
        "min": 0.06486183122504556,
        "max": 0.06486183122504556,
        "mean": 0.06486183122504556,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.04495879543959535,
        "sum_squared": 0.0020212932873793794,
        "min": -0.04495879543959535,
        "max": -0.04495879543959535,
        "mean": -0.04495879543959535,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.952246833730627,
        "sum_squared": 0.9067740323500044,
        "min": 0.952246833730627,
        "max": 0.952246833730627,
        "mean": 0.952246833730627,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.952246833730627,
        "sum_squared": 0.9067740323500044,
        "min": 0.952246833730627,
        "max": 0.952246833730627,
        "mean": 0.952246833730627,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -28.110070563464035,
        "sum_squared": 790.1760670829273,
        "min": -28.110070563464035,
        "max": -28.110070563464035,
        "mean": -28.110070563464035,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -28.110070563464035,
        "sum_squared": 790.1760670829273,
        "min": -28.110070563464035,
        "max": -28.110070563464035,
        "mean": -28.110070563464035,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 130.34567901234567,
        "sum_squared": 16989.996037189452,
        "min": 130.34567901234567,
        "max": 130.34567901234567,
        "mean": 130.34567901234567,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 625.2407407407408,
        "sum_squared": 390925.9838820302,
        "min": 625.2407407407408,
        "max": 625.2407407407408,
        "mean": 625.2407407407408,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 625.2407407407408,
        "sum_squared": 390925.9838820302,
        "min": 625.2407407407408,
        "max": 625.2407407407408,
        "mean": 625.2407407407408,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7585733882030161,
        "sum_squared": 0.5754335852898038,
        "min": 0.7585733882030161,
        "max": 0.7585733882030161,
        "mean": 0.7585733882030161,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7585733882030161,
        "sum_squared": 0.5754335852898038,
        "min": 0.7585733882030161,
        "max": 0.7585733882030161,
        "mean": 0.7585733882030161,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7585733882030161,
        "sum_squared": 0.5754335852898038,
        "min": 0.7585733882030161,
        "max": 0.7585733882030161,
        "mean": 0.7585733882030161,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/mixtral-8x22b-instruct-v0.1",
        "model": "mistralai/mixtral-8x22b-instruct-v0.1",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 3.4505287408828735,
        "sum_squared": 11.906148591658749,
        "min": 3.4505287408828735,
        "max": 3.4505287408828735,
        "mean": 3.4505287408828735,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -21.759122981938592,
        "sum_squared": 473.4594329431282,
        "min": -21.759122981938592,
        "max": -21.759122981938592,
        "mean": -21.759122981938592,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 441.15432098765433,
        "sum_squared": 194617.13492607835,
        "min": 441.15432098765433,
        "max": 441.15432098765433,
        "mean": 441.15432098765433,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.261309934633061,
        "sum_squared": 1.5909027512040566,
        "min": 1.261309934633061,
        "max": 1.261309934633061,
        "mean": 1.261309934633061,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.07115827121415497,
        "sum_squared": 0.005063499562187235,
        "min": 0.07115827121415497,
        "max": 0.07115827121415497,
        "mean": 0.07115827121415497,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.049323155065611426,
        "sum_squared": 0.0024327736256263503,
        "min": -0.049323155065611426,
        "max": -0.049323155065611426,
        "mean": -0.049323155065611426,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 3.4505287408828735,
        "sum_squared": 11.906148591658749,
        "min": 3.4505287408828735,
        "max": 3.4505287408828735,
        "mean": 3.4505287408828735,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 3.4505287408828735,
        "sum_squared": 11.906148591658749,
        "min": 3.4505287408828735,
        "max": 3.4505287408828735,
        "mean": 3.4505287408828735,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -21.759122981938592,
        "sum_squared": 473.4594329431282,
        "min": -21.759122981938592,
        "max": -21.759122981938592,
        "mean": -21.759122981938592,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -21.759122981938592,
        "sum_squared": 473.4594329431282,
        "min": -21.759122981938592,
        "max": -21.759122981938592,
        "mean": -21.759122981938592,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 93.72839506172839,
        "sum_squared": 8785.01204084743,
        "min": 93.72839506172839,
        "max": 93.72839506172839,
        "mean": 93.72839506172839,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 441.15432098765433,
        "sum_squared": 194617.13492607835,
        "min": 441.15432098765433,
        "max": 441.15432098765433,
        "mean": 441.15432098765433,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 441.15432098765433,
        "sum_squared": 194617.13492607835,
        "min": 441.15432098765433,
        "max": 441.15432098765433,
        "mean": 441.15432098765433,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7908093278463623,
        "sum_squared": 0.6253793930088153,
        "min": 0.7908093278463623,
        "max": 0.7908093278463623,
        "mean": 0.7908093278463623,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7908093278463623,
        "sum_squared": 0.6253793930088153,
        "min": 0.7908093278463623,
        "max": 0.7908093278463623,
        "mean": 0.7908093278463623,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7908093278463623,
        "sum_squared": 0.6253793930088153,
        "min": 0.7908093278463623,
        "max": 0.7908093278463623,
        "mean": 0.7908093278463623,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/mixtral-8x7b-instruct-v0.1",
        "model": "mistralai/mixtral-8x7b-instruct-v0.1",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 1.2561425000061224,
        "sum_squared": 1.5778939803216312,
        "min": 1.2561425000061224,
        "max": 1.2561425000061224,
        "mean": 1.2561425000061224,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -10.782968182736136,
        "sum_squared": 116.27240282989985,
        "min": -10.782968182736136,
        "max": -10.782968182736136,
        "mean": -10.782968182736136,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 513.7654320987655,
        "sum_squared": 263954.9192196312,
        "min": 513.7654320987655,
        "max": 513.7654320987655,
        "mean": 513.7654320987655,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.1025214446278306,
        "sum_squared": 1.2155535358642386,
        "min": 1.1025214446278306,
        "max": 1.1025214446278306,
        "mean": 1.1025214446278306,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.030279450020114118,
        "sum_squared": 0.0009168450935205889,
        "min": 0.030279450020114118,
        "max": 0.030279450020114118,
        "mean": 0.030279450020114118,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.02098811541034788,
        "sum_squared": 0.00044050098847808213,
        "min": -0.02098811541034788,
        "max": -0.02098811541034788,
        "mean": -0.02098811541034788,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1491.9197530864199,
        "sum_squared": 2225824.549649444,
        "min": 1491.9197530864199,
        "max": 1491.9197530864199,
        "mean": 1491.9197530864199,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.2561425000061224,
        "sum_squared": 1.5778939803216312,
        "min": 1.2561425000061224,
        "max": 1.2561425000061224,
        "mean": 1.2561425000061224,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.2561425000061224,
        "sum_squared": 1.5778939803216312,
        "min": 1.2561425000061224,
        "max": 1.2561425000061224,
        "mean": 1.2561425000061224,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -10.782968182736136,
        "sum_squared": 116.27240282989985,
        "min": -10.782968182736136,
        "max": -10.782968182736136,
        "mean": -10.782968182736136,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -10.782968182736136,
        "sum_squared": 116.27240282989985,
        "min": -10.782968182736136,
        "max": -10.782968182736136,
        "mean": -10.782968182736136,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 110.48148148148148,
        "sum_squared": 12206.157750342936,
        "min": 110.48148148148148,
        "max": 110.48148148148148,
        "mean": 110.48148148148148,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 513.7654320987655,
        "sum_squared": 263954.9192196312,
        "min": 513.7654320987655,
        "max": 513.7654320987655,
        "mean": 513.7654320987655,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 513.7654320987655,
        "sum_squared": 263954.9192196312,
        "min": 513.7654320987655,
        "max": 513.7654320987655,
        "mean": 513.7654320987655,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.758573388203016,
        "sum_squared": 0.5754335852898036,
        "min": 0.758573388203016,
        "max": 0.758573388203016,
        "mean": 0.758573388203016,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.758573388203016,
        "sum_squared": 0.5754335852898036,
        "min": 0.758573388203016,
        "max": 0.758573388203016,
        "mean": 0.758573388203016,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.758573388203016,
        "sum_squared": 0.5754335852898036,
        "min": 0.758573388203016,
        "max": 0.758573388203016,
        "mean": 0.758573388203016,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "openai/gpt-4o-2024-08-06",
        "model": "openai/gpt-4o-2024-08-06",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1298.9444444444443,
        "sum_squared": 1687256.669753086,
        "min": 1298.9444444444443,
        "max": 1298.9444444444443,
        "mean": 1298.9444444444443,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 6.5025614706086525,
        "sum_squared": 42.28330567904416,
        "min": 6.5025614706086525,
        "max": 6.5025614706086525,
        "mean": 6.5025614706086525,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 550.4197530864197,
        "sum_squared": 302961.9045877153,
        "min": 550.4197530864197,
        "max": 550.4197530864197,
        "mean": 550.4197530864197,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1298.9444444444443,
        "sum_squared": 1687256.669753086,
        "min": 1298.9444444444443,
        "max": 1298.9444444444443,
        "mean": 1298.9444444444443,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1298.9444444444443,
        "sum_squared": 1687256.669753086,
        "min": 1298.9444444444443,
        "max": 1298.9444444444443,
        "mean": 1298.9444444444443,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 6.5025614706086525,
        "sum_squared": 42.28330567904416,
        "min": 6.5025614706086525,
        "max": 6.5025614706086525,
        "mean": 6.5025614706086525,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 6.5025614706086525,
        "sum_squared": 42.28330567904416,
        "min": 6.5025614706086525,
        "max": 6.5025614706086525,
        "mean": 6.5025614706086525,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 99.65432098765432,
        "sum_squared": 9930.983691510439,
        "min": 99.65432098765432,
        "max": 99.65432098765432,
        "mean": 99.65432098765432,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 550.4197530864197,
        "sum_squared": 302961.9045877153,
        "min": 550.4197530864197,
        "max": 550.4197530864197,
        "mean": 550.4197530864197,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 550.4197530864197,
        "sum_squared": 302961.9045877153,
        "min": 550.4197530864197,
        "max": 550.4197530864197,
        "mean": 550.4197530864197,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.8134430727023293,
        "sum_squared": 0.661689632527407,
        "min": 0.8134430727023293,
        "max": 0.8134430727023293,
        "mean": 0.8134430727023293,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.8134430727023293,
        "sum_squared": 0.661689632527407,
        "min": 0.8134430727023293,
        "max": 0.8134430727023293,
        "mean": 0.8134430727023293,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.8134430727023293,
        "sum_squared": 0.661689632527407,
        "min": 0.8134430727023293,
        "max": 0.8134430727023293,
        "mean": 0.8134430727023293,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "openai/gpt-4o-mini-2024-07-18",
        "model": "openai/gpt-4o-mini-2024-07-18",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1298.9444444444443,
        "sum_squared": 1687256.669753086,
        "min": 1298.9444444444443,
        "max": 1298.9444444444443,
        "mean": 1298.9444444444443,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 2.032809728457604,
        "sum_squared": 4.132315392111877,
        "min": 2.032809728457604,
        "max": 2.032809728457604,
        "mean": 2.032809728457604,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 539.2037037037037,
        "sum_squared": 290740.6340877915,
        "min": 539.2037037037037,
        "max": 539.2037037037037,
        "mean": 539.2037037037037,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1298.9444444444443,
        "sum_squared": 1687256.669753086,
        "min": 1298.9444444444443,
        "max": 1298.9444444444443,
        "mean": 1298.9444444444443,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1298.9444444444443,
        "sum_squared": 1687256.669753086,
        "min": 1298.9444444444443,
        "max": 1298.9444444444443,
        "mean": 1298.9444444444443,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 2.032809728457604,
        "sum_squared": 4.132315392111877,
        "min": 2.032809728457604,
        "max": 2.032809728457604,
        "mean": 2.032809728457604,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 2.032809728457604,
        "sum_squared": 4.132315392111877,
        "min": 2.032809728457604,
        "max": 2.032809728457604,
        "mean": 2.032809728457604,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 95.28395061728395,
        "sum_squared": 9079.031245237007,
        "min": 95.28395061728395,
        "max": 95.28395061728395,
        "mean": 95.28395061728395,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 539.2037037037037,
        "sum_squared": 290740.6340877915,
        "min": 539.2037037037037,
        "max": 539.2037037037037,
        "mean": 539.2037037037037,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 539.2037037037037,
        "sum_squared": 290740.6340877915,
        "min": 539.2037037037037,
        "max": 539.2037037037037,
        "mean": 539.2037037037037,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7921810699588453,
        "sum_squared": 0.627550847601141,
        "min": 0.7921810699588453,
        "max": 0.7921810699588453,
        "mean": 0.7921810699588453,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7921810699588453,
        "sum_squared": 0.627550847601141,
        "min": 0.7921810699588453,
        "max": 0.7921810699588453,
        "mean": 0.7921810699588453,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7921810699588453,
        "sum_squared": 0.627550847601141,
        "min": 0.7921810699588453,
        "max": 0.7921810699588453,
        "mean": 0.7921810699588453,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/qwen2.5-72b-instruct-turbo",
        "model": "qwen/qwen2.5-72b-instruct-turbo",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1332.7777777777778,
        "sum_squared": 1776296.6049382717,
        "min": 1332.7777777777778,
        "max": 1332.7777777777778,
        "mean": 1332.7777777777778,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 1.812830861703849,
        "sum_squared": 3.28635573314592,
        "min": 1.812830861703849,
        "max": 1.812830861703849,
        "mean": 1.812830861703849,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -14.090352056650909,
        "sum_squared": 198.5380210803665,
        "min": -14.090352056650909,
        "max": -14.090352056650909,
        "mean": -14.090352056650909,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 467.11728395061726,
        "sum_squared": 218198.5569654016,
        "min": 467.11728395061726,
        "max": 467.11728395061726,
        "mean": 467.11728395061726,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.1723796063944947,
        "sum_squared": 1.3744739414897102,
        "min": 1.1723796063944947,
        "max": 1.1723796063944947,
        "mean": 1.1723796063944947,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.043518152153536924,
        "sum_squared": 0.0018938295668583904,
        "min": 0.043518152153536924,
        "max": 0.043518152153536924,
        "mean": 0.043518152153536924,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.030164484468402832,
        "sum_squared": 0.0009098961232445157,
        "min": -0.030164484468402832,
        "max": -0.030164484468402832,
        "mean": -0.030164484468402832,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1332.7777777777778,
        "sum_squared": 1776296.6049382717,
        "min": 1332.7777777777778,
        "max": 1332.7777777777778,
        "mean": 1332.7777777777778,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1332.7777777777778,
        "sum_squared": 1776296.6049382717,
        "min": 1332.7777777777778,
        "max": 1332.7777777777778,
        "mean": 1332.7777777777778,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.812830861703849,
        "sum_squared": 3.28635573314592,
        "min": 1.812830861703849,
        "max": 1.812830861703849,
        "mean": 1.812830861703849,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.812830861703849,
        "sum_squared": 3.28635573314592,
        "min": 1.812830861703849,
        "max": 1.812830861703849,
        "mean": 1.812830861703849,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -14.090352056650909,
        "sum_squared": 198.5380210803665,
        "min": -14.090352056650909,
        "max": -14.090352056650909,
        "mean": -14.090352056650909,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -14.090352056650909,
        "sum_squared": 198.5380210803665,
        "min": -14.090352056650909,
        "max": -14.090352056650909,
        "mean": -14.090352056650909,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 88.59876543209876,
        "sum_squared": 7849.741236092058,
        "min": 88.59876543209876,
        "max": 88.59876543209876,
        "mean": 88.59876543209876,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 467.11728395061726,
        "sum_squared": 218198.5569654016,
        "min": 467.11728395061726,
        "max": 467.11728395061726,
        "mean": 467.11728395061726,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 467.11728395061726,
        "sum_squared": 218198.5569654016,
        "min": 467.11728395061726,
        "max": 467.11728395061726,
        "mean": 467.11728395061726,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7969821673525352,
        "sum_squared": 0.6351805750779445,
        "min": 0.7969821673525352,
        "max": 0.7969821673525352,
        "mean": 0.7969821673525352,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7969821673525352,
        "sum_squared": 0.6351805750779445,
        "min": 0.7969821673525352,
        "max": 0.7969821673525352,
        "mean": 0.7969821673525352,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7969821673525352,
        "sum_squared": 0.6351805750779445,
        "min": 0.7969821673525352,
        "max": 0.7969821673525352,
        "mean": 0.7969821673525352,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  },
  {
    "run_path": "benchmark_output/runs/v0.1.0/helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo",
    "run_spec": {
      "name": "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo",
      "scenario_spec": {
        "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
        "args": {}
      },
      "adapter_spec": {
        "method": "generation",
        "global_prefix": "",
        "global_suffix": "",
        "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 100 words.",
        "input_prefix": "### Call Transcript\n",
        "input_suffix": "",
        "reference_prefix": "A. ",
        "reference_suffix": "\n",
        "chain_of_thought_prefix": "",
        "chain_of_thought_suffix": "\n",
        "output_prefix": "",
        "output_suffix": "",
        "instance_prefix": "\n",
        "substitutions": [],
        "max_train_instances": 0,
        "max_eval_instances": 100000,
        "num_outputs": 1,
        "num_train_trials": 1,
        "num_trials": 1,
        "sample_train": true,
        "model_deployment": "together/qwen2.5-7b-instruct-turbo",
        "model": "qwen/qwen2.5-7b-instruct-turbo",
        "temperature": 0.0,
        "max_tokens": 512,
        "stop_sequences": [],
        "multi_label": false
      },
      "metric_specs": [
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
          "args": {
            "names": []
          }
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
          "args": {}
        },
        {
          "class_name": "helm.benchmark.metrics.annotation_metrics.AnnotationLikertScaleMetric",
          "args": {
            "annotator_name": "helpdesk_call_center_summarization",
            "key": "score",
            "min_score": 1,
            "max_score": 10
          }
        }
      ],
      "data_augmenter_spec": {
        "perturbation_specs": [],
        "should_augment_train_instances": false,
        "should_include_original_train": false,
        "should_skip_unchanged_train": false,
        "should_augment_eval_instances": false,
        "should_include_original_eval": false,
        "should_skip_unchanged_eval": false,
        "seeds_per_instance": 1
      },
      "groups": [
        "helpdesk_call_summarization"
      ],
      "annotators": [
        {
          "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
          "args": {}
        }
      ]
    },
    "stats": [
      {
        "name": {
          "name": "num_references",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 1332.7777777777778,
        "sum_squared": 1776296.6049382717,
        "min": 1332.7777777777778,
        "max": 1332.7777777777778,
        "mean": 1332.7777777777778,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "training_co2_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "training_energy_cost",
          "split": "test"
        },
        "count": 0,
        "sum": 0.0,
        "sum_squared": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test"
        },
        "count": 1,
        "sum": 0.772343741522895,
        "sum_squared": 0.5965148550695845,
        "min": 0.772343741522895,
        "max": 0.772343741522895,
        "mean": 0.772343741522895,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test"
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test"
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test"
        },
        "count": 1,
        "sum": -19.53333554061788,
        "sum_squared": 381.55119734236564,
        "min": -19.53333554061788,
        "max": -19.53333554061788,
        "mean": -19.53333554061788,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test"
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test"
        },
        "count": 1,
        "sum": 360.88271604938274,
        "sum_squared": 130236.33474317941,
        "min": 360.88271604938274,
        "max": 360.88271604938274,
        "mean": 360.88271604938274,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "perplexity",
          "split": "test"
        },
        "count": 1,
        "sum": 1.3298357423655804,
        "sum_squared": 1.7684631016730143,
        "min": 1.3298357423655804,
        "max": 1.3298357423655804,
        "mean": 1.3298357423655804,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "bits_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": 0.07808810193230022,
        "sum_squared": 0.006097751663389309,
        "min": 0.07808810193230022,
        "max": 0.07808810193230022,
        "mean": 0.07808810193230022,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob_per_byte",
          "split": "test"
        },
        "count": 1,
        "sum": -0.054126547689651514,
        "sum_squared": 0.0029296831648001195,
        "min": -0.054126547689651514,
        "max": -0.054126547689651514,
        "mean": -0.054126547689651514,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_references",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_trials",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1332.7777777777778,
        "sum_squared": 1776296.6049382717,
        "min": 1332.7777777777778,
        "max": 1332.7777777777778,
        "mean": 1332.7777777777778,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_prompt_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1332.7777777777778,
        "sum_squared": 1776296.6049382717,
        "min": 1332.7777777777778,
        "max": 1332.7777777777778,
        "mean": 1332.7777777777778,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_completion_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_output_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.772343741522895,
        "sum_squared": 0.5965148550695845,
        "min": 0.772343741522895,
        "max": 0.772343741522895,
        "mean": 0.772343741522895,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "inference_runtime",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.772343741522895,
        "sum_squared": 0.5965148550695845,
        "min": 0.772343741522895,
        "max": 0.772343741522895,
        "mean": 0.772343741522895,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "batch_size",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_length",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_stop",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_endoftext",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "finish_reason_unknown",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 1.0,
        "sum_squared": 1.0,
        "min": 1.0,
        "max": 1.0,
        "mean": 1.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_train_instances",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "prompt_truncated",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.0,
        "sum_squared": 0.0,
        "min": 0.0,
        "max": 0.0,
        "mean": 0.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -19.53333554061788,
        "sum_squared": 381.55119734236564,
        "min": -19.53333554061788,
        "max": -19.53333554061788,
        "mean": -19.53333554061788,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "logprob",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": -19.53333554061788,
        "sum_squared": 381.55119734236564,
        "min": -19.53333554061788,
        "max": -19.53333554061788,
        "mean": -19.53333554061788,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_perplexity_tokens",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 68.5246913580247,
        "sum_squared": 4695.633325712544,
        "min": 68.5246913580247,
        "max": 68.5246913580247,
        "mean": 68.5246913580247,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 360.88271604938274,
        "sum_squared": 130236.33474317941,
        "min": 360.88271604938274,
        "max": 360.88271604938274,
        "mean": 360.88271604938274,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_bytes",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 360.88271604938274,
        "sum_squared": 130236.33474317941,
        "min": 360.88271604938274,
        "max": 360.88271604938274,
        "mean": 360.88271604938274,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "num_instances",
          "split": "test"
        },
        "count": 1,
        "sum": 162.0,
        "sum_squared": 26244.0,
        "min": 162.0,
        "max": 162.0,
        "mean": 162.0,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test"
        },
        "count": 1,
        "sum": 0.7469135802469118,
        "sum_squared": 0.55787989635726,
        "min": 0.7469135802469118,
        "max": 0.7469135802469118,
        "mean": 0.7469135802469118,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "robustness",
            "robustness": true,
            "fairness": false,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7469135802469118,
        "sum_squared": 0.55787989635726,
        "min": 0.7469135802469118,
        "max": 0.7469135802469118,
        "mean": 0.7469135802469118,
        "variance": 0.0,
        "stddev": 0.0
      },
      {
        "name": {
          "name": "annotation_helpdesk_call_center_summarization_score",
          "split": "test",
          "perturbation": {
            "name": "fairness",
            "robustness": false,
            "fairness": true,
            "computed_on": "worst"
          }
        },
        "count": 1,
        "sum": 0.7469135802469118,
        "sum_squared": 0.55787989635726,
        "min": 0.7469135802469118,
        "max": 0.7469135802469118,
        "mean": 0.7469135802469118,
        "variance": 0.0,
        "stddev": 0.0
      }
    ]
  }
]