[
  {
    "name": "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "anthropic/claude-3-5-haiku-20241022",
      "model": "anthropic/claude-3-5-haiku-20241022",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "anthropic/claude-3-5-sonnet-20240620",
      "model": "anthropic/claude-3-5-sonnet-20240620",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=anthropic_claude-3-7-sonnet-20250219",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "anthropic/claude-3-7-sonnet-20250219",
      "model": "anthropic/claude-3-7-sonnet-20250219",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/deepseek-v3",
      "model": "deepseek-ai/deepseek-v3",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=google_gemini-1.5-flash-002",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "google/gemini-1.5-flash-002",
      "model": "google/gemini-1.5-flash-002",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=google_gemini-1.5-pro-002",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "google/gemini-1.5-pro-002",
      "model": "google/gemini-1.5-pro-002",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=google_gemini-2.0-flash-001",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "google/gemini-2.0-flash-001",
      "model": "google/gemini-2.0-flash-001",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/llama-3.1-405b-instruct-turbo",
      "model": "meta/llama-3.1-405b-instruct-turbo",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/llama-3.1-70b-instruct-turbo",
      "model": "meta/llama-3.1-70b-instruct-turbo",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/llama-3.1-8b-instruct-turbo",
      "model": "meta/llama-3.1-8b-instruct-turbo",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/mistral-7b-instruct-v0.3",
      "model": "mistralai/mistral-7b-instruct-v0.3",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/mixtral-8x22b-instruct-v0.1",
      "model": "mistralai/mixtral-8x22b-instruct-v0.1",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/mixtral-8x7b-instruct-v0.1",
      "model": "mistralai/mixtral-8x7b-instruct-v0.1",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=openai_gpt-4o-2024-11-20",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "openai/gpt-4o-2024-11-20",
      "model": "openai/gpt-4o-2024-11-20",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "openai/gpt-4o-mini-2024-07-18",
      "model": "openai/gpt-4o-mini-2024-07-18",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/qwen2.5-72b-instruct-turbo",
      "model": "qwen/qwen2.5-72b-instruct-turbo",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  },
  {
    "name": "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo",
    "scenario_spec": {
      "class_name": "helm.benchmark.scenarios.helpdesk_call_summarization_scenario.HelpdeskCallSummarizationScenario",
      "args": {}
    },
    "adapter_spec": {
      "method": "generation",
      "global_prefix": "",
      "global_suffix": "",
      "instructions": "The following is a call transcript of a call between a compnay's employee and the company's IT helpdesk. Summarize the call transcript in under 200 words.",
      "input_prefix": "### Call Transcript\n",
      "input_suffix": "",
      "reference_prefix": "A. ",
      "reference_suffix": "\n",
      "chain_of_thought_prefix": "",
      "chain_of_thought_suffix": "\n",
      "output_prefix": "",
      "output_suffix": "",
      "instance_prefix": "\n",
      "substitutions": [],
      "max_train_instances": 0,
      "max_eval_instances": 10000,
      "num_outputs": 1,
      "num_train_trials": 1,
      "num_trials": 1,
      "sample_train": true,
      "model_deployment": "together/qwen2.5-7b-instruct-turbo",
      "model": "qwen/qwen2.5-7b-instruct-turbo",
      "temperature": 0.0,
      "max_tokens": 512,
      "stop_sequences": [],
      "multi_label": false
    },
    "metric_specs": [
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicGenerationMetric",
        "args": {
          "names": []
        }
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.BasicReferenceMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.basic_metrics.InstancesPerSplitMetric",
        "args": {}
      },
      {
        "class_name": "helm.benchmark.metrics.helpdesk_call_summarization_metrics.HelpdeskCallSummarizationMetric",
        "args": {}
      }
    ],
    "data_augmenter_spec": {
      "perturbation_specs": [],
      "should_augment_train_instances": false,
      "should_include_original_train": false,
      "should_skip_unchanged_train": false,
      "should_augment_eval_instances": false,
      "should_include_original_eval": false,
      "should_skip_unchanged_eval": false,
      "seeds_per_instance": 1
    },
    "groups": [
      "helpdesk_call_summarization"
    ],
    "annotators": [
      {
        "class_name": "helm.benchmark.annotation.helpdesk_call_summarization_annotator.HelpdeskCallSummarizationAnnotator",
        "args": {}
      }
    ]
  }
]