[
  {
    "title": "subset: summarization",
    "header": [
      {
        "value": "Model",
        "markdown": false,
        "metadata": {}
      },
      {
        "value": "Score",
        "description": "summarization\n\nScore: Score",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Score",
          "run_group": "Summarization"
        }
      },
      {
        "value": "Faithfulness",
        "description": "summarization\n\nFaithfulness: Whether all the information expressed by the summary can be inferred from the source transcript.",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Faithfulness",
          "run_group": "Summarization"
        }
      },
      {
        "value": "Relevance",
        "description": "summarization\n\nRelevance: Whether the summary includes only important information from the source.",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Relevance",
          "run_group": "Summarization"
        }
      },
      {
        "value": "Coherence",
        "description": "summarization\n\nCoherence: Whether the summary organizes the relevant information into a well-structured summary.",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Coherence",
          "run_group": "Summarization"
        }
      },
      {
        "value": "# eval",
        "description": "summarization\n\n# eval: Number of evaluation instances.",
        "markdown": false,
        "metadata": {
          "metric": "# eval",
          "run_group": "Summarization"
        }
      },
      {
        "value": "# train",
        "description": "summarization\n\n# train: Number of training instances (e.g., in-context examples).",
        "markdown": false,
        "metadata": {
          "metric": "# train",
          "run_group": "Summarization"
        }
      },
      {
        "value": "truncated",
        "description": "summarization\n\ntruncated: Fraction of instances where the prompt itself was truncated (implies that there were no in-context examples).",
        "markdown": false,
        "metadata": {
          "metric": "truncated",
          "run_group": "Summarization"
        }
      },
      {
        "value": "# prompt tokens",
        "description": "summarization\n\n# prompt tokens: Number of tokens in the prompt.",
        "markdown": false,
        "metadata": {
          "metric": "# prompt tokens",
          "run_group": "Summarization"
        }
      },
      {
        "value": "# output tokens",
        "description": "summarization\n\n# output tokens: Actual number of output tokens.",
        "markdown": false,
        "metadata": {
          "metric": "# output tokens",
          "run_group": "Summarization"
        }
      }
    ],
    "rows": [
      [
        {
          "value": "DeepSeek v3",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Ddeepseek-ai_deepseek-v3%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=deepseek-ai_deepseek-v3"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 757.2083333333334,
          "description": "min=757.208, mean=757.208, max=757.208, sum=757.208 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (405B)",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Dmeta_llama-3.1-405b-instruct-turbo%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=meta_llama-3.1-405b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 731.5833333333334,
          "description": "min=731.583, mean=731.583, max=731.583, sum=731.583 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 182.80416666666667,
          "description": "min=182.804, mean=182.804, max=182.804, sum=182.804 (1)",
          "style": {},
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (70B)",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Dmeta_llama-3.1-70b-instruct-turbo%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=meta_llama-3.1-70b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 731.5833333333334,
          "description": "min=731.583, mean=731.583, max=731.583, sum=731.583 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 156.4375,
          "description": "min=156.438, mean=156.438, max=156.438, sum=156.438 (1)",
          "style": {},
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (8B)",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Dmeta_llama-3.1-8b-instruct-turbo%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=meta_llama-3.1-8b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 731.5833333333334,
          "description": "min=731.583, mean=731.583, max=731.583, sum=731.583 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 189.07083333333333,
          "description": "min=189.071, mean=189.071, max=189.071, sum=189.071 (1)",
          "style": {},
          "markdown": false
        }
      ],
      [
        {
          "value": "Claude 3.5 Haiku (20241022)",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Danthropic_claude-3-5-haiku-20241022%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=anthropic_claude-3-5-haiku-20241022"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 801.1083333333333,
          "description": "min=801.108, mean=801.108, max=801.108, sum=801.108 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 177.17916666666667,
          "description": "min=177.179, mean=177.179, max=177.179, sum=177.179 (1)",
          "style": {},
          "markdown": false
        }
      ],
      [
        {
          "value": "Claude 3.5 Sonnet (20240620)",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Danthropic_claude-3-5-sonnet-20240620%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=anthropic_claude-3-5-sonnet-20240620"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 801.1083333333333,
          "description": "min=801.108, mean=801.108, max=801.108, sum=801.108 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 151.10833333333332,
          "description": "min=151.108, mean=151.108, max=151.108, sum=151.108 (1)",
          "style": {},
          "markdown": false
        }
      ],
      [
        {
          "value": "Gemini 1.5 Pro (002)",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Dgoogle_gemini-1.5-pro-002%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=google_gemini-1.5-pro-002"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 799.7791666666667,
          "description": "min=799.779, mean=799.779, max=799.779, sum=799.779 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        }
      ],
      [
        {
          "value": "Gemini 1.5 Flash (002)",
          "description": "",
          "href": "?group=call_center_summarization&subgroup=subset%3A%20summarization&runSpecs=%5B%22call_center_summarization%3Amodel%3Dgoogle_gemini-1.5-flash-002%22%5D",
          "markdown": false,
          "run_spec_names": [
            "call_center_summarization:model=google_gemini-1.5-flash-002"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "value": 0.9989583333333333,
          "description": "min=0.999, mean=0.999, max=0.999, sum=0.999 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 1.0,
          "description": "min=1, mean=1, max=1, sum=1 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false
        },
        {
          "value": 0.9989583333333333,
          "description": "min=0.999, mean=0.999, max=0.999, sum=0.999 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 240.0,
          "description": "min=240, mean=240, max=240, sum=240 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 799.7791666666667,
          "description": "min=799.779, mean=799.779, max=799.779, sum=799.779 (1)",
          "style": {},
          "markdown": false
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false
        }
      ]
    ],
    "links": [
      {
        "text": "LaTeX",
        "href": "benchmark_output/releases/v0.1.0/groups/latex/call_center_summarization_call_center_summarization_subset:summarization.tex"
      },
      {
        "text": "JSON",
        "href": "benchmark_output/releases/v0.1.0/groups/json/call_center_summarization_call_center_summarization_subset:summarization.json"
      }
    ],
    "name": "call_center_summarization_subset:summarization"
  }
]