[
  {
    "title": "Summarization",
    "header": [
      {
        "value": "Model",
        "markdown": false,
        "metadata": {}
      },
      {
        "value": "Helpdesk Call summarization - Score",
        "description": "Helpdesk Call summarization\n\nScore: Score",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Score",
          "run_group": "Helpdesk Call summarization"
        }
      },
      {
        "value": "Helpdesk Call summarization - Faithfulness",
        "description": "Helpdesk Call summarization\n\nFaithfulness: Whether all the information expressed by the summary can be inferred from the source transcript.",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Faithfulness",
          "run_group": "Helpdesk Call summarization"
        }
      },
      {
        "value": "Helpdesk Call summarization - Relevance",
        "description": "Helpdesk Call summarization\n\nRelevance: Whether the summary includes only important information from the source.",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Relevance",
          "run_group": "Helpdesk Call summarization"
        }
      },
      {
        "value": "Helpdesk Call summarization - Coherence",
        "description": "Helpdesk Call summarization\n\nCoherence: Whether the summary organizes the relevant information into a well-structured summary.",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {
          "metric": "Coherence",
          "run_group": "Helpdesk Call summarization"
        }
      }
    ],
    "rows": [
      [
        {
          "value": "DeepSeek v3",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.8161865569272949,
          "description": "min=0.816, mean=0.816, max=0.816, sum=0.816 (1)",
          "style": {
            "font-weight": "bold"
          },
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (405B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7976680384087768,
          "description": "min=0.798, mean=0.798, max=0.798, sum=0.798 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (70B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7894375857338795,
          "description": "min=0.789, mean=0.789, max=0.789, sum=0.789 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (8B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7661179698216716,
          "description": "min=0.766, mean=0.766, max=0.766, sum=0.766 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Mistral Instruct v0.3 (7B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7585733882030161,
          "description": "min=0.759, mean=0.759, max=0.759, sum=0.759 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Mixtral Instruct (8x22B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7908093278463623,
          "description": "min=0.791, mean=0.791, max=0.791, sum=0.791 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Mixtral Instruct (8x7B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.758573388203016,
          "description": "min=0.759, mean=0.759, max=0.759, sum=0.759 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Qwen2.5 Instruct Turbo (72B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7969821673525352,
          "description": "min=0.797, mean=0.797, max=0.797, sum=0.797 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Qwen2.5 Instruct Turbo (7B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7469135802469118,
          "description": "min=0.747, mean=0.747, max=0.747, sum=0.747 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Claude 3.5 Haiku (20241022)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7969821673525349,
          "description": "min=0.797, mean=0.797, max=0.797, sum=0.797 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Claude 3.5 Sonnet (20240620)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.8072702331961568,
          "description": "min=0.807, mean=0.807, max=0.807, sum=0.807 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Gemini 1.5 Pro (002)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.785322359396431,
          "description": "min=0.785, mean=0.785, max=0.785, sum=0.785 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-pro-002"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "Gemini 1.5 Flash (002)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7764060356652923,
          "description": "min=0.776, mean=0.776, max=0.776, sum=0.776 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-flash-002"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "GPT-4o (2024-08-06)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.8134430727023293,
          "description": "min=0.813, mean=0.813, max=0.813, sum=0.813 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ],
      [
        {
          "value": "GPT-4o mini (2024-07-18)",
          "description": "",
          "markdown": false
        },
        {
          "value": 0.7921810699588453,
          "description": "min=0.792, mean=0.792, max=0.792, sum=0.792 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18"
          ]
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        },
        {
          "description": "1 matching runs, but no matching metrics",
          "markdown": false
        }
      ]
    ],
    "links": [
      {
        "text": "LaTeX",
        "href": "benchmark_output/releases/v0.1.0/groups/latex/call_center_scenarios_summarization_metrics.tex"
      },
      {
        "text": "JSON",
        "href": "benchmark_output/releases/v0.1.0/groups/json/call_center_scenarios_summarization_metrics.json"
      }
    ],
    "name": "summarization_metrics"
  },
  {
    "title": "Efficiency",
    "header": [
      {
        "value": "Model",
        "markdown": false,
        "metadata": {}
      },
      {
        "value": "Mean win rate",
        "description": "How many models this model outperforms on average (over columns).",
        "markdown": false,
        "lower_is_better": false,
        "metadata": {}
      }
    ],
    "rows": [
      [
        {
          "value": "DeepSeek v3",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (405B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (70B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (8B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Mistral Instruct v0.3 (7B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Mixtral Instruct (8x22B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Mixtral Instruct (8x7B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Qwen2.5 Instruct Turbo (72B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Qwen2.5 Instruct Turbo (7B)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Claude 3.5 Haiku (20241022)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Claude 3.5 Sonnet (20240620)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Gemini 1.5 Pro (002)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "Gemini 1.5 Flash (002)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "GPT-4o (2024-08-06)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ],
      [
        {
          "value": "GPT-4o mini (2024-07-18)",
          "description": "",
          "markdown": false
        },
        {
          "markdown": false
        }
      ]
    ],
    "links": [
      {
        "text": "LaTeX",
        "href": "benchmark_output/releases/v0.1.0/groups/latex/call_center_scenarios_efficiency.tex"
      },
      {
        "text": "JSON",
        "href": "benchmark_output/releases/v0.1.0/groups/json/call_center_scenarios_efficiency.json"
      }
    ],
    "name": "efficiency"
  },
  {
    "title": "General information",
    "header": [
      {
        "value": "Model",
        "markdown": false,
        "metadata": {}
      },
      {
        "value": "Helpdesk Call summarization - # eval",
        "description": "Helpdesk Call summarization\n\n# eval: Number of evaluation instances.",
        "markdown": false,
        "metadata": {
          "metric": "# eval",
          "run_group": "Helpdesk Call summarization"
        }
      },
      {
        "value": "Helpdesk Call summarization - # train",
        "description": "Helpdesk Call summarization\n\n# train: Number of training instances (e.g., in-context examples).",
        "markdown": false,
        "metadata": {
          "metric": "# train",
          "run_group": "Helpdesk Call summarization"
        }
      },
      {
        "value": "Helpdesk Call summarization - truncated",
        "description": "Helpdesk Call summarization\n\ntruncated: Fraction of instances where the prompt itself was truncated (implies that there were no in-context examples).",
        "markdown": false,
        "metadata": {
          "metric": "truncated",
          "run_group": "Helpdesk Call summarization"
        }
      },
      {
        "value": "Helpdesk Call summarization - # prompt tokens",
        "description": "Helpdesk Call summarization\n\n# prompt tokens: Number of tokens in the prompt.",
        "markdown": false,
        "metadata": {
          "metric": "# prompt tokens",
          "run_group": "Helpdesk Call summarization"
        }
      },
      {
        "value": "Helpdesk Call summarization - # output tokens",
        "description": "Helpdesk Call summarization\n\n# output tokens: Actual number of output tokens.",
        "markdown": false,
        "metadata": {
          "metric": "# output tokens",
          "run_group": "Helpdesk Call summarization"
        }
      }
    ],
    "rows": [
      [
        {
          "value": "DeepSeek v3",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3"
          ]
        },
        {
          "value": 1336.8456790123457,
          "description": "min=1336.846, mean=1336.846, max=1336.846, sum=1336.846 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3"
          ]
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (405B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo"
          ]
        },
        {
          "value": 1325.1604938271605,
          "description": "min=1325.16, mean=1325.16, max=1325.16, sum=1325.16 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo"
          ]
        },
        {
          "value": 116.8395061728395,
          "description": "min=116.84, mean=116.84, max=116.84, sum=116.84 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo"
          ]
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (70B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo"
          ]
        },
        {
          "value": 1325.1604938271605,
          "description": "min=1325.16, mean=1325.16, max=1325.16, sum=1325.16 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo"
          ]
        },
        {
          "value": 103.87654320987654,
          "description": "min=103.877, mean=103.877, max=103.877, sum=103.877 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo"
          ]
        }
      ],
      [
        {
          "value": "Llama 3.1 Instruct Turbo (8B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo"
          ]
        },
        {
          "value": 1325.1604938271605,
          "description": "min=1325.16, mean=1325.16, max=1325.16, sum=1325.16 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo"
          ]
        },
        {
          "value": 119.98148148148148,
          "description": "min=119.981, mean=119.981, max=119.981, sum=119.981 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo"
          ]
        }
      ],
      [
        {
          "value": "Mistral Instruct v0.3 (7B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3"
          ]
        },
        {
          "value": 1491.9197530864199,
          "description": "min=1491.92, mean=1491.92, max=1491.92, sum=1491.92 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3"
          ]
        },
        {
          "value": 130.34567901234567,
          "description": "min=130.346, mean=130.346, max=130.346, sum=130.346 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3"
          ]
        }
      ],
      [
        {
          "value": "Mixtral Instruct (8x22B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1"
          ]
        },
        {
          "value": 1491.9197530864199,
          "description": "min=1491.92, mean=1491.92, max=1491.92, sum=1491.92 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1"
          ]
        },
        {
          "value": 93.72839506172839,
          "description": "min=93.728, mean=93.728, max=93.728, sum=93.728 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1"
          ]
        }
      ],
      [
        {
          "value": "Mixtral Instruct (8x7B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1"
          ]
        },
        {
          "value": 1491.9197530864199,
          "description": "min=1491.92, mean=1491.92, max=1491.92, sum=1491.92 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1"
          ]
        },
        {
          "value": 110.48148148148148,
          "description": "min=110.481, mean=110.481, max=110.481, sum=110.481 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1"
          ]
        }
      ],
      [
        {
          "value": "Qwen2.5 Instruct Turbo (72B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo"
          ]
        },
        {
          "value": 1332.7777777777778,
          "description": "min=1332.778, mean=1332.778, max=1332.778, sum=1332.778 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo"
          ]
        },
        {
          "value": 88.59876543209876,
          "description": "min=88.599, mean=88.599, max=88.599, sum=88.599 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo"
          ]
        }
      ],
      [
        {
          "value": "Qwen2.5 Instruct Turbo (7B)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo"
          ]
        },
        {
          "value": 1332.7777777777778,
          "description": "min=1332.778, mean=1332.778, max=1332.778, sum=1332.778 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo"
          ]
        },
        {
          "value": 68.5246913580247,
          "description": "min=68.525, mean=68.525, max=68.525, sum=68.525 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo"
          ]
        }
      ],
      [
        {
          "value": "Claude 3.5 Haiku (20241022)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022"
          ]
        },
        {
          "value": 1372.1666666666667,
          "description": "min=1372.167, mean=1372.167, max=1372.167, sum=1372.167 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022"
          ]
        },
        {
          "value": 86.29012345679013,
          "description": "min=86.29, mean=86.29, max=86.29, sum=86.29 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022"
          ]
        }
      ],
      [
        {
          "value": "Claude 3.5 Sonnet (20240620)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620"
          ]
        },
        {
          "value": 1372.1666666666667,
          "description": "min=1372.167, mean=1372.167, max=1372.167, sum=1372.167 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620"
          ]
        },
        {
          "value": 130.02469135802468,
          "description": "min=130.025, mean=130.025, max=130.025, sum=130.025 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620"
          ]
        }
      ],
      [
        {
          "value": "Gemini 1.5 Pro (002)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-pro-002"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-pro-002"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-pro-002"
          ]
        },
        {
          "value": 1394.8086419753085,
          "description": "min=1394.809, mean=1394.809, max=1394.809, sum=1394.809 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-pro-002"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-pro-002"
          ]
        }
      ],
      [
        {
          "value": "Gemini 1.5 Flash (002)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-flash-002"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-flash-002"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-flash-002"
          ]
        },
        {
          "value": 1394.8086419753085,
          "description": "min=1394.809, mean=1394.809, max=1394.809, sum=1394.809 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-flash-002"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=google_gemini-1.5-flash-002"
          ]
        }
      ],
      [
        {
          "value": "GPT-4o (2024-08-06)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06"
          ]
        },
        {
          "value": 1298.9444444444443,
          "description": "min=1298.944, mean=1298.944, max=1298.944, sum=1298.944 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06"
          ]
        },
        {
          "value": 99.65432098765432,
          "description": "min=99.654, mean=99.654, max=99.654, sum=99.654 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-2024-08-06"
          ]
        }
      ],
      [
        {
          "value": "GPT-4o mini (2024-07-18)",
          "description": "",
          "markdown": false
        },
        {
          "value": 162.0,
          "description": "min=162, mean=162, max=162, sum=162 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18"
          ]
        },
        {
          "value": 0.0,
          "description": "min=0, mean=0, max=0, sum=0 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18"
          ]
        },
        {
          "value": 1298.9444444444443,
          "description": "min=1298.944, mean=1298.944, max=1298.944, sum=1298.944 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18"
          ]
        },
        {
          "value": 95.28395061728395,
          "description": "min=95.284, mean=95.284, max=95.284, sum=95.284 (1)",
          "style": {},
          "markdown": false,
          "run_spec_names": [
            "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18"
          ]
        }
      ]
    ],
    "links": [
      {
        "text": "LaTeX",
        "href": "benchmark_output/releases/v0.1.0/groups/latex/call_center_scenarios_general_information.tex"
      },
      {
        "text": "JSON",
        "href": "benchmark_output/releases/v0.1.0/groups/json/call_center_scenarios_general_information.json"
      }
    ],
    "name": "general_information"
  }
]