{
  "title": "Summarization",
  "header": [
    {
      "value": "Model",
      "markdown": false,
      "metadata": {}
    },
    {
      "value": "Helpdesk Call summarization - Score",
      "description": "Helpdesk Call summarization\n\nScore: Score",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Score",
        "run_group": "Helpdesk Call summarization"
      }
    }
  ],
  "rows": [
    [
      {
        "value": "DeepSeek v3",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8404206675811593,
        "description": "min=0.84, mean=0.84, max=0.84, sum=0.84 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=deepseek-ai_deepseek-v3"
        ]
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (405B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8401920438957455,
        "description": "min=0.84, mean=0.84, max=0.84, sum=0.84 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=meta_llama-3.1-405b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (70B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.824874256973021,
        "description": "min=0.825, mean=0.825, max=0.825, sum=0.825 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=meta_llama-3.1-70b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (8B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.7750342935528121,
        "description": "min=0.775, mean=0.775, max=0.775, sum=0.775 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=meta_llama-3.1-8b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Mistral Instruct v0.3 (7B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.7597165066300872,
        "description": "min=0.76, mean=0.76, max=0.76, sum=0.76 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=mistralai_mistral-7b-instruct-v0.3"
        ]
      }
    ],
    [
      {
        "value": "Mixtral Instruct (8x22B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8182441700960201,
        "description": "min=0.818, mean=0.818, max=0.818, sum=0.818 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=mistralai_mixtral-8x22b-instruct-v0.1"
        ]
      }
    ],
    [
      {
        "value": "Mixtral Instruct (8x7B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.7624599908550526,
        "description": "min=0.762, mean=0.762, max=0.762, sum=0.762 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=mistralai_mixtral-8x7b-instruct-v0.1"
        ]
      }
    ],
    [
      {
        "value": "Qwen2.5 Instruct Turbo (72B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8481938728852292,
        "description": "min=0.848, mean=0.848, max=0.848, sum=0.848 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=qwen_qwen2.5-72b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Qwen2.5 Instruct Turbo (7B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.7583447645176039,
        "description": "min=0.758, mean=0.758, max=0.758, sum=0.758 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=qwen_qwen2.5-7b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Claude 3.5 Haiku (20241022)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8566529492455397,
        "description": "min=0.857, mean=0.857, max=0.857, sum=0.857 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=anthropic_claude-3-5-haiku-20241022"
        ]
      }
    ],
    [
      {
        "value": "Claude 3.5 Sonnet (20240620)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8431641518061251,
        "description": "min=0.843, mean=0.843, max=0.843, sum=0.843 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=anthropic_claude-3-5-sonnet-20240620"
        ]
      }
    ],
    [
      {
        "value": "Claude 3.7 Sonnet (20250219)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8687700045724716,
        "description": "min=0.869, mean=0.869, max=0.869, sum=0.869 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=anthropic_claude-3-7-sonnet-20250219"
        ]
      }
    ],
    [
      {
        "value": "Gemini 1.5 Pro (002)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8516232281664361,
        "description": "min=0.852, mean=0.852, max=0.852, sum=0.852 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=google_gemini-1.5-pro-002"
        ]
      }
    ],
    [
      {
        "value": "Gemini 1.5 Flash (002)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8449931412894355,
        "description": "min=0.845, mean=0.845, max=0.845, sum=0.845 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=google_gemini-1.5-flash-002"
        ]
      }
    ],
    [
      {
        "value": "Gemini 2.0 Flash",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8513946044810221,
        "description": "min=0.851, mean=0.851, max=0.851, sum=0.851 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=google_gemini-2.0-flash-001"
        ]
      }
    ],
    [
      {
        "value": "GPT-4o (2024-11-20)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.866941015089161,
        "description": "min=0.867, mean=0.867, max=0.867, sum=0.867 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=openai_gpt-4o-2024-11-20"
        ]
      }
    ],
    [
      {
        "value": "GPT-4o mini (2024-07-18)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.8463648834019184,
        "description": "min=0.846, mean=0.846, max=0.846, sum=0.846 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "helpdesk_call_summarization:model=openai_gpt-4o-mini-2024-07-18"
        ]
      }
    ]
  ],
  "links": [
    {
      "text": "LaTeX",
      "href": "benchmark_output/releases/v1.0.0/groups/latex/call_center_scenarios_summarization_metrics.tex"
    },
    {
      "text": "JSON",
      "href": "benchmark_output/releases/v1.0.0/groups/json/call_center_scenarios_summarization_metrics.json"
    }
  ],
  "name": "summarization_metrics"
}