{
  "title": "Summarization",
  "header": [
    {
      "value": "Model/adapter",
      "markdown": false,
      "metadata": {}
    },
    {
      "value": "Summarization - Faithfulness",
      "description": "summarization\n\nFaithfulness: Whether all the information expressed by the summary can be inferred from the source transcript.",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Faithfulness",
        "run_group": "Summarization"
      }
    },
    {
      "value": "Summarization - Relevance",
      "description": "summarization\n\nRelevance: Whether the summary includes only important information from the source.",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Relevance",
        "run_group": "Summarization"
      }
    },
    {
      "value": "Summarization - Coherence",
      "description": "summarization\n\nCoherence: Whether the summary organizes the relevant information into a well-structured summary.",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Coherence",
        "run_group": "Summarization"
      }
    },
    {
      "value": "Summarization (Real) - Faithfulness",
      "description": "Summarization with real call transcripts\n\nFaithfulness: Whether all the information expressed by the summary can be inferred from the source transcript.",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Faithfulness",
        "run_group": "Summarization (Real)"
      }
    },
    {
      "value": "Summarization (Real) - Relevance",
      "description": "Summarization with real call transcripts\n\nRelevance: Whether the summary includes only important information from the source.",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Relevance",
        "run_group": "Summarization (Real)"
      }
    },
    {
      "value": "Summarization (Real) - Coherence",
      "description": "Summarization with real call transcripts\n\nCoherence: Whether the summary organizes the relevant information into a well-structured summary.",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Coherence",
        "run_group": "Summarization (Real)"
      }
    }
  ],
  "rows": [
    [
      {
        "value": "Llama 3 Instruct (70B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=meta_llama-3-70b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=meta_llama-3-70b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=meta_llama-3-70b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=meta_llama-3-70b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=meta_llama-3-70b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=meta_llama-3-70b-chat"
        ]
      }
    ],
    [
      {
        "value": "Llama 3 Instruct (8B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=meta_llama-3-8b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=meta_llama-3-8b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=meta_llama-3-8b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=meta_llama-3-8b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=meta_llama-3-8b-chat"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=meta_llama-3-8b-chat"
        ]
      }
    ],
    [
      {
        "value": "Claude 3.5 Sonnet (20240620)",
        "description": "",
        "markdown": false
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=anthropic_claude-3-5-sonnet-20240620"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=anthropic_claude-3-5-sonnet-20240620"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=anthropic_claude-3-5-sonnet-20240620"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=anthropic_claude-3-5-sonnet-20240620"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=anthropic_claude-3-5-sonnet-20240620"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=anthropic_claude-3-5-sonnet-20240620"
        ]
      }
    ],
    [
      {
        "value": "GPT-4o (2024-05-13)",
        "description": "",
        "markdown": false
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=openai_gpt-4o-2024-05-13"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=openai_gpt-4o-2024-05-13"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=openai_gpt-4o-2024-05-13"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=openai_gpt-4o-2024-05-13"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=openai_gpt-4o-2024-05-13"
        ]
      },
      {
        "value": 0.95,
        "description": "min=0.95, mean=0.95, max=0.95, sum=0.95 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=openai_gpt-4o-2024-05-13"
        ]
      }
    ],
    [
      {
        "value": "GPT-4o mini (2024-07-18)",
        "description": "",
        "markdown": false
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=openai_gpt-4o-mini-2024-07-18"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=openai_gpt-4o-mini-2024-07-18"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:model=openai_gpt-4o-mini-2024-07-18"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=openai_gpt-4o-mini-2024-07-18"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=openai_gpt-4o-mini-2024-07-18"
        ]
      },
      {
        "value": 1.0,
        "description": "min=1, mean=1, max=1, sum=1 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "call_center_summarization:subset=real_call_transcripts,model=openai_gpt-4o-mini-2024-07-18"
        ]
      }
    ]
  ],
  "links": [
    {
      "text": "LaTeX",
      "href": "benchmark_output/releases/v0.1.0/groups/latex/call_center_scenarios_summarization_metrics.tex"
    },
    {
      "text": "JSON",
      "href": "benchmark_output/releases/v0.1.0/groups/json/call_center_scenarios_summarization_metrics.json"
    }
  ],
  "name": "summarization_metrics"
}