{
  "title": "Accuracy",
  "header": [
    {
      "value": "Model",
      "markdown": false,
      "metadata": {}
    },
    {
      "value": "CzechBankQA - SQL Error Rate",
      "description": "The CzechBankQA\n\nSQL Error Rate: Fraction of generated queries that result in a SQL execution error",
      "markdown": false,
      "lower_is_better": true,
      "metadata": {
        "metric": "SQL Error Rate",
        "run_group": "CzechBankQA"
      }
    }
  ],
  "rows": [
    [
      {
        "value": "Llama 3.1 Instruct Turbo (405B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.0196078431372549,
        "description": "min=0.02, mean=0.02, max=0.02, sum=0.02 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=meta_llama-3.1-405b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (70B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.13725490196078433,
        "description": "min=0.137, mean=0.137, max=0.137, sum=0.137 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=meta_llama-3.1-70b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (8B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.09803921568627451,
        "description": "min=0.098, mean=0.098, max=0.098, sum=0.098 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=meta_llama-3.1-8b-instruct-turbo"
        ]
      }
    ],
    [
      {
        "value": "Mistral Instruct v0.3 (7B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.47058823529411764,
        "description": "min=0.471, mean=0.471, max=0.471, sum=0.471 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=mistralai_mistral-7b-instruct-v0.3"
        ]
      }
    ],
    [
      {
        "value": "Mixtral Instruct (8x22B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.5980392156862745,
        "description": "min=0.598, mean=0.598, max=0.598, sum=0.598 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=mistralai_mixtral-8x22b-instruct-v0.1"
        ]
      }
    ],
    [
      {
        "value": "Mixtral Instruct (8x7B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.46078431372549017,
        "description": "min=0.461, mean=0.461, max=0.461, sum=0.461 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=mistralai_mixtral-8x7b-instruct-v0.1"
        ]
      }
    ],
    [
      {
        "value": "Qwen2 Instruct (72B)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.16666666666666666,
        "description": "min=0.167, mean=0.167, max=0.167, sum=0.167 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=qwen_qwen2-72b-instruct"
        ]
      }
    ],
    [
      {
        "value": "Claude 3.5 Haiku (20241022)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.029411764705882353,
        "description": "min=0.029, mean=0.029, max=0.029, sum=0.029 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=anthropic_claude-3-5-haiku-20241022"
        ]
      }
    ],
    [
      {
        "value": "Claude 3.5 Sonnet (20240620)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.00980392156862745,
        "description": "min=0.01, mean=0.01, max=0.01, sum=0.01 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=anthropic_claude-3-5-sonnet-20240620"
        ]
      }
    ],
    [
      {
        "value": "Gemini 1.5 Pro (002)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.029411764705882353,
        "description": "min=0.029, mean=0.029, max=0.029, sum=0.029 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=google_gemini-1.5-pro-002"
        ]
      }
    ],
    [
      {
        "value": "Gemini 1.5 Flash (002)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.16666666666666666,
        "description": "min=0.167, mean=0.167, max=0.167, sum=0.167 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=google_gemini-1.5-flash-002"
        ]
      }
    ],
    [
      {
        "value": "GPT-4o (2024-08-06)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.0196078431372549,
        "description": "min=0.02, mean=0.02, max=0.02, sum=0.02 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=openai_gpt-4o-2024-08-06"
        ]
      }
    ],
    [
      {
        "value": "GPT-4o mini (2024-07-18)",
        "description": "",
        "markdown": false
      },
      {
        "value": 0.058823529411764705,
        "description": "min=0.059, mean=0.059, max=0.059, sum=0.059 (1)",
        "style": {},
        "markdown": false,
        "run_spec_names": [
          "czech_bank_qa:model=openai_gpt-4o-mini-2024-07-18"
        ]
      }
    ]
  ],
  "links": [
    {
      "text": "LaTeX",
      "href": "benchmark_output/releases/v0.4.0/groups/latex/financial_scenarios_accuracy.tex"
    },
    {
      "text": "JSON",
      "href": "benchmark_output/releases/v0.4.0/groups/json/financial_scenarios_accuracy.json"
    }
  ],
  "name": "accuracy"
}