{
  "title": "",
  "header": [
    {
      "value": "Model",
      "markdown": false,
      "metadata": {}
    },
    {
      "value": "Execution Accuracy",
      "description": "BIRD-SQL (Dev)\n\nExecution Accuracy: Execution Accuracy",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Execution Accuracy",
        "run_group": "BIRD-SQL (Dev)"
      }
    },
    {
      "value": "# eval",
      "description": "BIRD-SQL (Dev)\n\n# eval: Number of evaluation instances.",
      "markdown": false,
      "metadata": {
        "metric": "# eval",
        "run_group": "BIRD-SQL (Dev)"
      }
    },
    {
      "value": "# train",
      "description": "BIRD-SQL (Dev)\n\n# train: Number of training instances (e.g., in-context examples).",
      "markdown": false,
      "metadata": {
        "metric": "# train",
        "run_group": "BIRD-SQL (Dev)"
      }
    },
    {
      "value": "truncated",
      "description": "BIRD-SQL (Dev)\n\ntruncated: Fraction of instances where the prompt itself was truncated (implies that there were no in-context examples).",
      "markdown": false,
      "metadata": {
        "metric": "truncated",
        "run_group": "BIRD-SQL (Dev)"
      }
    },
    {
      "value": "# prompt tokens",
      "description": "BIRD-SQL (Dev)\n\n# prompt tokens: Number of tokens in the prompt.",
      "markdown": false,
      "metadata": {
        "metric": "# prompt tokens",
        "run_group": "BIRD-SQL (Dev)"
      }
    },
    {
      "value": "# output tokens",
      "description": "BIRD-SQL (Dev)\n\n# output tokens: Actual number of output tokens.",
      "markdown": false,
      "metadata": {
        "metric": "# output tokens",
        "run_group": "BIRD-SQL (Dev)"
      }
    }
  ],
  "rows": [
    [
      {
        "value": "Llama 3.1 Instruct Turbo (405B)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Dmeta_llama-3.1-405b-instruct-turbo%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=meta_llama-3.1-405b-instruct-turbo"
        ]
      },
      {
        "value": 0.61,
        "description": "min=0.61, mean=0.61, max=0.61, sum=0.61 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1024.99,
        "description": "min=1024.99, mean=1024.99, max=1024.99, sum=1024.99 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 213.94,
        "description": "min=213.94, mean=213.94, max=213.94, sum=213.94 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (70B)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Dmeta_llama-3.1-70b-instruct-turbo%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=meta_llama-3.1-70b-instruct-turbo"
        ]
      },
      {
        "value": 0.55,
        "description": "min=0.55, mean=0.55, max=0.55, sum=0.55 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1024.99,
        "description": "min=1024.99, mean=1024.99, max=1024.99, sum=1024.99 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 222.9,
        "description": "min=222.9, mean=222.9, max=222.9, sum=222.9 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (8B)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Dmeta_llama-3.1-8b-instruct-turbo%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=meta_llama-3.1-8b-instruct-turbo"
        ]
      },
      {
        "value": 0.37,
        "description": "min=0.37, mean=0.37, max=0.37, sum=0.37 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1024.99,
        "description": "min=1024.99, mean=1024.99, max=1024.99, sum=1024.99 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 224.91,
        "description": "min=224.91, mean=224.91, max=224.91, sum=224.91 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Claude 3.5 Haiku (20241022)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Danthropic_claude-3-5-haiku-20241022%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=anthropic_claude-3-5-haiku-20241022"
        ]
      },
      {
        "value": 0.48,
        "description": "min=0.48, mean=0.48, max=0.48, sum=0.48 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1102.77,
        "description": "min=1102.77, mean=1102.77, max=1102.77, sum=1102.77 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 197.73,
        "description": "min=197.73, mean=197.73, max=197.73, sum=197.73 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Claude 3.5 Sonnet (20240620)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Danthropic_claude-3-5-sonnet-20240620%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=anthropic_claude-3-5-sonnet-20240620"
        ]
      },
      {
        "value": 0.47,
        "description": "min=0.47, mean=0.47, max=0.47, sum=0.47 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1102.77,
        "description": "min=1102.77, mean=1102.77, max=1102.77, sum=1102.77 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 232.02,
        "description": "min=232.02, mean=232.02, max=232.02, sum=232.02 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Gemini 1.5 Pro (002)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Dgoogle_gemini-1.5-pro-002%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=google_gemini-1.5-pro-002"
        ]
      },
      {
        "value": 0.61,
        "description": "min=0.61, mean=0.61, max=0.61, sum=0.61 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1181.01,
        "description": "min=1181.01, mean=1181.01, max=1181.01, sum=1181.01 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Gemini 1.5 Flash (002)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Dgoogle_gemini-1.5-flash-002%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=google_gemini-1.5-flash-002"
        ]
      },
      {
        "value": 0.63,
        "description": "min=0.63, mean=0.63, max=0.63, sum=0.63 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1181.01,
        "description": "min=1181.01, mean=1181.01, max=1181.01, sum=1181.01 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "GPT-4o (2024-08-06)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Dopenai_gpt-4o-2024-08-06%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=openai_gpt-4o-2024-08-06"
        ]
      },
      {
        "value": 0.62,
        "description": "min=0.62, mean=0.62, max=0.62, sum=0.62 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1030.82,
        "description": "min=1030.82, mean=1030.82, max=1030.82, sum=1030.82 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 240.35,
        "description": "min=240.35, mean=240.35, max=240.35, sum=240.35 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "GPT-4o mini (2024-07-18)",
        "description": "",
        "href": "?group=bird_sql&subgroup=&runSpecs=%5B%22bird_sql%3Amodel%3Dopenai_gpt-4o-mini-2024-07-18%22%5D",
        "markdown": false,
        "run_spec_names": [
          "bird_sql:model=openai_gpt-4o-mini-2024-07-18"
        ]
      },
      {
        "value": 0.54,
        "description": "min=0.54, mean=0.54, max=0.54, sum=0.54 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 1030.82,
        "description": "min=1030.82, mean=1030.82, max=1030.82, sum=1030.82 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 227.43,
        "description": "min=227.43, mean=227.43, max=227.43, sum=227.43 (1)",
        "style": {},
        "markdown": false
      }
    ]
  ],
  "links": [
    {
      "text": "LaTeX",
      "href": "benchmark_output/releases/v0.5.0/groups/latex/bird_sql_bird_sql_.tex"
    },
    {
      "text": "JSON",
      "href": "benchmark_output/releases/v0.5.0/groups/json/bird_sql_bird_sql_.json"
    }
  ],
  "name": "bird_sql_"
}