{
  "title": "",
  "header": [
    {
      "value": "Model",
      "markdown": false,
      "metadata": {}
    },
    {
      "value": "Execution Accuracy",
      "description": "Spider 1.0 (Test)\n\nExecution Accuracy: Execution Accuracy",
      "markdown": false,
      "lower_is_better": false,
      "metadata": {
        "metric": "Execution Accuracy",
        "run_group": "Spider 1.0 (Test)"
      }
    },
    {
      "value": "# eval",
      "description": "Spider 1.0 (Test)\n\n# eval: Number of evaluation instances.",
      "markdown": false,
      "metadata": {
        "metric": "# eval",
        "run_group": "Spider 1.0 (Test)"
      }
    },
    {
      "value": "# train",
      "description": "Spider 1.0 (Test)\n\n# train: Number of training instances (e.g., in-context examples).",
      "markdown": false,
      "metadata": {
        "metric": "# train",
        "run_group": "Spider 1.0 (Test)"
      }
    },
    {
      "value": "truncated",
      "description": "Spider 1.0 (Test)\n\ntruncated: Fraction of instances where the prompt itself was truncated (implies that there were no in-context examples).",
      "markdown": false,
      "metadata": {
        "metric": "truncated",
        "run_group": "Spider 1.0 (Test)"
      }
    },
    {
      "value": "# prompt tokens",
      "description": "Spider 1.0 (Test)\n\n# prompt tokens: Number of tokens in the prompt.",
      "markdown": false,
      "metadata": {
        "metric": "# prompt tokens",
        "run_group": "Spider 1.0 (Test)"
      }
    },
    {
      "value": "# output tokens",
      "description": "Spider 1.0 (Test)\n\n# output tokens: Actual number of output tokens.",
      "markdown": false,
      "metadata": {
        "metric": "# output tokens",
        "run_group": "Spider 1.0 (Test)"
      }
    }
  ],
  "rows": [
    [
      {
        "value": "Llama 3.1 Instruct Turbo (405B)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Dmeta_llama-3.1-405b-instruct-turbo%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=meta_llama-3.1-405b-instruct-turbo"
        ]
      },
      {
        "value": 0.77,
        "description": "min=0.77, mean=0.77, max=0.77, sum=0.77 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 365.01,
        "description": "min=365.01, mean=365.01, max=365.01, sum=365.01 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 168.06,
        "description": "min=168.06, mean=168.06, max=168.06, sum=168.06 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (70B)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Dmeta_llama-3.1-70b-instruct-turbo%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=meta_llama-3.1-70b-instruct-turbo"
        ]
      },
      {
        "value": 0.71,
        "description": "min=0.71, mean=0.71, max=0.71, sum=0.71 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 365.01,
        "description": "min=365.01, mean=365.01, max=365.01, sum=365.01 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 165.82,
        "description": "min=165.82, mean=165.82, max=165.82, sum=165.82 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Llama 3.1 Instruct Turbo (8B)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Dmeta_llama-3.1-8b-instruct-turbo%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=meta_llama-3.1-8b-instruct-turbo"
        ]
      },
      {
        "value": 0.61,
        "description": "min=0.61, mean=0.61, max=0.61, sum=0.61 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 365.01,
        "description": "min=365.01, mean=365.01, max=365.01, sum=365.01 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 169.2,
        "description": "min=169.2, mean=169.2, max=169.2, sum=169.2 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Claude 3.5 Haiku (20241022)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Danthropic_claude-3-5-haiku-20241022%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=anthropic_claude-3-5-haiku-20241022"
        ]
      },
      {
        "value": 0.63,
        "description": "min=0.63, mean=0.63, max=0.63, sum=0.63 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 454.04,
        "description": "min=454.04, mean=454.04, max=454.04, sum=454.04 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 143.6,
        "description": "min=143.6, mean=143.6, max=143.6, sum=143.6 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Claude 3.5 Sonnet (20240620)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Danthropic_claude-3-5-sonnet-20240620%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=anthropic_claude-3-5-sonnet-20240620"
        ]
      },
      {
        "value": 0.66,
        "description": "min=0.66, mean=0.66, max=0.66, sum=0.66 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 454.04,
        "description": "min=454.04, mean=454.04, max=454.04, sum=454.04 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 184.57,
        "description": "min=184.57, mean=184.57, max=184.57, sum=184.57 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Gemini 1.5 Pro (002)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Dgoogle_gemini-1.5-pro-002%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=google_gemini-1.5-pro-002"
        ]
      },
      {
        "value": 0.8,
        "description": "min=0.8, mean=0.8, max=0.8, sum=0.8 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 443.55,
        "description": "min=443.55, mean=443.55, max=443.55, sum=443.55 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "Gemini 1.5 Flash (002)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Dgoogle_gemini-1.5-flash-002%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=google_gemini-1.5-flash-002"
        ]
      },
      {
        "value": 0.79,
        "description": "min=0.79, mean=0.79, max=0.79, sum=0.79 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 443.55,
        "description": "min=443.55, mean=443.55, max=443.55, sum=443.55 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "GPT-4o (2024-08-06)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Dopenai_gpt-4o-2024-08-06%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=openai_gpt-4o-2024-08-06"
        ]
      },
      {
        "value": 0.81,
        "description": "min=0.81, mean=0.81, max=0.81, sum=0.81 (1)",
        "style": {
          "font-weight": "bold"
        },
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 367.53,
        "description": "min=367.53, mean=367.53, max=367.53, sum=367.53 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 183.2,
        "description": "min=183.2, mean=183.2, max=183.2, sum=183.2 (1)",
        "style": {},
        "markdown": false
      }
    ],
    [
      {
        "value": "GPT-4o mini (2024-07-18)",
        "description": "",
        "href": "?group=spider&subgroup=&runSpecs=%5B%22spider%3Amodel%3Dopenai_gpt-4o-mini-2024-07-18%22%5D",
        "markdown": false,
        "run_spec_names": [
          "spider:model=openai_gpt-4o-mini-2024-07-18"
        ]
      },
      {
        "value": 0.72,
        "description": "min=0.72, mean=0.72, max=0.72, sum=0.72 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 100.0,
        "description": "min=100, mean=100, max=100, sum=100 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 0.0,
        "description": "min=0, mean=0, max=0, sum=0 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 367.53,
        "description": "min=367.53, mean=367.53, max=367.53, sum=367.53 (1)",
        "style": {},
        "markdown": false
      },
      {
        "value": 176.02,
        "description": "min=176.02, mean=176.02, max=176.02, sum=176.02 (1)",
        "style": {},
        "markdown": false
      }
    ]
  ],
  "links": [
    {
      "text": "LaTeX",
      "href": "benchmark_output/releases/v0.5.0/groups/latex/spider_spider_.tex"
    },
    {
      "text": "JSON",
      "href": "benchmark_output/releases/v0.5.0/groups/json/spider_spider_.json"
    }
  ],
  "name": "spider_"
}