{
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=01-ai_yi-6b,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=anthropic_claude-2.1,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=anthropic_claude-3-opus-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=anthropic_claude-3-sonnet-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=anthropic_claude-instant-1.2,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_gemini-pro,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_gemma-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_gemma-7b-it,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_text-bison@001,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_text-unicorn@001,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=meta_llama-2-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=microsoft_phi-2,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=mistralai_mixtral-8x7b-32kseqlen,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0613,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_gpt-4-1106-preview,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=qwen_qwen1.5-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_abstract_algebra": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=01-ai_yi-6b,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=anthropic_claude-2.1,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=anthropic_claude-3-opus-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=anthropic_claude-3-sonnet-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=anthropic_claude-instant-1.2,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_gemini-pro,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_gemma-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_gemma-7b-it,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_text-bison@001,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_text-unicorn@001,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=meta_llama-2-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=microsoft_phi-2,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=mistralai_mixtral-8x7b-32kseqlen,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0613,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_gpt-4-1106-preview,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=college_chemistry,method=multiple_choice_joint,model=qwen_qwen1.5-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_college_chemistry": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=01-ai_yi-6b,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=anthropic_claude-2.1,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=anthropic_claude-3-opus-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=anthropic_claude-3-sonnet-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=anthropic_claude-instant-1.2,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=google_gemini-pro,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=google_gemma-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=google_gemma-7b-it,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=google_text-bison@001,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=google_text-unicorn@001,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=meta_llama-2-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=microsoft_phi-2,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=mistralai_mixtral-8x7b-32kseqlen,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0613,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_gpt-4-1106-preview,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=computer_security,method=multiple_choice_joint,model=qwen_qwen1.5-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_computer_security": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=01-ai_yi-6b,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=anthropic_claude-2.1,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=anthropic_claude-3-opus-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=anthropic_claude-3-sonnet-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=anthropic_claude-instant-1.2,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=google_gemini-pro,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=google_gemma-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=google_gemma-7b-it,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=google_text-bison@001,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=google_text-unicorn@001,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=meta_llama-2-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=microsoft_phi-2,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=mistralai_mixtral-8x7b-32kseqlen,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0613,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=openai_gpt-4-1106-preview,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=econometrics,method=multiple_choice_joint,model=qwen_qwen1.5-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_econometrics": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=01-ai_yi-6b,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=anthropic_claude-2.1,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=anthropic_claude-3-opus-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=anthropic_claude-3-sonnet-20240229,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=anthropic_claude-instant-1.2,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_gemini-pro,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_gemma-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_gemma-7b-it,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_text-bison@001,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_text-unicorn@001,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=meta_llama-2-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=microsoft_phi-2,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=mistralai_mixtral-8x7b-32kseqlen,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0613,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=openai_gpt-4-1106-preview,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=qwen_qwen1.5-7b,eval_split=test,additional_instructions=yifan,groups=mmlu_us_foreign_policy": "mmlu_yifan",
  "narrative_qa:model=01-ai_yi-6b,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=anthropic_claude-2.1,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=anthropic_claude-instant-1.2,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=google_gemma-7b,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=google_gemma-7b-it,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=google_text-bison@001,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=google_text-unicorn@001,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=meta_llama-2-7b,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=microsoft_phi-2,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=mistralai_mixtral-8x7b-32kseqlen,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=openai_gpt-3.5-turbo-0613,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=openai_gpt-4-1106-preview,additional_instructions=narrative_qa": "mmlu_yifan",
  "narrative_qa:model=qwen_qwen1.5-7b,additional_instructions=narrative_qa": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=01-ai_yi-6b,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=anthropic_claude-2.1,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=anthropic_claude-instant-1.2,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=google_gemma-7b,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=google_gemma-7b-it,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=google_text-bison@001,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=google_text-unicorn@001,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=meta_llama-2-7b,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=microsoft_phi-2,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=mistralai_mixtral-8x7b-32kseqlen,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=openai_gpt-3.5-turbo-0613,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=openai_gpt-4-1106-preview,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=closedbook,model=qwen_qwen1.5-7b,additional_instructions=natural_qa_closedbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=01-ai_yi-6b,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=google_gemma-7b,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=google_gemma-7b-it,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=google_text-bison@001,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=google_text-unicorn@001,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=meta_llama-2-7b,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=microsoft_phi-2,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=mistralai_mixtral-8x7b-32kseqlen,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=openai_gpt-3.5-turbo-0613,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=openai_gpt-4-1106-preview,additional_instructions=natural_qa_openbook": "mmlu_yifan",
  "natural_qa:mode=openbook_longans,model=qwen_qwen1.5-7b,additional_instructions=natural_qa_openbook": "mmlu_yifan"
}