Index of /helm/benchmark_output/runs/v1.7.0

[ICO]NameLast modifiedSizeDescription

[DIR]Parent Directory  -  
[DIR]wmt_14:language_pair=fr-en,model=ai21_jamba-instruct/25-Jul-2024 12:28 -  
[DIR]gsm:model=google_gemma-2-27b/28-Jul-2024 13:34 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 13:59 -  
[DIR]gsm:model=mistralai_open-mistral-nemo-2407,stop=none/30-Jul-2024 13:59 -  
[DIR]legalbench:subset=abercrombie,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 13:59 -  
[DIR]legalbench:subset=corporate_lobbying,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 13:59 -  
[DIR]legalbench:subset=function_of_decision_section,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 13:59 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]gsm:model=mistralai_mistral-large-2407,stop=none/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=abercrombie,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=corporate_lobbying,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=function_of_decision_section,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=international_citizenship_questions,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=international_citizenship_questions,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=proa,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=proa,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]med_qa:model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]med_qa:model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]narrative_qa:model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]narrative_qa:model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]natural_qa:mode=closedbook,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]natural_qa:mode=closedbook,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]natural_qa:mode=openbook_longans,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]natural_qa:mode=openbook_longans,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=cs-en,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=cs-en,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=de-en,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=de-en,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=fr-en,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=fr-en,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=hi-en,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=hi-en,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=ru-en,model=mistralai_mistral-large-2407/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=ru-en,model=mistralai_open-mistral-nemo-2407/30-Jul-2024 14:00 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]gsm:model=openai_gpt-4o-mini-2024-07-18,stop=none/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=abercrombie,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=corporate_lobbying,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=function_of_decision_section,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=international_citizenship_questions,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]legalbench:subset=proa,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]med_qa:model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]narrative_qa:model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]natural_qa:mode=closedbook,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]natural_qa:mode=openbook_longans,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=cs-en,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=de-en,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=fr-en,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=hi-en,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]wmt_14:language_pair=ru-en,model=openai_gpt-4o-mini-2024-07-18/30-Jul-2024 14:00 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]gsm:model=ai21_jamba-instruct,stop=none/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=abercrombie,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=abercrombie,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=abercrombie,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=abercrombie,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]gsm:model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=abercrombie,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]gsm:model=microsoft_phi-3-medium-4k-instruct,stop=none/30-Jul-2024 14:06 -  
[DIR]gsm:model=microsoft_phi-3-small-8k-instruct,stop=none/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=function_of_decision_section,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=function_of_decision_section,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=corporate_lobbying,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=corporate_lobbying,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=corporate_lobbying,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=corporate_lobbying,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=corporate_lobbying,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=function_of_decision_section,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=function_of_decision_section,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=function_of_decision_section,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=proa,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=proa,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=proa,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=international_citizenship_questions,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=proa,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=international_citizenship_questions,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=international_citizenship_questions,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=proa,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=international_citizenship_questions,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]legalbench:subset=international_citizenship_questions,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]med_qa:model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]med_qa:model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]med_qa:model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]med_qa:model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]med_qa:model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=google_gemma-2-9b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_jamba-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=google_gemma-2-27b/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:06 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]narrative_qa:model=google_gemma-2-27b-it/30-Jul-2024 14:07 -  
[DIR]narrative_qa:model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]narrative_qa:model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]narrative_qa:model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]narrative_qa:model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]narrative_qa:model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]narrative_qa:model=google_gemma-2-9b-it/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=closedbook,model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=closedbook,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=closedbook,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=closedbook,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=openbook_longans,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=openbook_longans,model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=openbook_longans,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=closedbook,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=openbook_longans,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=cs-en,model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]natural_qa:mode=openbook_longans,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=de-en,model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=cs-en,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=cs-en,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=cs-en,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=cs-en,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=de-en,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=de-en,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=de-en,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=de-en,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=fr-en,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=fr-en,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=fr-en,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=fr-en,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=hi-en,model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=hi-en,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=hi-en,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=hi-en,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=hi-en,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=ru-en,model=ai21_jamba-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=ru-en,model=google_gemma-2-9b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=ru-en,model=google_gemma-2-27b/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=ru-en,model=microsoft_phi-3-small-8k-instruct/30-Jul-2024 14:07 -  
[DIR]wmt_14:language_pair=ru-en,model=microsoft_phi-3-medium-4k-instruct/30-Jul-2024 14:07 -  
[DIR]legalbench:subset=abercrombie,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]legalbench:subset=abercrombie,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]gsm:model=google_gemma-2-27b-it,stop=none/30-Jul-2024 16:30 -  
[DIR]legalbench:subset=corporate_lobbying,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]gsm:model=google_gemma-2-9b-it,stop=none/30-Jul-2024 16:30 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]natural_qa:mode=closedbook,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]natural_qa:mode=closedbook,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]natural_qa:mode=openbook_longans,model=google_gemma-2-9b-it/30-Jul-2024 16:30 -  
[DIR]natural_qa:mode=openbook_longans,model=google_gemma-2-27b-it/30-Jul-2024 16:30 -  
[DIR]legalbench:subset=function_of_decision_section,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]legalbench:subset=function_of_decision_section,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]legalbench:subset=proa,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]legalbench:subset=proa,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]legalbench:subset=international_citizenship_questions,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]legalbench:subset=international_citizenship_questions,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]legalbench:subset=corporate_lobbying,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]gsm:model=microsoft_phi-3-medium-4k-instruct/31-Jul-2024 09:24 -  
[DIR]med_qa:model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]med_qa:model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=de-en,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=fr-en,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=hi-en,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=fr-en,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=cs-en,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=cs-en,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=de-en,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=hi-en,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=ru-en,model=google_gemma-2-27b-it/31-Jul-2024 09:24 -  
[DIR]wmt_14:language_pair=ru-en,model=google_gemma-2-9b-it/31-Jul-2024 09:24 -  
[DIR]gsm:model=google_gemma-2-27b-it/31-Jul-2024 09:57 -  
[DIR]eval_cache/06-Aug-2024 19:24 -  
[DIR]legalbench:subset=abercrombie,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=abercrombie,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=abercrombie,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]gsm:model=meta_llama-3.1-70b-instruct-turbo,stop=none/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=function_of_decision_section,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=corporate_lobbying,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]gsm:model=meta_llama-3.1-8b-instruct-turbo,stop=none/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=corporate_lobbying,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]gsm:model=meta_llama-3.1-405b-instruct-turbo,stop=none/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=corporate_lobbying,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=function_of_decision_section,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=function_of_decision_section,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=proa,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=proa,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=proa,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=international_citizenship_questions,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=international_citizenship_questions,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]legalbench:subset=international_citizenship_questions,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]med_qa:model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]med_qa:model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]med_qa:model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]natural_qa:mode=closedbook,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]narrative_qa:model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]narrative_qa:model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]narrative_qa:model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]natural_qa:mode=openbook_longans,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]natural_qa:mode=closedbook,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]natural_qa:mode=closedbook,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]natural_qa:mode=openbook_longans,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 10:31 -  
[DIR]natural_qa:mode=openbook_longans,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 10:32 -  
[DIR]wmt_14:language_pair=de-en,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=de-en,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=cs-en,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=cs-en,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=cs-en,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=fr-en,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=fr-en,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=de-en,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=hi-en,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=ru-en,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=ru-en,model=meta_llama-3.1-70b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=hi-en,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=fr-en,model=meta_llama-3.1-8b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=ru-en,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 16:21 -  
[DIR]wmt_14:language_pair=hi-en,model=meta_llama-3.1-405b-instruct-turbo/07-Aug-2024 16:21 -  

Apache/2.2.15 (CentOS) Server at nlp.stanford.edu Port 443