Index of /helm/benchmark_output/runs/v1.8.0

[ICO]NameLast modifiedSizeDescription

[DIR]Parent Directory  -  
[DIR]wmt_14:language_pair=hi-en,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]wmt_14:language_pair=ru-en,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]wmt_14:language_pair=fr-en,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]wmt_14:language_pair=de-en,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]natural_qa:mode=closedbook,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]narrative_qa:model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]natural_qa:mode=openbook_longans,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]wmt_14:language_pair=cs-en,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]med_qa:model=writer_palmyra-x-004/05-Sep-2024 09:49 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=writer_palmyra-x-004/05-Sep-2024 09:49 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=writer_palmyra-x-004/05-Sep-2024 09:49 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=writer_palmyra-x-004/05-Sep-2024 09:49 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=writer_palmyra-x-004/05-Sep-2024 09:49 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=writer_palmyra-x-004/05-Sep-2024 09:49 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]gsm:model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]legalbench:subset=corporate_lobbying,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]legalbench:subset=international_citizenship_questions,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=writer_palmyra-x-004/05-Sep-2024 09:49 -  
[DIR]legalbench:subset=function_of_decision_section,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]legalbench:subset=proa,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]legalbench:subset=abercrombie,model=writer_palmyra-x-004,stop=none/05-Sep-2024 09:49 -  
[DIR]wmt_14:language_pair=ru-en,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]wmt_14:language_pair=hi-en,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]wmt_14:language_pair=fr-en,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]wmt_14:language_pair=de-en,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]wmt_14:language_pair=cs-en,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]legalbench:subset=international_citizenship_questions,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]legalbench:subset=corporate_lobbying,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]legalbench:subset=function_of_decision_section,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]legalbench:subset=proa,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]legalbench:subset=abercrombie,model=nvidia_nemotron-4-340b-instruct/02-Sep-2024 19:21 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 22:21 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 22:21 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 22:21 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 22:21 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 22:21 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 22:04 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 22:04 -  
[DIR]eval_cache/01-Sep-2024 21:45 -  
[DIR]wmt_14:language_pair=hi-en,model=ai21_jamba-1.5-mini/01-Sep-2024 20:40 -  
[DIR]wmt_14:language_pair=hi-en,model=ai21_jamba-1.5-large/01-Sep-2024 20:40 -  
[DIR]wmt_14:language_pair=fr-en,model=ai21_jamba-1.5-mini/01-Sep-2024 20:40 -  
[DIR]wmt_14:language_pair=ru-en,model=ai21_jamba-1.5-mini/01-Sep-2024 20:40 -  
[DIR]wmt_14:language_pair=ru-en,model=ai21_jamba-1.5-large/01-Sep-2024 20:40 -  
[DIR]wmt_14:language_pair=fr-en,model=ai21_jamba-1.5-large/01-Sep-2024 20:40 -  
[DIR]wmt_14:language_pair=cs-en,model=ai21_jamba-1.5-mini/01-Sep-2024 17:21 -  
[DIR]wmt_14:language_pair=de-en,model=ai21_jamba-1.5-large/01-Sep-2024 17:21 -  
[DIR]wmt_14:language_pair=de-en,model=ai21_jamba-1.5-mini/01-Sep-2024 17:21 -  
[DIR]natural_qa:mode=openbook_longans,model=ai21_jamba-1.5-mini/01-Sep-2024 17:21 -  
[DIR]wmt_14:language_pair=cs-en,model=ai21_jamba-1.5-large/01-Sep-2024 17:21 -  
[DIR]natural_qa:mode=openbook_longans,model=ai21_jamba-1.5-large/01-Sep-2024 17:21 -  
[DIR]natural_qa:mode=closedbook,model=ai21_jamba-1.5-mini/01-Sep-2024 17:21 -  
[DIR]med_qa:model=ai21_jamba-1.5-large/01-Sep-2024 17:21 -  
[DIR]natural_qa:mode=closedbook,model=ai21_jamba-1.5-large/01-Sep-2024 17:21 -  
[DIR]med_qa:model=ai21_jamba-1.5-mini/01-Sep-2024 17:21 -  
[DIR]narrative_qa:model=ai21_jamba-1.5-large/01-Sep-2024 17:21 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=precalculus,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=prealgebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=number_theory,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=international_citizenship_questions,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]math:subject=intermediate_algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=international_citizenship_questions,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]math:subject=geometry,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=counting_and_probability,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=proa,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=proa,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]gsm:model=ai21_jamba-1.5-mini,stop=none/01-Sep-2024 17:20 -  
[DIR]gsm:model=ai21_jamba-1.5-large,stop=none/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=corporate_lobbying,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=corporate_lobbying,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=function_of_decision_section,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=function_of_decision_section,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=abercrombie,model=ai21_jamba-1.5-large/01-Sep-2024 17:20 -  
[DIR]legalbench:subset=abercrombie,model=ai21_jamba-1.5-mini/01-Sep-2024 17:20 -  
[DIR]natural_qa:mode=openbook_longans,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]natural_qa:mode=closedbook,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]narrative_qa:model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]mmlu:subject=econometrics,method=multiple_choice_joint,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]mmlu:subject=computer_security,method=multiple_choice_joint,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]mmlu:subject=college_chemistry,method=multiple_choice_joint,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]med_qa:model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:19 -  
[DIR]gsm:model=nvidia_nemotron-4-340b-instruct,stop=none/01-Sep-2024 17:18 -  
[DIR]commonsense:dataset=openbookqa,method=multiple_choice_joint,model=nvidia_nemotron-4-340b-instruct/01-Sep-2024 17:18 -  
[DIR]narrative_qa:model=ai21_jamba-1.5-mini/01-Sep-2024 15:05 -  

Apache/2.2.15 (CentOS) Server at nlp.stanford.edu Port 443