Index of /helm/benchmark_output/releases/v1.1.0-canary/groups/latex

	Name	Last modified	Size

	Parent Directory		-
$[ ]$	core_scenarios_general_information.tex	14-Feb-2024 14:13	19K
$[ ]$	core_scenarios_efficiency.tex	14-Feb-2024 14:13	9.4K
$[ ]$	targeted_evaluations_general_information.tex	09-Jan-2024 17:38	9.3K
$[ ]$	question_answering_general_information.tex	09-Jan-2024 17:38	9.3K
$[ ]$	core_scenarios_accuracy.tex	14-Feb-2024 14:13	8.3K
$[ ]$	targeted_evaluations_efficiency_detailed.tex	09-Jan-2024 17:38	5.7K
$[ ]$	knowledge_general_information.tex	09-Jan-2024 17:38	5.3K
$[ ]$	legalbench_legalbench.tex	14-Feb-2024 14:13	5.0K
$[ ]$	math_chain_of_thought_math_chain_of_thought.tex	14-Feb-2024 14:13	5.0K
$[ ]$	reasoning_general_information.tex	09-Jan-2024 17:38	4.8K
$[ ]$	legalbench_legalbench_subset:corporate_lobbying.tex	14-Feb-2024 14:13	4.7K
$[ ]$	narrative_qa_narrative_qa_.tex	14-Feb-2024 14:13	4.6K
$[ ]$	math_chain_of_thought_math_chain_of_thought_subject:geometry,level:1,use_official_examples:False,use_chain_of_thought:True.tex	14-Feb-2024 14:13	4.5K
$[ ]$	math_chain_of_thought_math_chain_of_thought_subject:counting_and_probability,level:1,use_official_examples:False,use_chain_of_thought:True.tex	14-Feb-2024 14:13	4.5K
$[ ]$	question_answering_robustness.tex	09-Jan-2024 17:38	4.5K
$[ ]$	core_scenarios_robustness.tex	09-Jan-2024 17:38	4.4K
$[ ]$	wmt_14_wmt_14_source_language:hi,target_language:en.tex	14-Feb-2024 14:13	4.4K
$[ ]$	question_answering_fairness.tex	09-Jan-2024 17:38	4.4K
$[ ]$	core_scenarios_fairness.tex	09-Jan-2024 17:38	4.4K
$[ ]$	math_chain_of_thought_math_chain_of_thought_subject:intermediate_algebra,level:1,use_official_examples:False,use_chain_of_thought:True.tex	14-Feb-2024 14:13	4.4K
$[ ]$	question_answering_accuracy.tex	09-Jan-2024 17:38	4.4K
$[ ]$	wmt_14_wmt_14.tex	14-Feb-2024 14:13	4.4K
$[ ]$	math_chain_of_thought_math_chain_of_thought_subject:prealgebra,level:1,use_official_examples:False,use_chain_of_thought:True.tex	14-Feb-2024 14:13	4.3K
$[ ]$	math_chain_of_thought_math_chain_of_thought_subject:precalculus,level:1,use_official_examples:False,use_chain_of_thought:True.tex	14-Feb-2024 14:13	4.3K
$[ ]$	wmt_14_wmt_14_source_language:de,target_language:en.tex	14-Feb-2024 14:13	4.3K
$[ ]$	wmt_14_wmt_14_source_language:cs,target_language:en.tex	14-Feb-2024 14:13	4.3K
$[ ]$	wmt_14_wmt_14_source_language:ru,target_language:en.tex	14-Feb-2024 14:13	4.3K
$[ ]$	wmt_14_wmt_14_source_language:fr,target_language:en.tex	14-Feb-2024 14:13	4.3K
$[ ]$	math_chain_of_thought_math_chain_of_thought_subject:algebra,level:1,use_official_examples:False,use_chain_of_thought:True.tex	14-Feb-2024 14:13	4.3K
$[ ]$	legalbench_legalbench_subset:function_of_decision_section.tex	14-Feb-2024 14:13	4.2K
$[ ]$	math_chain_of_thought_math_chain_of_thought_subject:number_theory,level:1,use_official_examples:False,use_chain_of_thought:True.tex	14-Feb-2024 14:13	4.1K
$[ ]$	natural_qa_openbook_longans_natural_qa_openbook_longans_mode:openbook_longans.tex	14-Feb-2024 14:13	4.1K
$[ ]$	legalbench_legalbench_subset:abercrombie.tex	14-Feb-2024 14:13	4.1K
$[ ]$	targeted_evaluations_accuracy.tex	09-Jan-2024 17:38	4.0K
$[ ]$	mmlu_mmlu_subject:econometrics.tex	14-Feb-2024 14:13	4.0K
$[ ]$	calibration_calibration_detailed.tex	09-Jan-2024 17:38	4.0K
$[ ]$	med_qa_med_qa_.tex	14-Feb-2024 14:13	4.0K
$[ ]$	mmlu_mmlu.tex	14-Feb-2024 14:13	4.0K
$[ ]$	natural_qa_closedbook_natural_qa_closedbook_mode:closedbook.tex	14-Feb-2024 14:13	3.8K
$[ ]$	legalbench_legalbench_subset:proa.tex	14-Feb-2024 14:13	3.8K
$[ ]$	legalbench_legalbench_subset:international_citizenship_questions.tex	14-Feb-2024 14:13	3.3K
$[ ]$	mmlu_mmlu_subject:us_foreign_policy.tex	14-Feb-2024 14:13	3.3K
$[ ]$	mmlu_mmlu_subject:computer_security.tex	14-Feb-2024 14:13	3.3K
$[ ]$	mmlu_mmlu_subject:college_chemistry.tex	14-Feb-2024 14:13	3.3K
$[ ]$	mmlu_mmlu_subject:abstract_algebra.tex	14-Feb-2024 14:13	3.3K
$[ ]$	gsm_gsm_.tex	14-Feb-2024 14:13	3.3K
$[ ]$	openbookqa_openbookqa_.tex	14-Feb-2024 14:13	3.2K
$[ ]$	targeted_evaluations_robustness.tex	09-Jan-2024 17:38	3.2K
$[ ]$	targeted_evaluations_fairness.tex	09-Jan-2024 17:38	3.1K
$[ ]$	knowledge_robustness.tex	09-Jan-2024 17:38	3.1K
$[ ]$	knowledge_fairness.tex	09-Jan-2024 17:38	3.1K
$[ ]$	knowledge_accuracy.tex	09-Jan-2024 17:38	3.1K
$[ ]$	question_answering_calibration.tex	09-Jan-2024 17:38	2.5K
$[ ]$	core_scenarios_calibration.tex	09-Jan-2024 17:38	2.5K
$[ ]$	reasoning_accuracy.tex	09-Jan-2024 17:38	2.3K
$[ ]$	calibration_accuracy.tex	09-Jan-2024 17:38	2.0K
$[ ]$	targeted_evaluations_calibration.tex	09-Jan-2024 17:38	2.0K
$[ ]$	knowledge_calibration.tex	09-Jan-2024 17:38	2.0K
$[ ]$	question_answering_efficiency.tex	09-Jan-2024 17:38	1.7K
$[ ]$	knowledge_efficiency.tex	09-Jan-2024 17:38	1.3K
$[ ]$	reasoning_efficiency.tex	09-Jan-2024 17:38	1.2K
$[ ]$	targeted_evaluations_disinformation_metrics.tex	09-Jan-2024 17:38	934
$[ ]$	targeted_evaluations_copyright_metrics.tex	09-Jan-2024 17:38	924
$[ ]$	core_scenarios_summarization_metrics.tex	09-Jan-2024 17:38	920
$[ ]$	targeted_evaluations_apps_metrics.tex	09-Jan-2024 17:38	914
$[ ]$	targeted_evaluations_bbq_metrics.tex	09-Jan-2024 17:38	912
$[ ]$	targeted_evaluations_toxicity.tex	09-Jan-2024 17:38	906
$[ ]$	question_answering_toxicity.tex	09-Jan-2024 17:38	902
$[ ]$	targeted_evaluations_bias.tex	09-Jan-2024 17:38	898
$[ ]$	question_answering_bias.tex	09-Jan-2024 17:38	894
$[ ]$	core_scenarios_toxicity.tex	09-Jan-2024 17:38	894
$[ ]$	reasoning_apps_metrics.tex	09-Jan-2024 17:38	892
$[ ]$	core_scenarios_bias.tex	09-Jan-2024 17:38	886
$[ ]$	knowledge_toxicity.tex	09-Jan-2024 17:38	884
$[ ]$	knowledge_bias.tex	09-Jan-2024 17:38	876

Apache/2.2.15 (CentOS) Server at nlp.stanford.edu Port 443