Index of /helm/benchmark_output/runs/instruction_following

[ICO]NameLast modifiedSizeDescription

[DIR]Parent Directory  -  
[DIR]anthropic_hh_rlhf:subset=hh,model=anthropic_claude-v1.3,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=anthropic_claude-v1.3,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=anthropic_claude-v1.3,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=anthropic_claude-v1.3,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=cohere_command-xlarge-beta,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=cohere_command-xlarge-beta,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=cohere_command-xlarge-beta,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=cohere_command-xlarge-beta,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-3.5-turbo-0613,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-3.5-turbo-0613,evaluator=gpt4/13-Feb-2024 16:15 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-3.5-turbo-0613,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-3.5-turbo-0613,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-4-0314,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-4-0314,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-4-0314,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=hh,model=openai_gpt-4-0314,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=anthropic_claude-v1.3,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=anthropic_claude-v1.3,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=anthropic_claude-v1.3,evaluator=mturk/13-Feb-2024 16:15 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=anthropic_claude-v1.3,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=cohere_command-xlarge-beta,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=cohere_command-xlarge-beta,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=cohere_command-xlarge-beta,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=cohere_command-xlarge-beta,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-3.5-turbo-0613,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-3.5-turbo-0613,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-3.5-turbo-0613,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-3.5-turbo-0613,evaluator=scale/13-Feb-2024 16:15 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-4-0314,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-4-0314,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-4-0314,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]anthropic_hh_rlhf:subset=red_team,model=openai_gpt-4-0314,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=anthropic_claude-v1.3,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=anthropic_claude-v1.3,evaluator=gpt4/13-Feb-2024 16:15 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=anthropic_claude-v1.3,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=anthropic_claude-v1.3,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=cohere_command-xlarge-beta,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=cohere_command-xlarge-beta,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=cohere_command-xlarge-beta,evaluator=mturk/13-Feb-2024 16:15 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=cohere_command-xlarge-beta,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-3.5-turbo-0613,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-3.5-turbo-0613,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-3.5-turbo-0613,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-3.5-turbo-0613,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-4-0314,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-4-0314,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-4-0314,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]grammar:path=src_helm_benchmark_scenarios_best_chatgpt_prompts.yaml,tags=,model=openai_gpt-4-0314,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]koala:model=anthropic_claude-v1.3,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]koala:model=anthropic_claude-v1.3,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]koala:model=anthropic_claude-v1.3,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]koala:model=anthropic_claude-v1.3,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]koala:model=cohere_command-xlarge-beta,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]koala:model=cohere_command-xlarge-beta,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]koala:model=cohere_command-xlarge-beta,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]koala:model=cohere_command-xlarge-beta,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]koala:model=openai_gpt-3.5-turbo-0613,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]koala:model=openai_gpt-3.5-turbo-0613,evaluator=gpt4/13-Feb-2024 16:15 -  
[DIR]koala:model=openai_gpt-3.5-turbo-0613,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]koala:model=openai_gpt-3.5-turbo-0613,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]koala:model=openai_gpt-4-0314,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]koala:model=openai_gpt-4-0314,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]koala:model=openai_gpt-4-0314,evaluator=mturk/13-Feb-2024 16:15 -  
[DIR]koala:model=openai_gpt-4-0314,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=anthropic_claude-v1.3,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=anthropic_claude-v1.3,evaluator=gpt4/13-Feb-2024 16:15 -  
[DIR]open_assistant:language=en,model=anthropic_claude-v1.3,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=anthropic_claude-v1.3,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=cohere_command-xlarge-beta,evaluator=claude/13-Feb-2024 16:15 -  
[DIR]open_assistant:language=en,model=cohere_command-xlarge-beta,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=cohere_command-xlarge-beta,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=cohere_command-xlarge-beta,evaluator=scale/13-Feb-2024 16:15 -  
[DIR]open_assistant:language=en,model=openai_gpt-3.5-turbo-0613,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=openai_gpt-3.5-turbo-0613,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=openai_gpt-3.5-turbo-0613,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=openai_gpt-3.5-turbo-0613,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=openai_gpt-4-0314,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=openai_gpt-4-0314,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=openai_gpt-4-0314,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]open_assistant:language=en,model=openai_gpt-4-0314,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=anthropic_claude-v1.3,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=anthropic_claude-v1.3,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=anthropic_claude-v1.3,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=anthropic_claude-v1.3,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=cohere_command-xlarge-beta,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=cohere_command-xlarge-beta,evaluator=gpt4/13-Feb-2024 16:15 -  
[DIR]self_instruct:model=cohere_command-xlarge-beta,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=cohere_command-xlarge-beta,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=openai_gpt-3.5-turbo-0613,evaluator=claude/13-Feb-2024 16:15 -  
[DIR]self_instruct:model=openai_gpt-3.5-turbo-0613,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=openai_gpt-3.5-turbo-0613,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=openai_gpt-3.5-turbo-0613,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=openai_gpt-4-0314,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=openai_gpt-4-0314,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=openai_gpt-4-0314,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]self_instruct:model=openai_gpt-4-0314,evaluator=scale/13-Feb-2024 16:15 -  
[DIR]vicuna:category=all,model=anthropic_claude-v1.3,evaluator=claude/13-Feb-2024 16:15 -  
[DIR]vicuna:category=all,model=anthropic_claude-v1.3,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=anthropic_claude-v1.3,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=anthropic_claude-v1.3,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=cohere_command-xlarge-beta,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=cohere_command-xlarge-beta,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=cohere_command-xlarge-beta,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=cohere_command-xlarge-beta,evaluator=scale/13-Feb-2024 16:15 -  
[DIR]vicuna:category=all,model=openai_gpt-3.5-turbo-0613,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=openai_gpt-3.5-turbo-0613,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=openai_gpt-3.5-turbo-0613,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=openai_gpt-3.5-turbo-0613,evaluator=scale/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=openai_gpt-4-0314,evaluator=claude/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=openai_gpt-4-0314,evaluator=gpt4/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=openai_gpt-4-0314,evaluator=mturk/13-Feb-2024 16:16 -  
[DIR]vicuna:category=all,model=openai_gpt-4-0314,evaluator=scale/13-Feb-2024 16:16 -  

Apache/2.2.15 (CentOS) Server at nlp.stanford.edu Port 443