Skip to content

Commit 3c65429

Browse files
author
Damien Sileo
committed
new tasks
1 parent 2b0ae93 commit 3c65429

3 files changed

Lines changed: 393 additions & 365 deletions

File tree

src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ def _preprocess_chatgpt_detection(ex):
772772
dataset_name="ColumbiaNLP/FLUTE")
773773

774774
strategy_qa = Classification('question',labels='answer',
775-
dataset_name="metaeval/strategy-qa",splits=['train',None,None])
775+
dataset_name="tasksource/strategy-qa",splits=['train',None,None])
776776

777777
summarize_from_feedback = MultipleChoice(get.info.post,
778778
choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']],
@@ -951,6 +951,7 @@ def _udep_post_process(ds):
951951
#for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias":
952952
# print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')")
953953

954+
"""
954955
mbib_cognitive_bias = Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias')
955956
mbib_fake_news = Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news')
956957
mbib_gender_bias = Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias')
@@ -959,6 +960,7 @@ def _udep_post_process(ds):
959960
mbib_political_bias = Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias')
960961
mbib_racial_bias = Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias')
961962
mbib_text_level_bias = Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias')
963+
"""
962964

963965
robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR")
964966

@@ -1021,7 +1023,7 @@ def _icl_rand(x):
10211023
icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
10221024
labels=lambda x: str(x['symbols'][_icl_rand(x)]==x['targets']),
10231025
dataset_name="tasksource/icl-symbol-tuning-instruct",
1024-
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char
1026+
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<500*4), # 500 tokens of 4 char
10251027
)
10261028

10271029
space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
@@ -1234,4 +1236,16 @@ def _nlgraph_binarize(x):
12341236
lex_glue___ecthr_a = Classification(sentence1="text", labels="labels",dataset_name="coastalcph/lex_glue",config_name="ecthr_a") # too long
12351237
lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long
12361238

1237-
ultrafeedback = MultipleChoice("question", choices=['response_j','reponse_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")
1239+
ultrafeedback = MultipleChoice("question", choices=['response_j','response_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")
1240+
1241+
essay_scoring = Classification("full_text",labels="score",dataset_name='tasksource/AES2-essay-scoring')
1242+
1243+
argument_feedback = Classification("discourse_text",labels="discourse_effectiveness", dataset_name="tasksource/argument-feedback")
1244+
1245+
eg = lambda x: Classification("full_text", labels=lambda y:int(y[x]), dataset_name="tasksource/english-grading")
1246+
grading__cohesion = eg('cohesion')
1247+
grading__syntax = eg('syntax')
1248+
grading__vocabulary = eg('vocabulary')
1249+
grading__phraseology = eg('phraseology')
1250+
grading__grammar = eg('grammar')
1251+
grading__conventions = eg('conventions')

src/tasksource/tasks.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ def _preprocess_chatgpt_detection(ex):
772772
dataset_name="ColumbiaNLP/FLUTE")
773773

774774
strategy_qa = Classification('question',labels='answer',
775-
dataset_name="metaeval/strategy-qa",splits=['train',None,None])
775+
dataset_name="tasksource/strategy-qa",splits=['train',None,None])
776776

777777
summarize_from_feedback = MultipleChoice(get.info.post,
778778
choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']],
@@ -951,6 +951,7 @@ def _udep_post_process(ds):
951951
#for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias":
952952
# print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')")
953953

954+
"""
954955
mbib_cognitive_bias = Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias')
955956
mbib_fake_news = Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news')
956957
mbib_gender_bias = Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias')
@@ -959,6 +960,7 @@ def _udep_post_process(ds):
959960
mbib_political_bias = Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias')
960961
mbib_racial_bias = Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias')
961962
mbib_text_level_bias = Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias')
963+
"""
962964

963965
robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR")
964966

@@ -1021,7 +1023,7 @@ def _icl_rand(x):
10211023
icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
10221024
labels=lambda x: str(x['symbols'][_icl_rand(x)]==x['targets']),
10231025
dataset_name="tasksource/icl-symbol-tuning-instruct",
1024-
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char
1026+
pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<500*4), # 500 tokens of 4 char
10251027
)
10261028

10271029
space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
@@ -1234,4 +1236,16 @@ def _nlgraph_binarize(x):
12341236
lex_glue___ecthr_a = Classification(sentence1="text", labels="labels",dataset_name="coastalcph/lex_glue",config_name="ecthr_a") # too long
12351237
lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long
12361238

1237-
ultrafeedback = MultipleChoice("question", choices=['response_j','reponse_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")
1239+
ultrafeedback = MultipleChoice("question", choices=['response_j','response_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")
1240+
1241+
essay_scoring = Classification("full_text",labels="score",dataset_name='tasksource/AES2-essay-scoring')
1242+
1243+
argument_feedback = Classification("discourse_text",labels="discourse_effectiveness", dataset_name="tasksource/argument-feedback")
1244+
1245+
eg = lambda x: Classification("full_text", labels=lambda y:int(y[x]), dataset_name="tasksource/english-grading")
1246+
grading__cohesion = eg('cohesion')
1247+
grading__syntax = eg('syntax')
1248+
grading__vocabulary = eg('vocabulary')
1249+
grading__phraseology = eg('phraseology')
1250+
grading__grammar = eg('grammar')
1251+
grading__conventions = eg('conventions')

0 commit comments

Comments
 (0)