The ultimate aim of language technology is to interact with humans. However, most such systems are trained without direct signals of human preference, with supervised target strings serving as (a sometimes crude) proxy. This work focuses on using reinforcement learning to interact and align to human preferences.
References 2023
Is Reinforcement Learning (Not) for Natural Language Processing: Benchmarks, Baselines, and Building Blocks for Natural Language Policy Optimization
Rajkumar Ramamurthy*, Prithviraj Ammanabrolu* , Kianté Brantley, Jack Hessel, Rafet Sifa, Christian Bauckhage, Hannaneh Hajishirzi, and Yejin Choi
In International Conference on Learning Representations (ICLR) , 2023
@inproceedings { Ramamurthy2022IsRL ,
title = {Is Reinforcement Learning (Not) for Natural Language Processing: Benchmarks, Baselines, and Building Blocks for Natural Language Policy Optimization} ,
author = {Ramamurthy*, Rajkumar and Ammanabrolu*, Prithviraj and Brantley, Kianté and Hessel, Jack and Sifa, Rafet and Bauckhage, Christian and Hajishirzi, Hannaneh and Choi, Yejin} ,
booktitle = {International Conference on Learning Representations (ICLR)} ,
url = {https://arxiv.org/abs/2210.01241} ,
year = {2023} ,
}
Inference-Time Policy Adapters (IPA): Tailoring Extreme-Scale LMs without Fine-tuning
Ximing Lu, Faeze Brahman, Peter West, Jaehun Jang, Khyathi Chandu, Abhilasha Ravichander, Lianhui Qin, Prithviraj Ammanabrolu , Liwei Jiang, Sahana Ramnath, Nouha Dziri, Jillian Fisher, Bill Yuchen Lin, Skyler Hallinan, Xiang Ren, Sean Welleck, and Yejin Choi
arXiv preprint arXiv:2305.15065 , 2023
@article { lu2023inference ,
title = {Inference-Time Policy Adapters (IPA): Tailoring Extreme-Scale LMs without Fine-tuning} ,
author = {Lu, Ximing and Brahman, Faeze and West, Peter and Jang, Jaehun and Chandu, Khyathi and Ravichander, Abhilasha and Qin, Lianhui and Ammanabrolu, Prithviraj and Jiang, Liwei and Ramnath, Sahana and Dziri, Nouha and Fisher, Jillian and Lin, Bill Yuchen and Hallinan, Skyler and Ren, Xiang and Welleck, Sean and Choi, Yejin} ,
journal = {arXiv preprint arXiv:2305.15065} ,
url = {https://arxiv.org/abs/2305.15065} ,
year = {2023} ,
}
Fine-Grained Human Feedback Gives Better Rewards for Language Model Training
Zeqiu Wu, Yushi Hu, Weijia Shi, Nouha Dziri, Alane Suhr, Prithviraj Ammanabrolu , Noah A. Smith, Mari Ostendorf, and Hannaneh Hajishirzi
In Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS) , 2023
@inproceedings { wu2023finegrained ,
title = {Fine-Grained Human Feedback Gives Better Rewards for Language Model Training} ,
author = {Wu, Zeqiu and Hu, Yushi and Shi, Weijia and Dziri, Nouha and Suhr, Alane and Ammanabrolu, Prithviraj and Smith, Noah A. and Ostendorf, Mari and Hajishirzi, Hannaneh} ,
booktitle = {Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS)} ,
year = {2023} ,
url = {https://arxiv.org/abs/2306.01693} ,
}
2021
How to Motivate Your Dragon: Teaching Goal-Driven Agents to Speak and Act in Fantasy Worlds
Prithviraj Ammanabrolu , Jack Urbanek, Margaret Li, Arthur Szlam, Tim Rocktäschel, and Jason Weston
In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies , Jun 2021
@inproceedings { ammanabrolu2021motivate ,
title = {How to Motivate Your Dragon: Teaching Goal-Driven Agents to Speak and Act in Fantasy Worlds} ,
author = {Ammanabrolu, Prithviraj and Urbanek, Jack and Li, Margaret and Szlam, Arthur and Rockt{\"a}schel, Tim and Weston, Jason} ,
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies} ,
month = jun ,
year = {2021} ,
address = {Online} ,
publisher = {Association for Computational Linguistics} ,
url = {https://aclanthology.org/2021.naacl-main.64} ,
doi = {10.18653/v1/2021.naacl-main.64} ,
pages = {807--833} ,
}
2020
Interactive fiction games: A colossal adventure
Matthew Hausknecht, Prithviraj Ammanabrolu , Marc-Alexandre Côté, and Xingdi Yuan
In Proceedings of the AAAI Conference on Artificial Intelligence , Jun 2020
@inproceedings { hausknecht2020interactive ,
title = {Interactive fiction games: A colossal adventure} ,
author = {Hausknecht, Matthew and Ammanabrolu, Prithviraj and C{\^o}t{\'e}, Marc-Alexandre and Yuan, Xingdi} ,
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence} ,
volume = {34} ,
number = {05} ,
pages = {7903--7910} ,
year = {2020} ,
url = {https://arxiv.org/abs/1909.05398} ,
}
2017
Improvisational storytelling agents
Lara J Martin, Prithviraj Ammanabrolu , Xinyu Wang, Shruti Singh, Brent Harrison, Murtaza Dhuliawala, Pradyumna Tambwekar, Animesh Mehta, Richa Arora, Nathan Dass, and others
In Workshop on Machine Learning for Creativity and Design (NeurIPS 2017) , Jun 2017
@inproceedings { martin2017improvisational ,
title = {Improvisational storytelling agents} ,
author = {Martin, Lara J and Ammanabrolu, Prithviraj and Wang, Xinyu and Singh, Shruti and Harrison, Brent and Dhuliawala, Murtaza and Tambwekar, Pradyumna and Mehta, Animesh and Arora, Richa and Dass, Nathan and others} ,
booktitle = {Workshop on Machine Learning for Creativity and Design (NeurIPS 2017)} ,
url = {https://nips2017creativity.github.io/doc/Improvisational_Agents.pdf} ,
pages = {4} ,
year = {2017} ,
}