%%%%%%%%%%%%%%%%%%%%% @inproceedings{de-marneffe-etal-2009-simple, title = {Not a Simple Yes or No: Uncertainty in Indirect Answers}, author = {de Marneffe, Marie-Catherine and Grimm, Scott and Potts, Christopher}, booktitle = {Proceedings of the {SIGDIAL} 2009 Conference}, month = sep, year = {2009}, address = {London, UK}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/W09-3920}, pages = {136--143} } @inproceedings{mihaylov-etal-2018-suit, title = {Can a Suit of Armor Conduct Electricity? A New Dataset for Open Book Question Answering}, author = {Mihaylov, Todor and Clark, Peter and Khot, Tushar and Sabharwal, Ashish}, booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, month = oct # {-} # nov, year = {2018}, address = {Brussels, Belgium}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/D18-1260}, doi = {10.18653/v1/D18-1260}, pages = {2381--2391}, abstract = {We present a new kind of question answering dataset, OpenBookQA, modeled after open book exams for assessing human understanding of a subject. The open book that comes with our questions is a set of 1326 elementary level science facts. Roughly 6000 questions probe an understanding of these facts and their application to novel situations. This requires combining an open book fact (e.g., metals conduct electricity) with broad common knowledge (e.g., a suit of armor is made of metal) obtained from other sources. While existing QA datasets over documents or knowledge bases, being generally self-contained, focus on linguistic understanding, OpenBookQA probes a deeper understanding of both the topic{---}in the context of common knowledge{---}and the language it is expressed in. Human performance on OpenBookQA is close to 92{\%}, but many state-of-the-art pre-trained QA methods perform surprisingly poorly, worse than several simple neural baselines we develop. Our oracle experiments designed to circumvent the knowledge retrieval bottleneck demonstrate the value of both the open book and additional facts. We leave it as a challenge to solve the retrieval problem in this multi-hop setting and to close the large gap to human performance.} } @inproceedings{min-etal-2020-ambigqa, title = {{A}mbig{QA}: Answering Ambiguous Open-domain Questions}, author = {Min, Sewon and Michael, Julian and Hajishirzi, Hannaneh and Zettlemoyer, Luke}, booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, month = nov, year = {2020}, address = {Online}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2020.emnlp-main.466}, doi = {10.18653/v1/2020.emnlp-main.466}, pages = {5783--5797}, abstract = {Ambiguity is inherent to open-domain question answering; especially when exploring new topics, it can be difficult to ask questions that have a single, unambiguous answer. In this paper, we introduce AmbigQA, a new open-domain question answering task which involves finding every plausible answer, and then rewriting the question for each one to resolve the ambiguity. To study this task, we construct AmbigNQ, a dataset covering 14,042 questions from NQ-open, an existing open-domain QA benchmark. We find that over half of the questions in NQ-open are ambiguous, with diverse sources of ambiguity such as event and entity references. We also present strong baseline models for AmbigQA which we show benefit from weakly supervised learning that incorporates NQ-open, strongly suggesting our new task and data will support significant future research effort. Our data and baselines are available at https://nlp.cs.washington.edu/ambigqa.} } @inproceedings{louis-etal-2020-id, title = "{``}{I}{'}d rather just go to bed{''}: Understanding Indirect Answers", author = "Louis, Annie and Roth, Dan and Radlinski, Filip", booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)", month = nov, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2020.emnlp-main.601", doi = "10.18653/v1/2020.emnlp-main.601", pages = "7411--7425", abstract = "We revisit a pragmatic inference problem in dialog: Understanding indirect responses to questions. Humans can interpret {`}I{'}m starving.{'} in response to {`}Hungry?{'}, even without direct cue words such as {`}yes{'} and {`}no{'}. In dialog systems, allowing natural responses rather than closed vocabularies would be similarly beneficial. However, today{'}s systems are only as sensitive to these pragmatic moves as their language model allows. We create and release the first large-scale English language corpus {`}Circa{'} with 34,268 (polar question, indirect answer) pairs to enable progress on this task. The data was collected via elaborate crowdsourcing, and contains utterances with yes/no meaning, as well as uncertain, middle-ground, and conditional responses. We also present BERT-based neural models to predict such categories for a question-answer pair. We find that while transfer learning from entailment works reasonably, performance is not yet sufficient for robust dialog. Our models reach 82-88{\%} accuracy for a 4-class distinction, and 74-85{\%} for 6 classes.", } @inproceedings{rajpurkar-etal-2018-know, title = {Know What You Don{'}t Know: Unanswerable Questions for {SQ}u{AD}}, author = {Rajpurkar, Pranav and Jia, Robin and Liang, Percy}, booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, month = jul, year = {2018}, address = {Melbourne, Australia}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/P18-2124}, doi = {10.18653/v1/P18-2124}, pages = {784--789}, abstract = {Extractive reading comprehension systems can often locate the correct answer to a question in a context document, but they also tend to make unreliable guesses on questions for which the correct answer is not stated in the context. Existing datasets either focus exclusively on answerable questions, or use automatically generated unanswerable questions that are easy to identify. To address these weaknesses, we present SQuADRUn, a new dataset that combines the existing Stanford Question Answering Dataset (SQuAD) with over 50,000 unanswerable questions written adversarially by crowdworkers to look similar to answerable ones. To do well on SQuADRUn, systems must not only answer questions when possible, but also determine when no answer is supported by the paragraph and abstain from answering. SQuADRUn is a challenging natural language understanding task for existing models: a strong neural system that gets 86{\%} F1 on SQuAD achieves only 66{\%} F1 on SQuADRUn. We release SQuADRUn to the community as the successor to SQuAD.} } @inproceedings{xu-etal-2019-asking, title = {Asking Clarification Questions in Knowledge-Based Question Answering}, author = {Xu, Jingjing and Wang, Yuechen and Tang, Duyu and Duan, Nan and Yang, Pengcheng and Zeng, Qi and Zhou, Ming and Sun, Xu}, booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)}, month = nov, year = {2019}, address = {Hong Kong, China}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/D19-1172}, doi = {10.18653/v1/D19-1172}, pages = {1618--1629}, abstract = {The ability to ask clarification questions is essential for knowledge-based question answering (KBQA) systems, especially for handling ambiguous phenomena. Despite its importance, clarification has not been well explored in current KBQA systems. Further progress requires supervised resources for training and evaluation, and powerful models for clarification-related text understanding and generation. In this paper, we construct a new clarification dataset, CLAQUA, with nearly 40K open-domain examples. The dataset supports three serial tasks: given a question, identify whether clarification is needed; if yes, generate a clarification question; then predict answers base on external user feedback. We provide representative baselines for these tasks and further introduce a coarse-to-fine model for clarification question generation. Experiments show that the proposed model achieves better performance than strong baselines. The further analysis demonstrates that our dataset brings new challenges and there still remain several unsolved problems, like reasonable automatic evaluation metrics for clarification question generation and powerful models for handling entity sparsity.} } @article{anjali2014ambiguities, title={Ambiguities in natural language processing}, author={Anjali, MK and Babu, Anto P}, journal={International Journal of Innovative Research in Computer and Communication Engineering}, volume={2}, number={5}, pages={392--394}, year={2014} } @book{jurafsky2009speech, title={Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition}, author={Jurafsky, D. and Martin, J.H.}, isbn={9780131873216}, lccn={2008010335}, series={Prentice Hall series in artificial intelligence}, url={https://books.google.com.ua/books?id=fZmj5UNK8AQC}, year={2009}, publisher={Pearson Prentice Hall} }