@inproceedings{8c975e7341a14e26bf6f6356530ecdd8,
title = "PromDA: Prompt-based Data Augmentation for low-resource NLU tasks",
abstract = "This paper focuses on the Data Augmentation for low-resource Natural Language Understanding (NLU) tasks. We propose Prompt-based Data Augmentation model (PromDA) which only trains small-scale Soft Prompt (i.e., a set of trainable vectors) in the frozen Pre-trained Language Models (PLMs). This avoids human effort in collecting unlabeled in-domain data and maintains the quality of generated synthetic data. In addition, PromDA generates synthetic data via two different views and filters out the low-quality data using NLU models. Experiments on four benchmarks show that synthetic data produced by PromDA successfully boost up the performance of NLU models which consistently outperform several competitive baseline models, including a state-of-the-art semi-supervised model using unlabeled in-domain data. The synthetic data from PromDA are also complementary with unlabeled in-domain data. The NLU models can be further improved when they are combined for training.",
author = "Yufei Wang and Can Xu and Qingfeng Sun and Huang Hu and Chongyang Tao and Xiubo Geng and Daxin Jiang",
note = "Version archived for private and non-commercial use with the permission of the author/s and according to publisher conditions. For further rights please contact the publisher.; 60th Annual Meeting of the Association for Computational Linguistics, ACL 2022 ; Conference date: 22-05-2022 Through 27-05-2022",
year = "2022",
doi = "10.18653/v1/2022.acl-long.292",
language = "English",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "4242--4255",
editor = "Smaranda Muresan and Preslav Nakov and Aline Villavicencio",
booktitle = "The 60th Annual Meeting of the Association for Computational Linguistics",
address = "United States",
}