@inproceedings{d8a2c72bfa2f4dfaa3cc22a338034b1b,
title = "Improving efficiency of unsupervised skill discovery by model resetting curriculum",
abstract = "Unsupervised skill discovery is a fundamental task for an agent to acquire optimal behaviours independently without relying on external rewards or supervision for specific tasks. Previous research has been conducted that aims to distil skills from information theory-guided exploration without supervision. However, the training stage of unsupervised skill discovery still requires a large number of samples. One approach to decrease the number of samples is to inject plasticity by resetting the neural network after a constant number of iterations. Counter-intuitively, we find that naive model resetting could compromise the model{\textquoteright}s efficacy and damage sample efficiency in unsupervised skill discovery tasks. To address this problem, we propose a new concept, Reward Difference Rate, and leverage it to construct three categories of learning curves during the unsupervised skill discovery training process. The reward Difference Rate is able to identify the failure cases of naive resetting. Based on the identification, we propose substituting the naive resetting model with a novel Model Resetting Curriculum scheme. We conduct experiments on a Mujoco-based environment compared with advanced baselines, targeting two continuous skill domains, Ant and Humanoid. The experiment result demonstrates the effectiveness of our proposed method in cutting training costs in terms of the number of environment interactions.",
keywords = "Unsupervised Skill Discovery, Sample Efficiency, Reinforcement Learning",
author = "Yuanjiang Cao and Yao Liu and Ruoyu Wang and Sheng, \{Quan Z.\} and Lina Yao",
year = "2025",
doi = "10.1007/978-981-96-6954-7\_1",
language = "English",
isbn = "9789819669530",
series = "Communications in Computer and Information Science",
publisher = "Springer, Springer Nature",
pages = "1--14",
editor = "Mufti Mahmud and Maryam Doborjeh and Kevin Wong and Leung, \{Andrew Chi Sing\} and Zohreh Doborjeh and M. Tanveer",
booktitle = "Neural Information Processing",
address = "United States",
note = "31st International Conference on Neural Information Processing, ICONIP 2024 ; Conference date: 02-12-2024 Through 06-12-2024",
}