@inproceedings{92ff0b3221e248ca8df2b3c9b2d765ba,
title = "Revealing distribution discrepancy by sampling transfer in unlabeled data",
abstract = "There are increasing cases where the class labels of test samples are unavailable, creating a significant need and challenge in measuring the discrepancy between training and test distributions. This distribution discrepancy complicates the assessment of whether the hypothesis selected by an algorithm on training samples remains applicable to test samples. We present a novel approach called Importance Divergence (I-Div) to address the challenge of test label unavailability, enabling distribution discrepancy evaluation using only training samples. I-Div transfers the sampling patterns from the test distribution to the training distribution by estimating density and likelihood ratios. Specifically, the density ratio, informed by the selected hypothesis, is obtained by minimizing the Kullback-Leibler divergence between the actual and estimated input distributions. Simultaneously, the likelihood ratio is adjusted according to the density ratio by reducing the generalization error of the distribution discrepancy as transformed through the two ratios. Experimentally, I-Div accurately quantifies the distribution discrepancy, as evidenced by a wide range of complex data scenarios and tasks.",
author = "Zhilin Zhao and Longbing Cao and Xuhui Fan and Zheng, {Wei Shi}",
year = "2024",
language = "English",
isbn = "9798331314385",
series = "Advances in Neural Information Processing Systems",
publisher = "Curran Associates",
pages = "1--28",
editor = "A. Globerson and L. Mackey and D. Belgrave and A. Fan and U. Paquet and J. Tomczak and C. Zhang",
booktitle = "NeurIPS 2024",
note = "Conference on Neural Information Processing Systems (38th : 2024), NeurIPS 2024 ; Conference date: 10-12-2024 Through 15-12-2024",
}