@inproceedings{92840755c94b4e64a58c1604d4975d69,
title = "A hybrid machine-crowdsourcing approach for web table matching and cleaning",
abstract = "Table matching and data cleaning are two crucial activities in integrating data from different web tables, which have traditionally been considered as separate activities. We show that data cleaning can effectively help us discover table matches, and vice versa. In this paper, we study a hybrid machine-crowdsourcing approach to handle the two activities together with a well-developed knowledge base. Understanding the semantics of tables is fundamental to both matching and cleaning.We select the most valuable columns to crowdsourcing validation and infer others by consolidating crowdsourcing results and machine-generated results. When resolving inconsistency between data and semantics, relative trust is taken into account to validate data or semantics via crowd. Our experimental results show the effectiveness of the proposed approach for matching and cleaning web tables using real-life datasets.",
keywords = "Crowdsourcing, Data cleaning, Table matching",
author = "Chunhua Li and Pengpeng Zhao and Sheng, {Victor S.} and Zhixu Li and Guanfeng Liu and Jian Wu and Zhiming Cui",
year = "2016",
month = jan,
day = "1",
doi = "10.1007/978-3-319-39958-4_11",
language = "English",
isbn = "9783319399577",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-VDI-Verlag GmbH & Co. KG",
pages = "132--144",
editor = "Bin Cui and Nan Zhang and Jianliang Xu and Xiang Lian and Dexi Liu",
booktitle = "Web-Age Information Management",
address = "Germany",
note = "17th International Conference on Web-Age Information Management, WAIM 2016 ; Conference date: 03-06-2016 Through 05-06-2016",
}