@inproceedings{3ce9067d3300459b92af82864e33af85,
title = "Scalable iterative implementation of mondrian for big data multidimensional anonymisation",
abstract = "Scalable data processing platforms built on cloud computing are becoming increasingly attractive as infrastructure for supporting big data mining and analytics applications. But privacy concerns are one of the major obstacles to make use of public cloud platforms. Practically, data generalisation is a widely adopted anonymisation technique for data privacy preservation in data publishing or sharing scenarios. Multidimensional anonymisation, a global-recoding generalisation scheme, has been a recent focus due to its capability of balancing data obfuscation and data usability. Existing approaches handled the scalability problem of multidimensional anonymisation for data sets much larger than main memory by storing data on disk at runtime, which incurs an impractical serial I/O cost. In this paper, we propose a scalable iterative multidimensional anonymisation approach for big data sets based on MapReduce, a state-of-the-art large-scale data processing paradigm. Our basic and intuitive idea is to partition a large data set recursively into smaller data partitions using MapReduce until all partitions can fit in memory of each computing node. A tree indexing structure is proposed to achieve recursive computation on MapReduce for data partitioning in multidimensional anonymisation. Experimental results on real-life data sets demonstrate that the proposed approach can significantly improve the scalability and time-efficiency of multidimensional anonymisation over existing approaches, and therefore is applicable to big data applications.",
keywords = "Big data, Cloud computing, Data anonymisation, MapReduce, Privacy preservation",
author = "Xuyun Zhang and Lianyong Qi and Qiang He and Wanchun Dou",
year = "2016",
doi = "10.1007/978-3-319-49145-5_31",
language = "English",
isbn = "9783319491448",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer, Springer Nature",
pages = "311--320",
editor = "Guojun Wang and Indrakshi Ray and {Alcaraz Calero}, {Jose M.} and Thampi, {Sabu M.}",
booktitle = "Security, Privacy and Anonymity in Computation, Communication and Storage",
address = "United States",
note = "9th International Conference on Security, Privacy and Anonymity in Computation, Communication and Storage, SpaCCS 2016 ; Conference date: 16-11-2016 Through 18-11-2016",
}