@inproceedings{558c7ed2f57a431ba4ebb2d0b96457a6,
title = "Information extraction via path merging",
abstract = "In this paper, we describe a new approach to information extraction that neatly integrates top-down hypothesis driven information with bottom-up data driven information. The aim of the kelp project is to combine a variety of natural language processing techniques so that we can extract useful elements of information from a collection of documents and then re-present this information in a manner that is tailored to the needs of a specific user. Our focus here is on how we can build richly structured data objects by extracting information from web pages; as an example, we describe our methods in the context of extracting information from webp ages that describe laptop computers. Our approach, which we call path-merging, involves using relatively simple techniques for identifying what are normally referred to as named entities, then allowing more sophisticated and intelligent techniques to combine these elements of information: effectively, we view the text as providing a collection of jigsaw-piece-like elements of information which then have to be combined to produce a representation of the useful content of the document. A principle goal of this work is the separation of different components of the information extraction task so as to increase portability.",
keywords = "Natural language generation, Natural language understanding",
author = "Robert Dale and Cecile Paris and Marc Tilbrook",
year = "2003",
month = dec,
doi = "10.1007/978-3-540-24581-0_13",
language = "English",
isbn = "9783540206460",
volume = "2903",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer, Springer Nature",
pages = "150--160",
editor = "Thomas D. and Gedeon Lance and Fung, {Chun Che}",
booktitle = "AI 2003: Advances in Artificial Intelligence - 16th Australian Conference on AI, Proceedings",
address = "United States",
note = "16th Australian Conference on Artificial Intelligence, AI - 2003 ; Conference date: 03-12-2003 Through 05-12-2003",
}