@ARTICLE{Barga2008, author = {Roger S. Barga and Luciano A. Digiampietri}, title = {Automatic Capture and Efficient Storage of eScience Experiment Provenance}, journal = {Concurrency and Computation: Practice and Experience}, year = {2008}, volume = {20}, pages = {419-429}, number = {5}, abstract = {Workflow is playing an increasingly important role in conducting e-Science experiments, but most commercial systems lack the necessary support for the collection and management of provenance data. We argue that eScience provenance data should be automatically generated by the workflow enactment engine and managed over time by an underlying storage service. In this paper, we introduce a layered model for workflow execution provenance, which allows navigation from an abstract model of the experiment to instance data collected during a specific experiment run. We outline modest extensions to a commercial workflow engine so it will automatically capture this provenance data at runtime. We then present an approach to store this provenance data in a relational database engine. Finally, we identify important properties of provenance data captured by our model that can significantly reduce the amount of storage required, and demonstrate we can reduce the size of provenance data captured from an actual experiment to 0.4\% of the original size, with modest performance overhead.}, doi = {http://dx.doi.org/10.1002/cpe.1235}, }