SLIDE Publication List

Authors: Type:

2015

  • M. C. Rousset and F. Ulliana, “Extracting Bounded-level Modules from Deductive RDF Triplestores,” in AAAI’15: 29th Conference on Artificial Intelligence, Austin, Texas, United States, 2015.
    [BibTeX] [Download PDF]
    @inproceedings{rousset:lirmm-01086951,
    TITLE = {{Extracting Bounded-level Modules from Deductive RDF Triplestores}},
    AUTHOR = {Rousset, Marie Christine and Ulliana, Federico},
    URL = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-01086951},
    BOOKTITLE = {{AAAI'15: 29th Conference on Artificial Intelligence}},
    ADDRESS = {Austin, Texas, United States},
    ORGANIZATION = {{AAAI}},
    YEAR = {2015},
    MONTH = Jan,
    HAL_ID = {lirmm-01086951},
    HAL_VERSION = {v1},
    }

  • M. Al-Bakri, M. Atencia, S. Lalande, and M. C. Rousset, “Inferring Same-as Facts from Linked Data: An Iterative Import-by-Query Approach,” in Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence (AAAI 2015), Austin, Texas, United States, 2015.
    [BibTeX] [Download PDF]
    @inproceedings{albakri:hal-01113463,
    TITLE = {{Inferring Same-as Facts from Linked Data: An Iterative Import-by-Query Approach}},
    AUTHOR = {Al-Bakri, Mustafa and Atencia, Manuel and Lalande, Steffen and Rousset, Marie Christine},
    URL = {https://hal.inria.fr/hal-01113463},
    BOOKTITLE = {{Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence (AAAI 2015)}},
    ADDRESS = {Austin, Texas, United States},
    YEAR = {2015},
    MONTH = Jan,
    HAL_ID = {hal-01113463},
    HAL_VERSION = {v1},
    }

  • M. Servajean, R. Akbarinia, E. Pacitti, and S. Amer-Yahia, “Profile Diversity for Query Processing using User Recommendations,” Information Systems, vol. 48, pp. 44-63, 2015. doi:10.1016/j.is.2014.09.001
    [BibTeX] [Download PDF]
    @article{servajean:lirmm-01079523,
    TITLE = {{Profile Diversity for Query Processing using User Recommendations}},
    AUTHOR = {Servajean, Maximilien and Akbarinia, Reza and Pacitti, Esther and Amer-Yahia, Sihem},
    URL = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-01079523},
    JOURNAL = {{Information Systems}},
    PUBLISHER = {{Elsevier}},
    SERIES = {Information Systems},
    VOLUME = {48},
    PAGES = {44-63},
    YEAR = {2015},
    MONTH = Mar,
    DOI = {10.1016/j.is.2014.09.001},
    KEYWORDS = {profile diversity ; search and recommendation ; top-k ; optimization},
    HAL_ID = {lirmm-01079523},
    HAL_VERSION = {v1},
    }

2014

  • F. Ulliana, J. C. Léon, O. Palombi, M. C. Rousset, and F. Faure, “Combining 3D Models and Functions through Ontologies to Describe Man-made Products and Virtual Humans: Toward a Common Framework,” Computer-Aided Design & Applications, p. 77, 2014.
    [BibTeX] [Download PDF]
    @article{ulliana:hal-01070581,
    TITLE = {{Combining 3D Models and Functions through Ontologies to Describe Man-made Products and Virtual Humans: Toward a Common Framework}},
    AUTHOR = {Ulliana, Federico and L{\'e}on, Jean Claude and Palombi, Olivier and Rousset, Marie Christine and Faure, Fran{\c c}ois},
    URL = {https://hal.archives-ouvertes.fr/hal-01070581},
    JOURNAL = {{Computer-Aided Design \& Applications}},
    PUBLISHER = {{CAD Solutions LLC and Taylor \& Francis Online}},
    PAGES = {77},
    YEAR = {2014},
    MONTH = Oct,
    HAL_ID = {hal-01070581},
    HAL_VERSION = {v1},
    }

  • F. Jouanot, C. Labbé, E. Michael, M. C. Rousset, M. Tauber, and F. Ulliana, “Semantic Filtering of Scientific Articles guided by a Domain Ontology,” in WCECS 2014 – World Congress on Engineering and Computer Science 2014, San Francisco, United States, 2014. doi:10.978-988-19253-7-4
    [BibTeX] [Download PDF]
    @inproceedings{jouanot:hal-01088092,
    TITLE = {{Semantic Filtering of Scientific Articles guided by a Domain Ontology}},
    AUTHOR = {Jouanot, Fabrice and Labb{\'e}, Cyril and Michael, Elena and Rousset, Marie Christine and Tauber, Ma{\"i}t{\'e} and Ulliana, Federico},
    URL = {https://hal.archives-ouvertes.fr/hal-01088092},
    BOOKTITLE = {{WCECS 2014 - World Congress on Engineering and Computer Science 2014}},
    ADDRESS = {San Francisco, United States},
    ORGANIZATION = {{International Association of Engineers}},
    EDITOR = {S. I. Ao and Craig Douglas and Warren S. Grundfest and Jon Burgstone},
    YEAR = {2014},
    MONTH = Oct,
    DOI = {10.978-988-19253-7-4},
    KEYWORDS = {concept extraction from text ; information integration ; ontology-based data access ; semantic search},
    HAL_ID = {hal-01088092},
    HAL_VERSION = {v1},
    }

  • S. Kleisarchaki, V. Christophides, S. Amer-Yahia, and A. Douzal-Chouakria, “Online Detection of Topic Change in Social Posts,” in BigData Innovators Gathering, Seoul, Corée, Corée, République Populaire Démocratique De, 2014, pp. 1-4.
    [BibTeX] [Abstract] [Download PDF]

    {G}aining deep insights of the social {W}eb content is a challenging {B}ig {D}ata analytics problem, especially when dealing with social posts of high volume and arrival rate consisting of high variable topics. {D}etecting and tracking the topics that the users discuss in popular microblogging applications like {T}witter and studying the evolution of each topic reveals crowd interests and intelligence. {T}he evolution summarizes the changes that are occurring on the topics over a given time horizon inside the evolving data stream. {F}or instance, some topics may disappear at some point in time due to lack of users’ interest, while others are retained over time adopting either a stable or an evolving behaviour. {T}he analysis and storage of such dynamic and massive content with spatio-thematic properties poses new challenges for research.

    @InProceedings{kleisarchaki:hal-00946969,
    address = {Seoul, Cor{\'e}e, Cor{\'e}e, R{\'e}publique Populaire D{\'e}mocratique De},
    title = {{O}nline {D}etection of {T}opic {C}hange in {S}ocial {P}osts},
    abstract = {{G}aining deep insights of the social {W}eb content is a challenging {B}ig {D}ata analytics problem, especially when dealing with social posts of high volume and arrival rate consisting of high variable topics. {D}etecting and tracking the topics that the users discuss in popular microblogging applications like {T}witter and studying the evolution of each topic reveals crowd interests and intelligence. {T}he evolution summarizes the changes that are occurring on the topics over a given time horizon inside the evolving data stream. {F}or instance, some topics may disappear at some point in time due to lack of users' interest, while others are retained over time adopting either a stable or an evolving behaviour. {T}he analysis and storage of such dynamic and massive content with spatio-thematic properties poses new challenges for research.},
    pages = {1-4},
    booktitle = {{B}ig{D}ata {I}nnovators {G}athering},
    year = {2014},
    x-international-audience = {yes},
    keywords = {topic change detection; breakpoint detection; scalability},
    author = {Kleisarchaki, Sofia and Christophides, Vassilis and Amer-Yahia, Sihem and Douzal-Chouakria, Ahlame},
    url = {http://hal.archives-ouvertes.fr/hal-00946969},
    }

  • O. Palombi, F. Ulliana, V. Favier, J. Léon, and M. C. Rousset, “My Corporis Fabrica: an ontology-based tool for reasoning and querying on complex anatomical models,” Journal of biomedical semantics, vol. 5, iss. 1, p. 20, 2014.
    [BibTeX] [Abstract] [Download PDF]

    {BackgroundMultiple models of anatomy have been developed independently and for different purposes. In particular, 3D graphical models are specially useful for visualizing the different organs composing the human body, while ontologies such as FMA (Foundational Model of Anatomy) are symbolic models that provide a unified formal description of anatomy. Despite its comprehensive content concerning the anatomical structures, the lack of formal descriptions of anatomical functions in FMA limits its usage in many applications. In addition, the absence of connection between 3D models and anatomical ontologies makes it difficult and time-consuming to set up and access to the anatomical content of complex 3D objects.ResultsFirst, we provide a new ontology of anatomy called My Corporis Fabrica (MyCF), which conforms to FMA but extends it by making explicit how anatomical structures are composed, how they contribute to functions, and also how they can be related to 3D complex objects. Second, we have equipped MyCF with automatic reasoning capabilities that enable model checking and complex queries answering. We illustrate the added-value of such a declarative approach for interactive simulation and visualization as well as for teaching applications.ConclusionsThe novel vision of ontologies that we have developed in this paper enables a declarative assembly of different models to obtain composed models guaranteed to be anatomically valid while capturing the complexity of human anatomy. The main interest of this approach is its declarativity that makes possible for domain experts to enrich the knowledge base at any moment through simple editors without having to change the algorithmic machinery. This provides MyCF software environment a flexibility to process and add semantics on purpose for various applications that incorporate not only symbolic information but also 3D geometric models representing anatomical entities as well as other symbolic information like the anatomical functions.}

    @article{palombi:hal-00998563,
    hal_id = {hal-00998563},
    url = {http://hal.inria.fr/hal-00998563},
    title = {{My Corporis Fabrica: an ontology-based tool for reasoning and querying on complex anatomical models}},
    author = {Palombi, Olivier and Ulliana, Federico and Favier, Valentin and L{\'e}on, Jean-Claude and Rousset, Marie Christine},
    abstract = {{BackgroundMultiple models of anatomy have been developed independently and for different purposes. In particular, 3D graphical models are specially useful for visualizing the different organs composing the human body, while ontologies such as FMA (Foundational Model of Anatomy) are symbolic models that provide a unified formal description of anatomy. Despite its comprehensive content concerning the anatomical structures, the lack of formal descriptions of anatomical functions in FMA limits its usage in many applications. In addition, the absence of connection between 3D models and anatomical ontologies makes it difficult and time-consuming to set up and access to the anatomical content of complex 3D objects.ResultsFirst, we provide a new ontology of anatomy called My Corporis Fabrica (MyCF), which conforms to FMA but extends it by making explicit how anatomical structures are composed, how they contribute to functions, and also how they can be related to 3D complex objects. Second, we have equipped MyCF with automatic reasoning capabilities that enable model checking and complex queries answering. We illustrate the added-value of such a declarative approach for interactive simulation and visualization as well as for teaching applications.ConclusionsThe novel vision of ontologies that we have developed in this paper enables a declarative assembly of different models to obtain composed models guaranteed to be anatomically valid while capturing the complexity of human anatomy. The main interest of this approach is its declarativity that makes possible for domain experts to enrich the knowledge base at any moment through simple editors without having to change the algorithmic machinery. This provides MyCF software environment a flexibility to process and add semantics on purpose for various applications that incorporate not only symbolic information but also 3D geometric models representing anatomical entities as well as other symbolic information like the anatomical functions.}},
    affiliation = {Department of Anatomy , LJK (CNRS-UJF-INPG-UPMF) , LIG (CNRS-UJF-INPG-UPMF)},
    pages = {20},
    journal = {Journal of Biomedical Semantics},
    volume = {5},
    number = {1 },
    audience = {internationale },
    year = {2014},
    pdf = {http://hal.inria.fr/hal-00998563/PDF/2041-1480-5-20.pdf},
    }

  • N. Bidoit, P. Bosc, L. Cholvy, O. Pivert, and M. C. Rousset, “Bases de données et intelligence artificielle,” in Panorama actuel de l’intelligence artificielle: ses bases méthodologiques, ses développements, P. Marquis, O. Papini, and H. Prade, Eds., Cépaduès, 2014.
    [BibTeX] [Download PDF]
    @incollection{bidoit:hal-01063411,
    hal_id = {hal-01063411},
    url = {http://hal.inria.fr/hal-01063411},
    title = {{Bases de donn{\'e}es et intelligence artificielle}},
    author = {Bidoit, Nicole and Bosc, Patrick and Cholvy, Laurence and Pivert, Olivier and Rousset, Marie Christine},
    language = {Fran{\c c}ais},
    affiliation = {PILGRIM - IRISA , Onera - The French Aerospace Lab - Toulouse , HADAS - LIG Laboratoire d'Informatique de Grenoble},
    booktitle = {{Panorama actuel de l'intelligence artificielle: ses bases m{\'e}thodologiques, ses d{\'e}veloppements}},
    publisher = {C{\'e}padu{\`e}s},
    editor = {Pierre Marquis and Odile Papini and Henri Prade },
    audience = {nationale },
    year = {2014},
    }

  • S. Amer-Yahia, N. Ibrahim, C. Kamdem Kengne, F. Ulliana, and M. C. Rousset, “SOCLE: towards a framework for data preparation in social applications,” Ingénierie des systèmes d’information, vol. 19, iss. 3, pp. 49-72, 2014. doi:10.3166/isi.19.3.49-72
    [BibTeX] [Download PDF]
    @article{DBLP:journals/isi/Amer-YahiaIKUR14,
    author = {Amer-Yahia, Sihem and Ibrahim, Noha and Kamdem Kengne, Christiane and Ulliana, Federico and Rousset, Marie Christine},
    title = {{SOCLE:} Towards a framework for data preparation in social applications},
    journal = {Ing{\'{e}}nierie des Syst{\`{e}}mes d'Information},
    year = {2014},
    volume = {19},
    number = {3},
    pages = {49--72},
    url = {http://dx.doi.org/10.3166/isi.19.3.49-72},
    doi = {10.3166/isi.19.3.49-72},
    timestamp = {Tue, 30 Sep 2014 18:56:34 +0200},
    biburl = {http://dblp.uni-trier.de/rec/bib/journals/isi/Amer-YahiaIKUR14},
    bibsource = {dblp computer science bibliography, http://dblp.org}
    }

  • I. Saidi, S. Amer-Yahia, and S. Nait Bahloul, “Diversité dans la recherche d’entités,” Ingénierie des systèmes d’information, vol. 19, iss. 3, pp. 107-136, 2014. doi:10.3166/isi.19.3.107-136
    [BibTeX] [Download PDF]
    @article{DBLP:journals/isi/SaidiAB14,
    author = {Saidi, Im{\`{e}}ne and Amer-Yahia, Sihem and Nait Bahloul, Safia},
    title = {Diversit{\'{e}} dans la recherche d'entit{\'{e}}s},
    journal = {Ing{\'{e}}nierie des Syst{\`{e}}mes d'Information},
    year = {2014},
    volume = {19},
    number = {3},
    pages = {107--136},
    url = {http://dx.doi.org/10.3166/isi.19.3.107-136},
    doi = {10.3166/isi.19.3.107-136},
    timestamp = {Tue, 30 Sep 2014 18:56:38 +0200},
    biburl = {http://dblp.uni-trier.de/rec/bib/journals/isi/SaidiAB14},
    bibsource = {dblp computer science bibliography, http://dblp.org}
    }

  • M. Das, S. Thirumuruganathan, S. Amer-Yahia, G. Das, and C. Yu, “An expressive framework and efficient algorithms for the analysis of collaborative tagging,” VLDB j., vol. 23, iss. 2, pp. 201-226, 2014. doi:10.1007/s00778-013-0341-y
    [BibTeX] [Download PDF]
    @article{DBLP:journals/vldb/DasTADY14,
    author = {Das, Mahashweta and Thirumuruganathan, Saravanan and Amer-Yahia, Sihem and Das, Gautam and Yu, Cong},
    title = {An expressive framework and efficient algorithms for the analysis
    of collaborative tagging},
    journal = {{VLDB} J.},
    year = {2014},
    volume = {23},
    number = {2},
    pages = {201--226},
    url = {http://dx.doi.org/10.1007/s00778-013-0341-y},
    doi = {10.1007/s00778-013-0341-y},
    timestamp = {Tue, 30 Sep 2014 18:56:44 +0200},
    biburl = {http://dblp.uni-trier.de/rec/bib/journals/vldb/DasTADY14},
    bibsource = {dblp computer science bibliography, http://dblp.org}
    }

  • M. Servajean, E. Pacitti, M. Liroz-Gistau, S. Amer-Yahia, and A. El-Abbadi, “Exploiting diversification in gossip-based recommendation,” in Data management in cloud, grid and P2P systems – 7th international conference, globe 2014, munich, germany, september 2-3, 2014. proceedings, 2014, pp. 25-36. doi:10.1007/978-3-319-10067-8_3
    [BibTeX] [Download PDF]
    @inproceedings{DBLP:conf/globe/ServajeanPLAA14,
    author = {Servajean, Maximilien and Pacitti, Esther and Liroz-Gistau, Miguel and Amer-Yahia, Sihem and El-Abbadi, Amr},
    title = {Exploiting Diversification in Gossip-Based Recommendation},
    booktitle = {Data Management in Cloud, Grid and {P2P} Systems - 7th International
    Conference, Globe 2014, Munich, Germany, September 2-3, 2014. Proceedings},
    year = {2014},
    pages = {25--36},
    crossref = {DBLP:conf/globe/2014},
    url = {http://dx.doi.org/10.1007/978-3-319-10067-8_3},
    doi = {10.1007/978-3-319-10067-8_3},
    timestamp = {Tue, 30 Sep 2014 18:56:56 +0200},
    biburl = {http://dblp.uni-trier.de/rec/bib/conf/globe/ServajeanPLAA14},
    bibsource = {dblp computer science bibliography, http://dblp.org}
    }

  • S. Basu Roy, S. Thirumuruganathan, S. Amer-Yahia, G. Das, and C. Yu, “Exploiting group recommendation functions for flexible preferences,” in IEEE 30th international conference on data engineering, chicago, ICDE 2014, il, usa, march 31 – april 4, 2014, 2014, pp. 412-423. doi:10.1109/ICDE.2014.6816669
    [BibTeX] [Download PDF]
    @inproceedings{DBLP:conf/icde/RoyTADY14,
    author = {Basu Roy, Senjuti and Thirumuruganathan, Saravanan and Amer-Yahia, Sihem and Das, Gautam and Yu, Cong},
    title = {Exploiting group recommendation functions for flexible preferences},
    booktitle = {{IEEE} 30th International Conference on Data Engineering, Chicago,
    {ICDE} 2014, IL, USA, March 31 - April 4, 2014},
    year = {2014},
    pages = {412--423},
    crossref = {DBLP:conf/icde/2014},
    url = {http://dx.doi.org/10.1109/ICDE.2014.6816669},
    doi = {10.1109/ICDE.2014.6816669},
    timestamp = {Tue, 30 Sep 2014 18:57:03 +0200},
    biburl = {http://dblp.uni-trier.de/rec/bib/conf/icde/RoyTADY14},
    bibsource = {dblp computer science bibliography, http://dblp.org}
    }

  • S. Amer-Yahia, V. Christophides, A. Kementsietsidis, M. N. Garofalakis, S. Idreos, and V. Leroy, “Proc. 17th international conference on extending database technology (edbt), athens, greece, march 24-28, 2014.” 2014.
    [BibTeX] [Download PDF]
    @inproceedings{DBLP:conf/edbt/2014,
    author = {Amer-Yahia, Sihem and Christophides, Vassilis and Kementsietsidis, Anastasios and N. Garofalakis, Minos and Idreos, Stratos and Leroy, Vincent},
    title = {Proc. 17th International Conference on Extending Database Technology
    (EDBT), Athens, Greece, March 24-28, 2014},
    year = {2014},
    publisher = {OpenProceedings.org},
    url = {http://openproceedings.org/edbticdt2014/EDBT_toc.html},
    timestamp = {Tue, 30 Sep 2014 18:57:14 +0200},
    biburl = {http://dblp.uni-trier.de/rec/bib/conf/edbt/2014},
    bibsource = {dblp computer science bibliography, http://dblp.org}
    }

  • S. K. Candan, S. Amer-Yahia, N. Schweikardt, V. Christophides, and V. Leroy, “Proceedings of the workshops of the EDBT/ICDT 2014 joint conference (EDBT/ICDT 2014), athens, greece, march 28, 2014.” 2014.
    [BibTeX] [Download PDF]
    @inproceedings{DBLP:conf/edbt/2014w,
    author = {Candan, K. Sel{\c{c}}uk and Amer-Yahia, Sihem and Schweikardt, Nicole and Christophides, Vassilis and Leroy, Vincent},
    title = {Proceedings of the Workshops of the {EDBT/ICDT} 2014 Joint Conference
    {(EDBT/ICDT} 2014), Athens, Greece, March 28, 2014},
    series = {{CEUR} Workshop Proceedings},
    year = {2014},
    volume = {1133},
    publisher = {CEUR-WS.org},
    url = {http://ceur-ws.org/Vol-1133},
    timestamp = {Tue, 30 Sep 2014 18:57:21 +0200},
    biburl = {http://dblp.uni-trier.de/rec/bib/conf/edbt/2014w},
    bibsource = {dblp computer science bibliography, http://dblp.org}
    }

2013

  • B. Negrevergne, A. Termier, M. C. Rousset, and J. F. Mehaut, “ParaMiner: a Generic Pattern Mining Algorithm for Multi-Core Architectures,” Data Mining and Knowledge Discovery, vol. 28, iss. 3, pp. 593-633, 2013. doi:http://dx.doi.org/10.1007/s10618-013-0313-2
    [BibTeX] [Abstract] [Download PDF]

    {I}n this paper, we present {P}ara {M}iner which is a generic and parallel algorithm for closed pattern mining. {P}ara {M}iner is built on the principles of pattern enumeration in strongly accessible set systems. {I}ts efficiency is due to a novel dataset reduction technique (that we call {EL}-reduction), combined with novel technique for performing dataset reduction in a parallel execution on a multi-core architecture. {W}e illustrate {P}ara {M}iner’s genericity by using this algorithm to solve three different pattern mining problems: the frequent itemset mining problem, the mining frequent connected relational graphs problem and the mining gradual itemsets problem. {I}n this paper, we prove the soundness and the completeness of {P}ara {M}iner. {F}urthermore, our experiments show that despite being a generic algorithm, {P}ara {M}iner can compete with specialized state of the art algorithms designed for the pattern mining problems mentioned above. {B}esides, for the particular problem of gradual itemset mining, {P}ara {M}iner outperforms the state of the art algorithm by two orders of magnitude.

    @Article{negrevergne:hal-00923535,
    number = {3},
    title = {{P}ara{M}iner: a {G}eneric {P}attern {M}ining {A}lgorithm for {M}ulti-{C}ore {A}rchitectures},
    month = apr,
    journal = {{D}ata {M}ining and {K}nowledge {D}iscovery},
    abstract = {{I}n this paper, we present {P}ara {M}iner which is a generic and parallel algorithm for closed pattern mining. {P}ara {M}iner is built on the principles of pattern enumeration in strongly accessible set systems. {I}ts efficiency is due to a novel dataset reduction technique (that we call {EL}-reduction), combined with novel technique for performing dataset reduction in a parallel execution on a multi-core architecture. {W}e illustrate {P}ara {M}iner's genericity by using this algorithm to solve three different pattern mining problems: the frequent itemset mining problem, the mining frequent connected relational graphs problem and the mining gradual itemsets problem. {I}n this paper, we prove the soundness and the completeness of {P}ara {M}iner. {F}urthermore, our experiments show that despite being a generic algorithm, {P}ara {M}iner can compete with specialized state of the art algorithms designed for the pattern mining problems mentioned above. {B}esides, for the particular problem of gradual itemset mining, {P}ara {M}iner outperforms the state of the art algorithm by two orders of magnitude.},
    volume = {28},
    pages = {593-633},
    year = {2013},
    note = {http://link.springer.com/article/10.1007/s10618-013-0313-2},
    x-international-audience = {yes},
    keywords = {{D}ata mining; {C}losed pattern mining; {P}arallel pattern mining; {M}ulti-core architectures},
    author = {Negrevergne, Benjamin and Termier, Alexandre and Rousset, Marie Christine and Mehaut, Jean Fran{\c c}ois},
    url = {http://hal.archives-ouvertes.fr/hal-00923535},
    doi = {http://dx.doi.org/10.1007/s10618-013-0313-2},
    }

  • F. Goasdoué and M. C. Rousset, “Robust Module-based Data Management,” IEEE Transactions on Knowledge and Data Engineering, vol. 25, iss. 3, pp. 648-661, 2013. doi:http://dx.doi.org/10.1109/TKDE.2011.255
    [BibTeX] [Abstract] [Download PDF]

    {T}he current trend for building an ontology-based data management system ({DMS}) is to capitalize on efforts made to design a preexisting well-established {DMS} (a reference system). {T}he method amounts to extracting from the reference {DMS} a piece of schema relevant to the new application needs — a module –, possibly personalizing it with extra-constraints wrt the application under construction, and then managing a dataset using the resulting schema. {I}n this paper, we extend the existing definitions of modules and we introduce novel properties of robustness that provide means for checking easily that a robust module-based {DMS} evolves safely wrt both the schema and the data of the reference {DMS}. {W}e carry out our investigations in the setting of description logics which underlie modern ontology languages, like {RDFS}, {OWL}, and {OWL}2 from {W}3{C}. {N}otably, we focus on the {DL}-lite_A dialect of the {DL}-lite family, which encompasses the foundations of the {QL} profile of {OWL}2 (ie {DL}-lite_R): the {W}3{C} recommendation for efficiently managing large datasets.

    @Article{goasdoue:hal-00671004,
    number = {3},
    title = {{R}obust {M}odule-based {D}ata {M}anagement},
    month = mar,
    journal = {{IEEE} {T}ransactions on {K}nowledge and {D}ata {E}ngineering},
    abstract = {{T}he current trend for building an ontology-based data management system ({DMS}) is to capitalize on efforts made to design a preexisting well-established {DMS} (a reference system). {T}he method amounts to extracting from the reference {DMS} a piece of schema relevant to the new application needs -- a module --, possibly personalizing it with extra-constraints wrt the application under construction, and then managing a dataset using the resulting schema. {I}n this paper, we extend the existing definitions of modules and we introduce novel properties of robustness that provide means for checking easily that a robust module-based {DMS} evolves safely wrt both the schema and the data of the reference {DMS}. {W}e carry out our investigations in the setting of description logics which underlie modern ontology languages, like {RDFS}, {OWL}, and {OWL}2 from {W}3{C}. {N}otably, we focus on the {DL}-lite\_{A} dialect of the {DL}-lite family, which encompasses the foundations of the {QL} profile of {OWL}2 (ie {DL}-lite\_{R}): the {W}3{C} recommendation for efficiently managing large datasets.},
    volume = {25},
    publisher = {IEEE},
    pages = {648-661},
    booktitle = {{T}ransactions on {K}nowledge and {D}ata {E}ngineering},
    year = {2013},
    x-international-audience = {yes},
    author = {Goasdou{\'e}, Fran{\c c}ois and Rousset, Marie Christine},
    url = {http://hal.inria.fr/hal-00671004},
    doi = {http://dx.doi.org/10.1109/TKDE.2011.255},
    pdf = {http://hal.inria.fr/hal-00671004/PDF/tkde-2012.pdf},
    }

  • M. Al Bakri, M. Atencia Arcas, and M. C. Rousset, “Trust in networks of ontologies and alignments,” Knowledge and Information Systems, pp. 1-27, 2013.
    [BibTeX] [Abstract] [Download PDF]

    {I}n this paper, we introduce a mechanism of trust adapted to semantic peer-to-peer networks in which every peer is free to organize its local resources as instances of classes of its own ontology. {P}eers use their ontologies to query other peers, and alignments between peers’ ontologies make it possible to reformulate queries from one local peer’s vocabulary to another. {A}lignments are typically the result of manual or (semi)automatic ontology matching. {H}owever, resulting alignments may be unsound and/or incomplete, and therefore, query reformulation based on alignments may lead to unsatisfactory answers. {T}rust can assist peers to select the peers in the network that are better suited to answer their queries. {I}n our model, the trust that a peer has toward another peer depends on a specific query, and it represents the probability that the latter peer will provide a satisfactory answer to the query. {I}n order to compute trust, we perform {B}ayesian inference that exploits ontologies, alignments and user feedback. {W}e have implemented our method and conducted an evaluation. {E}xperimental results show that trust values converge as more queries are sent and answers received. {F}urthermore, when query answering is guided by trust, the quality of peers’ answers, measured with precision and recall, is improved.

    @Article{albakri:hal-01002655,
    title = {{T}rust in networks of ontologies and alignments},
    journal = {{K}nowledge and {I}nformation {S}ystems},
    abstract = {{I}n this paper, we introduce a mechanism of trust adapted to semantic peer-to-peer networks in which every peer is free to organize its local resources as instances of classes of its own ontology. {P}eers use their ontologies to query other peers, and alignments between peers’ ontologies make it possible to reformulate queries from one local peer’s vocabulary to another. {A}lignments are typically the result of manual or (semi)automatic ontology matching. {H}owever, resulting alignments may be unsound and/or incomplete, and therefore, query reformulation based on alignments may lead to unsatisfactory answers. {T}rust can assist peers to select the peers in the network that are better suited to answer their queries. {I}n our model, the trust that a peer has toward another peer depends on a specific query, and it represents the probability that the latter peer will provide a satisfactory answer to the query. {I}n order to compute trust, we perform {B}ayesian inference that exploits ontologies, alignments and user feedback. {W}e have implemented our method and conducted an evaluation. {E}xperimental results show that trust values converge as more queries are sent and answers received. {F}urthermore, when query answering is guided by trust, the quality of peers’ answers, measured with precision and recall, is improved.},
    publisher = {Springer London},
    pages = {1-27},
    year = {2013},
    x-international-audience = {yes},
    keywords = {{O}ntology; {P}opulated ontology; {A}lignment; {T}rust; {P}rovenance},
    author = {Al Bakri, Mustafa and Atencia Arcas, Manuel and Rousset, Marie Christine},
    url = {http://hal.inria.fr/hal-01002655},
    }

  • C. Kamdem Kengne, L. C. Fopa, A. Termier, N. Ibrahim, M. C. Rousset, T. Washio, and M. Santana, “Rewriting Large Multimedia Application Execution Traces with few Event Sequences,” in Proceedings of the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), Chicago, IL, États-Unis, 2013, pp. 1348-1356. doi:http://dx.doi.org/10.1145/2487575.2488211
    [BibTeX] [Abstract] [Download PDF]

    {T}he analysis of multimedia application traces can reveal important information to enhance program execution comprehension. {H}owever typical size of traces can be in gigabytes, which hinders their effective exploitation by application developers. {I}n this paper, we study the problem of finding a set of sequences of events that allows a reduced-size rewriting of the original trace. {T}hese sequences of events, that we call blocks, can simplify the exploration of large execution traces by allowing application developers to see an abstraction instead of low-level events. {T}he problem of computing such set of blocks is {NP}-hard and naive approaches lead to prohibitive running times that prevent analysing real world traces. {W}e propose a novel algorithm that directly mines the set of blocks. {O}ur experiments show that our algorithm can analyse real traces of up to two hours of video. {W}e also show experimentally the quality of the set of blocks proposed, and the interest of the rewriting to understand actual trace data.

    @InProceedings{kamdemkengne:hal-00923538,
    address = {Chicago, IL, {\'E}tats-Unis},
    title = {{R}ewriting {L}arge {M}ultimedia {A}pplication {E}xecution {T}races with few {E}vent {S}equences},
    month = aug,
    abstract = {{T}he analysis of multimedia application traces can reveal important information to enhance program execution comprehension. {H}owever typical size of traces can be in gigabytes, which hinders their effective exploitation by application developers. {I}n this paper, we study the problem of finding a set of sequences of events that allows a reduced-size rewriting of the original trace. {T}hese sequences of events, that we call blocks, can simplify the exploration of large execution traces by allowing application developers to see an abstraction instead of low-level events. {T}he problem of computing such set of blocks is {NP}-hard and naive approaches lead to prohibitive running times that prevent analysing real world traces. {W}e propose a novel algorithm that directly mines the set of blocks. {O}ur experiments show that our algorithm can analyse real traces of up to two hours of video. {W}e also show experimentally the quality of the set of blocks proposed, and the interest of the rewriting to understand actual trace data.},
    publisher = {ACM},
    pages = {1348-1356},
    booktitle = {{P}roceedings of the 19th {ACM} {SIGKDD} {I}nternational {C}onference on {K}nowledge {D}iscovery and {D}ata {M}ining ({KDD})},
    year = {2013},
    note = {Session Industrial and government emerging (Security \& Privacy)},
    x-international-audience = {yes},
    author = {Kamdem Kengne, Christiane and Fopa, Leon Constantin and Termier, Alexandre and Ibrahim, Noha and Rousset, Marie Christine and Washio, Takashi and Santana, Miguel},
    editor ={S. Dhillon et al., Inderjit},
    url = {http://hal.archives-ouvertes.fr/hal-00923538},
    doi = {http://dx.doi.org/10.1145/2487575.2488211},
    }

  • M. Tsytsarau, S. Amer-Yahia, and T. Palpanas, “Efficient Sentiment Correlation for Large-scale Demographics,” in Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data, New York, NY, États-Unis, 2013, pp. 253-264. doi:http://dx.doi.org/10.1145/2463676.2465317
    [BibTeX] [Abstract] [Download PDF]

    {A}nalyzing sentiments of demographic groups is becoming important for the {S}ocial {W}eb, where millions of users provide opinions on a wide variety of content. {W}hile several approaches exist for mining sentiments from product reviews or micro-blogs, little attention has been devoted to aggregating and comparing extracted sentiments for different demographic groups over time, such as ‘{S}tudents in {I}taly’ or ‘{T}eenagers in {E}urope’. {T}his problem demands efficient and scalable methods for sentiment aggregation and correlation, which account for the evolution of sentiment values, sentiment bias, and other factors associated with the special characteristics of web data. {W}e propose a scalable approach for sentiment indexing and aggregation that works on multiple time granularities and uses incrementally updateable data structures for online operation. {F}urthermore, we describe efficient methods for computing meaningful sentiment correlations, which exploit pruning based on demographics and use top-k correlations compression techniques. {W}e present an extensive experimental evaluation with both synthetic and real datasets, demonstrating the effectiveness of our pruning techniques and the efficiency of our solution.

    @InProceedings{tsytsarau:hal-00923543,
    address = {New York, NY, {\'E}tats-Unis},
    title = {{E}fficient {S}entiment {C}orrelation for {L}arge-scale {D}emographics},
    month = jun,
    abstract = {{A}nalyzing sentiments of demographic groups is becoming important for the {S}ocial {W}eb, where millions of users provide opinions on a wide variety of content. {W}hile several approaches exist for mining sentiments from product reviews or micro-blogs, little attention has been devoted to aggregating and comparing extracted sentiments for different demographic groups over time, such as '{S}tudents in {I}taly' or '{T}eenagers in {E}urope'. {T}his problem demands efficient and scalable methods for sentiment aggregation and correlation, which account for the evolution of sentiment values, sentiment bias, and other factors associated with the special characteristics of web data. {W}e propose a scalable approach for sentiment indexing and aggregation that works on multiple time granularities and uses incrementally updateable data structures for online operation. {F}urthermore, we describe efficient methods for computing meaningful sentiment correlations, which exploit pruning based on demographics and use top-k correlations compression techniques. {W}e present an extensive experimental evaluation with both synthetic and real datasets, demonstrating the effectiveness of our pruning techniques and the efficiency of our solution.},
    publisher = {ACM},
    pages = {253-264},
    booktitle = {{P}roceedings of the 2013 {ACM} {SIGMOD} {I}nternational {C}onference on {M}anagement of {D}ata},
    year = {2013},
    note = {Research session: social media},
    x-international-audience = {yes},
    keywords = {sentiment analysis; sentiment demographics; correlation},
    author = {Tsytsarau, Mikalai and Amer-Yahia, Sihem and Palpanas, Themis},
    editor ={Ross, Divesh Srivastava, Dimitris Papadias, Kenneth},
    url = {http://hal.archives-ouvertes.fr/hal-00923543},
    doi = {http://dx.doi.org/10.1145/2463676.2465317},
    }

  • S. Abbar, S. Amer-Yahia, P. Indyk, S. Mahabadi, and K. Varadarajan, “Diverse Near Neighbor Problem,” in Proceedings of the 29th Annual Symposium on Computational Geometry (SoCG), Rio de Janeiro, Brésil, 2013, pp. 207-214. doi:http://dx.doi.org/10.1145/2462356.2462401
    [BibTeX] [Abstract] [Download PDF]

    {M}otivated by the recent research on diversity-aware search, we investigate the k-diverse near neighbor reporting problem. {T}he problem is defined as follows: given a query point q, report the maximum diversity set {S} of k points in the ball of radius r around q. {T}he diversity of a set {S} is measured by the minimum distance between any pair of points in {S} (the higher, the better). {W}e present two approximation algorithms for the case where the points live in a d-dimensional {H}amming space. {O}ur algorithms guarantee query times that are sub-linear in n and only polynomial in the diversity parameter k, as well as the dimension d. {F}or low values of k, our algorithms achieve sub-linear query times even if the number of points within distance r from a query q is linear in n. {T}o the best of our knowledge, these are the first known algorithms of this type that offer provable guarantees.

    @InProceedings{abbar:hal-00923544,
    address = {Rio de Janeiro, Br{\'e}sil},
    title = {{D}iverse {N}ear {N}eighbor {P}roblem},
    month = jun,
    abstract = {{M}otivated by the recent research on diversity-aware search, we investigate the k-diverse near neighbor reporting problem. {T}he problem is defined as follows: given a query point q, report the maximum diversity set {S} of k points in the ball of radius r around q. {T}he diversity of a set {S} is measured by the minimum distance between any pair of points in {S} (the higher, the better). {W}e present two approximation algorithms for the case where the points live in a d-dimensional {H}amming space. {O}ur algorithms guarantee query times that are sub-linear in n and only polynomial in the diversity parameter k, as well as the dimension d. {F}or low values of k, our algorithms achieve sub-linear query times even if the number of points within distance r from a query q is linear in n. {T}o the best of our knowledge, these are the first known algorithms of this type that offer provable guarantees.},
    publisher = {ACM},
    pages = {207-214},
    booktitle = {{P}roceedings of the 29th {A}nnual {S}ymposium on {C}omputational {G}eometry ({S}o{CG})},
    year = {2013},
    note = {Full Research Paper},
    x-international-audience = {yes},
    keywords = {{N}ear {N}eighbor, {D}iversity, {C}ore-set, {S}ub-linear},
    author = {Abbar, Sofiane and Amer-Yahia, Sihem and Indyk, Piotr and Mahabadi, Sepideh and Varadarajan, Kasturi},
    editor ={D. da Fonseca, Thomas Lewiner, Luis Pe\{\~n}aranda, Timothy Chan, Rolf Klein, Guilherme},
    url = {http://hal.archives-ouvertes.fr/hal-00923544},
    doi = {http://dx.doi.org/10.1145/2462356.2462401},
    }

  • J. Stoyanovich, S. Amer-Yahia, S. B. Davidson, M. Jacob, and T. Milo, “Understanding Local Structure in Ranked Datasets,” in Proceedings of the 6th Biennial Conference on Innovative Data Systems Research (CIDR), Asilomar, CA, États-Unis, 2013, p. 4 pages.
    [BibTeX] [Abstract] [Download PDF]

    {R}anked data is ubiquitous in real-world applications. {R}ankingsarise naturally when users express preferences about products and services, when voters cast ballots in elections, when funding proposals are evaluated based on their merits and university departments based on their reputation, or when genes are ordered based on their expression levels under various experimental conditions. {W}e observe that ranked data exhibits interesting local structure, representing agreement of subsets of rankers, over subsets of items. {B}eing able to model, identify and describe such structure is important, because it enables novel kinds of analysis with the potential of making ground-breaking impact, but is challenging to do effectively and efficiently. {W}e argue for the use of fundamental data management principles such as declarativeness and incremental evaluation, in combination with state-of-the-art machine learning and data mining techniques, for addressing the effectiveness and efficiency challenges. {W}e describe the key ingredients of a solution, and propose a roadmap towards a framework that will enable robust and efficient analysis of large ranked datasets.

    @InProceedings{stoyanovich:hal-00923541,
    address = {Asilomar, CA, {\'E}tats-Unis},
    title = {{U}nderstanding {L}ocal {S}tructure in {R}anked {D}atasets},
    month = jan,
    abstract = {{R}anked data is ubiquitous in real-world applications. {R}ankingsarise naturally when users express preferences about products and services, when voters cast ballots in elections, when funding proposals are evaluated based on their merits and university departments based on their reputation, or when genes are ordered based on their expression levels under various experimental conditions. {W}e observe that ranked data exhibits interesting local structure, representing agreement of subsets of rankers, over subsets of items. {B}eing able to model, identify and describe such structure is important, because it enables novel kinds of analysis with the potential of making ground-breaking impact, but is challenging to do effectively and efficiently. {W}e argue for the use of fundamental data management principles such as declarativeness and incremental evaluation, in combination with state-of-the-art machine learning and data mining techniques, for addressing the effectiveness and efficiency challenges. {W}e describe the key ingredients of a solution, and propose a roadmap towards a framework that will enable robust and efficient analysis of large ranked datasets.},
    pages = {4 pages},
    booktitle = {{P}roceedings of the 6th {B}iennial {C}onference on {I}nnovative {D}ata {S}ystems {R}esearch ({CIDR})},
    year = {2013},
    note = {Session: Outrageous Ideas and Vision I (Query Processing I) - http://www.cidrdb.org/cidr2013 - Online Proceedings Google Research Award "Identifying Ranked Agreement Among Raters"; European Research Council (FP7/2007-2013) / ERC grant MoDaS, agreement 291071; Israel Ministry of Science; US-Israel Bi-national Science foundation},
    x-international-audience = {yes},
    author = {Stoyanovich, Julia and Amer-Yahia, Sihem and B. Davidson, Susan and Jacob, Marie and Milo, Tova},
    url = {http://hal.archives-ouvertes.fr/hal-00923541},
    }

  • S. Abbar, S. Amer-Yahia, P. Indyk, and S. Mahabadi, “Real-time recommendation of diverse related articles,” in WWW, 2013, pp. 1-12.
    [BibTeX] [Abstract]

    {N}ews articles typically drive a lot of traffic in the form of comments posted by users on a news site. {S}uch user- generated content tends to carry additional information such as entities and sentiment. {I}n general, when articles are recommended to users, only popularity (e.g., most shared and most commented), recency, and sometimes (manual) editors’ picks (based on daily hot topics), are considered. {W}e formalize a novel recommendation problem where the goal is to find the closest most diverse articles to the one the user is currently browsing. {O}ur diversity measure incorporates entities and sentiment extracted from comments. {G}iven the real- time nature of our recommendations, we explore the applicability of nearest neighbor algorithms to solve the problem. {O}ur user study on real opinion articles from aljazeera.net and reuters.com validates the use of entities and sentiment extracted from articles and their comments to achieve news diversity when compared to content-based diversity. {F}inally, our performance experiments show the real-time feasibility of our solution.

    @InProceedings{AbbarAIM13,
    title = {{R}eal-time recommendation of diverse related articles},
    abstract = {{N}ews articles typically drive a lot of traffic in the form of comments posted by users on a news site. {S}uch user- generated content tends to carry additional information such as entities and sentiment. {I}n general, when articles are recommended to users, only popularity (e.g., most shared and most commented), recency, and sometimes (manual) editors’ picks (based on daily hot topics), are considered. {W}e formalize a novel recommendation problem where the goal is to find the closest most diverse articles to the one the user is currently browsing. {O}ur diversity measure incorporates entities and sentiment extracted from comments. {G}iven the real- time nature of our recommendations, we explore the applicability of nearest neighbor algorithms to solve the problem. {O}ur user study on real opinion articles from aljazeera.net and reuters.com validates the use of entities and sentiment extracted from articles and their comments to achieve news diversity when compared to content-based diversity. {F}inally, our performance experiments show the real-time feasibility of our solution.},
    pages = {1-12},
    booktitle = {{WWW}},
    year = {2013},
    author = {Abbar, Sofiane and Amer-Yahia, Sihem and Indyk, Piotr and Mahabadi, Sepideh},
    }

  • X. Bai, A. Jégou, J. Flavio P., and V. Leroy, “DynaSoRe: Efficient In-Memory Store for Social Applications,” in Proceedings of the ACM/IFIP/USENIX 14th International Middleware Conference, Beijing, Chine, 2013, pp. 425-444. doi:http://dx.doi.org/10.1007/978-3-642-45065-5_22
    [BibTeX] [Abstract] [Download PDF]

    {S}ocial network applications are inherently interactive, creating a requirement for processing user requests fast. {T}o enable fast responses to user requests, social network applications typically rely on large banks of cache servers to hold and serve most of their content from the cache. {I}n this work, we present {D}yna{S}o{R}e: a memory cache system for social network applications that optimizes data locality while placing user views across the system. {D}yna{S}o{R}e storage servers monitor access traffic and bring data frequently accessed together closer in the system to reduce the processing load across cache servers and network devices. {O}ur simulation results considering realistic data center topologies show that {D}yna{S}o{R}e is able to adapt to traffic changes, increase data locality, and balance the load across the system. {T}he traffic handled by the top tier of the network connecting servers drops by 94\% compared to a static assignment of views to cache servers while requiring only 30\% additional memory capacity compared to the whole volume of cached data.

    @InProceedings{bai:hal-00932468,
    address = {Beijing, Chine},
    title = {{D}yna{S}o{R}e: {E}fficient {I}n-{M}emory {S}tore for {S}ocial {A}pplications},
    month = dec,
    abstract = {{S}ocial network applications are inherently interactive, creating a requirement for processing user requests fast. {T}o enable fast responses to user requests, social network applications typically rely on large banks of cache servers to hold and serve most of their content from the cache. {I}n this work, we present {D}yna{S}o{R}e: a memory cache system for social network applications that optimizes data locality while placing user views across the system. {D}yna{S}o{R}e storage servers monitor access traffic and bring data frequently accessed together closer in the system to reduce the processing load across cache servers and network devices. {O}ur simulation results considering realistic data center topologies show that {D}yna{S}o{R}e is able to adapt to traffic changes, increase data locality, and balance the load across the system. {T}he traffic handled by the top tier of the network connecting servers drops by 94\% compared to a static assignment of views to cache servers while requiring only 30\% additional memory capacity compared to the whole volume of cached data.},
    pages = {425-444},
    booktitle = {{P}roceedings of the {ACM}/{IFIP}/{USENIX} 14th {I}nternational {M}iddleware {C}onference},
    year = {2013},
    x-international-audience = {yes},
    author = {Bai, Xiao and J{\'e}gou, Arnaud and Flavio, P., Junqueira, and Leroy, Vincent},
    url = {http://hal.archives-ouvertes.fr/hal-00932468},
    doi = {http://dx.doi.org/10.1007/978-3-642-45065-5_22},
    pdf = {http://hal.archives-ouvertes.fr/hal-00932468/PDF/camera-ready.pdf},
    }

  • A. Gionis, J. Flavio P. P., V. Leroy, M. Serafini, and I. Weber, “Piggybacking on Social Networks,” in Proceedings of the VLDB Endowment (PVLDB), Riva del Garda, Trento, Italie, 2013, pp. 409-420.
    [BibTeX] [Abstract] [Download PDF]

    {T}he popularity of social-networking sites has increased rapidly over the last decade. {O}ne of the most fundamental functionalities of social-networking sites is to present users with streams of events shared by their friends. {A}t a systems level, materialized per-user views are a common way to assemble and deliver such event streams on-line and with low latency. {A}ccess to the data stores, which keep the user views, is a major bottleneck of social-networking systems. {W}e propose improving the throughput of a system by using social piggybacking: process the requests of two friends by querying and updating the view of a third common friend. {B}y using one such hub view, the system can serve requests of the first friend without querying or updating the view of the second. {W}e show that, given a social graph, social piggybacking can minimize the overall number of requests, but computing the optimal set of hubs is an {NP}-hard problem. {W}e propose an {O}(log n) approximation algorithm and a heuristic to solve the problem, and evaluate them using the full {T}witter and {F}lickr social graphs, which have up to billions of edges. {C}ompared to existing approaches, using social piggy-backing results in similar throughput in systems with few servers, but enables substantial throughput improvements as the size of the system grows, reaching up to a 2-factor increase. {W}e also evaluate our algorithms on a real social-networking system prototype and we show that the actual increase in throughput corresponds nicely to the gain anticipated by our cost function.

    @InProceedings{gionis:hal-00923545,
    number = {6},
    address = {Riva del Garda, Trento, Italie},
    title = {{P}iggybacking on {S}ocial {N}etworks},
    month = apr,
    abstract = {{T}he popularity of social-networking sites has increased rapidly over the last decade. {O}ne of the most fundamental functionalities of social-networking sites is to present users with streams of events shared by their friends. {A}t a systems level, materialized per-user views are a common way to assemble and deliver such event streams on-line and with low latency. {A}ccess to the data stores, which keep the user views, is a major bottleneck of social-networking systems. {W}e propose improving the throughput of a system by using social piggybacking: process the requests of two friends by querying and updating the view of a third common friend. {B}y using one such hub view, the system can serve requests of the first friend without querying or updating the view of the second. {W}e show that, given a social graph, social piggybacking can minimize the overall number of requests, but computing the optimal set of hubs is an {NP}-hard problem. {W}e propose an {O}(log n) approximation algorithm and a heuristic to solve the problem, and evaluate them using the full {T}witter and {F}lickr social graphs, which have up to billions of edges. {C}ompared to existing approaches, using social piggy-backing results in similar throughput in systems with few servers, but enables substantial throughput improvements as the size of the system grows, reaching up to a 2-factor increase. {W}e also evaluate our algorithms on a real social-networking system prototype and we show that the actual increase in throughput corresponds nicely to the gain anticipated by our cost function.},
    volume = {6},
    pages = {409-420},
    booktitle = {{P}roceedings of the {VLDB} {E}ndowment ({PVLDB})},
    year = {2013},
    note = {Research session: Social and Crowd - http://www.vldb.org/pvldb/vol6.html - ISSN: 2150-8097},
    x-international-audience = {yes},
    author = {Gionis, Aristides and Flavio P., P., Junqueira, and Leroy, Vincent and Serafini, Marco and Weber, Ingmar},
    editor ={Michael B{\"o}hlen, Christoph Koch},
    url = {http://hal.archives-ouvertes.fr/hal-00923545},
    pdf = {http://hal.archives-ouvertes.fr/hal-00923545/PDF/p409-serafini.pdf},
    }

  • B. O. Tehrani, S. Amer-Yahia, A. Termier, A. Bertaux, E. Gaussier, and M. C. Rousset, “Towards a Framework for Semantic Exploration of Frequent Patterns,” in Proceedings of the 3rd International Workshop on Information Management for Mobile Applications (IMMoA), Riva del Garda, Trento, Italie, 2013, pp. 7-14.
    [BibTeX] [Abstract] [Download PDF]

    {M}ining frequent patterns is an essential task in discovering hidden correlations in datasets. {A}lthough frequent patterns unveil valuable information, there are some challenges which limits their usability. {F}irst, the number of possible patterns is often very large which hinders their eff ective exploration. {S}econd, patterns with many items are hard to read and the analyst may be unable to understand their meaning. {I}n addition, the only available information about patterns is their support, a very coarse piece of information. {I}n this paper, we are particularly interested in mining datasets that reflect usage patterns of users moving in space and time and for whom demographics attributes are available (age, occupation, etc). {S}uch characteristics are typical of data collected from smart phones, whose analysis has critical business applications nowadays. {W}e propose pattern exploration primitives, abstraction and refinement, that use hand-crafted taxonomies on time, space and user demographics. {W}e show on two real datasets, {N}okia and {M}ovie{L}ens, how the use of such taxonomies reduces the size of the pattern space and how demographics enable their semantic exploration. {T}his work opens new perspectives in the semantic exploration of frequent patterns that reflect the behavior of di fferent user communities.

    @InProceedings{tehrani:hal-00881195,
    address = {Riva del Garda, Trento, Italie},
    title = {{T}owards a {F}ramework for {S}emantic {E}xploration of {F}requent {P}atterns},
    month = may,
    x-short-communication = {yes},
    abstract = {{M}ining frequent patterns is an essential task in discovering hidden correlations in datasets. {A}lthough frequent patterns unveil valuable information, there are some challenges which limits their usability. {F}irst, the number of possible patterns is often very large which hinders their eff ective exploration. {S}econd, patterns with many items are hard to read and the analyst may be unable to understand their meaning. {I}n addition, the only available information about patterns is their support, a very coarse piece of information. {I}n this paper, we are particularly interested in mining datasets that reflect usage patterns of users moving in space and time and for whom demographics attributes are available (age, occupation, etc). {S}uch characteristics are typical of data collected from smart phones, whose analysis has critical business applications nowadays. {W}e propose pattern exploration primitives, abstraction and refinement, that use hand-crafted taxonomies on time, space and user demographics. {W}e show on two real datasets, {N}okia and {M}ovie{L}ens, how the use of such taxonomies reduces the size of the pattern space and how demographics enable their semantic exploration. {T}his work opens new perspectives in the semantic exploration of frequent patterns that reflect the behavior of di fferent user communities.},
    volume = {1075},
    publisher = {CEUR-WS},
    pages = {7-14},
    booktitle = {{P}roceedings of the 3rd {I}nternational {W}orkshop on {I}nformation {M}anagement for {M}obile {A}pplications ({IMM}o{A})},
    year = {2013},
    note = {http://ceur-ws.org/Vol-1075/ - ISSN: 1613-0073},
    x-international-audience = {yes},
    author = {Tehrani, Behrooz Omidvar and Amer-Yahia, Sihem and Termier, Alexandre and Bertaux, Aur{\'e}lie and Gaussier, Eric and Rousset, Marie Christine},
    editor ={Delot, Thierry and Geisler, Sandra and Ilarri, Sergio and Quix, Christoph},
    url = {http://hal.archives-ouvertes.fr/hal-00881195},
    pdf = {http://hal.archives-ouvertes.fr/hal-00881195/PDF/Towards_a_Framework_for_Semantic_Exploration_of_Frequent_Patterns.pdf},
    }

  • C. Kamdem Kengne, N. Ibrahim, M. C. Rousset, and M. Tchuenté, “Distance-based Trace Diagnosis for Multimedia Applications: Help me TED!,” in Proceedings of the 7th IEEE International Conference on Semantic Computing, Irvine, CA, États-Unis, 2013, pp. 306-309. doi:http://dx.doi.org/10.1109/ICSC.2013.59
    [BibTeX] [Abstract] [Download PDF]

    {E}xecution traces have become essential resources that many developers analyze to debug their applications. {I}deally, a developer wants to quickly detect whether there are anomalies on his application or not. {H}owever, in practice, the size of multimedia applications trace can reach gigabytes, which makes their exploitation very complex. {U}sually, developers use visualization tools before stating a hypothesis. {I}n this paper, we argue that this solution is not satisfactory and propose to automatically provide a diagnosis by comparing execution traces. {W}e use distance-based models and conduct a user case to show how {TED}, our automatic trace diagnosis tool, provides semantic added-value information to the developer. {P}erformance evaluation over real world data shows that our approach is scalable.

    @InProceedings{kamdemkengne:hal-00923537,
    address = {Irvine, CA, {\'E}tats-Unis},
    title = {{D}istance-based {T}race {D}iagnosis for {M}ultimedia {A}pplications: {H}elp me {TED}!},
    x-short-communication = {yes},
    abstract = {{E}xecution traces have become essential resources that many developers analyze to debug their applications. {I}deally, a developer wants to quickly detect whether there are anomalies on his application or not. {H}owever, in practice, the size of multimedia applications trace can reach gigabytes, which makes their exploitation very complex. {U}sually, developers use visualization tools before stating a hypothesis. {I}n this paper, we argue that this solution is not satisfactory and propose to automatically provide a diagnosis by comparing execution traces. {W}e use distance-based models and conduct a user case to show how {TED}, our automatic trace diagnosis tool, provides semantic added-value information to the developer. {P}erformance evaluation over real world data shows that our approach is scalable.},
    publisher = {IEEE},
    pages = {306 - 309},
    booktitle = {{P}roceedings of the 7th {IEEE} {I}nternational {C}onference on {S}emantic {C}omputing},
    year = {2013},
    note = {Session: Semantic Multimedia (short paper)},
    x-international-audience = {yes},
    keywords = {{A}udio/{V}ideo decoding; {D}iagnosis; {D}istance; {E}xecution traces; {M}ultimedia applications},
    author = {Kamdem Kengne, Christiane and Ibrahim, Noha and Rousset, Marie Christine and Tchuent{\'e}, Maurice},
    editor ={A. Evans, Mihaela van der Schaar, Phillip Sheu, Jeffrey Abbott, David},
    url = {http://hal.archives-ouvertes.fr/hal-00923537},
    doi = {http://dx.doi.org/10.1109/ICSC.2013.59},
    }

  • S. Basu Roy, I. Lykourentzou, S. Thirumuruganathan, S. Amer-Yahia, and G. Das, “Crowds, not Drones: Modeling Human Factors in Interactive Crowdsourcing,” in DBCrowd 2013 – VLDB Workshop on Databases and Crowdsourcing, Riva del Garda, Trento, Italie, 2013, pp. 39-42.
    [BibTeX] [Abstract] [Download PDF]

    {I}n this vision paper, we propose {S}mart{C}rowd, an intelligent and adaptive crowdsourcing framework. {C}ontrary to existing crowdsourcing systems, where the process of hiring workers (crowd), learning their skills, and evaluating the accuracy of tasks they perform are fragmented, siloed, and often ad-hoc, {S}mart{C}rowd foresees a paradigm shift in that process, considering unpredictability of human nature, namely human factors. {S}mart{C}rowd offers opportunities in making crowdsourcing intelligent through iterative interaction with the workers, and adaptively learning and improving the underlying processes. {B}oth existing (majority of which do not require longer engagement from volatile and mostly non-recurrent workers) and next generation crowdsourcing applications (which require longer engagement from the crowd) stand to benefit from {S}mart{C}rowd. {W}e outline the opportunities in {S}mart{C}rowd, and discuss the challenges and directions, that can potentially revolutionize the existing crowdsourcing landscape.

    @InProceedings{basuroy:hal-00923542,
    address = {Riva del Garda, Trento, Italie},
    title = {{C}rowds, not {D}rones: {M}odeling {H}uman {F}actors in {I}nteractive {C}rowdsourcing},
    month = aug,
    x-short-communication = {yes},
    series = {CEUR Workshop Proceedings},
    abstract = {{I}n this vision paper, we propose {S}mart{C}rowd, an intelligent and adaptive crowdsourcing framework. {C}ontrary to existing crowdsourcing systems, where the process of hiring workers (crowd), learning their skills, and evaluating the accuracy of tasks they perform are fragmented, siloed, and often ad-hoc, {S}mart{C}rowd foresees a paradigm shift in that process, considering unpredictability of human nature, namely human factors. {S}mart{C}rowd offers opportunities in making crowdsourcing intelligent through iterative interaction with the workers, and adaptively learning and improving the underlying processes. {B}oth existing (majority of which do not require longer engagement from volatile and mostly non-recurrent workers) and next generation crowdsourcing applications (which require longer engagement from the crowd) stand to benefit from {S}mart{C}rowd. {W}e outline the opportunities in {S}mart{C}rowd, and discuss the challenges and directions, that can potentially revolutionize the existing crowdsourcing landscape.},
    publisher = {CEUR-WS},
    pages = {39-42},
    booktitle = {{DBC}rowd 2013 - {VLDB} {W}orkshop on {D}atabases and {C}rowdsourcing},
    year = {2013},
    x-international-audience = {yes},
    author = {Basu Roy, Senjuti and Lykourentzou, Ioanna and Thirumuruganathan, Saravanan and Amer-Yahia, Sihem and Das, Gautam},
    editor ={Cheng, Reynold and Das Sarma, Anish and Maniu, Silviu and Senellart, Pierre},
    url = {http://hal.inria.fr/hal-00923542},
    pdf = {http://hal.inria.fr/hal-00923542/PDF/vision1.pdf},
    }

  • S. Amer-Yahia, F. Bonchi, C. Castillo, E. Feuerstein, I. Méndez-D{‘i}az, and P. Zabala, “Complexity and Algorithms for Composite Retrieval,” in Proceedings of the 22nd International Conference on World Wide Web (Companion Volume), Rio de Janeiro, Brésil, 2013, pp. 79-80.
    [BibTeX] [Abstract] [Download PDF]

    no abstract

    @InProceedings{ameryahia:hal-00923546,
    address = {Rio de Janeiro, Br{\'e}sil},
    title = {{C}omplexity and {A}lgorithms for {C}omposite {R}etrieval},
    month = may,
    x-short-communication = {yes},
    abstract = {no abstract},
    publisher = {IW3C2},
    pages = {79-80},
    booktitle = {{P}roceedings of the 22nd {I}nternational {C}onference on {W}orld {W}ide {W}eb ({C}ompanion {V}olume)},
    year = {2013},
    note = {Poster session: bridging structured and unstructured data - http://www2013.org - ISBN: 978-1-4503-2038-2},
    x-international-audience = {yes},
    author = {Amer-Yahia, Sihem and Bonchi, Francesco and Castillo, Carlos and Feuerstein, Esteban and M{\'e}ndez-D{\'\i}az, Isabel and Zabala, Paula},
    editor ={Schwabe, Virgilio Almeida, Hartmut Glaser, Ricardo Baeza-Yates, Sue Moon, Daniel},
    url = {http://hal.archives-ouvertes.fr/hal-00923546},
    }

  • M. Servajean, E. Pacitti, S. Amer-Yahia, and P. Neveu, “Profile Diversity in Search and Recommendation,” in SRS 2013: 4th International Workshop on Social Recommender Systems (in conjunction WWW 2013), Rio de Janeiro, Brésil, 2013, pp. 973-980.
    [BibTeX] [Abstract] [Download PDF]

    {W}e investigate profile diversity, a novel idea in searching scientific documents. {C}ombining keyword relevance with popularity in a scoring function has been the subject of different forms of social relevance. {C}ontent diversity has been thoroughly studied in search and advertising, database queries, and recommendations. {W}e believe our work is the first to investigate profile diversity to address the problem of returning highly popular but too-focused documents. {W}e show how to adapt {F}agin’s threshold-based algorithms to return the most relevant and most popular documents that satisfy content and profile diversities and run preliminary experiments on two benchmarks to validate our scoring function.

    @InProceedings{servajean:lirmm-00806676,
    address = {Rio de Janeiro, Br{\'e}sil},
    title = {{P}rofile {D}iversity in {S}earch and {R}ecommendation},
    month = may,
    isbn = {978-1-4503-2038-2},
    x-short-communication = {yes},
    abstract = {{W}e investigate profile diversity, a novel idea in searching scientific documents. {C}ombining keyword relevance with popularity in a scoring function has been the subject of different forms of social relevance. {C}ontent diversity has been thoroughly studied in search and advertising, database queries, and recommendations. {W}e believe our work is the first to investigate profile diversity to address the problem of returning highly popular but too-focused documents. {W}e show how to adapt {F}agin's threshold-based algorithms to return the most relevant and most popular documents that satisfy content and profile diversities and run preliminary experiments on two benchmarks to validate our scoring function.},
    publisher = {IW3C2},
    pages = {973-980},
    booktitle = {{SRS} 2013: 4th {I}nternational {W}orkshop on {S}ocial {R}ecommender {S}ystems (in conjunction {WWW} 2013)},
    year = {2013},
    note = {Paper Session: User Models - WWW'13 Companion (dl.acm.org/citation.cfm?id=2488094) Work conducted within the Institut de Biologie Computationnelle and partially funded by the labex NUMEV and the CNRS project Mastodons},
    x-international-audience = {yes},
    keywords = {{R}ecommendation, diversity, top-k},
    author = {Servajean, Maximilien and Pacitti, Esther and Amer-Yahia, Sihem and Neveu, Pascal},
    editor ={Guy, Ido and Chen, Li and X. Zhou, Michelle},
    url = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-00806676},
    pdf = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-00806676/PDF/Servajean-SRS2013.pdf},
    }

  • M. Servajean, E. Pacitti, S. Amer-Yahia, and P. Neveu, “Profile Diversity for Phenotyping Data Search and Recommendation,” in BDA 2013 – 29e Journées Bases de Données Avancées, Nantes, France, 2013, p. 20.
    [BibTeX] [Abstract] [Download PDF]

    {D}ans ce travail, nous étudions la diversité de profils. {I}l s’agit d’une approche nouvelle dans la recherche de documents scientifiques. {D}e nombreux travaux ont combinés la pertinence des mots clés avec la popularité des documents au sein d’une fonction de score " sociale ". {D}iversifier le contenu des documents retournés a également été traité de mani’ere approfondie et la recherche, la publicité, les requêtes en base de données et la recommandation. {N}ous pensons que notre travail est le premier à traiter de la diversité de profils afin de traiter le problème des listes de résultats hautement populaires mais trop ciblées. {N}ous montrerons comment nous adaptons l’algorithme de {F}agin sur les algorithmes à seuil pour retourner les documents les plus pertinents, les plus populaires mais aussi les plus divers que ce soit en terme de contenus ou de profils. {N}ous avons également un ensemble de simulations sur deux benchmarks afin de valider notre fonction de score.

    @InProceedings{servajean:lirmm-00879575,
    address = {Nantes, France},
    title = {{P}rofile {D}iversity for {P}henotyping {D}ata {S}earch and {R}ecommendation},
    month = oct,
    abstract = {{D}ans ce travail, nous {\'e}tudions la diversit{\'e} de profils. {I}l s'agit d'une approche nouvelle dans la recherche de documents scientifiques. {D}e nombreux travaux ont combin{\'e}s la pertinence des mots cl{\'e}s avec la popularit{\'e} des documents au sein d'une fonction de score " sociale ". {D}iversifier le contenu des documents retourn{\'e}s a {\'e}galement {\'e}t{\'e} trait{\'e} de mani'ere approfondie et la recherche, la publicit{\'e}, les requ{\^e}tes en base de donn{\'e}es et la recommandation. {N}ous pensons que notre travail est le premier {\`a} traiter de la diversit{\'e} de profils afin de traiter le probl{\`e}me des listes de r{\'e}sultats hautement populaires mais trop cibl{\'e}es. {N}ous montrerons comment nous adaptons l'algorithme de {F}agin sur les algorithmes {\`a} seuil pour retourner les documents les plus pertinents, les plus populaires mais aussi les plus divers que ce soit en terme de contenus ou de profils. {N}ous avons {\'e}galement un ensemble de simulations sur deux benchmarks afin de valider notre fonction de score.},
    pages = {20},
    booktitle = {{BDA} 2013 - 29e {J}ourn{\'e}es {B}ases de {D}onn{\'e}es {A}vanc{\'e}es},
    year = {2013},
    note = {Session: Applications innovantes Work conducted within the Institut de Biologie Computationnelle and partially funded by the labex NUMEV and the CNRS project Mastodons},
    x-international-audience = {no},
    keywords = {{R}ecommendation, diversity, top-k},
    author = {Servajean, Maximilien and Pacitti, Esther and Amer-Yahia, Sihem and Neveu, Pascal},
    url = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-00879575},
    pdf = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-00879575/PDF/bda_latin.pdf},
    }

  • S. Amer-Yahia, “Seminar in Défis du Big Social Data Management,” in Les Fondamentales du CNRS, France, France, 2013.
    [BibTeX] [Abstract] [Download PDF]

    not available

    @InProceedings{ameryahia:hal-01002699,
    address = {France, France},
    title = {{S}eminar in {D}{\'e}fis du {B}ig {S}ocial {D}ata {M}anagement},
    abstract = {not available},
    x-scientific-popularization = {yes},
    booktitle = {{L}es {F}ondamentales du {CNRS}},
    year = {2013},
    x-international-audience = {no},
    author = {Amer-Yahia, Sihem},
    url = {http://hal.inria.fr/hal-01002699},
    }

  • C. Kamdem Kengne, N. Ibrahim, M. C. Rousset, and M. Tchuenté, “Distance-based Trace Diagnosis for Multimedia Applications: Help me TED!,” , 2013.
    [BibTeX] [Abstract] [Download PDF]

    {E}xecution traces have become essential resources that many developers analyze to debug their applications. {I}deally, a developer wants to quickly detect whether there are anomalies on his application or not. {H}owever, in practice, size of multimedia applications trace can reach gigabytes, which makes their exploitation very complex. {U}sually, developers use visualization tools before stating a hypothesis. {I}n this paper, we argue that this solution is not satisfactory and propose to automatically provide a diagnosis by comparing execution traces. {W}e use distance based models and conduct a user case to show how {TED}, our automatic trace diagnosis tool, provides semantic added-value information to the developer. {P}erformance evaluation over real world data shows that our approach is scalable.

    @TechReport{kamdemkengne:hal-00923547,
    title = {{D}istance-based {T}race {D}iagnosis for {M}ultimedia {A}pplications: {H}elp me {TED}!},
    abstract = {{E}xecution traces have become essential resources that many developers analyze to debug their applications. {I}deally, a developer wants to quickly detect whether there are anomalies on his application or not. {H}owever, in practice, size of multimedia applications trace can reach gigabytes, which makes their exploitation very complex. {U}sually, developers use visualization tools before stating a hypothesis. {I}n this paper, we argue that this solution is not satisfactory and propose to automatically provide a diagnosis by comparing execution traces. {W}e use distance based models and conduct a user case to show how {TED}, our automatic trace diagnosis tool, provides semantic added-value information to the developer. {P}erformance evaluation over real world data shows that our approach is scalable.},
    year = {2013},
    note = {9 pages},
    author = {Kamdem Kengne, Christiane and Ibrahim, Noha and Rousset, Marie Christine and Tchuent{\'e}, Maurice},
    url = {http://hal.archives-ouvertes.fr/hal-00923547},
    pdf = {http://hal.archives-ouvertes.fr/hal-00923547/PDF/RR-LIG-045_orig.pdf},
    }

  • M. Servajean, E. Pacitti, S. Amer-Yahia, A. El Abbadi, and P. Neveu, “Profile Diversity for P2P Search and Recommendation,” , 2013.
    [BibTeX] [Abstract] [Download PDF]

    {W}e investigate profile diversity for {P}2{P} search and recommendation of scientific documents. {I}n scientific domains, endorsements from different communities are important indicators of the broad focus of scientific documents and should be accounted for in search and recommendation. {T}o do so, we introduce profile diversity, a novel idea in searching scientific documents. {T}raditional content diversity has been thoroughly studied in centralized search and advertising, database queries, and recommendations and addresses the question of returning relevant but too-similar documents. {W}e argue that content diversity alone does not suffice for finding documents endorsed by different scientific communities and that profile diversity is needed to alleviate returning popular but too-focused documents. {M}oreover, {P}2{P} profile diversity increases recall and reduces the search space compared with a centralized approach. {W}e believe this paper is the first to investigate {P}2{P} profile diversity in search and recommendation and to study its various facets: architecture, scoring function, and algorithms. {O}ur experiments on the {TREC}09 benchmark validate our proposal.

    @TechReport{servajean:lirmm-00829308,
    title = {{P}rofile {D}iversity for {P}2{P} {S}earch and {R}ecommendation},
    month = jun,
    abstract = {{W}e investigate profile diversity for {P}2{P} search and recommendation of scientific documents. {I}n scientific domains, endorsements from different communities are important indicators of the broad focus of scientific documents and should be accounted for in search and recommendation. {T}o do so, we introduce profile diversity, a novel idea in searching scientific documents. {T}raditional content diversity has been thoroughly studied in centralized search and advertising, database queries, and recommendations and addresses the question of returning relevant but too-similar documents. {W}e argue that content diversity alone does not suffice for finding documents endorsed by different scientific communities and that profile diversity is needed to alleviate returning popular but too-focused documents. {M}oreover, {P}2{P} profile diversity increases recall and reduces the search space compared with a centralized approach. {W}e believe this paper is the first to investigate {P}2{P} profile diversity in search and recommendation and to study its various facets: architecture, scoring function, and algorithms. {O}ur experiments on the {TREC}09 benchmark validate our proposal.},
    year = {2013},
    author = {Servajean, Maximilien and Pacitti, Esther and Amer-Yahia, Sihem and El Abbadi, Amr and Neveu, Pascal},
    url = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-00829308},
    }

  • M. Servajean, E. Pacitti, S. Amer-Yahia, and P. Neveu, “Profile Diversity in Search and Recommendation,” , 2013.
    [BibTeX] [Abstract] [Download PDF]

    {W}e investigate profile diversity, a novel idea in searching scientific documents. {C}ombining keyword relevance with popularity in a scoring function has been the subject of different forms of social relevance. {C}ontent diversity has been thoroughly studied in search and advertising, database queries, and recommendations. {W}e believe our work is the first to investigate profile diversity to address the problem of returning highly popular but too-focused documents. {W}e show how to adapt {F}agin’s threshold-based algorithms to return the most relevant and most popular documents that satisfy content and profile diversities and run preliminary experiments on two benchmarks to validate our scoring function.

    @TechReport{servajean:lirmm-00794814,
    title = {{P}rofile {D}iversity in {S}earch and {R}ecommendation},
    month = feb,
    abstract = {{W}e investigate profile diversity, a novel idea in searching scientific documents. {C}ombining keyword relevance with popularity in a scoring function has been the subject of different forms of social relevance. {C}ontent diversity has been thoroughly studied in search and advertising, database queries, and recommendations. {W}e believe our work is the first to investigate profile diversity to address the problem of returning highly popular but too-focused documents. {W}e show how to adapt {F}agin's threshold-based algorithms to return the most relevant and most popular documents that satisfy content and profile diversities and run preliminary experiments on two benchmarks to validate our scoring function.},
    year = {2013},
    keywords = {{R}ecommendation, diversity, top-k},
    author = {Servajean, Maximilien and Pacitti, Esther and Amer-Yahia, Sihem and Neveu, Pascal},
    url = {http://hal-lirmm.ccsd.cnrs.fr/lirmm-00794814},
    }

2012

  • S. Thirumuruganathan, M. Das, S. Desai, S. Amer-Yahia, G. Das, and C. Yu, “MapRat: Meaningful Explanation, Interactive Exploration and Geo-Visualization of Collaborative Ratings,” Proceedings of the VLDB Endowment (PVLDB), vol. 5, iss. 12, pp. 1986-1989, 2012.
    [BibTeX] [Abstract] [Download PDF]

    {C}ollaborative rating sites such as {IMDB} and {Y}elp have become rich resources that users consult to form judgments about and choose from among competing items. {M}ost of these sites either provide a plethora of information for users to interpret all by themselves or a simple overall aggregate information. {S}uch aggregates (e.g., average rating over all users who have rated an item, aggregates along pre-defined dimensions, etc.) can not help a user quickly decide the desirability of an item. {I}n this paper, we build a system {M}ap{R}at that allows a user to explore multiple carefully chosen aggregate analytic details over a set of user demographics that meaningfully explain the ratings associated with item(s) of interest. {M}ap{R}at allows a user to systematically explore, visualize and understand user rating patterns of input item(s) so as to make an informed decision quickly. {I}n the demo, participants are invited to explore collaborative movie ratings for popular movies.

    @Article{thirumuruganathan:hal-00922884,
    number = {12},
    title = {{M}ap{R}at: {M}eaningful {E}xplanation, {I}nteractive {E}xploration and {G}eo-{V}isualization of {C}ollaborative {R}atings},
    month = aug,
    journal = {{P}roceedings of the {VLDB} {E}ndowment ({PVLDB})},
    abstract = {{C}ollaborative rating sites such as {IMDB} and {Y}elp have become rich resources that users consult to form judgments about and choose from among competing items. {M}ost of these sites either provide a plethora of information for users to interpret all by themselves or a simple overall aggregate information. {S}uch aggregates (e.g., average rating over all users who have rated an item, aggregates along pre-defined dimensions, etc.) can not help a user quickly decide the desirability of an item. {I}n this paper, we build a system {M}ap{R}at that allows a user to explore multiple carefully chosen aggregate analytic details over a set of user demographics that meaningfully explain the ratings associated with item(s) of interest. {M}ap{R}at allows a user to systematically explore, visualize and understand user rating patterns of input item(s) so as to make an informed decision quickly. {I}n the demo, participants are invited to explore collaborative movie ratings for popular movies.},
    volume = {5},
    pages = {1986-1989},
    year = {2012},
    note = {ISSN: 2150-8097 - www.vldb2012.org - Demonstration session: Information Retrieval, Web, and Mobility at VLDB 2012 (Very Large Data Bases Conference, Istanbul, Turkey, 2012) NSF grants (0812601, 0915834, 1018865); NHARP grant from the Texas Higher Education Coordinating Board; grants from Microsoft Research and Nokia Research},
    x-international-audience = {yes},
    author = {Thirumuruganathan, Saravanan and Das, Mahashweta and Desai, Shrikant and Amer-Yahia, Sihem and Das, Gautam and Yu, Cong},
    url = {http://hal.archives-ouvertes.fr/hal-00922884},
    pdf = {http://hal.archives-ouvertes.fr/hal-00922884/PDF/p1986_saravananthirumuruganathan_vldb2012.pdf},
    }

  • M. Das, S. Thirumuruganathan, S. Amer-Yahia, G. Das, and C. Yu, “Who Tags What? An Analysis Framework,” Proceedings of the VLDB Endowment (PVLDB), vol. 5, iss. 11, pp. 1567-1578, 2012.
    [BibTeX] [Abstract] [Download PDF]

    {T}he rise of {W}eb 2.0 is signaled by sites such as {F}lickr, del.icio.us, and {Y}ou{T}ube, and social tagging is essential to their success. {A} typical tagging action involves three components, user, item (e.g., photos in {F}lickr), and tags (i.e., words or phrases). {A}nalyzing how tags are assigned by certain users to certain items has important implications in helping users search for desired information. {I}n this paper, we explore common analysis tasks and propose a dual mining framework for social tagging behavior mining. {T}his framework is centered around two opposing measures, similarity and diversity, being applied to one or more tagging components, and therefore enables a wide range of analysis scenarios such as characterizing similar users tagging diverse items with similar tags, or diverse users tagging similar items with diverse tags, etc. {B}y adopting different concrete measures for similarity and diversity in the framework, we show that a wide range of concrete analysis problems can be defined and they are {NP}-{C}omplete in general. {W}e design efficient algorithms for solving many of those problems and demonstrate, through comprehensive experiments over real data, that our algorithms significantly out-perform the exact brute-force approach without compromising analysis result quality.

    @Article{das:hal-00922883,
    number = {11},
    title = {{W}ho {T}ags {W}hat? {A}n {A}nalysis {F}ramework},
    month = jul,
    journal = {{P}roceedings of the {VLDB} {E}ndowment ({PVLDB})},
    abstract = {{T}he rise of {W}eb 2.0 is signaled by sites such as {F}lickr, del.icio.us, and {Y}ou{T}ube, and social tagging is essential to their success. {A} typical tagging action involves three components, user, item (e.g., photos in {F}lickr), and tags (i.e., words or phrases). {A}nalyzing how tags are assigned by certain users to certain items has important implications in helping users search for desired information. {I}n this paper, we explore common analysis tasks and propose a dual mining framework for social tagging behavior mining. {T}his framework is centered around two opposing measures, similarity and diversity, being applied to one or more tagging components, and therefore enables a wide range of analysis scenarios such as characterizing similar users tagging diverse items with similar tags, or diverse users tagging similar items with diverse tags, etc. {B}y adopting different concrete measures for similarity and diversity in the framework, we show that a wide range of concrete analysis problems can be defined and they are {NP}-{C}omplete in general. {W}e design efficient algorithms for solving many of those problems and demonstrate, through comprehensive experiments over real data, that our algorithms significantly out-perform the exact brute-force approach without compromising analysis result quality.},
    volume = {5},
    pages = {1567-1578},
    year = {2012},
    note = {ISSN: 2150-8097 - arXiv:1208.0285 - www.vldb2012.org - Research paper at VLDB 2012 (Very Large Data Bases Conference, Istanbul, Turkey, 2012) NSF grants (0812601, 0915834, 1018865); NHARP grant from the Texas Higher Education Coordinating Board; grants from Microsoft Research and Nokia Research},
    x-international-audience = {yes},
    author = {Das, Mahashweta and Thirumuruganathan, Saravanan and Amer-Yahia, Sihem and Das, Gautam and Yu, Cong},
    url = {http://hal.archives-ouvertes.fr/hal-00922883},
    pdf = {http://hal.archives-ouvertes.fr/hal-00922883/PDF/p1567_mahashwetadas_vldb2012.pdf},
    }

  • M. Atencia Arcas, M. Al Bakri, and M. C. Rousset, “TrustMe, I Got What You Mean! – A Trust-Based Semantic P2P Bookmarking System,” in 18th InternationalConference, EKAW 2012EKAW, Galway City, Ireland, Irlande, 2012, pp. 442-445.
    [BibTeX] [Abstract] [Download PDF]

    {V}irtual online communities (social networks, wikis. . . ) are becoming the major usage of the web. {T}he freedom they give to publish and access information is attracting many web users. {H}owever, this freedom is filling up the web with varied information and viewpoints. {T}his raises important issues that concern privacy and trust. {D}ue to their decentralised nature peer-to-peer ({P}2{P}) systems provide a partial solution for the privacy problem: each user (peer) can keep control on her own data by storing it locally and by deciding the access they want to give to other peers. {W}e focus on semantic {P}2{P} systems in which peers annotate their resources (documents, videos, photos, services) using ontologies.

    @InProceedings{atenciaarcas:hal-01002690,
    address = {Galway City, Ireland, Irlande},
    title = {{T}rust{M}e, {I} {G}ot {W}hat {Y}ou {M}ean! - {A} {T}rust-{B}ased {S}emantic {P}2{P} {B}ookmarking {S}ystem},
    abstract = {{V}irtual online communities (social networks, wikis. . . ) are becoming the major usage of the web. {T}he freedom they give to publish and access information is attracting many web users. {H}owever, this freedom is filling up the web with varied information and viewpoints. {T}his raises important issues that concern privacy and trust. {D}ue to their decentralised nature peer-to-peer ({P}2{P}) systems provide a partial solution for the privacy problem: each user (peer) can keep control on her own data by storing it locally and by deciding the access they want to give to other peers. {W}e focus on semantic {P}2{P} systems in which peers annotate their resources (documents, videos, photos, services) using ontologies.},
    pages = {442-445},
    booktitle = {18th {I}nternational{C}onference, {EKAW} 2012{EKAW}},
    year = {2012},
    x-international-audience = {yes},
    author = {Atencia Arcas, Manuel and Al Bakri, Mustafa and Rousset, Marie Christine},
    url = {http://hal.inria.fr/hal-01002690},
    }

  • S. Amer-Yahia, S. Anjum, A. Ghenai, A. Siddique, S. Abbar, S. Madden, A. Marcus, and M. El-Haddad, “MAQSA: A System for Social Analytics on News,” in Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data, Scottsdale, AZ, États-Unis, 2012, pp. 653-656. doi:http://dx.doi.org/10.1145/2213836.2213924
    [BibTeX] [Abstract] [Download PDF]

    {W}e present {MAQSA}, a system for social analytics on news. {MAQSA} provides an interactive topic-centric dashboard that summarizes news articles and social activity (e.g., comments and tweets) around them. {MAQSA} helps editors and publishers in newsrooms understand user engagement and audience sentiment evolution on various topics of interest. {I}t also helps news consumers explore public reaction on articles relevant to a topic and refine their exploration via related entities, topics, articles and tweets. {G}iven a topic, e.g., "{G}ulf {O}il {S}pill," or "{T}he {A}rab {S}pring", {MAQSA} combines three key dimensions: time, geographic location, and topic to generate a detailed activity dashboard around relevant articles. {T}he dashboard contains an annotated comment timeline and a social graph of comments. {I}t utilizes commenters’ locations to build maps of comment sentiment and topics by region of the world. {F}inally, to facilitate exploration, {MAQSA} provides listings of related entities, articles, and tweets. {I}t algorithmically processes large collections of articles and tweets, and enables the dynamic specification of topics and dates for exploration. {I}n this demo, participants will be invited to explore the social dynamics around articles on oil spills, the {L}ibyan revolution, and the {A}rab {S}pring. {I}n addition, participants will be able to define and explore their own topics dynamically.

    @InProceedings{ameryahia:hal-00922894,
    address = {Scottsdale, AZ, {\'E}tats-Unis},
    title = {{MAQSA}: {A} {S}ystem for {S}ocial {A}nalytics on {N}ews},
    month = may,
    abstract = {{W}e present {MAQSA}, a system for social analytics on news. {MAQSA} provides an interactive topic-centric dashboard that summarizes news articles and social activity (e.g., comments and tweets) around them. {MAQSA} helps editors and publishers in newsrooms understand user engagement and audience sentiment evolution on various topics of interest. {I}t also helps news consumers explore public reaction on articles relevant to a topic and refine their exploration via related entities, topics, articles and tweets. {G}iven a topic, e.g., "{G}ulf {O}il {S}pill," or "{T}he {A}rab {S}pring", {MAQSA} combines three key dimensions: time, geographic location, and topic to generate a detailed activity dashboard around relevant articles. {T}he dashboard contains an annotated comment timeline and a social graph of comments. {I}t utilizes commenters' locations to build maps of comment sentiment and topics by region of the world. {F}inally, to facilitate exploration, {MAQSA} provides listings of related entities, articles, and tweets. {I}t algorithmically processes large collections of articles and tweets, and enables the dynamic specification of topics and dates for exploration. {I}n this demo, participants will be invited to explore the social dynamics around articles on oil spills, the {L}ibyan revolution, and the {A}rab {S}pring. {I}n addition, participants will be able to define and explore their own topics dynamically.},
    publisher = {ACM},
    pages = {653-656},
    booktitle = {{P}roceedings of the 2012 {ACM} {SIGMOD} {I}nternational {C}onference on {M}anagement of {D}ata},
    year = {2012},
    note = {Demonstration session: social- or user-centered},
    x-international-audience = {yes},
    keywords = {{S}ocial {A}nalytics, {S}entiment {A}nalysis, {T}opic {E}xtraction, {D}ata {V}isualization},
    author = {Amer-Yahia, Sihem and Anjum, Samreen and Ghenai, Amira and Siddique, Aysha and Abbar, Sofiane and Madden, Sam and Marcus, Adam and El-Haddad, Mohammed},
    editor ={Selcuk Candan, Yi Chen, Richard T. Snodgrass, Luis Gravano, Ariel Fuxman, K.},
    url = {http://hal.archives-ouvertes.fr/hal-00922894},
    doi = {http://dx.doi.org/10.1145/2213836.2213924},
    }

  • C. Kamdem Kengne, L. C. Fopa, N. Ibrahim, A. Termier, M. C. Rousset, and T. Washio, “Enhancing the Analysis of Large Multimedia Applications Execution Traces with FrameMiner,” in Proceedings of the 12th IEEE International Conference on Data Mining Workshops (ICDM Workshops), Brussels, Belgique, 2012, pp. 595-602. doi:http://dx.doi.org/10.1109/ICDMW.2012.95
    [BibTeX] [Abstract] [Download PDF]

    {T}he analysis of multimedia application traces can reveal important information to enhance program comprehension. {H}owever traces can be very large, which hinders their effective exploitation. {I}n this paper, we study the problem of finding a \textit{k-golden} set of blocks that best characterize data. {S}equential pattern mining can help to automatically discover the blocks, and we called \textit{k-golden set}, a set of $k$ blocks that maximally covers the trace. {T}hese kind of blocks can simplify the exploration of large traces by allowing programmers to see an abstraction instead of low-level events. {W}e propose an approach for mining golden blocks and finding coverage of frames. {T}he experiments carried out on video and audio application decoding show very promising results.

    @InProceedings{kamdemkengne:hal-00922889,
    address = {Brussels, Belgique},
    title = {{E}nhancing the {A}nalysis of {L}arge {M}ultimedia {A}pplications {E}xecution {T}races with {F}rame{M}iner},
    month = dec,
    x-short-communication = {yes},
    abstract = {{T}he analysis of multimedia application traces can reveal important information to enhance program comprehension. {H}owever traces can be very large, which hinders their effective exploitation. {I}n this paper, we study the problem of finding a \textit{k-golden} set of blocks that best characterize data. {S}equential pattern mining can help to automatically discover the blocks, and we called \textit{k-golden set}, a set of $k$ blocks that maximally covers the trace. {T}hese kind of blocks can simplify the exploration of large traces by allowing programmers to see an abstraction instead of low-level events. {W}e propose an approach for mining golden blocks and finding coverage of frames. {T}he experiments carried out on video and audio application decoding show very promising results.},
    publisher = {IEEE Computer Society},
    pages = {595-602},
    booktitle = {{P}roceedings of the 12th {IEEE} {I}nternational {C}onference on {D}ata {M}ining {W}orkshops ({ICDM} {W}orkshops)},
    year = {2012},
    note = {ISBN: 978-1-4673-5164-5 - http://icdm2012.ua.ac.be/},
    x-international-audience = {yes},
    author = {Kamdem Kengne, Christiane and Fopa, Leon Constantin and Ibrahim, Noha and Termier, Alexandre and Rousset, Marie Christine and Washio, Takashi},
    editor ={Vreeken et al., Jilles},
    url = {http://hal.archives-ouvertes.fr/hal-00922889},
    doi = {http://dx.doi.org/10.1109/ICDMW.2012.95},
    }

  • S. Abiteboul, I. Manolescu, P. Rigaux, M. C. Rousset, and P. Senellart, Web Data Management, Cambridge University Press, 2012.
    [BibTeX] [Abstract] [Download PDF]

    {I}nternet and the {W}eb have revolutionized access to information. {T}oday, one finds primarily on the {W}eb, {HTML} (the standard for the {W}eb) but also documents in pdf, doc, plain text as well as images, music and videos. {T}he public {W}eb is composed of billions of pages on millions of servers. {I}t is a fantastic means of sharing information. {I}t is very simple to use for humans. {O}n the negative side, it is very inappropriate for accesses by software applications. {T}his motivated the introduction of a semistructured data model, namely {XML}, that is well suited both for humans and machines. {G}oals of this site {T}he present site aims to provide a consistent material to describe the structure of information found on the {W}eb, and to explain how this information can be efficiently represented, described and accessed. {T}he primary audience are students interested in data management issues, and teachers who want to set-up a course on {W}eb {D}ata {M}odeling and large-scale data management in distributed and heterogeneous environments.

    @Book{abiteboul:hal-00677720,
    title = {{W}eb {D}ata {M}anagement},
    month = feb,
    isbn = {978-1-107-01243-1},
    abstract = {{I}nternet and the {W}eb have revolutionized access to information. {T}oday, one finds primarily on the {W}eb, {HTML} (the standard for the {W}eb) but also documents in pdf, doc, plain text as well as images, music and videos. {T}he public {W}eb is composed of billions of pages on millions of servers. {I}t is a fantastic means of sharing information. {I}t is very simple to use for humans. {O}n the negative side, it is very inappropriate for accesses by software applications. {T}his motivated the introduction of a semistructured data model, namely {XML}, that is well suited both for humans and machines. {G}oals of this site {T}he present site aims to provide a consistent material to describe the structure of information found on the {W}eb, and to explain how this information can be efficiently represented, described and accessed. {T}he primary audience are students interested in data management issues, and teachers who want to set-up a course on {W}eb {D}ata {M}odeling and large-scale data management in distributed and heterogeneous environments.},
    publisher = {Cambridge University Press},
    pages = {450},
    year = {2012},
    note = {Access to the full text: http://webdam.inria.fr/Jorge/ PS:CUP-2012 PS:CUP-2012 Webdam project - ERC grant (EC-FP7, agreement 226513)},
    x-international-audience = {yes},
    author = {Abiteboul, Serge and Manolescu, Ioana and Rigaux, Philippe and Rousset, Marie Christine and Senellart, Pierre},
    url = {http://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00677720},
    }

  • S. Amer-Yahia, “User and Topic Analytics of the Social Web of News,” in Keynote talk in 5th International Conference on Information Systems and Economic Intelligence (SIIE), Djerba, Tunisia, Tunisie, 2012.
    [BibTeX] [Abstract] [Download PDF]

    {T}he proliferation of social media is undoubtedly changing the way people produce and consume news online. {E}ditors and publishers in newsrooms need to understand user engagement and audience sentiment evolution on various news topics. {N}ews consumers want to explore public reaction on articles relevant to a topic and refine their exploration via related entities, topics, articles and tweets. {I} will present {SPAUE} and {MAQSA}, two systems for social analytics on news. {SPAUE} analyzes traffic while {MAQSA} is content-centric. {T}he core data model in {SPAUE} is a graph formed by the collective behavior of users represented as a set of actions such as browsing, posting an opinion and sharing news stories. {SPAUE} implements path traversal primitives that aggregate topics and actions for a given time period and along four dimensions: traffic source, visits, visitors, and geographic location. {MAQSA} provides an interactive topic-centric dashboard that summarizes social activity around news articles. {T}he dashboard contains an annotated comment timeline, a social graph of comments, and maps of comment sentiment and topics. {B}oth {SPAUE} and {MAQSA} rely on scalable algorithms that enable an interactive specification of topics, actions, and dates and dynamically process large collections of relevant articles.

    @InProceedings{ameryahia:hal-01002704,
    x-invited-conference = {yes},
    address = {Djerba, Tunisia, Tunisie},
    title = {{U}ser and {T}opic {A}nalytics of the {S}ocial {W}eb of {N}ews},
    abstract = {{T}he proliferation of social media is undoubtedly changing the way people produce and consume news online. {E}ditors and publishers in newsrooms need to understand user engagement and audience sentiment evolution on various news topics. {N}ews consumers want to explore public reaction on articles relevant to a topic and refine their exploration via related entities, topics, articles and tweets. {I} will present {SPAUE} and {MAQSA}, two systems for social analytics on news. {SPAUE} analyzes traffic while {MAQSA} is content-centric. {T}he core data model in {SPAUE} is a graph formed by the collective behavior of users represented as a set of actions such as browsing, posting an opinion and sharing news stories. {SPAUE} implements path traversal primitives that aggregate topics and actions for a given time period and along four dimensions: traffic source, visits, visitors, and geographic location. {MAQSA} provides an interactive topic-centric dashboard that summarizes social activity around news articles. {T}he dashboard contains an annotated comment timeline, a social graph of comments, and maps of comment sentiment and topics. {B}oth {SPAUE} and {MAQSA} rely on scalable algorithms that enable an interactive specification of topics, actions, and dates and dynamically process large collections of relevant articles.},
    booktitle = {{K}eynote talk in 5th {I}nternational {C}onference on {I}nformation {S}ystems and {E}conomic {I}ntelligence ({SIIE})},
    year = {2012},
    x-international-audience = {yes},
    author = {Amer-Yahia, Sihem},
    url = {http://hal.inria.fr/hal-01002704},
    }

  • S. Amer-Yahia, “Crowd-Sourcing Literature Review in SUNFLOWER,” in Invited talk in 1st International Workshop on Crowdsourcing Web Search (CrowdSearch) in conjunction with WWW, Lyon, France, France, 2012.
    [BibTeX] [Abstract] [Download PDF]

    {R}eviewing literature is painful albeit intellectually rewarding. {I} will describe {SUNFLOWER}, a system we developed in collaboration with {B}loomsbury, a publisher of scienti?c material, member of {Q}atar {F}oundation. {SUNFLOWER} combines the power of search and collaborative editing to review and summarize a large number of articles around a topic. {I}t works in three steps. {T}he ?rst step performs a composite retrieval of related articles where k bundles of complementary articles are built using article metadata such as authors and citations. {T}he second step associates a summary to each bundle by extracting key phrases from their constituent articles. {T}he last step involves humans that collaboratively edit extracted summaries into a coherent literature review on the topic. {W}e report our experiments on scienti?c articles from qscience.com

    @InProceedings{ameryahia:hal-01002703,
    x-invited-conference = {yes},
    address = {Lyon, France, France},
    title = {{C}rowd-{S}ourcing {L}iterature {R}eview in {SUNFLOWER}},
    abstract = {{R}eviewing literature is painful albeit intellectually rewarding. {I} will describe {SUNFLOWER}, a system we developed in collaboration with {B}loomsbury, a publisher of scienti?c material, member of {Q}atar {F}oundation. {SUNFLOWER} combines the power of search and collaborative editing to review and summarize a large number of articles around a topic. {I}t works in three steps. {T}he ?rst step performs a composite retrieval of related articles where k bundles of complementary articles are built using article metadata such as authors and citations. {T}he second step associates a summary to each bundle by extracting key phrases from their constituent articles. {T}he last step involves humans that collaboratively edit extracted summaries into a coherent literature review on the topic. {W}e report our experiments on scienti?c articles from qscience.com},
    booktitle = {{I}nvited talk in 1st {I}nternational {W}orkshop on {C}rowdsourcing {W}eb {S}earch ({C}rowd{S}earch) in conjunction with {WWW}},
    year = {2012},
    x-international-audience = {yes},
    author = {Amer-Yahia, Sihem},
    url = {http://hal.inria.fr/hal-01002703},
    }

  • S. Amer-Yahia, “User Activity Analytics on the Social Web of News,” in Keynote talk in 18th International Conference on Management of Data, COMAD, Pune, India, Inde, 2012.
    [BibTeX] [Abstract] [Download PDF]

    he proliferation of social media is undoubtedly changing the way people produce and consume news online. {E}ditors and publishers in newsrooms need to understand user engagement and audience sentiment evolution on various news topics. {N}ews consumers want to explore public reaction on articles relevant to a topic and refine their exploration via related entities, topics, articles and tweets. {I} will present {MAQSA}, a system for social analytics on news. {MAQSA} provides an interactive topic-centric dashboard that summarizes social activity around news articles. {T}he dashboard contains an annotated comment timeline, a social graph of comments, and maps of comment sentiment and topics. {T}he analysis of both content and user engagement in social media in {MAQSA} enables the exploration of new ways of immersing users in a news consumption experience.

    @InProceedings{ameryahia:hal-01002702,
    x-invited-conference = {yes},
    address = {Pune, India, Inde},
    title = {{U}ser {A}ctivity {A}nalytics on the {S}ocial {W}eb of {N}ews},
    abstract = {he proliferation of social media is undoubtedly changing the way people produce and consume news online. {E}ditors and publishers in newsrooms need to understand user engagement and audience sentiment evolution on various news topics. {N}ews consumers want to explore public reaction on articles relevant to a topic and refine their exploration via related entities, topics, articles and tweets. {I} will present {MAQSA}, a system for social analytics on news. {MAQSA} provides an interactive topic-centric dashboard that summarizes social activity around news articles. {T}he dashboard contains an annotated comment timeline, a social graph of comments, and maps of comment sentiment and topics. {T}he analysis of both content and user engagement in social media in {MAQSA} enables the exploration of new ways of immersing users in a news consumption experience.},
    booktitle = {{K}eynote talk in 18th {I}nternational {C}onference on {M}anagement of {D}ata, {COMAD}},
    year = {2012},
    x-international-audience = {yes},
    author = {Amer-Yahia, Sihem},
    url = {http://hal.inria.fr/hal-01002702},
    }

  • M. C. Rousset, “Reasoning on Web Data Semantics,” in Seminar in College de France, Paris, France, France, 2012.
    [BibTeX] [Abstract] [Download PDF]

    {N}ot available

    @InProceedings{rousset:hal-01002700,
    address = {Paris, France, France},
    title = {{R}easoning on {W}eb {D}ata {S}emantics},
    abstract = {{N}ot available},
    x-scientific-popularization = {yes},
    booktitle = {{S}eminar in {C}ollege de {F}rance},
    year = {2012},
    x-international-audience = {no},
    author = {Rousset, Marie Christine},
    url = {http://hal.inria.fr/hal-01002700},
    }

2011

  • R. Tournaire, J. Petit, M. C. Rousset, and A. Termier, “Discovery of Probabilistic Mappings between Taxonomies: Principles and Experiments,” Journal on Data Semantics, vol. 6720, pp. 66-101, 2011. doi:http://dx.doi.org/10.1007/978-3-642-22630-4_3
    [BibTeX] [Abstract] [Download PDF]

    {I}n this paper, we investigate a principled approach for defining and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.

    @Article{tournaire:hal-00932491,
    title = {{D}iscovery of {P}robabilistic {M}appings between {T}axonomies: {P}rinciples and {E}xperiments},
    journal = {{J}ournal on {D}ata {S}emantics},
    abstract = {{I}n this paper, we investigate a principled approach for defining and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.},
    volume = {6720},
    pages = {66-101},
    year = {2011},
    x-international-audience = {yes},
    author = {Tournaire, Remi and Petit, Jean-Marc and Rousset, Marie Christine and Termier, Alexandre},
    url = {http://hal.archives-ouvertes.fr/hal-00932491},
    doi = {http://dx.doi.org/10.1007/978-3-642-22630-4_3},
    pdf = {http://hal.archives-ouvertes.fr/hal-00932491/PDF/JoDS_Tournaire_last_submission.pdf},
    }

  • N. Ibrahim, F. Le Mouël, and S. Frénot, “Semantic Service Substitution in Pervasive Environments,” International Journal of Services, Economics and Management (IJSEM), 2011.
    [BibTeX] [Abstract] [Download PDF]

    {A} computing infrastructure where \everything is a service" offers many new system and application possibilities. {A}mong the main challenges, however, is the issue of service substitution for the application execution in such heterogeneous environments. {A}n application would like to continue to execute even when a service disappears, or it would like to benefit from the environment by using better services with better {Q}o{S} when possible. {I}n this article, we define a generic service model and describe the equivalence relations between services considering the functionalities they propose and their non functional {Q}o{S} properties. {W}e define semantic equivalence relations between services and equivalence degree between non functional {Q}o{S} properties. {U}sing these relations we propose semantic substitution mechanisms upon the appearance and disappearance of services that fits the application needs. {W}e developed a prototype as a proof of concept and evaluated its efficiency over a real use case.

    @Article{ibrahim:inria-00438223,
    title = {{S}emantic {S}ervice {S}ubstitution in {P}ervasive {E}nvironments},
    journal = {{I}nternational {J}ournal of {S}ervices, {E}conomics and {M}anagement ({IJSEM})},
    abstract = {{A} computing infrastructure where \everything is a service" offers many new system and application possibilities. {A}mong the main challenges, however, is the issue of service substitution for the application execution in such heterogeneous environments. {A}n application would like to continue to execute even when a service disappears, or it would like to benefit from the environment by using better services with better {Q}o{S} when possible. {I}n this article, we define a generic service model and describe the equivalence relations between services considering the functionalities they propose and their non functional {Q}o{S} properties. {W}e define semantic equivalence relations between services and equivalence degree between non functional {Q}o{S} properties. {U}sing these relations we propose semantic substitution mechanisms upon the appearance and disappearance of services that fits the application needs. {W}e developed a prototype as a proof of concept and evaluated its efficiency over a real use case.},
    publisher = {Inderscience},
    year = {2011},
    note = {"Service-Oriented Engineering" special issue},
    x-international-audience = {yes},
    author = {Ibrahim, Noha and Le Mou{\"e}l, Fr{\'e}d{\'e}ric and Fr{\'e}not, St{\'e}phane},
    url = {http://hal.inria.fr/inria-00438223},
    }

  • M. Atencia, J. Euzenat, G. Pirro, and M. C. Rousset, “Alignment-Based Trust for Resource Finding in Semantic P2P Networks,” in Proc. 10th International Semantic Web Conference (ISWC), Bonn (DE), 2011, pp. 51-66. doi:http://dx.doi.org/10.1007/978-3-642-25073-6_4
    [BibTeX] [Abstract] [Download PDF]

    {I}n a semantic {P}2{P} network, peers use separate ontologies and rely on alignments between their ontologies for translating queries. {N}onetheless, alignments may be limited -unsound or incomplete- and generate flawed translations, leading to unsatisfactory answers. {I}n this paper we present a trust mechanism that can assist peers to select those in the network that are better suited to answer their queries. {T}he trust that a peer has towards another peer depends on a specific query and represents the probability that the latter peer will provide a satisfactory answer. {I}n order to compute trust, we exploit both alignments and peers’ direct experience, and perform {B}ayesian inference. {W}e have implemented our technique and conducted an evaluation. {E}xperimental results showed that trust values converge as more queries are sent and answers received. {F}urthermore, the use of trust improves both precision and recall.

    @InProceedings{atencia:hal-00781008,
    address = {Bonn (DE)},
    title = {{A}lignment-{B}ased {T}rust for {R}esource {F}inding in {S}emantic {P}2{P} {N}etworks},
    series = {Lecture Notes in Computer Science},
    abstract = {{I}n a semantic {P}2{P} network, peers use separate ontologies and rely on alignments between their ontologies for translating queries. {N}onetheless, alignments may be limited -unsound or incomplete- and generate flawed translations, leading to unsatisfactory answers. {I}n this paper we present a trust mechanism that can assist peers to select those in the network that are better suited to answer their queries. {T}he trust that a peer has towards another peer depends on a specific query and represents the probability that the latter peer will provide a satisfactory answer. {I}n order to compute trust, we exploit both alignments and peers' direct experience, and perform {B}ayesian inference. {W}e have implemented our technique and conducted an evaluation. {E}xperimental results showed that trust values converge as more queries are sent and answers received. {F}urthermore, the use of trust improves both precision and recall.},
    volume = {7031},
    pages = {51-66},
    booktitle = {{P}roc. 10th {I}nternational {S}emantic {W}eb {C}onference ({ISWC})},
    year = {2011},
    x-international-audience = {yes},
    author = {Atencia, Manuel and Euzenat, J{\'e}r{\^o}me and Pirro, Giuseppe and Rousset, Marie Christine},
    url = {http://hal.inria.fr/hal-00781008},
    doi = {http://dx.doi.org/10.1007/978-3-642-25073-6_4},
    pdf = {http://hal.inria.fr/hal-00781008/PDF/atencia2011a.pdf},
    }

  • S. Abiteboul, M. Bienvenu, A. Galland, and M. C. Rousset, “Distributed Datalog Revisited,” in Datalog 2.0 Workshop, Oxford, Royaume-Uni, 2011.
    [BibTeX] [Abstract] [Download PDF]

    {T}he emergence of {W}eb 2.0 and social network applications has enabled more and more users to share sensitive information over the {W}eb. {T}he information they manipulate has many facets: personal data (e.g., pictures, movies, music, contacts, emails), social data (e.g., annotations, recommendations, contacts), localization information (e.g., bookmarks), access information (e.g., login, keys), web services (e.g., legacy data, search engines), access rights, ontologies, beliefs, time and provenance information, etc. {T}he tasks they perform are very diverse: search, query, update, authentication, data extraction, etc. {W}e believe that all this should be viewed in the holistic context of the management of a distributed knowledge base. {F}urthermore, we believe that datalog (and its extensions) forms the sound formal basis for representing such information and supporting these tasks. {I}n this paper, we revisit datalog with this goal in mind.

    @InProceedings{abiteboul:inria-00540814,
    address = {Oxford, Royaume-Uni},
    title = {{D}istributed {D}atalog {R}evisited},
    x-short-communication = {yes},
    abstract = {{T}he emergence of {W}eb 2.0 and social network applications has enabled more and more users to share sensitive information over the {W}eb. {T}he information they manipulate has many facets: personal data (e.g., pictures, movies, music, contacts, emails), social data (e.g., annotations, recommendations, contacts), localization information (e.g., bookmarks), access information (e.g., login, keys), web services (e.g., legacy data, search engines), access rights, ontologies, beliefs, time and provenance information, etc. {T}he tasks they perform are very diverse: search, query, update, authentication, data extraction, etc. {W}e believe that all this should be viewed in the holistic context of the management of a distributed knowledge base. {F}urthermore, we believe that datalog (and its extensions) forms the sound formal basis for representing such information and supporting these tasks. {I}n this paper, we revisit datalog with this goal in mind.},
    booktitle = {{D}atalog 2.0 {W}orkshop},
    year = {2011},
    x-international-audience = {yes},
    author = {Abiteboul, Serge and Bienvenu, Meghyn and Galland, Alban and Rousset, Marie Christine},
    url = {http://hal.inria.fr/inria-00540814},
    pdf = {http://hal.inria.fr/inria-00540814/PDF/datalog20-serge.pdf},
    }

  • S. Amer-Yahia, “I am structured: Cluster Me, Don’t Just Rank me,” in Invited paper in in 2nd International Workshop on Business intelligencE and the WEB (BEWEB) in conjunction with EDBT, Berlin, Germany, Allemagne, 2011.
    [BibTeX] [Abstract] [Download PDF]

    {A} large number of online applications are built over high dimensional data. {T}hat is the case for shopping where products have several features (e.g., price and color), dating where personal pro?les are described using several dimensions (e.g., physical features and political views), and entertainment (e.g., movie genre and director, restaurant ambiance and location). {I}n addition, in some applications, items may be accompanied with qualitative data such as movie and restaurant reviews. {T}he typical way users ?nd items in those applications is by entering a keyword query and receiving a ranked list of relevant results. {I}deally, just like in {W}eb search, users would want to spend little time before ?nding a satisfactory item. {I}n practice, due the query output size, the high dimensionality of items, and in some cases, the presence of qualitative data, users tend to spend a lot of time trying to understand correlations between item features and item quality. {I}n this talk, {I} will argue that the 10-blue links experience we are used to in {W}eb search, keywords as input – ranked list as output, is inappropriate when querying and ranking high dimensional data. {I} will describe two applications: exploring qualitative data and ranked querying of structured data. {E}xploring qualitative data is a common activity on collaborative rating sites such as {IMD}b, {CN}et and {Y}elp. {T}he amount of information available on those sites is often daunting. {F}or example, on {Y}elp, a not-so-popular restaurant {J}oe’s {S}hanghai received nearly a thousand ratings, and more popular restaurants routinely exceed that number. {S}imilarly, the movie “{T}he {S}ocial {N}etwork” received more than 42000 ratings on {IMD}b after being released for just two months! {I}n practice, a user either spends a lot of time examining items and reviews before making an informed decision. {R}anked querying of structured data is typical in applications such as online dating or real estate search. {I}n online dating, a user looking for a partner between 20 and 40 years old, and who sorts the matches by income from higher to lower, will see a large number of matches in their late 30s who hold an {MBA} degree and work in the ?nancial indus try, before seeing any matches in different age groups and walks of life. {S}imilarly, in online real estate, a user looking for 1- or 2-bedroom apartments sorted by price will see a large number of cheap 1-bedrooms in undesirable neighborhoods before seeing any apartment with di?erent features. {T}op results in ranked lists tend to be homogeneous, thereby hindering data exploration. {I}n both applications, an alternative to ranking is to cluster results on their attributes and describe the clusters (e.g.,“{W}oody {A}llen {C}omedies liked by {M}ales over 35”, cheap 2 bedrooms with 2 baths). {H}owever, not all clusters will be of interest to users given varying item quality and varying reviewers information. {W}hen exploring qualitative data, different users are interested in the opinion of different reviewerpopulations. {W}hen querying and ranking structured data,different item features correlate differently with item quality. {I} will discuss two approaches in this talk. {P}ersona-driven search for which we have preliminary ideas in restaurant search, aims to improve the exploration of qualitative data. {R}ank-aware clustering, aims to unveil hidden correlations between item features and item quality. {I}n that context, {I} will report our results of a large-scale user study and a performance evaluation over datasets from a leading dating site.

    @InProceedings{ameryahia:hal-01002705,
    address = {Berlin, Germany, Allemagne},
    title = {{I} am structured: {C}luster {M}e, {D}on't {J}ust {R}ank me},
    x-short-communication = {yes},
    abstract = {{A} large number of online applications are built over high dimensional data. {T}hat is the case for shopping where products have several features (e.g., price and color), dating where personal pro?les are described using several dimensions (e.g., physical features and political views), and entertainment (e.g., movie genre and director, restaurant ambiance and location). {I}n addition, in some applications, items may be accompanied with qualitative data such as movie and restaurant reviews. {T}he typical way users ?nd items in those applications is by entering a keyword query and receiving a ranked list of relevant results. {I}deally, just like in {W}eb search, users would want to spend little time before ?nding a satisfactory item. {I}n practice, due the query output size, the high dimensionality of items, and in some cases, the presence of qualitative data, users tend to spend a lot of time trying to understand correlations between item features and item quality. {I}n this talk, {I} will argue that the 10-blue links experience we are used to in {W}eb search, keywords as input - ranked list as output, is inappropriate when querying and ranking high dimensional data. {I} will describe two applications: exploring qualitative data and ranked querying of structured data. {E}xploring qualitative data is a common activity on collaborative rating sites such as {IMD}b, {CN}et and {Y}elp. {T}he amount of information available on those sites is often daunting. {F}or example, on {Y}elp, a not-so-popular restaurant {J}oe’s {S}hanghai received nearly a thousand ratings, and more popular restaurants routinely exceed that number. {S}imilarly, the movie “{T}he {S}ocial {N}etwork” received more than 42000 ratings on {IMD}b after being released for just two months! {I}n practice, a user either spends a lot of time examining items and reviews before making an informed decision. {R}anked querying of structured data is typical in applications such as online dating or real estate search. {I}n online dating, a user looking for a partner between 20 and 40 years old, and who sorts the matches by income from higher to lower, will see a large number of matches in their late 30s who hold an {MBA} degree and work in the ?nancial indus try, before seeing any matches in different age groups and walks of life. {S}imilarly, in online real estate, a user looking for 1- or 2-bedroom apartments sorted by price will see a large number of cheap 1-bedrooms in undesirable neighborhoods before seeing any apartment with di?erent features. {T}op results in ranked lists tend to be homogeneous, thereby hindering data exploration. {I}n both applications, an alternative to ranking is to cluster results on their attributes and describe the clusters (e.g.,“{W}oody {A}llen {C}omedies liked by {M}ales over 35”, cheap 2 bedrooms with 2 baths). {H}owever, not all clusters will be of interest to users given varying item quality and varying reviewers information. {W}hen exploring qualitative data, different users are interested in the opinion of different reviewerpopulations. {W}hen querying and ranking structured data,different item features correlate differently with item quality. {I} will discuss two approaches in this talk. {P}ersona-driven search for which we have preliminary ideas in restaurant search, aims to improve the exploration of qualitative data. {R}ank-aware clustering, aims to unveil hidden correlations between item features and item quality. {I}n that context, {I} will report our results of a large-scale user study and a performance evaluation over datasets from a leading dating site.},
    booktitle = {{I}nvited paper in in 2nd {I}nternational {W}orkshop on {B}usiness intelligenc{E} and the {WEB} ({BEWEB}) in conjunction with {EDBT}},
    year = {2011},
    x-international-audience = {yes},
    author = {Amer-Yahia, Sihem},
    url = {http://hal.inria.fr/hal-01002705},
    }

  • B. Negrevergne, A. Termier, M. C. Rousset, and J. F. Mehaut, “ParaMiner: a Generic Parallel Pattern Mining Algorithm,” {INRIA}, Rapport de recherche RR-LIG-012, , 2011.
    [BibTeX] [Download PDF]
    @TechReport{negrevergne:hal-00788812,
    number = {RR-LIG-012},
    title = {{P}ara{M}iner: a {G}eneric {P}arallel {P}attern {M}ining {A}lgorithm},
    institution = {{INRIA}},
    year = {2011},
    type = {Rapport de recherche},
    author = {Negrevergne, Benjamin and Termier, Alexandre and Rousset, Marie Christine and Mehaut, Jean Fran{\c c}ois},
    url = {http://hal.inria.fr/hal-00788812},
    }

  • M. Atencia Arcas, J. Euzenat, and M. C. Rousset, “Exploiting ontologies and alignments for trust in semantic P2P networks,” {INRIA}, RR-LIG-018, , 2011.
    [BibTeX] [Abstract] [Download PDF]

    {I}n a semantic {P}2{P} network, peers use separate ontologies and rely on alignments between their ontologies for translating queries. {H}owever, alignments may be limited –unsound or incomplete– and generate flawed translations, and thereby produce unsatisfactory answers. {I}n this paper we propose a trust mechanism that can assist peers to select those in the network that are better suited to answer their queries. {T}he trust that a peer has towards another peer is subject to a specific query and approximates the probability that the latter peer will provide a satisfactory answer. {I}n order to compute trust, we exploit the information provided by peers’ ontologies and alignments, along with the information that comes from peers’ experience. {T}rust values are refined over time as more queries are sent and answers received, and we prove that these approximations converge.

    @TechReport{atenciaarcas:hal-00921676,
    number = {RR-LIG-018},
    title = {{E}xploiting ontologies and alignments for trust in semantic {P}2{P} networks},
    month = jun,
    institution = {{INRIA}},
    abstract = {{I}n a semantic {P}2{P} network, peers use separate ontologies and rely on alignments between their ontologies for translating queries. {H}owever, alignments may be limited --unsound or incomplete-- and generate flawed translations, and thereby produce unsatisfactory answers. {I}n this paper we propose a trust mechanism that can assist peers to select those in the network that are better suited to answer their queries. {T}he trust that a peer has towards another peer is subject to a specific query and approximates the probability that the latter peer will provide a satisfactory answer. {I}n order to compute trust, we exploit the information provided by peers' ontologies and alignments, along with the information that comes from peers' experience. {T}rust values are refined over time as more queries are sent and answers received, and we prove that these approximations converge.},
    year = {2011},
    note = {10 pages - ISSN 2105-0422 - Les rapports de recherche du LIG},
    author = {Atencia Arcas, Manuel and Euzenat, J{\'e}r{\^o}me and Rousset, Marie Christine},
    url = {http://hal.archives-ouvertes.fr/hal-00921676},
    pdf = {http://hal.archives-ouvertes.fr/hal-00921676/PDF/RR-LIG-018.pdf},
    }

2010

  • R. Tournaire, J. Petit, M. C. Rousset, and A. Termier, “Combining Logic and Probabilities for Discovering Mappings Between Taxonomies,” in Proceedings of the 4th International Conference on Knowledge Science, Engineering and Management, Belfast, Irlande, 2010, pp. 530-542.
    [BibTeX] [Abstract] [Download PDF]

    {I}n this paper, we investigate a principled approach for defining and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.

    @InProceedings{tournaire:hal-00953265,
    address = {Belfast, Irlande},
    title = {{C}ombining {L}ogic and {P}robabilities for {D}iscovering {M}appings {B}etween {T}axonomies},
    series = {KSEM'10},
    abstract = {{I}n this paper, we investigate a principled approach for defining and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.},
    publisher = {Springer-Verlag},
    pages = {530--542},
    booktitle = {{P}roceedings of the 4th {I}nternational {C}onference on {K}nowledge {S}cience, {E}ngineering and {M}anagement},
    year = {2010},
    x-international-audience = {yes},
    author = {Tournaire, Remi and Petit, Jean-Marc and Rousset, Marie Christine and Termier, Alexandre},
    url = {http://hal.inria.fr/hal-00953265},
    }

  • S. Frénot, N. Ibrahim, F. Le Mouël, A. Ben Hamida, J. Ponge, M. Chantrel, and D. Beras, “ROCS: a Remotely Provisioned OSGi Framework for Ambient Systems,” in Network Operations and Management Symposium, Osaka, Japon, 2010, pp. 503-510.
    [BibTeX] [Abstract] [Download PDF]

    {O}ne of the challenges of ambient systems lies in providing all the available services of the environment to the ambient devices, even if they do not physically host those services. {A}lthough this challenge has come to find a solution through cloud computing, there are still few devices and operating systems that enable applications execution by only uploading the required components into the runtime environment. {T}he {ROCS} ({R}emote {OSG}i {C}aching {S}ervice) framework is a novel proposal which relies on a heavy-weighted standard {J}ava/{OSG}i stack. {I}t is distributed between class servers and ambient devices to provide full functionalities to resource-constrained environments. {T}he {ROCS} framework provides improvements in two areas. {F}irst, it defines a minimal bootstrap environment that runs a standard {J}ava/{OSG}i stack. {S}econdly, it provides an architecture for loading any necessary missing class from remote servers into memory at runtime. {O}ur first results show similar performances when classes are either remotely downloaded into the main memory from a local network or from a flash drive. {T}hese results suggest a way to design minimalistic middleware that dynamically obtain their applications from the network as a first step towards cloud-aware operating systems.

    @InProceedings{frenot:inria-00436041,
    address = {Osaka, Japon},
    title = {{ROCS}: a {R}emotely {P}rovisioned {OSG}i {F}ramework for {A}mbient {S}ystems},
    month = apr,
    abstract = {{O}ne of the challenges of ambient systems lies in providing all the available services of the environment to the ambient devices, even if they do not physically host those services. {A}lthough this challenge has come to find a solution through cloud computing, there are still few devices and operating systems that enable applications execution by only uploading the required components into the runtime environment. {T}he {ROCS} ({R}emote {OSG}i {C}aching {S}ervice) framework is a novel proposal which relies on a heavy-weighted standard {J}ava/{OSG}i stack. {I}t is distributed between class servers and ambient devices to provide full functionalities to resource-constrained environments. {T}he {ROCS} framework provides improvements in two areas. {F}irst, it defines a minimal bootstrap environment that runs a standard {J}ava/{OSG}i stack. {S}econdly, it provides an architecture for loading any necessary missing class from remote servers into memory at runtime. {O}ur first results show similar performances when classes are either remotely downloaded into the main memory from a local network or from a flash drive. {T}hese results suggest a way to design minimalistic middleware that dynamically obtain their applications from the network as a first step towards cloud-aware operating systems.},
    pages = {503-510},
    booktitle = {{N}etwork {O}perations and {M}anagement {S}ymposium},
    year = {2010},
    organization = {{IEEE}/{IFIP}},
    x-international-audience = {yes},
    keywords = {{OSG}i, {C}loud {C}omputing, {J}ava},
    author = {Fr{\'e}not, St{\'e}phane and Ibrahim, Noha and Le Mou{\"e}l, Fr{\'e}d{\'e}ric and Ben Hamida, Amira and Ponge, Julien and Chantrel, Mathieu and Beras, Denis},
    url = {http://hal.inria.fr/inria-00436041},
    pdf = {http://hal.inria.fr/inria-00436041/PDF/PID1073261.pdf},
    }

  • N. Ibrahim, S. Frénot, and F. Le Mouël, “User-Excentric Service Composition in Pervasive Environments,” in The 24th IEEE International Conference on Advanced Information Networking and Applications (AINA), Perth, Australie, 2010, p. 8.
    [BibTeX] [Abstract] [Download PDF]

    {I}n pervasive environments, services are fastly developing and are being deployed everywhere. {I}n this article, we introduce a {S}ervicebook, a new social network of services, where services create and join group of service profile providing to users better access to all the services in their vicinity. {W}e propose a novel technique to realize this {S}ervicebook, the user-excentric service composition. {T}his user-excentric composition relies on two service relations: the compatible relation and the composition relation. {W}e developed and evaluated an {OSG}i-prototype as a proof-of-concept.

    @InProceedings{ibrahim:inria-00442598,
    address = {Perth, Australie},
    title = {{U}ser-{E}xcentric {S}ervice {C}omposition in {P}ervasive {E}nvironments},
    abstract = {{I}n pervasive environments, services are fastly developing and are being deployed everywhere. {I}n this article, we introduce a {S}ervicebook, a new social network of services, where services create and join group of service profile providing to users better access to all the services in their vicinity. {W}e propose a novel technique to realize this {S}ervicebook, the user-excentric service composition. {T}his user-excentric composition relies on two service relations: the compatible relation and the composition relation. {W}e developed and evaluated an {OSG}i-prototype as a proof-of-concept.},
    pages = {8},
    booktitle = {{T}he 24th {IEEE} {I}nternational {C}onference on {A}dvanced {I}nformation {N}etworking and {A}pplications ({AINA})},
    year = {2010},
    x-international-audience = {yes},
    keywords = {{SOA}; social network of services; user-excentric composition; semantic matching},
    author = {Ibrahim, Noha and Fr{\'e}not, St{\'e}phane and Le Mou{\"e}l, Fr{\'e}d{\'e}ric},
    url = {http://hal.inria.fr/inria-00442598},
    pdf = {http://hal.inria.fr/inria-00442598/PDF/AINA201026_01_10.pdf},
    }

  • A. Benyelloul, F. Jouanot, and M. C. Rousset, “Conquer, an RDFS-based model for context querying,” in 6emes Journées Francophones Mobilité et Ubiquité, Lyon, France, France, 2010.
    [BibTeX] [Abstract] [Download PDF]

    {W}e propose a declarative approach to modeling context data based on the {RDFS}[6] language which enables to declare in a ?exible way classes and properties, and organize them in hierarchies. {W}e show that queries on context data can then be expressed at a high level of abstraction and remain applicable when the class hierarchy is extended.

    @InProceedings{benyelloul:hal-01002691,
    x-editorial-board = {no},
    address = {Lyon, France, France},
    title = {{C}onquer, an {RDFS}-based model for context querying},
    x-short-communication = {yes},
    x-proceedings = {no},
    abstract = {{W}e propose a declarative approach to modeling context data based on the {RDFS}[6] language which enables to declare in a ?exible way classes and properties, and organize them in hierarchies. {W}e show that queries on context data can then be expressed at a high level of abstraction and remain applicable when the class hierarchy is extended.},
    booktitle = {6emes {J}ourn{\'e}es {F}rancophones {M}obilit{\'e} et {U}biquit{\'e}},
    year = {2010},
    x-international-audience = {no},
    author = {Benyelloul, Anis and Jouanot, Fabrice and Rousset, Marie Christine},
    url = {http://hal.inria.fr/hal-01002691},
    }

  • F. Sa{"i}s, N. Niraula, N. Pernelle, and M. C. Rousset, “LN2R — a knowledge based reference reconciliation system: OAEI 2010 Results,” in The Fifth International Workshop on Ontology Matching, Shanghai, Chine, 2010.
    [BibTeX] [Abstract] [Download PDF]

    {T}his paper presents the first participation of {LN}2{R} system in {IM}@{OAEI}2010, the {I}nstance {M}atching track of {O}ntology {A}lignment {E}valuation {I}nitiative 2010 {C}ampaign. {I}n particular, we participated in {OWL} data track by performing {LN}2{R} system on {P}erson-{R}estaurant data set. {W}e obtained very good results on person data sets and reasonable results on restaurant data set.

    @InProceedings{sais:inria-00527287,
    address = {Shanghai, Chine},
    title = {{LN}2{R} -- a knowledge based reference reconciliation system: {OAEI} 2010 {R}esults},
    month = nov,
    x-short-communication = {yes},
    abstract = {{T}his paper presents the first participation of {LN}2{R} system in {IM}@{OAEI}2010, the {I}nstance {M}atching track of {O}ntology {A}lignment {E}valuation {I}nitiative 2010 {C}ampaign. {I}n particular, we participated in {OWL} data track by performing {LN}2{R} system on {P}erson-{R}estaurant data set. {W}e obtained very good results on person data sets and reasonable results on restaurant data set.},
    booktitle = {{T}he {F}ifth {I}nternational {W}orkshop on {O}ntology {M}atching},
    year = {2010},
    x-international-audience = {yes},
    author = {Sa{\"\i}s, Fatiha and Niraula, Nobal and Pernelle, Nathalie and Rousset, Marie Christine},
    url = {http://hal.inria.fr/inria-00527287},
    }

  • R. Tournaire, J. Petit, M. C. Rousset, and A. Termier, “ProbaMap: a scalable tool for discovering probabilistic mappings between taxonomies,” in First workshop on Automated Knowledge Base Construction (AKBC2010), Grenoble, France, 2010, p. Not available.
    [BibTeX] [Abstract] [Download PDF]

    {N}ot available

    @InProceedings{tournaire:hal-00953268,
    address = {Grenoble, France},
    title = {{P}roba{M}ap: a scalable tool for discovering probabilistic mappings between taxonomies},
    x-short-communication = {yes},
    abstract = {{N}ot available},
    pages = {Not available},
    booktitle = {{F}irst workshop on {A}utomated {K}nowledge {B}ase {C}onstruction ({AKBC}2010)},
    year = {2010},
    x-international-audience = {yes},
    author = {Tournaire, Remi and Petit, Jean-Marc and Rousset, Marie Christine and Termier, Alexandre},
    url = {http://hal.inria.fr/hal-00953268},
    }

  • R. Tournaire, J. Petit, M. C. Rousset, and A. Termier, “Combining Logic and Probabilities for Discovering Mappings between Taxonomies (short paper),” in 7th Extented Semantic Web Conference, Heraklion, Grèce, 2010, p. not available.
    [BibTeX] [Abstract] [Download PDF]

    {I}n this paper, we investigate a principled approach for defining and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.

    @InProceedings{tournaire:hal-00953266,
    address = {Heraklion, Gr{\`e}ce},
    title = {{C}ombining {L}ogic and {P}robabilities for {D}iscovering {M}appings between {T}axonomies (short paper)},
    x-short-communication = {yes},
    abstract = {{I}n this paper, we investigate a principled approach for defining and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.},
    pages = {not available},
    booktitle = {7th {E}xtented {S}emantic {W}eb {C}onference},
    year = {2010},
    x-international-audience = {yes},
    author = {Tournaire, Remi and Petit, Jean-Marc and Rousset, Marie Christine and Termier, Alexandre},
    url = {http://hal.inria.fr/hal-00953266},
    }

  • N. Ibrahim, F. Le Mouël, and S. Frénot, “Middleware Technologies for Ubiquitous Computing,” in Handbook of Research on Next Generation Networks and Ubiquitous Computing, S. Pierre, Ed., IGI Global, 2010, vol. Chapter 12, pp. 122-131. doi:http://dx.doi.org/10.4018/978-1-60566-250-3.ch012
    [BibTeX] [Abstract] [Download PDF]

    {M}iddleware handles many important functionalities for ubiquitous computing. {T}he authors distinguish various middleware technologies providing key elements for all applications’ requirements such as discovery, adaptation/composition, context management, and management of ubiquitous applications. {I}n this chapter, they propose a classification for some of the most employed ubiquitous middleware. {T}he classification was established upon the challenges raised by ubiquitous computing – effective use of smart spaces, invisibility, and localized scalability – and upon how the various ubiquitous middleware respond to them in terms of interoperability, discoverability, location transparency, adaptability, context awareness, scalability, security, and autonomous management. {T}his classification shows that if many middleware are mature enough and offer specific functionalities respecting the properties of ubiquity, a real lack is noticed in having an interoperable, autonomous and scalable middleware for the execution of ubiquitous applications. {T}he development of the service-oriented paradigm, the semantics, the {W}eb middleware, and the ambient intelligence shows the new trend the middleware research field is engaged in.

    @InCollection{ibrahim:inria-00395417,
    title = {{M}iddleware {T}echnologies for {U}biquitous {C}omputing},
    isbn = {9781605662503},
    abstract = {{M}iddleware handles many important functionalities for ubiquitous computing. {T}he authors distinguish various middleware technologies providing key elements for all applications' requirements such as discovery, adaptation/composition, context management, and management of ubiquitous applications. {I}n this chapter, they propose a classification for some of the most employed ubiquitous middleware. {T}he classification was established upon the challenges raised by ubiquitous computing - effective use of smart spaces, invisibility, and localized scalability - and upon how the various ubiquitous middleware respond to them in terms of interoperability, discoverability, location transparency, adaptability, context awareness, scalability, security, and autonomous management. {T}his classification shows that if many middleware are mature enough and offer specific functionalities respecting the properties of ubiquity, a real lack is noticed in having an interoperable, autonomous and scalable middleware for the execution of ubiquitous applications. {T}he development of the service-oriented paradigm, the semantics, the {W}eb middleware, and the ambient intelligence shows the new trend the middleware research field is engaged in.},
    volume = {Chapter 12},
    publisher = {IGI Global},
    pages = {122-131},
    booktitle = {{H}andbook of {R}esearch on {N}ext {G}eneration {N}etworks and {U}biquitous {C}omputing},
    year = {2010},
    x-international-audience = {yes},
    author = {Ibrahim, Noha and Le Mou{\"e}l, Fr{\'e}d{\'e}ric and Fr{\'e}not, St{\'e}phane},
    editor ={Pierre, Samuel},
    url = {http://hal.inria.fr/inria-00395417},
    doi = {http://dx.doi.org/10.4018/978-1-60566-250-3.ch012},
    }

  • F. Goasdoué and M. C. Rousset, “Modules sémantiques robustes pour une réutilisation saine en DL-lite,” in Bases de données avancées, Toulouse, France, 2010.
    [BibTeX] [Abstract] [Download PDF]

    {L}’extraction de modules à partir d’ontologies a été récemment étudié dans le cadre des logiques de description, qui sont au coeur des langages modernes d’ontologies. {D}ans cet article, nous définissons une nouvelle notion de modules sémantiques capturant à la fois les modules obtenus par extraction d’un sous-ensemble d’une {T}box ou par "forgetting" de concepts et de roles d’une {T}box. {N}ous définissons et étudions ensuite la réutilisation saine d’un module sémantique d’une {T}box globale afin de construire des {A}boxes locales et de les interroger soit indépendamment, soit de facon conjointe avec la {A}box globale. {A}fin que la {A}box locale (associée au module) et que la {A}box globale (associée à la {T}box initiale) puissent évoluer indépendamment, mais de manière cohérente, nous généralisons la notion d’extension conservative de requete et nous l’étendons au test de consistance. {E}nfin, nous fournissons des algorithmes et des résultats de complexité pour le calcul de modules sémantiques minimaux et robustes dans {DL}-lite{F} et {DL}-lite{R}. {C}es dialectes sont membres de la famille {DL}-lite qui a été spécialement définie pour l’interrogation efficace de grandes masses de données.

    @InProceedings{goasdoue:inria-00533086,
    address = {Toulouse, France},
    title = {{M}odules s{\'e}mantiques robustes pour une r{\'e}utilisation saine en {DL}-lite},
    month = sep,
    abstract = {{L}'extraction de modules {\`a} partir d'ontologies a {\'e}t{\'e} r{\'e}cemment {\'e}tudi{\'e} dans le cadre des logiques de description, qui sont au coeur des langages modernes d'ontologies. {D}ans cet article, nous d{\'e}finissons une nouvelle notion de modules s{\'e}mantiques capturant {\`a} la fois les modules obtenus par extraction d'un sous-ensemble d'une {T}box ou par "forgetting" de concepts et de roles d'une {T}box. {N}ous d{\'e}finissons et {\'e}tudions ensuite la r{\'e}utilisation saine d'un module s{\'e}mantique d'une {T}box globale afin de construire des {A}boxes locales et de les interroger soit ind{\'e}pendamment, soit de facon conjointe avec la {A}box globale. {A}fin que la {A}box locale (associ{\'e}e au module) et que la {A}box globale (associ{\'e}e {\`a} la {T}box initiale) puissent {\'e}voluer ind{\'e}pendamment, mais de mani{\`e}re coh{\'e}rente, nous g{\'e}n{\'e}ralisons la notion d'extension conservative de requete et nous l'{\'e}tendons au test de consistance. {E}nfin, nous fournissons des algorithmes et des r{\'e}sultats de complexit{\'e} pour le calcul de modules s{\'e}mantiques minimaux et robustes dans {DL}-lite{F} et {DL}-lite{R}. {C}es dialectes sont membres de la famille {DL}-lite qui a {\'e}t{\'e} sp{\'e}cialement d{\'e}finie pour l'interrogation efficace de grandes masses de donn{\'e}es.},
    booktitle = {{B}ases de donn{\'e}es avanc{\'e}es},
    year = {2010},
    x-international-audience = {no},
    author = {Goasdou{\'e}, Fran{\c c}ois and Rousset, Marie Christine},
    url = {http://hal.inria.fr/inria-00533086},
    pdf = {http://hal.inria.fr/inria-00533086/PDF/article.pdf},
    }

2009

  • F. Sa{"i}s, N. Pernelle, and M. C. Rousset, “Combining a Logical and a Numerical Method for Data Reconciliation,” Journal on Data Semantics, iss. 12, pp. 66-94, 2009.
    [BibTeX] [Abstract] [Download PDF]

    {T}he reference reconciliation problem consists in deciding whether different identifiers refer to the same data, i.e. correspond to the same real world entity. {I}n this article we present a reference reconciliation approach which combines a logical method for reference reconciliation called {L}2{R} and a numerical one called {N}2{R}. {T}his approach exploits the schema and data semantics, which is translated into a set of {H}orn {FOL} rules of reconciliation. {T}hese rules are used in {L}2{R} to infer exact decisions both of reconciliation and non-reconciliation. {I}n the second method {N}2{R}, the semantics of the schema is translated in an informed similarity measure which is used by a numerical computation of the similarity of reference pairs. {T}his similarity measure is expressed in a non linear equation system, which is solved by using an iterative method. {T}he experiments of the methods made on two different domains, show good results for both recall and precision. {T}hey can be used separately or in combination. {W}e have shown that their combination allows to improve runtime performance.

    @Article{sais:inria-00433007,
    number = {12},
    title = {{C}ombining a {L}ogical and a {N}umerical {M}ethod for {D}ata {R}econciliation},
    month = jun,
    journal = {{J}ournal on {D}ata {S}emantics},
    abstract = {{T}he reference reconciliation problem consists in deciding whether different identifiers refer to the same data, i.e. correspond to the same real world entity. {I}n this article we present a reference reconciliation approach which combines a logical method for reference reconciliation called {L}2{R} and a numerical one called {N}2{R}. {T}his approach exploits the schema and data semantics, which is translated into a set of {H}orn {FOL} rules of reconciliation. {T}hese rules are used in {L}2{R} to infer exact decisions both of reconciliation and non-reconciliation. {I}n the second method {N}2{R}, the semantics of the schema is translated in an informed similarity measure which is used by a numerical computation of the similarity of reference pairs. {T}his similarity measure is expressed in a non linear equation system, which is solved by using an iterative method. {T}he experiments of the methods made on two different domains, show good results for both recall and precision. {T}hey can be used separately or in combination. {W}e have shown that their combination allows to improve runtime performance.},
    publisher = {Springer Berlin / Heidelberg},
    pages = {66-94},
    year = {2009},
    x-international-audience = {yes},
    author = {Sa{\"\i}s, Fatiha and Pernelle, Nathalie and Rousset, Marie Christine},
    url = {http://hal.inria.fr/inria-00433007},
    pdf = {http://hal.inria.fr/inria-00433007/PDF/Sais-Pernelle-Rousset-JoDSXII-Camera-Ready.pdf},
    }

  • N. Ibrahim and F. Le Mouël, “A Survey on Service Composition Middleware in Pervasive Environments,” International Journal of Computer Science Issues (IJCSI), Special Issue on « Pervasive Computing Systems and Technologies », vol. 1, pp. 1-12, 2009.
    [BibTeX] [Abstract]

    {T}he development of pervasive computing has put the light on a challenging problem: how to dynamically compose services in heterogeneous and highly changing environments? {W}e propose a survey that defines the service composition as a sequence of four steps: the translation, the generation, the evaluation, and finally the execution. {W}ith this powerful and simple model we describe the major service composition middleware. {T}hen, a classification of these service composition middleware according to pervasive requirements – interoperability, discoverability, adaptability, context awareness, {Q}o{S} management, security, spontaneous management, and autonomous management – is given. {T}he classification highlights what has been done and what remains to do to develop the service composition in pervasive environments.

    @Article{IbrahimIJCSI,
    title = {{A} {S}urvey on {S}ervice {C}omposition {M}iddleware in {P}ervasive {E}nvironments},
    month = aug,
    journal = {{I}nternational {J}ournal of {C}omputer {S}cience {I}ssues ({IJCSI}), {S}pecial {I}ssue on « {P}ervasive {C}omputing {S}ystems and {T}echnologies »},
    abstract = {{T}he development of pervasive computing has put the light on a challenging problem: how to dynamically compose services in heterogeneous and highly changing environments? {W}e propose a survey that defines the service composition as a sequence of four steps: the translation, the generation, the evaluation, and finally the execution. {W}ith this powerful and simple model we describe the major service composition middleware. {T}hen, a classification of these service composition middleware according to pervasive requirements - interoperability, discoverability, adaptability, context awareness, {Q}o{S} management, security, spontaneous management, and autonomous management - is given. {T}he classification highlights what has been done and what remains to do to develop the service composition in pervasive environments.},
    volume = {1},
    pages = {1--12},
    year = {2009},
    x-international-audience = {yes},
    author = {Ibrahim, Noha and Le Mou{\"e}l, Fr{\'e}d{\'e}ric},
    }

  • N. Abdallah, F. Goasdoué, and M. C. Rousset, “DL-LiteR in the Light of Propositional Logic for Decentralized Data Management,” in IJCAI 2009:International Joint Conference on Artificial Intelligence, 2009.
    [BibTeX] [Abstract]

    {T}his paper provides a decentralized data model and associated algorithms for peer data management systems ({PDMS}) based on the {DL}-{L}ite{R} description logic. {O}ur approach relies on reducing query reformulation and consistency checking for {DL}-{L}ite{R} into reasoning in propositional logic. {T}his enables a straightforward deployment of {DL}-{L}ite{R} {PDMS}s on top of {S}ome{W}here, a scalable propositional peer-to-peer inference system. {W}e also show how to use the state-of-the-art {M}inicon algorithm for rewriting queries using views in {DL}-{L}ite{R} in the centralized and decentralized cases.

    @InProceedings{CIC:AGR09,
    title = {{DL}-{L}ite{R} in the {L}ight of {P}ropositional {L}ogic for {D}ecentralized {D}ata {M}anagement},
    month = jul,
    abstract = {{T}his paper provides a decentralized data model and associated algorithms for peer data management systems ({PDMS}) based on the {DL}-{L}ite{R} description logic. {O}ur approach relies on reducing query reformulation and consistency checking for {DL}-{L}ite{R} into reasoning in propositional logic. {T}his enables a straightforward deployment of {DL}-{L}ite{R} {PDMS}s on top of {S}ome{W}here, a scalable propositional peer-to-peer inference system. {W}e also show how to use the state-of-the-art {M}inicon algorithm for rewriting queries using views in {DL}-{L}ite{R} in the centralized and decentralized cases.},
    booktitle = {{IJCAI} 2009:{I}nternational {J}oint {C}onference on {A}rtificial {I}ntelligence},
    year = {2009},
    author = {Abdallah, Nada and Goasdou{\'e}, Fran{\c c}ois and Rousset, Marie Christine},
    }

  • N. Ibrahim, “Orthogonal Classification of Middleware Technologies,” in The Third International Conference on Mobile Ubiquitous Computing, Systems, Services and Technologies (Ubicomm 2009), Sliema, Malta, 2009, pp. 46-51.
    [BibTeX] [Abstract]

    {T}his article describes middleware by adopting a horizontal and a vertical layer views. {M}iddleware are enabling technologies for application development and execution in ubiquitous environments. {I}n the horizontal view, we find most types of middleware developed so far, such as {MOM}, {ORB}, databases middleware and more recently {SOA}. {T}wo new concepts emerged in this category, the “middleware of sensors" and the “middleware of middlewares". {T}he vertical layer distinguishes two essential properties that characterize nowadays middleware and define what is called to be the ubiquitous middleware and the self-* middleware. {T}his article briefly explains all these key words and exposes the directions in which these new areas of research could proceed. {I}t than introduces the spontaneous middleware as a new property for ubiquity in the vertical layer and highlights what spontaneity brings to the different types of middleware.

    @InProceedings{CIC:ni09,
    address = {Sliema, Malta},
    title = {{O}rthogonal {C}lassification of {M}iddleware {T}echnologies},
    month = oct,
    abstract = {{T}his article describes middleware by adopting a horizontal and a vertical layer views. {M}iddleware are enabling technologies for application development and execution in ubiquitous environments. {I}n the horizontal view, we find most types of middleware developed so far, such as {MOM}, {ORB}, databases middleware and more recently {SOA}. {T}wo new concepts emerged in this category, the ``middleware of sensors" and the ``middleware of middlewares". {T}he vertical layer distinguishes two essential properties that characterize nowadays middleware and define what is called to be the ubiquitous middleware and the self-* middleware. {T}his article briefly explains all these key words and exposes the directions in which these new areas of research could proceed. {I}t than introduces the spontaneous middleware as a new property for ubiquity in the vertical layer and highlights what spontaneity brings to the different types of middleware.},
    publisher = {IEEE},
    pages = {46-51},
    booktitle = {{T}he {T}hird {I}nternational {C}onference on {M}obile {U}biquitous {C}omputing, {S}ystems, {S}ervices and {T}echnologies ({U}bicomm 2009)},
    year = {2009},
    x-international-audience = {yes},
    author = {Ibrahim, Noha},
    }

  • N. Ibrahim, F. Le Mouël, and S. Frénot, “MySIM: A Spontaneous Service Integration Middleware for Pervasive Environments,” in ACM International Conference on Pervasive Services (ICPS), London, England, 2009, pp. 1-10.
    [BibTeX] [Abstract]

    {A} computing infrastructure where \everything is a service" offers many new system and application possibilities. {A}mong the main challenges, however, is the issue of service integration for the application development in such heterogeneous environments. {S}ervice integration has been considered by major middleware as a user centric approach as it responds to user requests and needs. {I}n this article, we propose a novel way to integrate services considering only their availability, the functionalities they propose and their non functional {Q}o{S} properties rather than the users direct requests. {W}e define {M}y{SIM}, a spontaneous service integration middleware. {M}y{SIM} integrates services spontaneously on an event based mechanism and transparently for users and applications, extending by that the environment with functionalities. {W}e developed a prototype as a proof of concept and evaluated its effciency over a real use case.

    @InProceedings{ibrahimICPS,
    address = {London, England},
    title = {{M}y{SIM}: {A} {S}pontaneous {S}ervice {I}ntegration {M}iddleware for {P}ervasive {E}nvironments},
    month = jul,
    abstract = {{A} computing infrastructure where \everything is a service" offers many new system and application possibilities. {A}mong the main challenges, however, is the issue of service integration for the application development in such heterogeneous environments. {S}ervice integration has been considered by major middleware as a user centric approach as it responds to user requests and needs. {I}n this article, we propose a novel way to integrate services considering only their availability, the functionalities they propose and their non functional {Q}o{S} properties rather than the users direct requests. {W}e define {M}y{SIM}, a spontaneous service integration middleware. {M}y{SIM} integrates services spontaneously on an event based mechanism and transparently for users and applications, extending by that the environment with functionalities. {W}e developed a prototype as a proof of concept and evaluated its effciency over a real use case.},
    pages = {1--10},
    booktitle = {{ACM} {I}nternational {C}onference on {P}ervasive {S}ervices ({ICPS})},
    year = {2009},
    x-international-audience = {yes},
    author = {Ibrahim, Noha and Le Mou{\"e}l, Fr{\'e}d{\'e}ric and Fr{\'e}not, St{\'e}phane},
    }

  • C. Reynaud, N. Pernelle, M. C. Rousset, B. Safar, and F. Sa{"i}s, “Data Extraction, Transformation and Integration Guided by an Ontology,” in Data Warehousing Design and Advanced Engineering Applications: Methods for Complex Construction, Advances in Data Warehousing and Mining Book Series, L. Bellatreche, Ed., IGI Global, 2009.
    [BibTeX] [Abstract] [Download PDF]

    {T}his chapter deals with integration of {XML} heterogeneous information sources into a data warehouse with data defined in terms of a global abstract schema or ontology. {T}he authors present an approach supporting the acquisition of data from a set of external sources available for an application of interest including data extraction, data transformation and data integration or reconciliation. {T}he integration middleware that the authors propose extracts data from external {XML} sources which are relevant according to an {RDFS}+ ontology, transforms returned {XML} data into {RDF} facts conformed to the ontology and reconciles {RDF} data in order to resolve possible redundancies.

    @InCollection{reynaud:inria-00432585,
    title = {{D}ata {E}xtraction, {T}ransformation and {I}ntegration {G}uided by an {O}ntology},
    abstract = {{T}his chapter deals with integration of {XML} heterogeneous information sources into a data warehouse with data defined in terms of a global abstract schema or ontology. {T}he authors present an approach supporting the acquisition of data from a set of external sources available for an application of interest including data extraction, data transformation and data integration or reconciliation. {T}he integration middleware that the authors propose extracts data from external {XML} sources which are relevant according to an {RDFS}+ ontology, transforms returned {XML} data into {RDF} facts conformed to the ontology and reconciles {RDF} data in order to resolve possible redundancies.},
    publisher = {IGI Global},
    booktitle = {{D}ata {W}arehousing {D}esign and {A}dvanced {E}ngineering {A}pplications: {M}ethods for {C}omplex {C}onstruction, {A}dvances in {D}ata {W}arehousing and {M}ining {B}ook {S}eries},
    year = {2009},
    author = {Reynaud, Chantal and Pernelle, Nathalie and Rousset, Marie Christine and Safar, Brigitte and Sa{\"\i}s, Fatiha},
    editor ={Bellatreche, Ladjel},
    url = {http://hal.inria.fr/inria-00432585},
    pdf = {http://hal.inria.fr/inria-00432585/PDF/IGI_Chapter.pdf},
    }

  • R. Tournaire, A. Termier, J. Petit, and M. C. Rousset, “Discovery of Probabilistic Mappings between Taxonomies: Principles and Experiments,” in Journées Bases de Données Avancées (BDA 2009), namur, Belgium, Belgique, 2009, pp. 1-12.
    [BibTeX] [Abstract] [Download PDF]

    {I}n this paper, we investigate a principled approach for de?ning and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.

    @InProceedings{tournaire:hal-01002693,
    address = {namur, Belgium, Belgique},
    title = {{D}iscovery of {P}robabilistic {M}appings between {T}axonomies: {P}rinciples and {E}xperiments},
    abstract = {{I}n this paper, we investigate a principled approach for de?ning and discovering probabilistic mappings between two taxonomies. {F}irst, we compare two ways of modeling probabilistic mappings which are compatible with the logical constraints declared in each taxonomy. {T}hen we describe a generate and test algorithm which minimizes the number of calls to the probability estimator for determining those mappings whose probability exceeds a certain threshold. {F}inally, we provide an experimental analysis of this approach.},
    pages = {1--12},
    booktitle = {{J}ourn{\'e}es {B}ases de {D}onn{\'e}es {A}vanc{\'e}es ({BDA} 2009)},
    year = {2009},
    x-international-audience = {no},
    author = {Tournaire, Remi and Termier, Alexandre and Petit, Jean-Marc and Rousset, Marie Christine},
    editor ={Aman, B.},
    url = {http://hal.inria.fr/hal-01002693},
    }

  • M. C. Rousset, “Semantic oriented data spaces,” in Invited tutorial at EDBT Summer School on Data and Resource Management in Ambient Computing, Giens, France, 2009, p. not available.
    [BibTeX] [Abstract] [Download PDF]

    {A} data space system is a multi-model data management system that manages data sourced from a variety of local or external sources. {T}he management of data spaces raises new challenges for representing, indexing, sharing and querying possibly distributed collections of heterogeneous and ill-structured data. {S}emantic approaches are promising for providing the glue between heterogeneous data. {T}he goal of this course is to give an overview of the challenges and existing solutions for representing and exploiting the data semantics in data spaces. {W}e will distinguish centralized approaches based on the mediator model from decentralized approaches of {P}2{P} data sharing for online communities connected through overlay networks.

    @InProceedings{rousset:hal-00953253,
    x-invited-conference = {yes},
    address = {Giens, France},
    title = {{S}emantic oriented data spaces},
    abstract = {{A} data space system is a multi-model data management system that manages data sourced from a variety of local or external sources. {T}he management of data spaces raises new challenges for representing, indexing, sharing and querying possibly distributed collections of heterogeneous and ill-structured data. {S}emantic approaches are promising for providing the glue between heterogeneous data. {T}he goal of this course is to give an overview of the challenges and existing solutions for representing and exploiting the data semantics in data spaces. {W}e will distinguish centralized approaches based on the mediator model from decentralized approaches of {P}2{P} data sharing for online communities connected through overlay networks.},
    pages = {not available},
    booktitle = {{I}nvited tutorial at {EDBT} {S}ummer {S}chool on {D}ata and {R}esource {M}anagement in {A}mbient {C}omputing},
    year = {2009},
    x-international-audience = {yes},
    author = {Rousset, Marie Christine},
    url = {http://hal.inria.fr/hal-00953253},
    }