Our data schema not only addresses the actual links between publications and research data, but also contains the process model to obtain these links by executing algorithms by means of our services. A higher-level description of the schema is provided here.

 / schema / Execution[text/turtle]

@base <http://infolis.gesis.org/infolink/schema/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix schema: <http://schema.org/> .
@prefix bibo: <http://purl.org/ontology/bibo/> .
@prefix infolis: <> .
@prefix dqm: <http://purl.org/dqm-vocabulary/v1/dqm#> .
@prefix omnom: <http://onto.dm2e.eu/schema/omnom/> .
@prefix doap: <http://usefulinc.com/ns/doap#> .
@prefix vann: <http://purl.org/vocab/vann/> .

infolis:Execution
    dc:description "The concrete execution of an Algorithm." ;
    dcterms:source <https://github.com/infolis/infoLink/blob/master/src/main/java/io/github/infolis/model/Execution.java> ;
    vann:example <../api#!/rest-ld-execution> ;
    a owl:Class, owl:Thing ;
    rdfs:subClassOf schema:Action .

infolis:abstractLanguage
    dc:description "Language of the abstracts" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range [
        a rdfs:Datatype ;
        owl:oneOf ("en"
            "de"
        )
    ] .

infolis:algorithm
    dc:description "Java class of the algorithm which is supposed to be executed within this execution." ;
    dcterms:source <https://github.com/infolis/infoLink/blob/master/src/main/java/io/github/infolis/model/Execution.java> ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range [
        a rdfs:Datatype ;
        owl:oneOf ("# TODO fix this for updated version"
            "io.github.infolis.algorithm.BestMatchLinker"
            "io.github.infolis.algorithm.BibliographyExtractor"
            "io.github.infolis.algorithm.DoiExtractor"
            "io.github.infolis.algorithm.DoiLinker"
            "io.github.infolis.algorithm.FederatedSearcher"
            "io.github.infolis.algorithm.FrequencyBasedBootstrapping"
            "io.github.infolis.algorithm.GoldLinker"
            "io.github.infolis.algorithm.Indexer"
            "io.github.infolis.algorithm.InfolisPatternSearcher"
            "io.github.infolis.algorithm.KeywordTagger"
            "io.github.infolis.algorithm.LearnPatternsAndCreateLinks"
            "io.github.infolis.algorithm.Learner"
            "io.github.infolis.algorithm.LinkImporter"
            "io.github.infolis.algorithm.LocalSearcher"
            "io.github.infolis.algorithm.LuceneSearcher"
            "io.github.infolis.algorithm.MetaDataExtractor"
            "io.github.infolis.algorithm.MultiMatchesLinker"
            "io.github.infolis.algorithm.OntologyLinker"
            "io.github.infolis.algorithm.ReferenceLinker"
            "io.github.infolis.algorithm.RegexSearcher"
            "io.github.infolis.algorithm.ReliabilityBasedBootstrapping"
            "io.github.infolis.algorithm.SearchPatternsAndCreateLinks"
            "io.github.infolis.algorithm.SearchDoisAndCreateLinks"
            "io.github.infolis.algorithm.SearchResultLinker"
            "io.github.infolis.algorithm.SpringerImporter"
            "io.github.infolis.algorithm.TagSearcher"
            "io.github.infolis.algorithm.TextAndMetaDataExtractor"
            "io.github.infolis.algorithm.TextExtractor"
            "io.github.infolis.algorithm.NameExtractor"
            "io.github.infolis.algorithm.Tokenizer"
            "io.github.infolis.algorithm.TokenizerOpenNLP"
            "io.github.infolis.algorithm.TokenizerStanford"
            "org.gesis.git.LODInfrastructure.dataProcessing.importer.AllbusBibImporter"
            "org.gesis.git.LODInfrastructure.dataProcessing.importer.DbkImporter"
            "org.gesis.git.LODInfrastructure.dataProcessing.importer.ZisImporter"
            "org.gesis.git.LODInfrastructure.dataProcessing.enrichment.DatasetSearcher"
        )
    ] .

infolis:allowLeadingWildcards
    dc:description """Determines whether the Lucene query parser is allowed to
use leading wildcard characters.
Default: true""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:boolean .

infolis:bootstrapStrategy
    dc:description "Strategy to use for bootstrapping. Can either be: mergeCurrent, mergeNew, mergeAll, separate, reliability." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range [
        a rdfs:Datatype ;
        owl:oneOf ("mergeCurrent"
            "mergeNew"
            "mergeAll"
            "separate"
            "reliability"
        )
    ] .

infolis:endTime
    dc:description "Timestamp when execution ended." ;
    schema:domainIncludes infolis:Execution ;
    a xsd:date ;
    rdfs:range xsd:dateTime ;
    owl:sameAs schema:endTime .

infolis:entitiesForKeywordTagging
    dc:description """List of entities (URIs of the entities) for which key words should be
generated. Serves as input for the keyword  tagging algorithm.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:Entity .

infolis:indexDirectory
    dc:description "index directory of the indexer." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:infolisFileTags
    dc:description "List of tags to identify a specific set of InfolisFiles" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:infolisPatternTags
    dc:description "List of tags to identify a specific set of InfolisPatterns" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:inputFiles
    dc:description """List of URIs of input files.
Can be either PDF or text files.
They are for example used to search patterns within the Pattern Applier algorithm.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:InfolisFile .

infolis:keyWords
    dc:description "Key words which are generated by the KeywordTagger." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:leftContextGroup
    dc:description "Group numbers to use for RegexSearcher: group of left context." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:linkedEntities
    dc:description """Beside the search in external repositories, we can also
search in our own database. As use case, we get a URN for a publication
from a user and want to show all named entities that are linked to 
this publication. With an internal search using the generated links,
we can find these entities which are returned in this list.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:Entity .

infolis:links
    dc:description """As a final step, links between the texts and the discovered
named entities (research data) are established and saved in this list.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:EntityLink .

infolis:log
    dc:description "Log messages of this execution." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:maxClauseCount
    dc:description """The  maximum number of clauses permitted per BooleanQuery (Lucence search).
A boolean query represents a query that matches documents
matching boolean combinations of other queries.
Default: Integer max value""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:maxIterations
    dc:description """Maximum number of iterations during the bootstrapping process.
A high number of iterations can lead to a increased run time.
Default: 10""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:metaDataFiles
    dc:description """Meta data files of the publications in which information about
the title, author, astract, subject, identifiers, URL, and language
can be found.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:outputDirectory
    dc:description "Output directory of the indexer." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:outputFiles
    dc:description "List of URIs of output files. Output files to save the output files (txt files) of algorithms." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:InfolisFile .

infolis:overwriteTextFiles
    dc:description """Flag used by TextExtractor: if set to false, pdfs for which corresponding text 
files already exist in the specified text directory will not be converted again, instead 
the existing text files will be returned as InfolisFile instances. If set to true, all 
pdfs will be converted regardless of any existing files in the text directory. """ ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:boolean .

infolis:patterns
    dc:description """A list of patterns (internally expressed as regular expression)
that can be applied on texts, e.g. to find links to research data.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:InfolisFile .

infolis:phraseSlop
    dc:description """The slop for phrases used by the Lucene query parser.
It determines how similar two phrases must be to be matched.
If zero, then only exact phrase matches, if 10 up to 10 edit
operations may be carried out.
Default: 10""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:progress
    dc:description "Numerical progress of the execution in the interval 0-100." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:ptb3Escaping
    dc:description "Enable all traditional PTB3 token transforms (like parentheses becoming -LRB-, -RRB-)." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:boolean .

infolis:queryServiceClasses
    dc:description "Instead of a using a list of QueryService URIs(queryServices), the Java classes of the QueryServices can be used. This way, the query serivces do not need to be posted." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range [
        a rdfs:Datatype ;
        owl:oneOf ("io.github.infolis.infolink.querying.DaraHTMLQueryService"
            "io.github.infolis.infolink.querying.DaraSolrQueryService"
            "io.github.infolis.infolink.querying.DataciteQueryService"
        )
    ] .

infolis:queryServices
    dc:description "List of QueryService to use. A query service to a certain repository can be posted and later referred." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:QueryService .

infolis:referenceGroup
    dc:description """Group numbers to use for RegexSearcher: group of reference term.
number of the group which contains the doi. This number depends on the used doiRegex""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:reliabilityThreshold
    dc:description "Determines which patterns are the relevant ones." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:removeBib
    dc:description "Whether to remove the bibliography from text/plain document." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:boolean .

infolis:rightContextGroup
    dc:description "Group numbers to use for RegexSearcher: group of right context." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:searchQuery
    dc:description """Any kind of search query that can be used within the algorithms.
For example, it represtens the search query which is used
to perform a search in different repositories to find
fitting research data.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:SearchQuery .

infolis:searchResultLinkerClass
    dc:description """The SearchResultLinkerClass determines the SearchResultLinker to 
use. That class is responsible for deciding which SearchResults to 
select for creating links.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range [
        a rdfs:Datatype ;
        owl:oneOf ("io.github.infolis.algorithm.BestMatchLinker"
            "io.github.infolis.algorithm.MultiMatchesLinker"
            "io.github.infolis.algorithm.OntologyLinker"
            "io.github.infolis.algorithm.GoldLinker"
        )
    ] .

infolis:searchResults
    dc:description """After a search in one or more repositories, a list 
of search results is returned. These results not only contain
the repository which was searched but also information like
the relevance score.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:SearchResult .

infolis:searchTerm
    dc:description """A search term that can be used in different algorithms whenever
something a certain term needs to be searched in a text.
For example, the bootstrapping algorithms need a seed in the beginning to start
the whole process. The search term represents such a seed, e.g. the study name 'ALLBUS'.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:seeds
    dc:description "The initial seeds for learning." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:startPage
    dc:description """Index (starting at 1 rather than 0) of the first page to extract. 
Useful to ignore title pages if present.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .

infolis:startTime
    dc:description "Timestamp when execution started." ;
    schema:domainIncludes infolis:Execution ;
    a xsd:date ;
    rdfs:range xsd:dateTime ;
    owl:sameAs schema:startTime .

infolis:status
    dc:description """Status of the execution

* PENDING
* STARTED
* FINISHED
* FAILED

Default (when starting): ExecutionStatus.PENDING""" ;
    vann:example "SUCCESS" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range [
        a rdfs:Datatype ;
        owl:oneOf ("PENDING"
            "STARTED"
            "FAILED"
            "FINISHED"
        )
    ] .

infolis:tags
    dc:description """Free-form tags to assign to the execution and all generated entities. 
This makes all uploaded and generated data identifiable and searchable 
e.g. using infolisFileTags, infolisPatternTags and textualReferenceTags. """ ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:textualReferenceTags
    dc:description "List of tags to identify a specific set of TextualReferences" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:textualReferences
    dc:description """A textual reference represents any kind of reference that
can be find in a text, e.g. a term like a study name has been found in a publication.
Besides the text and the term that has been found in the text,
it also contains the context, i.e. where the term has been detected.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range infolis:TextualReference .

infolis:thesaurus
    dc:description "Path/URI to the thesaurus which is used for the key word tagging." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:string .

infolis:tokenize
    dc:description """Whether to tokenize text input.

Bootstrapping requires tokenized input texts to perform well. It can
either be called on tokenized input texts or it can be called on
untokenized text or pdf files and perform tokenization itself. If
unspecified, defaults to false for TextExtractor. For Bootstrapping,
this field has to be set explicitly as this information is crucial for
good performance.""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:boolean .

infolis:tokenizeNLs
    dc:description "Determines whether new line characters are to be tokenized." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:boolean .

infolis:upperCaseConstraint
    dc:description """Indicates whether we require a term to contain at least one upper case
character.

The idea behind is that especially a study name is supposed to be a
named entity and thus should contain at least one upper-case character.
Default: false""" ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:boolean .

infolis:windowsize
    dc:description "Number of words used for creation of patterns." ;
    schema:domainIncludes infolis:Execution ;
    rdfs:range xsd:float .