# LUBMft queries expressed in SPARQL, using templated fulltext search syntax # [query ID] # query # === BASIC IR QUERIES === [lubm-fulltext-1.1] # All resources that match the keyword 'engineer', a relatively infrequent term. There are 40 results in LUBMft(1). PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, "engineer")%% } [lubm-fulltext-1.2] # All resources that match the keyword 'network', a very frequent term. There are 1,013 results in LUBMft(1). PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, "network")%% } [lubm-fulltext-2.1] # All resources that match the keyword 'engineer' in property 'ub:publicationText'. This query is more precise than lubm-fulltext-1.1, but the result set is identical. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer")%% } [lubm-fulltext-2.2] # All resources that match the keyword 'network' in property 'ub:publicationText'. This query is more precise than lubm-fulltext-1.2, but the result set is identical. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "network")%% } [lubm-fulltext-3] # All resources that match the keywords 'engineer' or 'network' in property 'ub:publicationText'. This query tests for OR boolean queries. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer network")%% } [lubm-fulltext-4] # All resources that match the phrase 'network engineer' in property 'ub:publicationText'. There are three such results in LUBMft(1). PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "\"network engineer\"")%% } [lubm-fulltext-5.1] # All resources that match the keyword 'smith' in property 'ub:surname', the most common surname. There are 95 results in LUBMft(1). This query is used to be compared with lubm-fulltext-5.2. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:surname, "smith")%% } [lubm-fulltext-5.2] # All resources that have the string literal "Smith" as their surname. This query is equivalent to lubm-fulltext-5.1, except it refers to the exact string Literal. This allows for comparison of keyword and Literal lookup performance. PREFIX rdf: PREFIX ub: SELECT ?X WHERE { ?X ub:surname "Smith" . } # === SEMANTIC IR QUERIES === [lubm-fulltext-6] # All ub:Publications that match the keyword 'engineer' in property 'ub:publicationText'. This query is even more precise than lubm-fulltext-2.2, but the result set is identical. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer")%% ?X rdf:type ub:Publication . } [lubm-fulltext-7] # All ub:Publications and their title, that match the keyword 'engineer' in property 'ub:publicationText'. Compared to lubm-fulltext-6, this query further extracts the titles of the publications from the triple store. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?title WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer")%% ?X ub:name ?title . ?X rdf:type ub:Publication . } [lubm-fulltext-8] # All ub:Publications, their title, and the paper's full professor's fullname, that match the keyword 'engineer' in property 'ub:publicationText'. Compared to lubm-fulltext-7, this query further performs some triple index lookups to extract the full professor's fullnames. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?title ?fullname WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer")%% ?X ub:name ?title . ?X ub:publicationAuthor ?Y . ?X rdf:type ub:Publication . ?Y ub:fullname ?fullname . ?Y rdf:type ub:FullProfessor . } [lubm-fulltext-9] # All resources that match the keyword 'engineer' in ub:publicationText, and that are authored by someone matching 'smith' in the ub:fullname property. This combines two keyword queries in one structured query. Connecting the two result sets of the keyword searches requires lookups of the semantic path between these resources. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?Y WHERE { %%FULLTEXT_SEARCH(?Y, ub:fullname, "smith")%% %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer")%% ?X ub:publicationAuthor ?Y . } [lubm-fulltext-10] # All resources that match the keyword 'network' in ub:publicationText, and that are authored by a full professor who also authored a resource matching 'engineer' in ub:publicationText. This combines two keyword queries in one structured query. Connecting the two result sets of the keyword searches requires lookups of the semantic path between these resources. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?Y ?Z WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "network")%% %%FULLTEXT_SEARCH(?Y, ub:publicationText, "engineer")%% ?X ub:publicationAuthor ?Z . ?Y ub:publicationAuthor ?Z . ?Z rdf:type ub:FullProfessor . } [lubm-fulltext-11] # All distinct full professors matching 'smith' in ub:fullname that authored resources that match the keyword 'network' in ub:publicationText, and also resources matching 'engineer' in ub:publicationText. This further introduces one more keyword search into the query. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT DISTINCT ?Z WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "network")%% %%FULLTEXT_SEARCH(?Y, ub:publicationText, "engineer")%% %%FULLTEXT_SEARCH(?Z, ub:fullname, "smith")%% ?X ub:publicationAuthor ?Z . ?Y ub:publicationAuthor ?Z . ?Z rdf:type ub:FullProfessor . } # === ADVANCED IR QUERIES === [lubm-fulltext-12] # All resources that match the keywords 'engineer' and 'network' in property 'ub:publicationText'. This query tests for AND boolean queries. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "+engineer +network")%% } [lubm-fulltext-13] # All resources that match the keywords 'network' but not 'engineer' in property 'ub:publicationText'. This query tests for AND NOT boolean queries. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "-engineer +network")%% } [lubm-fulltext-14] # All resources that contains the keywords 'network' and 'engineer' in property 'ub:publicationText', where both keywords must occur in a distance of at most 10 words. There are 14 such results in LUBMft(1). PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "\"network engineer\"~10")%% } [lubm-fulltext-15] # All resources that match the wildcard query 'engineer*' in property 'ub:publicationText'. The query matches, 'engineer', 'engineering', and 'engineers'. There are 939 results in LUBMft(1). PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer*")%% } [lubm-fulltext-16] # All resources that match the wildcard query 'engineer?' in property 'ub:publicationText'. The query matches 'engineers', but not 'engineer'. There are 32 results in LUBMft(1). PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer?")%% } [lubm-fulltext-17] # All resources that match a keywords that is 80% similar to 'engineer'. Where 'engineer' has 40 results, this query has 71 in LUBMft(1). PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer~0.8")%% } [lubm-fulltext-18] # All resources that match the keyword 'engineer' in property 'ub:publicationText'. This query further requests the score of each matching resource to be returned. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?score WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer", ?score)%% } [lubm-fulltext-19] # All resources that match the keyword 'engineer' in property 'ub:publicationText'. This query further requests a snippet of the matching literal of each matching resource to be returned. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?snippet WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "engineer", ?snippet)%% } [lubm-fulltext-20] # All resources that match the keyword 'network' in property 'ub:publicationText', but only 10 resources with the highest score. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?score WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "network", ?score, 10)%% } [lubm-fulltext-21] # All resources that match the keyword 'network' in property 'ub:publicationText', but only the resources with score higher than 0.75. Though the keyword has 1013 matches in LUBMft(1), only 43 have a higher score (lucene) than 0.75. PREFIX rdf: PREFIX ub: %%FULLTEXT_SEARCH_PREFIX%% SELECT ?X ?score WHERE { %%FULLTEXT_SEARCH(?X, ub:publicationText, "network", ?score, 0.75)%% }