/* *************************************************************************** * * Copyright (c) 2003 The Edutella Project * * Redistributions in source code form must reproduce the above copyright * and this condition. The contents of this file are subject to the * Sun Project JXTA License Version 1.1 (the "License"); you may not use * this file except in compliance with the License. * A copy of the License is available at http://www.jxta.org/jxta_license.html. * *************************************************************************** */ /* This is a datalog grammar as specified in * http://edutella.jxta.org/spec/qel.html. * * The N3 fragment lexer parts are based on the Jena N3 grammar. * This file generates AntlrDatalogLexer, AntlrDatalogParser and * AntlrDatalogParserToken * */ header{ package net.jxta.edutella.provider.datalog; import net.jxta.edutella.eqm.*; import net.jxta.edutella.vocabulary.QEL; import com.hp.hpl.jena.rdf.model.*; import java.util.*; } class AntlrDatalogParser extends Parser; options { //LL(4) grammar (lookahead of 4 tokens) //The most complicated cases occur with outer join markers k = 4 ; } tokens { NAME; QNAME ; PREFIXNAME; RULESEP; AT_PREFIX; AT_BASE; AT_LANG; } { AntlrDatalogLexer lexer = null ; void setLexer(AntlrDatalogLexer l) { this.lexer = l ; } /* * helper method to create Resources from the Query model. */ private Resource createResource(Query eduquery, String s) { try{ return eduquery.getResource().getModel().createResource(s); } catch( RDFException e ) { throw new EQMException(e); } } private class ParsedQueryLiteral { public QueryLiteral queryLiteral; public boolean isOuterJoin = false; } } /* * Entry point for the parsing process. This method parses a query * and returns a new Query object. * @param queryRes Resource as base query resource */ query[String base] returns [Query eduquery] { Map prefixes = new HashMap(); prefixes.put("qel", QEL.getURI()); Model m = ModelFactory.createDefaultModel(); Resource queryRes = null; String declaredBase = null; eduquery = null; } : (declaredBase = baseDeclaration)? (prefixDeclaration[m, prefixes])* { try { if(declaredBase != null ) { base = declaredBase; } queryRes = m.createResource(base+"#query"); } catch (RDFException e) { throw new EQMException(e); } eduquery = new Query(queryRes); } ( { Rule edurule; } edurule=rule[eduquery, prefixes] { eduquery.addRule(edurule); } )* queryExpression[eduquery, prefixes] ; /* * parses the query literals */ queryExpression[Query eduquery, Map prefixes] { QueryLiteral queryLit; List resultVariables; ParsedQueryLiteral pql; } : QUERYSEP ( MINUS | ( resultVariables=arglist[eduquery, prefixes] { eduquery.setResultVariables(resultVariables); } MINUS ) ) ( pql = queryLiteral[eduquery, prefixes] { if ( pql.isOuterJoin ) { eduquery.addOuterJoinLiteral(pql.queryLiteral); } else { eduquery.addLiteral(pql.queryLiteral); } } ) ( COMMA ( pql = queryLiteral[eduquery, prefixes] { if ( pql.isOuterJoin ) { eduquery.addOuterJoinLiteral(pql.queryLiteral); } else { eduquery.addLiteral(pql.queryLiteral); } } ) )* (SEP)? ; /* * parses a rule */ rule[Query eduquery, Map prefixes] returns[Rule edurule] { ParsedQueryLiteral pql; edurule=null; } : pql=queryLiteral[eduquery, prefixes] { edurule = eduquery.createRule(pql.queryLiteral); } RULESEP ( pql=queryLiteral[eduquery, prefixes] { if ( pql.isOuterJoin ) { edurule.addOuterJoinLiteral(pql.queryLiteral); } else { edurule.addLiteral(pql.queryLiteral); } } ) ( COMMA ( pql=queryLiteral[eduquery, prefixes] { if ( pql.isOuterJoin ) { edurule.addOuterJoinLiteral(pql.queryLiteral); } else { edurule.addLiteral(pql.queryLiteral); } } ) )* SEP ; /* * parses a query literal */ queryLiteral[Query eduquery, Map prefixes] returns [ParsedQueryLiteral pql] { pql = new ParsedQueryLiteral(); List litArgs; Resource pSym = null; boolean negated = false; boolean isPrefixStatement = false; } : (MINUS {negated=true;})? (p:NAME { // allow usage of statement literal without namespace declaration if ("s".equals(p.getText())) { pSym = QEL.s; } else { pSym = createResource(eduquery, eduquery.getResource().getNameSpace()+p.getText()); } } | pSym = qname[eduquery, prefixes] {isPrefixStatement=true;} | pSym = uriref[eduquery] {isPrefixStatement=true;}) (OJMARKER { pql.isOuterJoin = true; } )? litArgs=arglist[eduquery, prefixes] { if(pSym.equals(QEL.s)){ //create StatementLiteral Resource subject = (Resource)litArgs.get(0); Resource predicate = (Resource)litArgs.get(1); RDFNode object = (RDFNode)litArgs.get(2); pql.queryLiteral = eduquery.createStatementLiteral(subject, predicate, object); } else if (QEL.isBuiltinPredicate(pSym)) { pql.queryLiteral = eduquery.createBuiltinLiteral(pSym,litArgs,negated); } else { if (isPrefixStatement) { //create StatementLiteral Resource subject = (Resource)litArgs.get(0); Resource predicate = pSym; RDFNode object = (RDFNode)litArgs.get(1); pql.queryLiteral = eduquery.createStatementLiteral(subject, predicate, object); } else { pql.queryLiteral = eduquery.createQueryLiteral(pSym,litArgs); } } } ; /* * parses the arguments rsp. parameters of a query literal */ arglist[Query eduquery, Map prefixes] returns[List args] { args = new ArrayList(); RDFNode arg; } : LPAREN arg=argument[eduquery, prefixes] { args.add(arg); } ( COMMA arg=argument[eduquery, prefixes] { args.add(arg); } )* RPAREN ; /* * parses one argument of a query literal */ argument[Query eduquery, Map prefixes] returns[RDFNode arg] { arg=null; RDFNode a = null; } : arg=literal[eduquery, prefixes] |arg=qname[eduquery, prefixes] |arg=uriref[eduquery] |v:NAME { String varName=v.getText(); arg = eduquery.getVarForLocalName(varName); if ( arg == null ) { eduquery.addVarWithLocalName(varName); arg = eduquery.getVarForLocalName(varName); } } ; /* * parses a prefix declaration (of the form @prefix shorthand: ) */ prefixDeclaration[Model m, Map prefixes] { String prefix = null; Resource ns = null; } : AT_PREFIX p:PREFIXNAME { prefix = p.getText(); } r:URIREF { prefixes.put(prefix, r.getText()); } (SEP)? ; /* * parses an uri base declaration for the query */ baseDeclaration returns[String base] { base = null; } : AT_BASE r:URIREF { base=r.getText(); } (SEP)? ; /* * parses the shorthand part of a prefix declaration */ nsprefix returns[String prefix] { prefix="_default"; } : (ns:NAME {prefix = ns.getText();})? COLON ; /* * parses an RDF literal */ literal[Query eduquery, Map prefixes] returns [Literal lit] { lit=null; String lang = null; Resource dt = null; } : s:STRING (lang=langModifier)? (dt=datatypeModifier[eduquery, prefixes])? { try{ lit=eduquery.getResource().getModel().createLiteral(s.getText()); } catch( RDFException e ) { throw new EQMException(e); } } ; /* * parses a language declaration for an RDF literal */ langModifier returns[String lang] { lang = null; } : l:AT_LANG {lang=l.getText();} ; /* * parses an XML schema datatype declaration for an RDF literal */ datatypeModifier[Query eduquery, Map prefixes] returns[Resource res] { res = null; } : DATATYPE res=datatype[eduquery, prefixes] ; /* * parses the datatype resource part. It can be a qname (foo:bar) * or an uriref () */ datatype[Query eduquery, Map prefixes] returns [Resource res] { res = null; } : res=qname[eduquery, prefixes] | res=uriref[eduquery] ; /* * parses a qname (of the form foo:bar, where foo is a declared * prefix and bar the local name part of the uri) */ qname[Query eduquery, Map prefixes] returns [Resource res] { res = null; } : qn: QNAME { String qname = qn.getText(); int colonIndex = qname.indexOf(':'); String prefix = qname.substring(0, colonIndex); String lname = qname.substring(colonIndex+1, qname.length()); String ns = (String)prefixes.get(prefix); if ( ns == null ) { throw new SemanticException("namespace '" + prefix + "' has not been declared"); } res=createResource(eduquery, ns+lname); } ; /* * parses an uri. Uris are always enclosed in '<' '>' */ uriref[Query eduquery] returns [Resource res] { res = null; } : r:URIREF { res=createResource(eduquery, r.getText()); } ; class AntlrDatalogLexer extends Lexer; options { k=3; // Because of """ and ''' // UTF-8 expanded to Java chars // NB: antlr 2.7.1 \uFFFF is the EOF char // Not fixed in antlr 2.7.2 // N3 Comments cause OutOfMemoryError if FFFF used. // It does make for rather large tables charVocabulary= '\u0000'..'\uFFFE' ; } // THING is there to distinguish between // - RULESEP (rule separator ':-') // - QNAME (like ''qel:s', ':) // - PREFIXNAME (shorthand part in @prefix declaration, e.g. 'qel:') // - NAME (plain name for variables and predicates like 'X', 'title') // // It's necessary to do it in the lexer because otherwise // ambiguities occur THING // Order of syntactic predicates matters here! : (COLON MINUS) => COLON MINUS {$setType(RULESEP);} | (COLON LNAME )=> COLON LNAME {$setType(QNAME);} | (NSNAME COLON LNAME )=> NSNAME COLON LNAME {$setType(QNAME);} | (NSNAME COLON WS )=> NSNAME COLON! WS! {$setType(PREFIXNAME);} | (NSNAME) => NSNAME {$setType(NAME);} ; // Namespace prefix name: include bNode ids. protected // Prefix NSNAME: (ALPHANUMERIC|'_') (ALPHANUMERIC|'_'|'-')* ; protected LNAME : LOCALCHAR (LOCALCHAR)* ; protected LOCALCHAR: ALPHANUMERIC | MOST_URI_CHARS ; URIREF: '<'! (options{greedy=false;}: URICHAR)+ '>'! ; // RDFC2396 + chars for limited IRI compatibility // processing to check URIref syntax and chanracter sets protected URICHAR: ALPHANUMERIC | MOST_URI_CHARS | '(' | ')' | ',' | '*' ; // we need to stop at '(', ')', '*' and ',' for LNAMEs, // therefore we can't allow them here. // URIs containing these characters have to be // written using the '<'full_uri'>' syntax protected MOST_URI_CHARS: // RFC 2396 unreserved '-' | '_' | '.' | '!' | '~' | "'" | // RFC 2396 reserved ';' | '/' | '?' | ':' | '&' | '=' | '+' | '$' | '@' | // unwise '{' | '}' | '|' | '\\' | '^' | '[' | ']' | '`' | // Delims: Escape and ref '%' | '#' | '"' ; // To cases of @word: directives (@prefix) and language tags. // Can't have a language of "prefix". AT_WORD : (AT "prefix") => AT "prefix" { $setType(AT_PREFIX) ; } | (AT "base") => AT "base" { $setType(AT_BASE) ; } | (AT (ALPHA)) => AT a:(ALPHA)+ ("-" (ALPHA)*)? { $setType(AT_LANG) ; } ; protected COLON : ':' ; // Named characters / tokens SEP : '.' ; AT : '@' ; LPAREN : '(' ; RPAREN : ')' ; COMMA : ',' ; DATATYPE : "^^"; MINUS : '-' ; QUERYSEP : "?"; OJMARKER : '*'; // strings enclosed in '' or "" STRING: ( STRING1 | STRING2 ) ; // Single line comment. SL_COMMENT: "#" // Uses the fact that the first clause is greedy, eating all // non-newlines, thus the end condition is optional newline // and it works at the end of the file. (~('\n'|'\r'))* (NL)? {$setType(Token.SKIP); } ; // Windows: \r\n // Unix: \n // Mac: \r protected NL1: "\r\n" { newline(); } ; protected NL2: "\n" { newline(); } ; protected NL3: "\r" { newline(); } ; // Hard work! This makes NL's in ''' and """ strings work protected NL: (NL1) => NL1 | (NL2) => NL2 | (NL3) => NL3; // Ignore whitespace. Not protected as SKIP is passed to parser. WS: ( ' ' | '\t' | '\f' | NL ) { $setType(Token.SKIP); } ; protected ALPHASMALL: 'a'..'z'|'ä'|'ü'|'ö'|'ß' ; protected ALPHACAPS: 'A'..'Z'|'Ä'|'Ö'|'Ü' ; protected ALPHA: ALPHACAPS|ALPHASMALL ; protected ALPHANUMERIC: (ALPHA|'0'..'9') ; protected STRING1 : (QUOTE3S)=> // Needs k=3: if k less a lexer is generated but fails : see antlr doc QUOTE3S! (options{greedy=false;}: (NL)=>NL | ESCAPE | ~('\\'))* QUOTE3S! | '\''! (options{greedy=false;}: ESCAPE | ~'\\')* '\''! ; protected STRING2 : (QUOTE3D)=> QUOTE3D! (options{greedy=false;}: (NL)=>NL | ESCAPE | ~('\\'))* QUOTE3D! | '"'! (options{greedy=false;}: ESCAPE | ~'\\')* '"'! ; protected QUOTE3S: "'''" ; // 3 single quotes protected QUOTE3D: '"' '"' '"' ; // 3 double quotes protected ESCAPE: '\\'! ( (ESC_CHAR) => ESC_CHAR | ch:. { $setText("\\"+ch) ; } ) ; protected ESC_CHAR: ( 'n' { $setText("\n") ; } | 'r' { $setText("\r") ; } | 'b' { $setText("\b") ; } | 't' { $setText("\t") ; } | 'f' { $setText("\f") ; } | 'v' { $setText("\f") ; } | 'a' { $setText("\007") ; } | '"' { $setText("\"") ; } | '\\' { $setText("\\") ; } | '\'' { $setText("'") ; } ) ;