1/* $Id$ 2 3 Part of SWI-Prolog 4 5 Author: Jan Wielemaker 6 E-mail: jan@swi.psy.uva.nl 7 WWW: http://www.swi-prolog.org 8 Copyright (C): 1985-2004, University of Amsterdam 9 10 This program is free software; you can redistribute it and/or 11 modify it under the terms of the GNU General Public License 12 as published by the Free Software Foundation; either version 2 13 of the License, or (at your option) any later version. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 As a special exception, if you link this library with other files, 25 compiled with a Free Software compiler, to produce an executable, this 26 library does not by itself cause the resulting executable to be covered 27 by the GNU General Public License. This exception does not however 28 invalidate any other reasons why the executable file might be covered by 29 the GNU General Public License. 30*/ 31 32:- module(serql, 33 [ serql_query/2, % +Query, -Result 34 serql_query/3, % +Query, -Result, +Options 35 serql_compile/3, % +Query, -Compiled, +Options 36 serql_run/2 % +Compiled, -Reply 37 ]). 38:- use_module(library(semweb/rdf_db)). 39:- use_module(library(semweb/rdf_optimise)). 40:- use_module(library(lists)). 41:- use_module(library(option)). 42:- use_module(library(debug)). 43:- use_module(library(settings)). 44:- use_module(rdfql_util). 45:- include(entailment(load)). 46 47:- meta_predicate 48 select_results( , , ). 49 50/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 51A Prolog path expression is a conjunction of rdf/3 statements. Parts may 52be wrapped in opt/1 to indicate they are optional and nodes may be of 53the form set(List) to indicate a conjunction of distinct values. 54- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
row(Col1, Col2, ...)
for SELECT statements or a term rdf(S,P,O)
.
The predicate is non-deterministic, returning the rows or RDF
statements one-by-one.65serql_query(Query, Result) :- 66 serql_query(Query, Result, 67 [ entailment(rdf) 68 ]). 69 70serql_query(Query, Result, Options) :- 71 serql_compile(Query, Compiled, Options), 72 serql_run(Compiled, Result).
select(VarNames)
or construct84serql_compile(Text, Compiled, Options) :- 85 atom(Options), Options \== [], !, % compatibility 86 serql_compile(Text, Compiled, [entailment(Options)]). 87serql_compile(Text, serql_query(Goal, ReplyTempl, Module), Options) :- 88 serql_parse(Text, Query), 89 compile(Query, Goal, ReplyTempl, Module, Options). 90 91compile(select(Row0, VarNames, Path, Where, Distinct, Limit, Offset), 92 select(Final, Solutions), 93 Row, 94 Module, 95 Options) :- 96 option(entailment(Entailment), Options, rdfs), 97 entailment_module(Entailment, Module), 98 mk_solutions(Distinct, Limit, Offset, Solutions), 99 set_type(select(VarNames), Options), 100 where_constraints(Where, Annotations), 101 serql_compile_path(Path, select, Goal), 102 remove_annotations(Annotations, where), 103 projection_functions(Row0, Row, Select), 104 ( setting(cliopatria:optimise_query, Def), 105 option(optimise(Opt), Options, Def), 106 Opt == true 107 -> rdf_optimise((Goal,Where,Select), Optimised) 108 ; Optimised = (Goal,Where,Select) 109 ), 110 serql_select_bind_null(Optimised, Final), 111 debug(serql(compiled), '~@', 112 [ portray_clause((q(Row) :- Final)) 113 ]). 114compile(construct(RPath, Path, Where, Distinct, Limit, Offset), 115 construct(Final, Solutions), 116 RDF, 117 Module, 118 Options) :- 119 option(entailment(Entailment), Options, rdfs), 120 entailment_module(Entailment, Module), 121 mk_solutions(Distinct, Limit, Offset, Solutions), 122 set_type(construct, Options), 123 where_constraints(Where, Annotations), 124 serql_compile_path(Path, construct, Goal), 125 remove_annotations(Annotations, where), 126 statements(RPath, Statements), 127 entailment_module(Entailment, Module), 128 ( setting(cliopatria:optimise_query, Def), 129 option(optimise(Opt), Options, Def), 130 Opt == true 131 -> rdf_optimise((Goal,Where), Optimised) 132 ; Optimised = (Goal,Where) 133 ), 134 Final = (Optimised, serql_member_statement(RDF, Statements)), 135 debug(serql(compiled), '~@', 136 [ portray_clause((q(RDF) :- Final)) 137 ]).
144mk_solutions(distinct, Limit, Offset, 145 distinct(solutions(unsorted, Limit, Offset))) :- !. 146mk_solutions(_, Limit, Offset, solutions(unsorted, Limit, Offset)).
type(X)
152set_type(Type, Options) :- 153 memberchk(type(T), Options), !, 154 ( T = Type 155 -> true 156 ; functor(T, Expected, _), 157 functor(Type, Found, _), 158 throw(error(type_error(query_type(Expected), Found), _)) 159 ). 160set_type(_, _).
164serql_run(serql_query(Parsed, Reply, Module), Reply) :- 165 serql_run(Parsed, Reply, Module). 166 167serql_run(select(Goal, Solutions), Reply, Module) :- 168 select_results(Solutions, Reply, Module:Goal). 169serql_run(construct(Goal, Solutions), Reply, Module) :- 170 select_results(Solutions, Reply, Module:Goal).
176select_results(distinct(solutions(Order, Limit, Offset)), Reply, Goal) :- !, 177 select_results(distinct, Offset, Limit, Order, Reply, ). 178select_results(solutions(Order, Limit, Offset), Reply, Goal) :- 179 select_results(all, Offset, Limit, Order, Reply, ). 180 181 182 /******************************* 183 * COMPILER * 184 *******************************/
191serql_compile_path(rdf(S,P,O), Type, Conj) :- 192 set(S, Set), !, 193 make_set_subj_conj(Set, [], P, O, Type, Conj). 194serql_compile_path(rdf(S,P,O), Type, Conj) :- 195 set(O, Set), !, 196 make_set_obj_conj(Set, [], S, P, Type, Conj). 197serql_compile_path(rdf(S0, P, O), Type, Goal) :- 198 reified(S0, S, GS), !, 199 serql_compile_path(rdf(S, P, O), Type, G0), 200 Goal = (G0, GS). 201serql_compile_path(rdf(S, P, O0), Type, Goal) :- 202 reified(O0, O, GS), !, 203 serql_compile_path(rdf(S, P, O), Type, G0), 204 Goal = (G0, GS). 205serql_compile_path((A0,B0), Type, (A,B)) :- !, 206 serql_compile_path(A0, Type, A), 207 serql_compile_path(B0, Type, B). 208serql_compile_path(optional(Id, A0), construct, (A *-> Id=true ; Id=false)) :- !, 209 serql_compile_path(A0, construct, A). 210serql_compile_path(optional(_, A0), select, (A *-> true ; true)) :- !, 211 serql_compile_path(A0, select, A). 212serql_compile_path(rdf(S,P,O0), _, Goal) :- !, 213 resource_annotations(S, GS), 214 resource_annotations(P, GP), 215 object_annotations(O0, O, GO), 216 clean_conj((GS, GP, rdf(S,P,O), GO), Goal). 217serql_compile_path(G, _, G). 218 219reified(0, _, _) :- % catch variables 220 !, fail. 221reified(rdf(S,P,O), StatementId, 222 ( rdf(StatementId, Type, Statement), 223 rdf(StatementId, Subject, S), 224 rdf(StatementId, Predicate, P), 225 rdf(StatementId, Object, O) 226 )) :- 227 rdf_equal(Type, rdf:type), 228 rdf_equal(Subject, rdf:subject), 229 rdf_equal(Predicate, rdf:predicate), 230 rdf_equal(Object, rdf:object), 231 rdf_equal(Statement, rdf:'Statement'). 232 233 234 235make_set_subj_conj([], _, _, _, _, true). % should not happen 236make_set_subj_conj([Last], [], P, O, Type, Goal) :- !, 237 serql_compile_path(rdf(Last, P, O), Type, Goal). 238make_set_subj_conj([Last], Diff, P, O, Type, (Goal, Diffs)) :- !, 239 serql_compile_path(rdf(Last, P, O), Type, Goal), 240 make_diff(Diff, Last, Diffs). 241make_set_subj_conj([H|T], Diff, P, O, Type, (Goal, Diffs, More)) :- !, 242 serql_compile_path(rdf(H, P, O), Type, Goal), 243 make_diff(Diff, H, Diffs), 244 make_set_subj_conj(T, [H|Diff], P, O, Type, More). 245 246 247make_set_obj_conj([], _, _, _, _, true). % should not happen 248make_set_obj_conj([Last], [], S, P, Type, Goal) :- !, 249 serql_compile_path(rdf(S, P, Last), Type, Goal). 250make_set_obj_conj([Last], Diff, S, P, Type, (Goal, Diffs)) :- !, 251 serql_compile_path(rdf(S, P, Last), Type, Goal), 252 make_diff(Diff, Last, Diffs). 253make_set_obj_conj([H|T], Diff, S, P, Type, (Goal, Diffs, More)) :- !, 254 serql_compile_path(rdf(S, P, H), Type, Goal), 255 make_diff(Diff, H, Diffs), 256 make_set_obj_conj(T, [H|Diff], S, P, Type, More). 257 258 259make_diff([], _, true). 260make_diff([Last], To, (Last \== To)) :- !. 261make_diff([H|T], To, (H \== To, More)) :- 262 make_diff(T, To, More).
Extract a plain list of triples from an CONSTRUCT
path-expression. Optional parts of the tree are represented as
% optional(Bool, ListOfTriples)
. Using CONSTRUCT * (i.e. when the
executed path is the result path) the goal generated by the
compiler will unify Bool with true or false. See also
member_statement/2.
274statements(Graph, Statements) :- 275 phrase(statements(Graph), Statements). 276 277statements(rdf(S,P,O)) --> 278 { set(S, Set) }, !, 279 subj_statements(Set, P, O). 280statements(rdf(S,P0,O)) --> !, 281 { nonvar(P0), 282 map_builtin(P0, P) 283 -> true 284 ; P = P0 285 }, 286 [ rdf(S,P,O) ]. 287statements((A,B)) --> !, 288 statements(A), 289 statements(B). 290statements(optional(Id, A)) --> !, 291 { phrase(statements(A), OptionalStatements) }, 292 [ optional(Id, OptionalStatements) ]. 293statements(_) --> 294 []. 295 296term_expansion(map_builtin(B0, P0), map_builtin(B, P)) :- 297 rdf_global_id(B0, B), 298 rdf_global_id(P0, P). 299 300map_builtin(serql:directSubClassOf, rdfs:subClassOf). 301map_builtin(serql:directSubPropertyOf, rdfs:subPropertyOf). 302map_builtin(serql:directType, rdf:type). 303 304 305subj_statements([], _, _) --> 306 []. 307subj_statements([H|T], P, O) --> 308 ( { set(O, Set) } 309 -> obj_statements(Set, H, P) 310 ; [ rdf(H, P, O) ] 311 ), 312 subj_statements(T, P, O). 313 314obj_statements([], _, _) --> 315 []. 316obj_statements([H|T], S, P) --> 317 [ rdf(S, P, H) ], 318 obj_statements(T, S, P). 319 320 321set(Node, Set) :- 322 nonvar(Node), 323 Node = set(Set). 324 325 326 /******************************* 327 * SELECT FUNCTIONS * 328 *******************************/ 329 330projection_functions(Row0, Row, Map) :- 331 functor(Row0, Functor, Arity), 332 functor(Row, Functor, Arity), 333 projection_functions(0, Arity, Row0, Row, true, Map). 334 335projection_functions(Arity, Arity, _, _, Map, Map) :- !. 336projection_functions(I0, Arity, Row0, Row, Map0, Map) :- 337 I is I0 + 1, 338 arg(I, Row0, A0), 339 ( var(A0) 340 -> arg(I, Row, A0), 341 projection_functions(I, Arity, Row0, Row, Map0, Map) 342 ; arg(I, Row, A), 343 add_conj(Map0, serql_eval(A0, A), Map1), 344 projection_functions(I, Arity, Row0, Row, Map1, Map) 345 ). 346 347add_conj(true, X, X) :- !. 348add_conj(C0, G, (C0,G)). 349 350 351 /******************************* 352 * WHERE CONSTRAINTS * 353 *******************************/ 354 355/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 356The idea of this step is to turn where clauses into constraints on 357variables. 358 359Supported annotations (in standard order of terms): 360 361 any 362 literal 363 resource 364 eq(Value) 365 like(Pattern) 366- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
or(ListOfAlternatives)
. The latter is used if different paths
through the control-structure yields different annotations.
374where_constraints(Goal, Annotations) :-
375 bagof(Annot, where_constraint_list(Goal, Annot), AltAnnots),
376 sort_lol(AltAnnots, AltAnnots1),
377 join_alt_annots(AltAnnots1, Annotations).
385where_constraint_list(Goal, Annotations) :- 386 where_constraints(Goal, AttrVars, []), 387 attrs_to_terms(AttrVars, Annotations). 388 389 390where_constraints((A,B)) --> !, 391 where_constraints(A), 392 where_constraints(B). 393where_constraints((A;B)) --> !, 394 ( where_constraints(A) 395 ; where_constraints(B) 396 ). 397where_constraints(serql_compare(like, Var, Pattern)) --> !, 398 constrain(Var, like(Pattern)). 399where_constraints(serql_compare(=, Var, Value)) --> !, 400 constrain(Var, eq(Value)). 401where_constraints(rdf_is_literal(V)) --> !, 402 constrain(V, literal). 403where_constraints(rdf_is_resource(V)) --> !, 404 constrain(V, resource). 405where_constraints(rdf(S,P,_)) --> !, 406 constrain(S, resource), 407 constrain(P, resource). 408where_constraints(_) --> 409 []. 410 411constrain(Var, Cond) --> 412 { var(Var) }, !, 413 ( { get_attr(Var, where, C0) } 414 -> { put_attr(Var, where, (Cond, C0)) }, 415 [] 416 ; { put_attr(Var, where, Cond) 417 }, 418 [ Var ] 419 ). 420constrain(label(X), Cond) --> !, 421 constrain(X, (literal, Cond)). 422constrain(lang(X), Cond) --> !, 423 constrain(X, (literal, Cond)). 424constrain(datatype(X), Cond) --> !, 425 constrain(X, (literal, Cond)). 426constrain(_, _) --> 427 [].
436join_alt_annots(LoL, Annotated) :- 437 smallest_var(LoL, Var), !, 438 var_annotations(Var, LoL, LoL1, Annotations0), 439 sort(Annotations0, Annotations), % remove duplicates 440 ( empty_annotations(Annotations) 441 -> join_alt_annots(LoL1, Annotated) 442 ; put_annotations(Annotations, Var), 443 Annotated = [Var|T], 444 join_alt_annots(LoL1, T) 445 ). 446join_alt_annots(LoL, []) :- 447 assertion(maplist(=([]), LoL)).
456normalise_annotation(A0, A) :- 457 conj_to_list(A0, L0, []), 458 sort(L0, L), 459 list_do_conj(L, A). 460 461conj_to_list((A,B)) --> !, 462 conj_to_list(A), 463 conj_to_list(B). 464conj_to_list(A) --> 465 [A]. 466 467list_do_conj([], any). 468list_do_conj([H], H) :- !. 469list_do_conj([H|T0], (H,T)) :- 470 list_do_conj(T0, T).
A,(C1;C2) into (A,C1);(A,C2)
And apply optimisation on both branches.
484empty_annotations([]) :- !. 485empty_annotations(List) :- 486 memberchk(any, List). 487 488put_annotations([], _). 489put_annotations([One], Var) :- !, 490 put_attr(Var, where, One). 491put_annotations(More, Var) :- 492 put_attr(Var, where, or(More)).
499smallest_var([[S0=_|_]|T], Smallest) :- 500 smallest_var(T, S0, Smallest). 501smallest_var([[]|T], Smallest) :- 502 smallest_var(T, Smallest). 503 504smallest_var([], S, S). 505smallest_var([[S1=_|_]|T], S0, S) :- !, 506 smallest(S1, S0, S2), 507 smallest_var(T, S2, S). 508smallest_var([[]|T], S0, S) :- 509 smallest_var(T, S0, S). 510 511smallest(A, B, S) :- 512 ( A @< B 513 -> S = A 514 ; S = B 515 ).
522var_annotations(_, [], [], []) :- !. 523var_annotations(Var, [[Var=A|TA0]|TL0], LoL, [A|TA]) :- !, 524 ( TA0 == [] 525 -> LoL = TL 526 ; LoL = [TA0|TL] 527 ), 528 var_annotations(Var, TL0, TL, TA). 529var_annotations(Var, [A0|TL0], [A0|TL], [any|A]) :- 530 var_annotations(Var, TL0, TL, A). 531 532 533whereattr_unify_hook(_,_) :- fail. 534whereattr_portray_hook(Val, _Var) :- 535 print(Val).
543attrs_to_terms([], []). 544attrs_to_terms([H|T0], [H=A|T]) :- 545 get_attr(H, where, A0), 546 del_attr(H, where), 547 normalise_annotation(A0, A), 548 attrs_to_terms(T0, T).
552sort_lol([], []). 553sort_lol([H0|T0], [H|T]) :- 554 sort(H0, H), 555 sort_lol(T0, T).
560remove_annotations([], _). 561remove_annotations([H|T], A) :- 562 del_attr(H, A), 563 remove_annotations(T, A).
568object_annotations(O0, O, G) :- 569 get_attr(O0, where, Annotations), 570 object_annot(Annotations, O0, O, G), !. 571object_annotations(O, O, true). 572 573object_annot((literal, like(Pattern)), O, 574 literal(like(Pattern), L), O = literal(L)).
578resource_annotations(R, Goal) :- 579 get_attr(R, where, Annotations), 580 resource_annot(Annotations, R, Goal), !. 581resource_annotations(_, true). 582 583resource_annot(eq(R1), R, true) :- % where A = B 584 var(R1), !, 585 del_attr(R, where), 586 R = R1. 587resource_annot(eq(query(String)), R, true) :- !, 588 del_attr(R, where), 589 R = String. 590resource_annot(or(List), R, Goal) :- 591 eq_list(List, Resources), !, 592 Goal = member(R, Resources). 593 594eq_list([], []). 595eq_list([eq(query(R))|T0], [R|T]) :- 596 eq_list(T0, T).
603clean_conj((true, G0), G) :- !, 604 clean_conj(G0, G). 605clean_conj((G0, true), G) :- !, 606 clean_conj(G0, G). 607clean_conj(G, G). 608 609 /******************************* 610 * PARSER * 611 *******************************/
617serql_parse(Codes, Query) :- 618 is_list(Codes), !, 619 ( phrase(tokens(Tokens), Codes), 620 phrase(query(Query0, NameSpaces), Tokens), 621 expand_vars(Query0, Query1), 622 expand_uris(Query1, NameSpaces, Query) 623 -> true 624 ; syntax_error(unknown) 625 ). 626serql_parse(Atomic, Query) :- 627 atomic(Atomic), !, 628 atom_codes(Atomic, Codes), 629 serql_parse(Codes, Query). 630serql_parse(Input, _) :- 631 throw(error(type_error(text, Input), _)). 632 633 634 /******************************* 635 * ERRORS * 636 *******************************/ 637 638syntax_error(What) :- 639 throw(error(syntax_error(What), 640 context(_, 'in SeRQL query'))). 641 642 643 /******************************* 644 * NAMESPACES * 645 *******************************/ 646 647expand_uris(Var, _, Var) :- 648 var(Var), !. 649expand_uris(uri(URI), _, URI) :- !. % <!foo:bar> 650expand_uris(uri(NS, URI0), Map, URI) :- !, % foo:bar 651 ( memberchk(NS=Prefix, Map) 652 -> true 653 ; ns(NS, Prefix) 654 -> true 655 ; throw(error(existence_error(namespace, NS), _)) 656 ), 657 atom_concat(Prefix, URI0, URI). 658expand_uris(old_uri(NS, URI0), Map, URI) :- !, % <foo:bar> 659 ( ( memberchk(NS=Prefix, Map) 660 ; ns(NS, Prefix) 661 ) 662 -> atom_concat(Prefix, URI0, URI) 663 ; concat_atom([NS, :, URI0], URI) 664 ). 665expand_uris(Q0, Map, Q) :- 666 compound(Q0), !, 667 functor(Q0, Name, Arity), 668 functor(Q, Name, Arity), 669 expand_uris(0, Arity, Q0, Map, Q). 670expand_uris(Q, _, Q). 671 672expand_uris(Arity, Arity, _, _, _) :- !. 673expand_uris(I0, Arity, Q0, Map, Q) :- 674 I is I0 + 1, 675 arg(I, Q0, A0), 676 arg(I, Q, A), 677 expand_uris(A0, Map, A), 678 expand_uris(I, Arity, Q0, Map, Q).
687ns(NS, URI) :- 688 setting(cliopatria:rdf_db_namespaces, true), !, 689 rdf_db:ns(NS, URI). 690ns(NS, URI) :- 691 serql_ns(NS, URI). 692 693serql_ns(rdf, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'). 694serql_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#'). 695serql_ns(owl, 'http://www.w3.org/2002/7/owl#'). 696serql_ns(xsd, 'http://www.w3.org/2001/XMLSchema#'). % Wrong in SeRQL docs! 697serql_ns(serql,'http://rdf4j.org/schema/serql#'). 698 699 700 /******************************* 701 * VARIABLES * 702 *******************************/ 703 704% TBD: Check that projection variables actually appear in the 705% query! 706 707expand_vars(select(*, Path0, Where0, Distinct, Limit, Offset), 708 select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !, 709 var_names(Path0-Where0, Path-Where, VarNames), 710 vars(VarNames, Vars, Names), 711 Row =.. [row | Vars], 712 VNames =.. [names|Names]. 713expand_vars(select(Projection, Path0, Where0, Distinct, Limit, Offset), 714 select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !, 715 var_names(x(Projection,Path0,Where0), x(Vars,Path,Where), _VarNames), 716 Row =.. [row | Vars], 717 proj_names(Projection, Names), 718 VNames =.. [names|Names]. 719expand_vars(construct(*, Path0, Where0, Distinct, Limit, Offset), 720 construct(Path, Path, Where, Distinct, Limit, Offset)) :- !, 721 var_names(x(Path0,Where0), x(Path,Where), _VarNames). 722expand_vars(construct(Ret0, Path0, Where0, Distinct, Limit, Offset), 723 construct(Ret, Path, Where, Distinct, Limit, Offset)) :- !, 724 var_names(x(Ret0,Path0,Where0), x(Ret,Path,Where), _VarNames). 725 726 727var_names(var(-(V)), V, _) :- !. % bnodes, mapped from {} 728var_names(var(Name), Var, Map) :- 729 member(Name=Var, Map), !. 730var_names(Q0, Q, Map) :- 731 compound(Q0), !, 732 functor(Q0, Name, Arity), 733 functor(Q, Name, Arity), 734 var_names(0, Arity, Q0, Q, Map). 735var_names(Q, Q, _). 736 737var_names(Arity, Arity, _, _, _) :- !. 738var_names(I0, Arity, Q0, Q, Map) :- 739 I is I0 + 1, 740 arg(I, Q0, A0), 741 arg(I, Q, A), 742 var_names(A0, A, Map), 743 var_names(I, Arity, Q0, Q, Map). 744 745vars([], [], []) :- !. % also closes list! 746vars([Name=Var|T0], [Var|TV], [Name|TN]) :- 747 vars(T0, TV, TN). 748 749proj_names([], []). 750proj_names([var(Var)|T0], [Var|T]) :- !, 751 proj_names(T0, T). 752proj_names([_|T0], [-|T]) :- 753 proj_names(T0, T). 754 755 756 /******************************* 757 * ERROR LOCATIONS * 758 *******************************/ 759 760syntax_error(What, In, []) :- 761 throw(error(syntax_error(What), 762 context(_, left(In)))). 763 764add_error_location(error(syntax_error(What), 765 context(_, left(After))), 766 Tokens) :- 767 append(Before, After, Tokens), 768 length(Before, BL), 769 ( BL =< 5 770 -> BC = Before 771 ; length(BC0, 5), 772 append(_, BC0, Before), 773 BC = ['...'|BC0] 774 ), 775 length(After, AL), 776 ( AL =< 5 777 -> AC = After 778 ; length(AC0, 5), 779 append(AC0, _, After), 780 append(AC0, ['...'], AC) 781 ), 782 append(BC, ['**here**'|AC], ContextTokens0), 783 maplist(token_to_atom, ContextTokens0, ContextTokens), 784 concat_atom(ContextTokens, ' ', Context), 785 throw(error(syntax_error(What), 786 context(serql_parse/2, Context))). 787 788token_to_atom(Token, Token) :- 789 atom(Token), !. 790token_to_atom(id(X), X) :- !. 791token_to_atom(string(X), X) :- !. 792token_to_atom(uri(URI), X) :- !, 793 concat_atom([<, URI, >], X). 794token_to_atom(uri(NS,Local), X) :- !, 795 concat_atom([NS, Local], :, X). 796token_to_atom(old_uri(NS,Local), X) :- !, 797 concat_atom([<, NS, :, Local, >], X). 798token_to_atom(cmp(X), X) :- !. 799token_to_atom(rest(X), X) :- !. 800token_to_atom(Token, Atom) :- 801 term_to_atom(Token, Atom). 802 803query(Query, NameSpaces, In, Out) :- 804 catch(compilation_unit(Query, NameSpaces, In, Out), 805 E, 806 add_error_location(E, In)). 807 808must_see(Token) --> 809 [Token], !. 810must_see(Token) --> 811 syntax_error(expected(Token)). 812 813must_see(Token, _) --> 814 [Token], !. 815must_see(_, UserName) --> 816 syntax_error(expected(UserName)). 817 818 819 /******************************* 820 * HIGH LEVEL PARSER * 821 *******************************/ 822 823compilation_unit(Query, NameSpaces) --> 824 query(Query), 825 namespace_list(NameSpaces).
831namespace_list([H|T]) --> 832 [ using ], !, must_see(namespace), !, 833 must_see_namespace(H), 834 namespaces(T). 835namespace_list([]) --> 836 []. 837 838must_see_namespace(Decl) --> 839 namespace(Decl), !. 840must_see_namespace(_) --> 841 syntax_error(expected(namespace_declaration)). 842 843namespace(NS=URI) --> 844 must_see(id(NS), identifier), 845 must_see(cmp(=), =), 846 namespace_uri(URI). 847 848namespace_uri(URI) --> 849 [ uri(URI) ], !. 850namespace_uri(URI) --> 851 [ old_uri(Protocol, Local) ], !, % New style <foo:bar> 852 { concat_atom([Protocol, :, Local], URI) 853 }. 854namespace_uri(_) --> 855 syntax_error(expected(absolute_uri)). 856 857namespaces([H|T]) --> 858 [ ',' ], !, 859 must_see_namespace(H), 860 namespaces(T). 861namespaces([]) --> 862 []. 863 864query(select(Projection, Path, Where, Distinct, Limit, Offset)) --> 865 [ select ], !, 866 distinct(Distinct), 867 projection(Projection), 868 must_see(from), path_expr_list(Path), 869 query_tail(Where, Limit, Offset). 870query(construct(Construct, Path, Where, Distinct, Limit, Offset)) --> 871 [ construct ], !, 872 distinct(Distinct), 873 construct_clause(Construct), 874 must_see(from), path_expr_list(Path), 875 query_tail(Where, Limit, Offset). 876query(_) --> 877 syntax_error(no_select_or_construct). 878 879distinct(distinct) --> 880 [ distinct ], !. 881distinct(false) --> 882 []. 883 884query_tail(Where, Limit, Offset) --> 885 ( [ where ] 886 -> ( boolean_query(Where) 887 -> [] 888 ; syntax_error(illegal_where_clause) 889 ) 890 ; {Where = true} 891 ), 892 ( [ limit ] 893 -> ( pos_int(Limit) 894 -> [] 895 ; syntax_error(illegal_limit) 896 ) 897 ; {Limit = inf} 898 ), 899 ( [ offset ] 900 -> ( pos_int(Offset) 901 -> [] 902 ; syntax_error(illegal_offset) 903 ) 904 ; {Offset = 0} 905 ). 906 907projection(*) --> 908 [ * ], !. 909projection([H|T]) --> 910 var_or_value(H), !, 911 var_or_value_list(T). 912projection(_) --> 913 syntax_error(expected(projection)). 914 915construct_clause(*) --> 916 [ * ], !. 917construct_clause(Path) --> 918 path_expr_list(Path), !. 919construct_clause(_) --> 920 syntax_error(expected(construct_clause)). 921 922path_expr_list(Expr) --> 923 must_see_path_expr(E0), 924 ( [ ',' ] 925 -> path_expr_list(Es), 926 { Expr = (E0, Es) } 927 ; { Expr = E0 } 928 ). 929 930must_see_path_expr(E) --> 931 path_expr(E), !. 932must_see_path_expr(_) --> 933 syntax_error(expected(path_expression)). 934 935path_expr(optional(_, Path)) --> 936 [ '[' ], !, path_expr_list(Path), must_see(']'). 937path_expr(Expr) --> 938 path_expr0(Expr). 939 940path_expr0(Expr) --> 941 path_expr_head(Head), 942 ( ( [ ';' ] 943 -> { arg(1, Head, H) } 944 ; { arg(3, Head, H) } 945 ), 946 path_expr_tail(H, Tail) 947 -> { Expr = (Head, Tail) 948 } 949 ; { Expr = Head } 950 ). 951 952 953path_expr_head(rdf(S, P, O)) --> 954 must_see_node(S), must_see_edge(P), must_see_node(O). 955 956path_expr_tail(S, Expr) --> 957 [ '[' ], path_expr_tail0(S, Expr1), [ ']' ], 958 { Expr0 = optional(_, Expr1) }, 959 ( [ ';' ] 960 -> path_expr_tail(S, Tail), 961 { Expr = (Expr0, Tail) } 962 ; { Expr = Expr0 } 963 ). 964path_expr_tail(S, Expr) --> 965 path_expr_tail0(S, Expr). 966 967% path_expr_tail0 <=> Edge Node ((";")? Path_expr_tail)? 968 969path_expr_tail0(S, Expr) --> 970 edge(P), must_see_node(O), 971 { Statement = rdf(S, P, O) }, 972 ( ( [ ';' ] 973 -> path_expr_tail(S, Tail) 974 ; path_expr_tail(O, Tail) 975 ) 976 -> { Expr = (Statement, Tail) } 977 ; { Expr = Statement } 978 ). 979 980must_see_edge(Edge) --> 981 edge(Edge), !. 982must_see_edge(_) --> 983 syntax_error(expected(edge)). 984 985edge(var(Var)) --> 986 [ id(Var) ], !. 987edge(uri(URI)) --> 988 [ uri(URI) ], !. % <!foo:bar> 989edge(uri(NS, URI)) --> 990 [ uri(NS, URI) ], !. % foo:bar 991edge(old_uri(NS, URI)) --> 992 [ old_uri(NS, URI) ], !. % <foo:bar> 993 994must_see_node(Node) --> 995 node(Node), !. 996must_see_node(_) --> 997 syntax_error(expected(node)). 998 999node(Node) --> 1000 [ '{' ], node_elem(E0), !, node_elem_list(Es), [ '}' ], 1001 ( {Es == []} 1002 -> {Node = E0} 1003 ; {Node = set([E0|Es])} 1004 ). 1005node(var(-(_))) --> % the _ is the variable that will 1006 [ '{', '}' ]. % be shared 1007 1008node_elem_list([H|T]) --> 1009 [ ',' ], !, 1010 must_see_node_elem(H), 1011 node_elem_list(T). 1012node_elem_list([]) --> 1013 []. 1014 1015must_see_node_elem(Elem) --> 1016 node_elem(Elem), !. 1017must_see_node_elem(_) --> 1018 syntax_error(expected(node_element)). 1019 1020node_elem(Elem) --> 1021 ( var(Elem) 1022 ; uri(Elem) 1023 ; literal(Elem) 1024 ; reified_stat(Elem) 1025 ), !. 1026 1027reified_stat(rdf(S,P,O)) --> 1028 node(S), must_see_edge(P), must_see_node(O). 1029 1030 1031 /******************************* 1032 * WHERE ... * 1033 *******************************/ 1034 1035boolean_query(Query) --> 1036 and_expr(And), 1037 ( [ or ], 1038 boolean_query(Or) 1039 -> {Query = (And ; Or)} 1040 ; {Query = And} 1041 ). 1042 1043and_expr(Query) --> 1044 boolean_query0(Q0), 1045 ( [ and ], 1046 and_expr(And) 1047 -> {Query = (Q0, And)} 1048 ; {Query = Q0} 1049 ). 1050 1051boolean_query0(Query) --> 1052 [ '(' ], !, boolean_query(Query), must_see(')'). 1053boolean_query0(true) --> 1054 [ true ], !. 1055boolean_query0(fail) --> 1056 [ false ], !. 1057boolean_query0(\+(Q)) --> 1058 [ not ], !, boolean_query0(Q). 1059boolean_query0(serql_compare(Cmp, L, R)) --> 1060 var_or_query_value(L), 1061 [ cmp(Cmp) ], !, 1062 var_or_query_value(R). 1063boolean_query0(serql_compare(like, Var, String)) --> 1064 var_or_value(Var), % must be var? 1065 [ like ], !, must_see_string(String). 1066boolean_query0(rdf_is_literal(V)) --> 1067 [ isliteral, '(' ], !, var(V), must_see(')'). 1068boolean_query0(rdf_is_resource(V)) --> 1069 [ isresource, '(' ], !, var(V), must_see(')'). 1070boolean_query0(_) --> 1071 syntax_error(expected(boolean_test)). 1072 1073must_see_string(String) --> 1074 [ string(String) ], !. 1075must_see_string(_) --> 1076 syntax_error(expected(string)). 1077 1078var_or_value_list([H|T]) --> 1079 [ ',' ], !, 1080 must_see_var_or_value(H), 1081 var_or_value_list(T). 1082var_or_value_list([]) --> 1083 []. 1084 1085must_see_var_or_value(X) --> 1086 var_or_value(X), !. 1087must_see_var_or_value(_) --> 1088 syntax_error(expected(var_or_value)). 1089 1090var_or_value(X) --> 1091 var(X), !. 1092var_or_value(X) --> 1093 value(X). 1094 1095var_or_query_value(X) --> 1096 ( literal_value(Value) 1097 -> { X = query(Value) 1098 } 1099 ; var_or_value(X) 1100 ). 1101 1102var(var(Var)) --> 1103 [ id(Var) ], !. 1104 1105value(URI) --> 1106 uri(URI). 1107value('$null$') --> 1108 [ null ]. 1109value(Literal) --> 1110 literal(Literal), !. 1111value(datatype(var(Var))) --> 1112 [ datatype, '(', id(Var), ')' ]. 1113value(lang(var(Var))) --> 1114 [ lang, '(', id(Var), ')' ]. 1115value(label(var(Var))) --> 1116 [ label, '(', id(Var), ')' ]. 1117 1118uri(uri(URI)) --> [uri(URI)]. 1119uri(uri(NS, URI)) --> [uri(NS, URI)]. 1120uri(old_uri(NS, URI)) --> [old_uri(NS, URI)]. 1121 1122literal(Literal) --> 1123 literal_value(Value), 1124 { Literal = literal(Value) }. 1125 1126literal_value(Lit) --> 1127 [ string(String) ], 1128 ( [@, id(Lang)] 1129 -> { Lit = lang(Lang, String) } 1130 ; [^^, URI] 1131 -> { Lit = type(URI, String) } 1132 ; { Lit = String } 1133 ). 1134 1135pos_int(I) --> 1136 [ int(I) ], { I >= 0 }. % bit weird not to have >0, but this 1137 % is the Sesame spec 1138 1139 1140 /******************************* 1141 * TOKENISER * 1142 *******************************/ 1143 1144tokens([H|T]) --> 1145 blank, 1146 token(H), !, 1147 tokens(T). 1148tokens([]) --> 1149 blank. 1150 1151token(uri(URI)) --> % Old style absolute URI 1152 "<!", uri_codes(Codes), ">", 1153 { atom_codes(URI, Codes) 1154 }. 1155token(old_uri(NS, Local)) --> % Old style local, new style absolute 1156 "<", identifier(NS), ":", uri_codes(Codes), ">", 1157 { atom_codes(Local, Codes) 1158 }. 1159token(string(String)) --> 1160 "\"", string_codes(Codes), "\"", 1161 { atom_codes(String, Codes) 1162 }. 1163token(Token) --> 1164 identifier(Id), !, 1165 ( ":", identifier(Local) % new style URI 1166 -> { Token = uri(Id, Local) 1167 } 1168 ; { downcase_atom(Id, Keyword), 1169 serql_keyword(Keyword) 1170 -> Token = Keyword 1171 ; Token = id(Id) 1172 } 1173 ). 1174token(int(Int)) --> 1175 digit(D0), !, 1176 digits(Digits), 1177 { number_codes(Int, [D0|Digits]) 1178 }. 1179token(cmp(Cmp)) --> 1180 cmp(Cmp), !. 1181token(^^) --> 1182 "^^", !. 1183token(Char) --> 1184 [C], 1185 { single(C), 1186 char_code(Char, C) 1187 }. 1188token(rest(Rest), In, []) :- % catch syntax errors. 1189 In \== [], 1190 atom_codes(Rest, In). 1191 1192 1193single(0'*). 1194single(0'=). 1195single(0'(). 1196single(0')). 1197single(0'{). 1198single(0'}). 1199single(0'[). 1200single(0']). 1201single(0'@). 1202single(0',). 1203single(0';). 1204 1205% cmp//1 1206% 1207% Returns Prolog comparison operators from the SeRQL ones. 1208 1209cmp(=<) --> "<=". 1210cmp(\=) --> "!=". 1211cmp(>=) --> ">=". 1212cmp(=) --> "=". 1213cmp(<) --> "<". 1214cmp(>) --> ">".
library(url)
.1222uri_codes([C0|Cs]) --> 1223 [C0], 1224 { uri_code(C0) 1225 }, !, 1226 uri_codes(Cs). 1227uri_codes([]) --> 1228 []. 1229 1230uri_code(C) :- 1231 code_type(C, csym), !. 1232uri_code(0'$). 1233uri_code(0'-). 1234uri_code(0'@). 1235uri_code(0'&). 1236uri_code(0'+). 1237uri_code(0'.). 1238uri_code(0'/). 1239uri_code(0'?). 1240uri_code(0'#). 1241uri_code(0'=). 1242uri_code(0':). 1243uri_code(0'~). % officially not 1244uri_code(0';). 1245uri_code(0'{). 1246uri_code(0'}).
1253string_codes([C0|Cs]) --> 1254 "\"", [C0], 1255 { C0 == 0'\\ ; C0 = 0'" }, !, 1256 string_codes(Cs). 1257string_codes([]) --> 1258 peek(0'"). 1259string_codes([C0|Cs]) --> 1260 [C0], 1261 string_codes(Cs).
1270identifier(Id) --> 1271 [C0], 1272 { code_type(C0, csymf) }, 1273 id_chars(Cs), 1274 { atom_codes(Id, [C0|Cs]) 1275 }. 1276 1277id_chars([C0|Cs]) --> 1278 [C0], 1279 { code_type(C0, csym) 1280 ; C0 == 0'. 1281 ; C0 == 0'- 1282 }, !, 1283 id_chars(Cs). 1284id_chars([]) --> 1285 []. 1286 1287digit(D) --> 1288 [D], 1289 { code_type(D, digit) }. 1290 1291digits([D0|Ds]) --> 1292 digit(D0), !, 1293 digits(Ds). 1294digits([]) --> 1295 []. 1296 1297blank --> 1298 [C], 1299 { code_type(C, space) }, !, 1300 blank. 1301blank --> 1302 [].
1308serql_keyword(select). 1309serql_keyword(construct). 1310serql_keyword(from). 1311serql_keyword(where). 1312serql_keyword(using). 1313serql_keyword(namespace). 1314serql_keyword(true). 1315serql_keyword(false). 1316serql_keyword(not). 1317serql_keyword(and). 1318serql_keyword(or). 1319serql_keyword(like). 1320serql_keyword(label). 1321serql_keyword(lang). 1322serql_keyword(datatype). 1323serql_keyword(null). 1324serql_keyword(isresource). 1325serql_keyword(isliteral). 1326serql_keyword(sort). 1327serql_keyword(in). 1328serql_keyword(union). 1329serql_keyword(intersect). 1330serql_keyword(minus). 1331serql_keyword(exists). 1332serql_keyword(forall). 1333serql_keyword(distinct). % SPEC: not in grammar 1334serql_keyword(limit). % SPEC: not in grammar 1335serql_keyword(offset). % SPEC: not in grammar 1336 1337 /******************************* 1338 * DCG BASICS * 1339 *******************************/ 1340 1341peek(C, L, L) :- 1342 L = [C|_]. 1343 1344 1345 /******************************* 1346 * HUMAN READABLE MESSAGES * 1347 *******************************/ 1348 1349:- multifile 1350 prolog:message/3. 1351 1352prologmessage(error(syntax_error(What), 1353 context(serql_parse/2, Location))) --> 1354 [ 'Syntax error in SeRQL query: ' ], 1355 explain(What), [ ' at **here** in', nl, nl], 1356 ['~w'-[Location] ]. 1357 1358explain(expected(X)) --> 1359 [ '"~w" expected'-[X] ]