@prefix ex:<https://purl.expasy.org/sparql-examples/neXtProt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .

ex:NXQ_00001 a sh:SPARQLExecutable,
        sh:SPARQLSelectExecutable ;
    rdfs:comment "Proteins phosphorylated and located in the cytoplasm"@en ;
    sh:prefixes _:sparql_examples_prefixes ;
    sh:select """PREFIX : <http://nextprot.org/rdf/>
PREFIX nextprot_cv: <http://nextprot.org/rdf/terminology/>

SELECT DISTINCT ?entry WHERE {
 values ?cytoloc {nextprot_cv:SL-0086 nextprot_cv:GO_0005737} # SL and GO values for cytoplasm
 ?entry :isoform ?iso.
 ?iso :uniprotKeyword / :term nextprot_cv:KW-0597. # Phosphorylated
 ?iso :cellularComponent ?loc .
 ?loc :term /:childOf ?cytoloc.
 filter not exists {?loc :negativeEvidence ?negev} # No negative localization evidence
}

# Assign values to the variable ?cytoloc
# The values correspond to the two controlled vocabulary (cv) terms for cytoplam:
# SL-0086 is the UniProt subcellular location term
# GO_0005737 is the Gene Ontology cellular component term
#
# Use the same name of the variable (?iso and ?loc) in several statements.
# It is the name of the variable that enforces the constraints.
#
# Phosphorylated proteins are retrieved using a keyword:
# KW-0597 is the UniProt keyword for phosphorylated
#
# Use :childOf to include children of a term.
# Cytosol (SL-0091), the child term of cytoplasm, will thus be included.
#
# Exclude negative locatization evidences.
# This enforces that the protein is located in the cytoplasm.""" ;
    schema:keywords "PTM",
        "phosphorylation",
        "subcellular location",
        "tutorial" ;
    schema:target <https://sparql.nextprot.org/sparql> .

