@prefix ex: <https://sparql.uniprot.org/.well-known/sparql-examples/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .

ex:47 a sh:SPARQLExecutable,
        sh:SPARQLSelectExecutable ;
    rdfs:comment "Find UniProtKB entries with a transmembrane region, with an alanine in the 15 amino acid region preceding the transmembrane"@en ;
    sh:prefixes _:sparql_examples_prefixes ;
    sh:select """PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX up: <http://purl.uniprot.org/core/>


SELECT ?protein ?from ?interestingRegion
WHERE
{
  ?protein up:annotation ?annotation .
  ?annotation a up:Transmembrane_Annotation .
  # Get the coordinates of the Transmembrane
  ?annotation up:range ?range .
  ?range faldo:begin ?beginI .
  ?beginI faldo:position ?begin .
  ?beginI faldo:reference ?sequence .
  # The aas will have the specific IUPAC aminoacids
  ?sequence rdf:value ?aas .
  # We calculate the start by substracting 10
  BIND(?begin - 10 AS ?tenBeforeBegin)
  # Can't start before the sequence starts or we might miss some results
  BIND(IF(?tenBeforeBegin < 1, 0, ?tenBeforeBegin) AS ?from)
  # Substring the IUPAC aminoacids
  BIND(SUBSTR(?aas, ?from, 15) AS ?interestingRegion)
  # The interestingRegion needds to contain an Alanine
  FILTER(CONTAINS(?interestingRegion, 'A'))
}""" ;
    schema:keywords "transmembrane" , "search within sequence" ;
    schema:target <https://sparql.uniprot.org/sparql/> .

