@prefix ex: <https://sparql.uniprot.org/.well-known/sparql-examples/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .

ex:107_uniprot_sequences_and_mark_which_is_cannonical_for_human a sh:SPARQLExecutable,
        sh:SPARQLSelectExecutable ;
    rdfs:comment "List all human UniProtKB entries and their sequences, marking if the sequence listed is the cannonical sequence of the matching entry."^^rdf:HTML ;
    sh:prefixes _:sparql_examples_prefixes ;
    sh:select """PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>

SELECT ?entry ?sequence ?isCanonical
WHERE {
  # We don't want to look into the UniParc graph which will 
  # confuse matters
  GRAPH <http://sparql.uniprot.org/uniprot> {
      # we need the UniProt entries that are human
      ?entry a up:Protein ;
	up:organism taxon:9606 ;
      	up:sequence ?sequence .
      # If the sequence is a "Simple_Sequence" it is likely to be the 
      # cannonical sequence
      OPTIONAL {
       	?sequence a up:Simple_Sequence .
        BIND(true AS ?likelyIsCanonical)
      }
      # unless we are dealing with an external isoform
      # see https://www.uniprot.org/help/canonical_and_isoforms
      OPTIONAL {
       	FILTER(?likelyIsCanonical)
        ?sequence a up:External_Sequence .
        BIND(true AS ?isComplicated)
      }
      # If it is an external isoform it's id would not match the 
      # entry primary accession. We test that the variable is bound
      # else an UNDEF might fail the query from working
      BIND(IF(BOUND(?isComplicated), STRENDS(STR(?entry), STRBEFORE(SUBSTR(STR(?sequence), 34),'-')),?likelyIsCanonical) AS ?isCanonical)
  }
}""" ;
    schema:keywords "isoform" , "sequence" ;
    schema:target <https://sparql.uniprot.org/sparql/> .

