# Intro to Biomedical Ontologies: Owlready2

Biomedical ontologies is generally a tough-to-approach field, starting with "what is an ontology?".

I often reply to that with "it's a hairball of knowledge." 

Imagine if someone/group decided to "lets represent something close to a neuronal-connection of knowledge".

For me, I am not an ontologist (creating new ontologies). I consider myself one of the few people who can figure out how to leverage ontologies to achieve very specific biomedical and clinical tasks. 

In [1]:
import owlready2

hpo = owlready2.get_ontology("data/hp.owl").load()
# mondo = owlready2.get_ontology("data/mondo.owl").load()
efo = owlready2.get_ontology("data/efo.owl").load()
mfo = owlready2.get_ontology("data/MFOMD.owl").load()

In [2]:

import re

# Crude searcher
def obo_searcher(ontology, 
                 search_term, 
                 second_pass_regex=None):
    mondo_results = ontology.search(label = f"*{search_term}*", _case_sensitive=False)

    if second_pass_regex:
        data = [{
            "concept": x,
            "label": x.label,
            "iri": x.iri,
            "synonyms": x.hasExactSynonym,
            "name": x.name,
            "subclasses": list(x.subclasses()),
            "xrefs": x.hasDbXref
        } for x in mondo_results if (str(x.name) != "") & any( re.compile(f"{second_pass_regex}", re.I).match(item) for item in x.label)]

    else: 
        data = [{
            "concept": x,
            "label": x.label,
            "iri": x.iri,
            "synonyms": x.hasExactSynonym,
            "name": x.name,
            "subclasses": list(x.subclasses()),
            "xrefs": x.hasDbXref
        } for x in mondo_results if str(x.name) != ""]

    return (data)


In [3]:
search_term = "moyamoya"

results = obo_searcher(mfo, search_term)

# Results: List of dictionary of ontology concepts and metadata

In [4]:
results

[{'concept': obo.HP_0011834,
  'label': ['Moyamoya phenomenon'],
  'iri': 'http://purl.obolibrary.org/obo/HP_0011834',
  'synonyms': [],
  'name': 'HP_0011834',
  'subclasses': [],
  'xrefs': ['UMLS:C4023169']},
 {'concept': obo.MONDO_0010448,
  'label': ['moyamoya angiopathy-short stature-facial dysmorphism-hypergonadotropic hypogonadism syndrome'],
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0010448',
  'synonyms': ['Moyamoya disease-short stature-facial dysmorphism-hypergonadotropic hypogonadism',
   'moyamoya disease 4, X-linked recessive'],
  'name': 'MONDO_0010448',
  'subclasses': [],
  'xrefs': ['OMIM:300845', 'Orphanet:280679', 'UMLS:C3151857']},
 {'concept': obo.MONDO_0016820,
  'label': ['Moyamoya disease'],
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0016820',
  'synonyms': ['idiopathic Moyamoya disease',
   'progressive intracranial arterial occlusion'],
  'name': 'MONDO_0016820',
  'subclasses': [obo.MONDO_0010448, obo.MONDO_0014331],
  'xrefs': ['DOID:13099',
   'I

## Check one concept

In [5]:
results

[{'concept': obo.HP_0011834,
  'label': ['Moyamoya phenomenon'],
  'iri': 'http://purl.obolibrary.org/obo/HP_0011834',
  'synonyms': [],
  'name': 'HP_0011834',
  'subclasses': [],
  'xrefs': ['UMLS:C4023169']},
 {'concept': obo.MONDO_0010448,
  'label': ['moyamoya angiopathy-short stature-facial dysmorphism-hypergonadotropic hypogonadism syndrome'],
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0010448',
  'synonyms': ['Moyamoya disease-short stature-facial dysmorphism-hypergonadotropic hypogonadism',
   'moyamoya disease 4, X-linked recessive'],
  'name': 'MONDO_0010448',
  'subclasses': [],
  'xrefs': ['OMIM:300845', 'Orphanet:280679', 'UMLS:C3151857']},
 {'concept': obo.MONDO_0016820,
  'label': ['Moyamoya disease'],
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0016820',
  'synonyms': ['idiopathic Moyamoya disease',
   'progressive intracranial arterial occlusion'],
  'name': 'MONDO_0016820',
  'subclasses': [obo.MONDO_0010448, obo.MONDO_0014331],
  'xrefs': ['DOID:13099',
   'I

In [6]:
results[0]['concept'].hasExactSynonym

[]

## Get `is_a` concepts

In [7]:
is_a = results[0]['concept'].is_a

# it returns a list, since "stomach cancer" can be multiple things
[x.label for x in is_a]

[['Abnormal cerebral artery morphology']]

## Get all ancestors

In [8]:
ancestors = results[0]['concept'].ancestors()

# it returns a list of all ancestors
[x.label for x in ancestors]

[['Moyamoya phenomenon'],
 [locstr('continuant', 'en'),
  locstr('continuant', 'en'),
  locstr('continuant', 'en')],
 [locstr('quality', 'en'),
  'quality',
  locstr('quality', 'en'),
  locstr('quality', 'en')],
 ['Abnormality of brain morphology', 'Abnormality of brain morphology'],
 ['Abnormal systemic arterial morphology'],
 ['Phenotypic abnormality', 'Phenotypic abnormality'],
 [locstr('entity', 'en'), locstr('entity', 'en')],
 ['Abnormal nervous system morphology'],
 ['All'],
 [],
 [locstr('Abnormal vascular morphology', 'en')],
 ['Abnormality of the vasculature', 'Abnormality of the vasculature'],
 ['Abnormal cerebral vascular morphology',
  'Abnormal cerebral vascular morphology'],
 ['experimental factor'],
 ['Abnormal cerebral artery morphology'],
 ['Abnormality of the cardiovascular system',
  'Abnormality of the cardiovascular system'],
 [locstr('specifically dependent continuant', 'en'),
  'material property',
  locstr('specifically dependent continuant', 'en'),
  locstr('sp

## Get all descendants

In [9]:
descendants = results[0]['concept'].descendants()

# Descendants tends to return itself
[(x.label, x.name) for x in descendants]

[(['Moyamoya phenomenon'], 'HP_0011834')]

## Get all Subclasses

In [10]:
subclasses = results[0]['concept'].subclasses()

# Descendants tends to return itself
[(x.label, x.name) for x in subclasses]

[]

## Things to Note:

- `label`: actually returns a list of the synonyms related
- `iri`: unique ID for this concept
- `name`: concept ID, Even though this is an HPO term, sometimes ontologies can reference external ontologies as part of the "semantic web" reference.
- `xrefs`: Generally, `owlready2` has poor documentation, but it's a single person(?) effort (and I never personally contributed) for not the most approachable field, so give him some slack. But the oddly named `.hasDbXref` returns a list of external cross-walks, which is one of the more useful things to know.

# TODO Visual Interface

In [11]:
results[0]

{'concept': obo.HP_0011834,
 'label': ['Moyamoya phenomenon'],
 'iri': 'http://purl.obolibrary.org/obo/HP_0011834',
 'synonyms': [],
 'name': 'HP_0011834',
 'subclasses': [],
 'xrefs': ['UMLS:C4023169']}

# It's hard to work with python objects

For my mission - I want to try to create various plots using Altair. 

Plotting libraries don't know what to do with python objects.

Generally needs to more primitive data types - convert everything to string in our case.

In [12]:
results

[{'concept': obo.HP_0011834,
  'label': ['Moyamoya phenomenon'],
  'iri': 'http://purl.obolibrary.org/obo/HP_0011834',
  'synonyms': [],
  'name': 'HP_0011834',
  'subclasses': [],
  'xrefs': ['UMLS:C4023169']},
 {'concept': obo.MONDO_0010448,
  'label': ['moyamoya angiopathy-short stature-facial dysmorphism-hypergonadotropic hypogonadism syndrome'],
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0010448',
  'synonyms': ['Moyamoya disease-short stature-facial dysmorphism-hypergonadotropic hypogonadism',
   'moyamoya disease 4, X-linked recessive'],
  'name': 'MONDO_0010448',
  'subclasses': [],
  'xrefs': ['OMIM:300845', 'Orphanet:280679', 'UMLS:C3151857']},
 {'concept': obo.MONDO_0016820,
  'label': ['Moyamoya disease'],
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0016820',
  'synonyms': ['idiopathic Moyamoya disease',
   'progressive intracranial arterial occlusion'],
  'name': 'MONDO_0016820',
  'subclasses': [obo.MONDO_0010448, obo.MONDO_0014331],
  'xrefs': ['DOID:13099',
   'I

In [13]:

def sanitize(v):
  if isinstance(v, list):
    return [str(x) for x in v]
  elif isinstance(v, dict):
    return sanitize(v)
  else:
    return str(v)

def sanitize_results(data):
    for d in data:
        for k, v in d.items():
            d[k] = sanitize(v)
    return data



In [14]:
import polars as pl
sanitized_results = sanitize_results(results)
pl.from_dicts(sanitized_results)

concept,label,iri,synonyms,name,subclasses,xrefs
str,list[str],str,list[str],str,list[str],list[str]
"""obo.HP_0011834…","[""Moyamoya phenomenon""]","""http://purl.ob…",[],"""HP_0011834""",[],"[""UMLS:C4023169""]"
"""obo.MONDO_0010…","[""moyamoya angiopathy-short stature-facial dysmorphism-hypergonadotropic hypogonadism syndrome""]","""http://purl.ob…","[""Moyamoya disease-short stature-facial dysmorphism-hypergonadotropic hypogonadism"", ""moyamoya disease 4, X-linked recessive""]","""MONDO_0010448""",[],"[""OMIM:300845"", ""Orphanet:280679"", ""UMLS:C3151857""]"
"""obo.MONDO_0016…","[""Moyamoya disease""]","""http://purl.ob…","[""idiopathic Moyamoya disease"", ""progressive intracranial arterial occlusion""]","""MONDO_0016820""","[""obo.MONDO_0010448"", ""obo.MONDO_0014331""]","[""DOID:13099"", ""ICD9:437.5"", … ""UMLS:C0026654""]"
"""obo.MONDO_0014…","[""Moyamoya disease with early-onset achalasia""]","""http://purl.ob…","[""moyamoya 6 with achalasia""]","""MONDO_0014331""",[],"[""OMIM:615750"", ""Orphanet:401945"", … ""UMLS:C3810403""]"
"""efo.EFO_000425…","[""obsolete_Moyamoya disease""]","""http://www.ebi…","[""Moya-Moya disease"", ""Moyamoya syndrome"", … ""progressive intracranial occlusive arteropathy (Moyamoya)""]","""EFO_0004250""",[],"[""MeSH:D009072""]"
"""ORDO.Orphanet_…","[""obsolete_Moyamoya disease""]","""http://www.orp…",[],"""Orphanet_2573""",[],"[""ICD10:I67.5"", ""MeSH:C536991"", … ""UMLS:C2931384""]"
"""ORDO.Orphanet_…","[""Moyamoya disease - short stature - facial dysmorphism - hypergonadotropic hypogonadism""]","""http://www.orp…",[],"""Orphanet_28067…",[],"[""OMIM:300845""]"
"""ORDO.Orphanet_…","[""obsolete_Moyamoya disease with early-onset achalasia""]","""http://www.orp…",[],"""Orphanet_40194…",[],"[""ICD10:I67.5"", ""OMIM:615750""]"


In [15]:
from pprint import pprint

pprint(sanitized_results)

[{'concept': 'obo.HP_0011834',
  'iri': 'http://purl.obolibrary.org/obo/HP_0011834',
  'label': ['Moyamoya phenomenon'],
  'name': 'HP_0011834',
  'subclasses': [],
  'synonyms': [],
  'xrefs': ['UMLS:C4023169']},
 {'concept': 'obo.MONDO_0010448',
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0010448',
  'label': ['moyamoya angiopathy-short stature-facial '
            'dysmorphism-hypergonadotropic hypogonadism syndrome'],
  'name': 'MONDO_0010448',
  'subclasses': [],
  'synonyms': ['Moyamoya disease-short stature-facial '
               'dysmorphism-hypergonadotropic hypogonadism',
               'moyamoya disease 4, X-linked recessive'],
  'xrefs': ['OMIM:300845', 'Orphanet:280679', 'UMLS:C3151857']},
 {'concept': 'obo.MONDO_0016820',
  'iri': 'http://purl.obolibrary.org/obo/MONDO_0016820',
  'label': ['Moyamoya disease'],
  'name': 'MONDO_0016820',
  'subclasses': ['obo.MONDO_0010448', 'obo.MONDO_0014331'],
  'synonyms': ['idiopathic Moyamoya disease',
               'progressive

# Test Drive it out with bigger use case

In [16]:
obo_searcher(mfo, "diabetes")

[{'concept': obo.HP_0000819,
  'label': ['Diabetes mellitus'],
  'iri': 'http://purl.obolibrary.org/obo/HP_0000819',
  'synonyms': [],
  'name': 'HP_0000819',
  'subclasses': [obo.HP_0000831,
   obo.HP_0001953,
   obo.HP_0004904,
   obo.HP_0005978,
   obo.HP_0009800,
   obo.HP_0100651],
  'xrefs': ['MSH:D003920', 'SNOMEDCT_US:73211009', 'UMLS:C0011849']},
 {'concept': obo.HP_0000831,
  'label': ['Insulin-resistant diabetes mellitus'],
  'iri': 'http://purl.obolibrary.org/obo/HP_0000831',
  'synonyms': ['Insulin resistant diabetes',
   'Insulin resistant diabetes mellitus',
   'Insulin-resistant diabetes'],
  'name': 'HP_0000831',
  'subclasses': [obo.HP_0000857, obo.HP_0000877],
  'xrefs': ['UMLS:C0854110']},
 {'concept': obo.HP_0000857,
  'label': ['Neonatal insulin-dependent diabetes mellitus',
   'Neonatal insulin-dependent diabetes mellitus'],
  'iri': 'http://purl.obolibrary.org/obo/HP_0000857',
  'synonyms': [],
  'name': 'HP_0000857',
  'subclasses': [obo.HP_0008255],
  'xrefs':

# Lets try plotting

In [17]:
import polars as pl
mfo_df = pl.from_dicts(sanitize_results(obo_searcher(mfo, "diabetes")))

In [18]:
mfo_df

concept,label,iri,synonyms,name,subclasses,xrefs
str,list[str],str,list[str],str,list[str],list[str]
"""obo.HP_0000819…","[""Diabetes mellitus""]","""http://purl.ob…",[],"""HP_0000819""","[""obo.HP_0000831"", ""obo.HP_0001953"", … ""obo.HP_0100651""]","[""MSH:D003920"", ""SNOMEDCT_US:73211009"", ""UMLS:C0011849""]"
"""obo.HP_0000831…","[""Insulin-resistant diabetes mellitus""]","""http://purl.ob…","[""Insulin resistant diabetes"", ""Insulin resistant diabetes mellitus"", ""Insulin-resistant diabetes""]","""HP_0000831""","[""obo.HP_0000857"", ""obo.HP_0000877""]","[""UMLS:C0854110""]"
"""obo.HP_0000857…","[""Neonatal insulin-dependent diabetes mellitus"", ""Neonatal insulin-dependent diabetes mellitus""]","""http://purl.ob…",[],"""HP_0000857""","[""obo.HP_0008255""]","[""UMLS:C3278636"", ""UMLS:C3278636""]"
"""obo.HP_0000863…","[""Central diabetes insipidus"", ""Central diabetes insipidus""]","""http://purl.ob…","[""Neurohypophyseal diabetes insipidus"", ""Neurohypophyseal diabetes insipidus""]","""HP_0000863""",[],"[""MSH:D020790"", ""SNOMEDCT_US:45369008"", … ""UMLS:C0687720""]"
"""obo.HP_0000873…","[""Diabetes insipidus""]","""http://purl.ob…",[],"""HP_0000873""","[""obo.HP_0000863"", ""obo.HP_0009806""]","[""MSH:D003919"", ""SNOMEDCT_US:15771004"", ""UMLS:C0011848""]"
"""obo.HP_0000877…","[""Insulin-resistant diabetes mellitus at puberty""]","""http://purl.ob…","[""Insulin-resistant diabetes mellitus at puberty""]","""HP_0000877""",[],"[""UMLS:C1837792""]"
"""obo.HP_0004904…","[""Maturity-onset diabetes of the young""]","""http://purl.ob…","[""Maturity onset diabetes of the young""]","""HP_0004904""",[],"[""MSH:C562772"", ""SNOMEDCT_US:609561005"", ""UMLS:C0342276""]"
"""obo.HP_0005978…","[""Type II diabetes mellitus""]","""http://purl.ob…","[""Diabetes mellitus Type II"", ""Diabetes mellitus, noninsulin-dependent"", … ""Type II diabetes""]","""HP_0005978""","[""obo.HP_0008205""]","[""MSH:D003924"", ""SNOMEDCT_US:44054006"", ""UMLS:C0011860""]"
"""obo.HP_0008205…","[""Insulin-dependent but ketosis-resistant diabetes""]","""http://purl.ob…",[],"""HP_0008205""",[],"[""UMLS:C1842404""]"
"""obo.HP_0008255…","[""Transient neonatal diabetes mellitus""]","""http://purl.ob…",[],"""HP_0008255""",[],"[""SNOMEDCT_US:237603002"", ""UMLS:C0342273""]"


# Diabetes Example

In [19]:

sanitized_results = sanitize_results(obo_searcher(hpo, "diabetes"))
df = pl.from_dicts(sanitized_results)
df.head()

concept,label,iri,synonyms,name,subclasses,xrefs
str,list[str],str,list[str],str,list[str],list[str]
"""obo.HP_0000819…","[""Diabetes mellitus""]","""http://purl.ob…",[],"""HP_0000819""","[""obo.HP_0000831"", ""obo.HP_0001953"", … ""obo.HP_0100651""]","[""MSH:D003920"", ""SNOMEDCT_US:73211009"", ""UMLS:C0011849""]"
"""obo.HP_0000831…","[""Insulin-resistant diabetes mellitus""]","""http://purl.ob…","[""Insulin resistant diabetes"", ""Insulin resistant diabetes mellitus"", ""Insulin-resistant diabetes""]","""HP_0000831""","[""obo.HP_0000857"", ""obo.HP_0000877""]","[""UMLS:C0854110""]"
"""obo.HP_0000857…","[""Neonatal insulin-dependent diabetes mellitus"", ""Neonatal insulin-dependent diabetes mellitus""]","""http://purl.ob…",[],"""HP_0000857""","[""obo.HP_0008255""]","[""UMLS:C3278636"", ""UMLS:C3278636""]"
"""obo.HP_0000863…","[""Central diabetes insipidus"", ""Central diabetes insipidus""]","""http://purl.ob…","[""Neurohypophyseal diabetes insipidus"", ""Neurohypophyseal diabetes insipidus""]","""HP_0000863""",[],"[""MSH:D020790"", ""SNOMEDCT_US:45369008"", … ""UMLS:C0687720""]"
"""obo.HP_0000873…","[""Diabetes insipidus""]","""http://purl.ob…",[],"""HP_0000873""","[""obo.HP_0000863"", ""obo.HP_0009806""]","[""MSH:D003919"", ""SNOMEDCT_US:15771004"", ""UMLS:C0011848""]"


In [20]:
import polars as pl
import altair as alt

search_input = alt.param(
    value="",
    bind=alt.binding(
        input="search",
        placeholder="Diseases/symptoms",
        name="Search",
    ),
)

selection = alt.selection_multi(fields=["name"])

base = (
    alt.Chart(df.to_pandas())
    .encode(
        tooltip=["name:N", "label:N", "xrefs:N", "synonyms:N"],
        color=alt.Color("name:N").legend(None),
    )
    .transform_flatten(["synonyms"])
    .transform_flatten(["label"])
    .add_params(search_input)
)


name_chart = base.mark_rect(height=18, cornerRadius=5, limit=100).encode(
    x=alt.value(185),
    y=alt.Y("label:N", sort=alt.EncodingSortField("name"))
    .axis(labels=False, ticks=False, grid=False, domainWidth=0)
    .title(None),
    # opacity=alt.condition(
    #     alt.expr.test(alt.expr.regexp(search_input, 'i'), alt.datum.synonyms),
    #     alt.Opacity("name:Q").legend(None),
    #     alt.value(0.05)
    # ),
)

name_text = (
    name_chart.mark_text(align="left", dx=0, dy=0, limit=170)
    .encode(
        x=alt.value(1),
        text=alt.Text("label:N").title(None),
        color=alt.value("white"),
    )
    .add_params(search_input, selection)
)


xref_chart = (
    base.mark_rect(height=18, cornerRadius=5)
    .encode(
        x=alt.value(150),
        y=alt.Y("xrefs:N", sort=alt.EncodingSortField("name"))
        .axis(labels=False, ticks=False, grid=False, domainWidth=0)
        .title(None),
    )
    .transform_flatten(["xrefs"])
    .transform_calculate(
        ontology=alt.expr.split(alt.datum.xrefs, ":")[0],
        concept_id=alt.expr.split(alt.datum.xrefs, ":")[1],
    )
)

xref_text = (
    xref_chart.mark_text(align="left", dx=0, dy=0)
    .encode(
        x=alt.value(1),
        text=alt.Text("xrefs").title(None),
        color=alt.value("white"),
    )
    .add_params(search_input, selection)
)


(
    (name_chart + name_text).properties(title="Concept Name", width=100)
    | (xref_chart + xref_text).properties(title="External References", width=100)
).configure_concat(spacing=0).transform_filter(selection).configure_view(
    strokeOpacity=0, strokeWidth=0
).properties(
    title=alt.Title("Ontology Explorer", subtitle=[""])
)



# Rare Disease Example

In [21]:
import polars as pl
import altair as alt


sanitized_results = sanitize_results(obo_searcher(hpo, "steven"))
df = pl.from_dicts(sanitized_results)
df.head()

search_input = alt.param(
    value="",
    bind=alt.binding(
        input="search",
        placeholder="Diseases/symptoms",
        name="Search",
    ),
)

selection = alt.selection_multi(fields=["name"])

base = (
    alt.Chart(df.to_pandas())
    .encode(
        tooltip=["name:N", "label:N", "xrefs:N", "synonyms:N"],
        color=alt.Color("name:N").legend(None),
    )
    .transform_flatten(["synonyms"])
    .transform_flatten(["label"])
    .add_params(search_input)
)


name_chart = base.mark_rect(height=18, cornerRadius=5, limit=100).encode(
    x=alt.value(185),
    y=alt.Y("label:N", sort=alt.EncodingSortField("name"))
    .axis(labels=False, ticks=False, grid=False, domainWidth=0)
    .title(None),
    # opacity=alt.condition(
    #     alt.expr.test(alt.expr.regexp(search_input, 'i'), alt.datum.synonyms),
    #     alt.Opacity("name:Q").legend(None),
    #     alt.value(0.05)
    # ),
)

name_text = (
    name_chart.mark_text(align="left", dx=0, dy=0, limit=170)
    .encode(
        x=alt.value(1),
        text=alt.Text("label:N").title(None),
        color=alt.value("white"),
    )
    .add_params(search_input, selection)
)


xref_chart = (
    base.mark_rect(height=18, cornerRadius=5)
    .encode(
        x=alt.value(150),
        y=alt.Y("xrefs:N", sort=alt.EncodingSortField("name"))
        .axis(labels=False, ticks=False, grid=False, domainWidth=0)
        .title(None),
    )
    .transform_flatten(["xrefs"])
    .transform_calculate(
        ontology=alt.expr.split(alt.datum.xrefs, ":")[0],
        concept_id=alt.expr.split(alt.datum.xrefs, ":")[1],
    )
)

xref_text = (
    xref_chart.mark_text(align="left", dx=0, dy=0)
    .encode(
        x=alt.value(1),
        text=alt.Text("xrefs").title(None),
        color=alt.value("white"),
    )
    .add_params(search_input, selection)
)


(
    (name_chart + name_text).properties(title="Concept Name", width=100)
    | (xref_chart + xref_text).properties(title="External References", width=100)
).configure_concat(spacing=0).transform_filter(selection).configure_view(
    strokeOpacity=0, strokeWidth=0
).properties(
    title=alt.Title("Ontology Explorer", subtitle=[""])
)

