Skip to content

WORMS utilities

WORMS_tree_to_childparent_tree(worms_trees)

Converts one or more WORMS classification trees into a {child: parent} dict.

This function processes a list of nested tree structures (as returned by get_WORMS_tree) and flattens them into a single dictionary that maps each child AphiaID to its immediate parent AphiaID.

Parameters:

Name Type Description Default
worms_trees list[dict]

A list of nested tree structures from the WORMS API.

required

Returns:

Type Description
dict[int, int]

A single dictionary representing the hierarchy in {child_AphiaID: parent_AphiaID} format.

Examples:

>>> tree1 = {
...   "AphiaID": 1, "scientificname": "Biota", "child": {
...     "AphiaID": 2, "scientificname": "Animalia", "child": {
...       "AphiaID": 1821, "scientificname": "Chordata", "child": None
...     }
...   }
... }
>>> tree2 = {
...     "AphiaID": 1,
...     "rank": "Superdomain",
...     "scientificname": "Biota",
...     "child": {
...         "AphiaID": 2,
...         "rank": "Kingdom",
...         "scientificname": "Animalia",
...         "child": {
...             "AphiaID": 1065,
...             "rank": "Phylum",
...             "scientificname": "Arthropoda",
...             "child": {
...                 "AphiaID": 1274,
...                 "rank": "Subphylum",
...                 "scientificname": "Chelicerata",
...                 "child": {
...                     "AphiaID": 1300,
...                     "rank": "Class",
...                     "scientificname": "Arachnida",
...                     "child": None
...                 }
...             }
...         }
...     }
... }
>>> WORMS_tree_to_childparent_tree([tree1, tree2])
{2: 1, 1821: 2, 1065: 2, 1274: 1065, 1300: 1274}
Source code in hierarchical_loss/worms_utils.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def WORMS_tree_to_childparent_tree(worms_trees: list[dict]) -> dict[int, int]:
    """Converts one or more WORMS classification trees into a {child: parent} dict.

    This function processes a list of nested tree structures (as returned by
    `get_WORMS_tree`) and flattens them into a single dictionary that maps
    each child AphiaID to its immediate parent AphiaID.

    Parameters
    ----------
    worms_trees : list[dict]
        A list of nested tree structures from the WORMS API.

    Returns
    -------
    dict[int, int]
        A single dictionary representing the hierarchy in
        {child_AphiaID: parent_AphiaID} format.

    Examples
    --------
    >>> tree1 = {
    ...   "AphiaID": 1, "scientificname": "Biota", "child": {
    ...     "AphiaID": 2, "scientificname": "Animalia", "child": {
    ...       "AphiaID": 1821, "scientificname": "Chordata", "child": None
    ...     }
    ...   }
    ... }
    >>> tree2 = {
    ...     "AphiaID": 1,
    ...     "rank": "Superdomain",
    ...     "scientificname": "Biota",
    ...     "child": {
    ...         "AphiaID": 2,
    ...         "rank": "Kingdom",
    ...         "scientificname": "Animalia",
    ...         "child": {
    ...             "AphiaID": 1065,
    ...             "rank": "Phylum",
    ...             "scientificname": "Arthropoda",
    ...             "child": {
    ...                 "AphiaID": 1274,
    ...                 "rank": "Subphylum",
    ...                 "scientificname": "Chelicerata",
    ...                 "child": {
    ...                     "AphiaID": 1300,
    ...                     "rank": "Class",
    ...                     "scientificname": "Arachnida",
    ...                     "child": None
    ...                 }
    ...             }
    ...         }
    ...     }
    ... }
    >>> WORMS_tree_to_childparent_tree([tree1, tree2])
    {2: 1, 1821: 2, 1065: 2, 1274: 1065, 1300: 1274}
    """
    childparent_tree = {}
    for tree in worms_trees:
        try:
            parent = tree['AphiaID']
        except Exception as e:
            print("could not find id")
            print(tree)
            raise e
        while 'child' in tree and tree['child']:
            tree = tree['child']
            try:
                child = tree['AphiaID']
            except Exception as e:
                print("could not find id")
                print(tree)
                raise e
            childparent_tree[child] = parent
            parent = child
    return childparent_tree

get_WORMS_id(name)

Fetches the AphiaID from WORMS for a given scientific name.

Parameters:

Name Type Description Default
name str

The scientific name of the organism to look up.

required

Returns:

Type Description
int

The corresponding AphiaID from the WORMS database.

Examples:

>>> get_WORMS_id('Gnathostomata')
1828
Source code in hierarchical_loss/worms_utils.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def get_WORMS_id(name: str) -> int:
    """Fetches the AphiaID from WORMS for a given scientific name.

    Parameters
    ----------
    name : str
        The scientific name of the organism to look up.

    Returns
    -------
    int
        The corresponding AphiaID from the WORMS database.

    Examples
    --------
    >>> get_WORMS_id('Gnathostomata')
    1828
    """
    result = requests.get(WORMS_ID_URL.format(name))
    return int(result.content)

get_WORMS_name(WORMS_id)

Fetches the scientific name from WORMS for a given AphiaID.

The returned name is stripped of the surrounding double quotes that the API returns.

Parameters:

Name Type Description Default
WORMS_id int

The AphiaID of the organism to look up.

required

Returns:

Type Description
str

The corresponding scientific name.

Examples:

>>> get_WORMS_name(1828)
'Gnathostomata'
Source code in hierarchical_loss/worms_utils.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def get_WORMS_name(WORMS_id: int) -> str:
    """Fetches the scientific name from WORMS for a given AphiaID.

    The returned name is stripped of the surrounding double quotes
    that the API returns.

    Parameters
    ----------
    WORMS_id : int
        The AphiaID of the organism to look up.

    Returns
    -------
    str
        The corresponding scientific name.

    Examples
    --------
    >>> get_WORMS_name(1828)
    'Gnathostomata'
    """
    result = requests.get(WORMS_NAME_URL.format(WORMS_id))
    return result.content.decode("utf-8")[1:-1]

get_WORMS_tree(organism_id)

Fetches the full hierarchical classification tree from WORMS.

Given an AphiaID or scientific name, retrieves the classification hierarchy from the root ("Biota") down to the specified organism.

Parameters:

Name Type Description Default
organism_id int | str

The AphiaID or scientific name of the organism.

required

Returns:

Type Description
dict

A nested dictionary representing the classification tree.

Examples:

>>> import json
>>> print(json.dumps(get_WORMS_tree(get_WORMS_id('Gnathostomata')), indent=4))
{
    "AphiaID": 1,
    "rank": "Superdomain",
    "scientificname": "Biota",
    "child": {
        "AphiaID": 2,
        "rank": "Kingdom",
        "scientificname": "Animalia",
        "child": {
            "AphiaID": 1821,
            "rank": "Phylum",
            "scientificname": "Chordata",
            "child": {
                "AphiaID": 146419,
                "rank": "Subphylum",
                "scientificname": "Vertebrata",
                "child": {
                    "AphiaID": 1828,
                    "rank": "Infraphylum",
                    "scientificname": "Gnathostomata",
                    "child": null
                }
            }
        }
    }
}
Source code in hierarchical_loss/worms_utils.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def get_WORMS_tree(organism_id: int | str) -> dict:
    """Fetches the full hierarchical classification tree from WORMS.

    Given an AphiaID or scientific name, retrieves the classification
    hierarchy from the root ("Biota") down to the specified organism.

    Parameters
    ----------
    organism_id : int | str
        The AphiaID or scientific name of the organism.

    Returns
    -------
    dict
        A nested dictionary representing the classification tree.

    Examples
    --------
    >>> import json
    >>> print(json.dumps(get_WORMS_tree(get_WORMS_id('Gnathostomata')), indent=4))
    {
        "AphiaID": 1,
        "rank": "Superdomain",
        "scientificname": "Biota",
        "child": {
            "AphiaID": 2,
            "rank": "Kingdom",
            "scientificname": "Animalia",
            "child": {
                "AphiaID": 1821,
                "rank": "Phylum",
                "scientificname": "Chordata",
                "child": {
                    "AphiaID": 146419,
                    "rank": "Subphylum",
                    "scientificname": "Vertebrata",
                    "child": {
                        "AphiaID": 1828,
                        "rank": "Infraphylum",
                        "scientificname": "Gnathostomata",
                        "child": null
                    }
                }
            }
        }
    }
    """
    result = requests.get(WORMS_TREE_URL.format(organism_id))
    return result.json()