Hi there community,
I have the following function which was working perfectly. Lately, I realized the size is set to 3000 at max. But I want to get all the proteins for a disease, let’s say 5000. The trick would be to use the cursor to re-run the code twice, however, I do not see the option to retrieve the cursor in the documentation for associatedTargets. Please help me with it.
Thanks in advance,
R
def GetDiseaseAssociatedProteins(disease_id):
efo_id = str(disease_id)
query_string = """
query associatedTargets{
disease(efoId: $efo_id){
id
name
associatedTargets(page:{size:15000,index:0}){
count
rows {
target {
id
approvedSymbol
proteinIds {
id
source
}
}
score
}
}
}
}
"""
#replace $efo_id with value from efo_id
query_string = query_string.replace("$efo_id",f'"{efo_id}"')
#variables = {"$efo_id":efo_id}
# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"
# Perform POST request and check status code of response
r = requests.post(base_url, json={"query": query_string})
#print(r.status_code)
# Transform API response from JSON into Python dictionary and print in console
api_response = json.loads(r.text)
temp_list = []
for item in api_response['data']['disease']['associatedTargets']['rows']:
#print(item['target'])
#break
for obj in item['target']['proteinIds']:
if obj['source'] == 'uniprot_swissprot':
#print(obj)
uprot = obj['id']
source = obj['source']
score = item['score']
ensg = item['target']['id']
name = item['target']['approvedSymbol']
temp = {'Protein':name,'ENSG':ensg,'UniProt':uprot,'Source':source,'Score':score}
temp_list.append(temp)
df = pd.DataFrame(temp_list)
df['disease_id'] = efo_id
return(df)