In [70]:
import requests
from copy import copy as cp


## Authorize with the endpoint

In [2]:
API_URL = "https://YOUR.ENDPOINT.aws.endpoints.huggingface.cloud"
headers = {
    "Accept" : "application/json",
    "Authorization": "Bearer hf_YOUR_TOKEN",
    "Content-Type": "application/json"
}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

## Construct the query
Instructions define what type of experiment you are trying to simulate with P3GPT.<br>
Key instructions enabled at this endpoint include:
- <font size="4">**`disease2diff2disease`**</font>: For tasks that are equivalent to case-control cross-sectional settings. E.g. the generation of DEGs for a medical condition;
- <font size="4">**`compound2diff2compound `**</font>: For compound screening tasks. E.g. propose a compound that can selectively methylate certain gene promoters;
- <font size="4">**`age_group2diff2age_group`**</font>: For task on aging-related omics dynamics. E.g. identify genes that are up-/down-regulated in older vs younger adults. 


In [139]:
prompt = {'instruction': ['age_group2diff2age_group','compound2diff2compound'], 
          # This is a chemical screening experiment in a particular age group, 
          # so you'll need to use 2 intructions
          'tissue': 'lung',
          'age': 70,
          'cell': '',
          'efo': 'EFO_0000768', #pulmonary fibrosis
          'datatype': 'expression', # we want to get DEGs
          'drug': 'curcumin',
          'dose': '',
          'time': '',
          'case': ['70.0-80.0', '80.0-90.0'], # define the age groups of interest
          'control': '', # left blank since no healthy controls participate in this experiment
          'dataset_type': '',
          'gender': 'm',
          'species': 'human',
          'up': [], # left blank to be filled in by P3GPT
          'down': []
        }



## Execution modes
- <font size="4">**`meta2diff`**</font>: `compound2diff2compound` can be executed either way. This mode tells P3GPT to return differentially expressed genes and not compounds;
- <font size="4">**`diff2compound`**</font>: The reverse of the `meta2diff` mode. Make sure to fill in 'up' and 'down' in the prompt first!
- <font size="4">**`meta2diff2compound`**</font>: Runs `meta2diff` first and applies `diff2compound` to its output. This is mostly for utility reasons â€” you get to run P3GPT twice with one call.

As an LLM, P3GPT is trained to fill in the blanks in its prompt pointed at by the instructions. Its native output has the same structure as the input prompt.<br>
Modes do not belong in the prompt and are used for parsing P3GPT's output so that only the expected part of the completed prompt is presented to the user.

In [140]:
config_sample = {'inputs': prompt,
                 'mode': 'meta2diff', # this is a chemical screening experiment 
                 'parameters': {'temperature': 0.4,
                                  'top_p': 0.8,
                                  'top_k': 3550,
                                  'n_next_tokens': 20}
                 }
output = query(config_sample) # send request to Hugging Face

In [141]:
print(output.keys())

dict_keys(['output', 'mode', 'message', 'input'])


In [142]:
# successful generation
output['message']

'Done!'

In [143]:
# this is what actual P3GPT input looks like
# NB: there is no 'mode' in the prompt.  
output['input']

'[BOS]<age_group2diff2age_group><compound2diff2compound><tissue>lung </tissue><age_individ>70 </age_individ><cell></cell><efo>EFO_0000768 </efo><datatype>expression </datatype><drug>curcumin </drug><dose></dose><time></time><case>70.0-80.0 80.0-90.0 </case><control></control><dataset_type></dataset_type><gender>m </gender><species>human </species>'

In [144]:
# output gene symbols
genes_up, genes_dn = output['output']['up'][0], output['output']['down'][0]
print("Up-regulated genes:")
print(*genes_up[:20], sep = "; ",end='\n\n')
print("Down-regulated genes:")
print(*genes_dn[:20], sep = "; ",end='\n\n')


Up-regulated genes:
MUC5B; AHSP; ALAS2; SLC4A1; CDHR5; NXF2B; CYP4F3; LGALS7B; FBN3; NTS; CYSTM1; ORM2; ASL; CD177; GLRX5; H4C3; NDUFA3; TUBA4B; EPB42; GCHFR

Down-regulated genes:
KRT6A; KRT5; KRT15; KRT14; KRT6B; DSG3; CALML3; S100A7; SERPINB5; SPRR2A; SPRR3; LY6D; TMEM45A; KRT16; S100A9; GOLGA8A; SPINK6; CXCL10; CXCL9; CSTA



In [145]:
# now, let's do the opposite and get a compounds based on these DEG lists
# to do that, we only need a couple changes to the original prompt
prompt2 = cp(prompt)
prompt2.update({
                'drug':'',
                'up':genes_up,
                'down':genes_dn
              })
# remember to reverse meta2diff!
config_sample.update({'mode':'diff2compound',
                      'inputs':prompt2})

In [146]:
output = query(config_sample) # send request to Hugging Face

In [127]:
output.keys()

dict_keys(['output', 'compounds', 'raw_output', 'mode', 'message', 'input'])

In [147]:
print(*output['compounds'][0], sep='; ')

artemisinin; todralazine; dyphylline; esmolol; formestane; z160; netupitant; brd-k89304341; isoprenaline


In [175]:
# alternatively, use the meta2diff2compound to get straigth to compounds
prompt3 = cp(prompt)
prompt3.update({'instruction':['compound2diff2compound']})
config_sample.update({'mode':'meta2diff2compound',
                      'inputs':prompt3})

In [176]:
output = query(config_sample)

In [178]:
prompt3

{'instruction': ['compound2diff2compound'],
 'tissue': 'lung',
 'age': 70,
 'cell': '',
 'efo': 'EFO_0000768',
 'datatype': 'expression',
 'drug': '',
 'dose': '',
 'time': '',
 'case': ['70.0-80.0', '80.0-90.0'],
 'control': '',
 'dataset_type': '',
 'gender': 'm',
 'species': 'human',
 'up': [],
 'down': []}

In [177]:
output

{'output': [None],
 'mode': 'meta2diff2compound',
 'message': '62149 is not in list',
 'input': '[BOS]<compound2diff2compound><tissue>lung </tissue><age_individ>70 </age_individ><cell></cell><efo>EFO_0000768 </efo><datatype>expression </datatype><drug></drug><dose></dose><time></time><case>70.0-80.0 80.0-90.0 </case><control></control><dataset_type></dataset_type><gender>m </gender><species>human </species>'}

In [167]:

print("Up-regulated genes:")
print(*output['output']['up'][0], sep='; ', end="\n\n")
print("Down-regulated genes:")
print(*output['output']['down'][0], sep='; ', end="\n\n")

Up-regulated genes:
MUC5B; AHSP; ALAS2; SLC4A1; CDHR5; NXF2B; CYP4F3; LGALS7B; FBN3; NTS; CYSTM1; ORM2; ASL; CD177; GLRX5; H4C3; NDUFA3; TUBA4B; EPB42; GCHFR; KLF1; CFAP119; TRAPPC2L; DMTN; PDZK1IP1; SEM1; PCYT2; SERF2; CDC20; DAD1; MPC2; EMC3; BOLA1; CMTM5; PGD; EBP; GUK1; NDUFB7; UQCR11; LGALS9C; KEL; HBQ1; TUBB2A; RBX1; TMEM141; F8A1; COX7B; TMEM258; NDUFA7; MYL6; UQCRQ; MRPS24; HPGD; BOLA2B; KRTAP19-4; ATP5MF; RPL29; RPP25L; WDR83OS; FAU; UXT; ZNHIT1; SLC6A8

Down-regulated genes:
KRT6A; KRT5; KRT15; KRT14; KRT6B; DSG3; CALML3; S100A7; SERPINB5; SPRR2A; SPRR3; LY6D; TMEM45A; KRT16; S100A9; GOLGA8A; SPINK6; CXCL10; CXCL9; CSTA; DSC3; APOL1; CXCL8; PKIA; MYBL1; CYP26B1; POSTN; THBS1; ARL14; UPK1B; CXCL13; CXCL6; C1R; COL14A1; TNFAIP2; TIMP1; VEGFC; C1QB; COL15A1; MGP; BICC1; S100A2; XIST; MARCKS; TLR2; TYMP; RPS4Y1; COL1A1; KLF6; KRT17; FBN1; STK32B; KDM5D; SPP1; APOD; THBS2; EIF1AY; CD163; CCL8; SYNM; CD44; HSPA9; CD14; SOCS3; HSPA6; MCL1; ALOX5AP; PBX3; DDX21; IRF8; HMGA1; MAFB; RG