Spaces:

JoseEliel
/

generate-lagrangians

Running

App Files Files Community

José Eliel Camargo Molina commited on Jan 14

Commit

302961a

1 Parent(s): ce3d862

app added

Browse files

Files changed (1) hide show

app.py +419 -0

app.py ADDED Viewed

	@@ -0,0 +1,419 @@

+import streamlit as st
+from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration
+import torch
+from pathlib import Path
+import urllib.request
+# To latex stuff
+####################################
+import itertools
+import re
+rep_tex_dict = {
+    "SU3":{"-3":r"\bar{\textbf{3}}","3":r"\textbf{3}"},
+    "SU2":{"-2":r"\textbf{2}","2":r"\textbf{2}","-3":r"\textbf{3}","3":r"\textbf{3}"},
+}
+def fieldobj_to_tex(obj,lor_index,pos):
+    su3 = None
+    su2 = None
+    u1  =  None
+    hel = None
+    sp  = None
+    #print(obj)
+    obj_mod = obj.copy()
+    for tok in obj:
+      if "SU3" in tok:
+        su3 = tok.split("=")[-1]
+        obj_mod.remove(tok)
+      if "SU2" in tok:
+        su2 = tok.split("=")[-1]
+        obj_mod.remove(tok)
+      if "U1"  in tok:
+        u1 = tok.split("=")[-1]
+        obj_mod.remove(tok)
+      if "HEL" in tok:
+          hel = tok.split("=")[-1]
+          if hel == "1" : hel = "+1"
+      if "SP"  in tok: sp = tok.split("=")[-1]
+    #print(obj)
+    assert sp is not None
+    outtex= ""
+    if sp == "0"    : outtex += "\phi"
+    if sp == "1"    : outtex += "A"+pos+lor_index
+    if sp == "1/2"  : outtex += "\psi"
+    outtex += r"_{("
+    if su3 is not None:
+      outtex += rep_tex_dict["SU3"][su3]+" ,"
+    else:
+      outtex +=  r"\textbf{1}"+" ,"
+    if su2 is not None:
+      outtex += rep_tex_dict["SU2"][su2]+" ,"
+    else:
+      outtex +=  r"\textbf{1}"+" ,"
+    if u1  is not None:
+      outtex += u1+" ,"
+    else:
+      outtex += r"\textbf{0}"+" ,"
+    if hel  is not None: outtex += "h:"+ hel + " ,"
+    if outtex[-1] == ",": outtex = outtex[:-1]+")}"
+    return outtex
+def derobj_to_tex(obj,lor_index,pos):
+    if pos == "^":
+      outtex = "D^{"+lor_index+"}_{("
+    elif pos == "_":
+      outtex = "D_{"+lor_index+"("
+    else:
+      raise ValueError("pos must be ^ or _")
+    if "G" not in obj and "W" not in obj and "B" not in obj:
+       if pos == "^":
+        return "\partial^{"+lor_index+"}"
+       elif pos == "_":
+        return "\partial_{"+lor_index+"}"
+    if "G" in obj: outtex += "SU3,"
+    if "W" in obj: outtex += "SU2,"
+    if "B" in obj: outtex += "U1,"
+    if outtex[-1] == ",": outtex = outtex[:-1]+")}"
+    return outtex
+def gamobj_to_tex(obj,lor_index,pos):
+    outtex = "\gamma"+pos+lor_index
+    return outtex
+def obj_to_tex(obj,lor_index="\mu",pos="^"):
+    if isinstance(obj,tuple): obj = list(obj)
+    if isinstance(obj,str): obj = [i for i in obj.split(" ") if i != ""]
+    # remove any space char in the first element of the list
+    if obj[0] == "+"  :
+        return "\quad\quad+"
+    if obj[0] == "-" :
+        return "\quad\quad-"
+    if obj[0] == "i" :
+        return "i"
+    if obj[0] == "QF" :
+	    return fieldobj_to_tex(obj,lor_index,pos)
+    if obj[0] == "D":
+      return derobj_to_tex(obj,lor_index,pos)
+    if obj[0] == "GM":
+      return gamobj_to_tex(obj,lor_index,pos)
+    if obj[0] == "CMAD":
+      return "[ "+derobj_to_tex(obj,lor_index,pos)
+    if obj[0] == "CMBD":
+      return ", "+derobj_to_tex(obj,lor_index,pos)+' ]'
+def split_with_delimiter_preserved(string, delimiters,ignore_dots=False):
+    if "." in string and ignore_dots == False:
+        #print(string)
+        raise  ValueError("Unexpected ending to the generated Lagrangian")
+    pattern = '(' + '|'.join(map(re.escape, delimiters)) + ')'
+    pattern = re.split(pattern, string)
+    pattern = [" + " if i == "+ " else i for i in pattern  ]
+    pattern = [i for i in pattern if i != ""]
+    return pattern
+def split_with_delimiter_preserved(string, delimiters,ignore_dots=False):
+    if "." in string and ignore_dots == False:
+        #print(string)
+        raise  ValueError("Unexpected ending to the generated Lagrangian")
+    pattern = '(' + '|'.join(map(re.escape, delimiters)) + ')'
+    pattern = re.split(pattern, string)
+    pattern = [" + " if i == "+ " else i for i in pattern  ]
+    pattern = [i for i in pattern if i != ""]
+    return pattern
+def clean_split(inlist, delimiters):
+    i = 0
+    merged_list = []
+    while i < len(inlist):
+        if inlist[i] in delimiters:
+            if i < len(inlist) - 1:
+                merged_list.append(inlist[i] + inlist[i+1])
+                i += 1  # Skip the next element as it has been merged
+            else:
+                merged_list.append(inlist[i])  # If it's the last element, append it without merging
+        else:
+            merged_list.append(inlist[i])
+        i += 1
+    return merged_list
+def get_obj_dict(inlist):
+    outdict = {}
+    for iitem in inlist:
+        idict = {"ID":None,"LATEX":None}
+        id = [i for i in iitem.split() if "ID" in i]
+        if len(id) == 1:
+            idict["ID"] = id[0]
+            if "QF" in iitem:
+                idict["LATEX"] = obj_to_tex(iitem,"\\mu","^")
+        if iitem == "+" or iitem == "-" or iitem == "i":
+            idict["LATEX"] = obj_to_tex(iitem    )
+        outdict[iitem] = idict
+    return outdict
+def get_con_dict(inlist):
+    outdict = {}
+    for iitem in inlist:
+        iitem = iitem.split()
+        iitem = [i for i in iitem if i != ""]
+        sym = [i for i in iitem if ("SU" in i or "LZ" in i)]
+        assert len(sym) == 1, "More than one symmetry in contraction"
+        ids = [i for i in iitem if ("SU" not in i and "LZ" not in i)]
+        if sym[0] not in outdict.keys():
+            outdict[sym[0]] = [ids]
+        else:
+            outdict[sym[0]].append(ids)
+    return outdict
+def term_to_tex(term,verbose=False):
+    # Clean term
+    term = term.replace(".","").replace(" = ", "=").replace(" =- ", "=-").replace(" / ", "/").replace("CMA D", "CMAD").replace("CMB D", "CMBD")
+    term = split_with_delimiter_preserved(term,[" QF "," D "," GM "," CMAD "," CMBD "," CON "])
+    term = clean_split(term, [" QF "," D "," GM "," CMAD "," CMBD "," CON "])
+    if verbose: print(term)
+    if term == [" + "] or term == [" - "] or term == [" i "]:
+        return term[0]
+    # Get Dictionary of objects
+    objdict = get_obj_dict([i for i in term if " CON " not in i])
+    if verbose:
+        for i,j in objdict.items():
+            print(i,"\t\t",j)
+    # Do contractions
+    contractions = [i for i in term if " CON " in i]
+    assert len(contractions) < 2, "More than one contraction in term"
+    if (len(contractions) == 1) and contractions != [" CON "]:
+        contractions = contractions[0]
+        contractions = split_with_delimiter_preserved(contractions,[" LZ "," SU2 "," SU3 "])
+        contractions = clean_split(contractions, [" LZ "," SU2 "," SU3 "])
+        contractions = [i for i in contractions if i != " CON"]
+        condict      = get_con_dict(contractions)
+        if verbose: print(condict)
+        if "LZ" in condict.keys():
+            firstlz = True
+            cma = True
+            for con in condict["LZ"]:
+                for kobj , iobj in objdict.items():
+                    if iobj["ID"] is None   : continue
+                    if iobj["ID"] in con:
+                        if cma: lsymb = "\\mu"
+                        else: lsymb = "\\nu"
+                        if firstlz:
+                            iobj["LATEX"] = obj_to_tex(kobj,lsymb,"^")
+                            firstlz = False
+                        else:
+                            iobj["LATEX"] = obj_to_tex(kobj,lsymb,"_")
+                            cma = False
+                            firstlz = True
+    outstr = " ".join([objdict[i]["LATEX"] for i in term if " CON " not in i])
+    return outstr
+def display_in_latex(instring,verbose=False):
+  #latex_string = r"$\overgroup{\Large{" + instring + "}}$"
+  latex_string = r"$\Large{" + instring + "}$"
+  if verbose: print(latex_string)
+  display(Latex(latex_string))
+  return instring
+def str_tex(instr,num=0):
+    #print("INPUT:",iinstr)
+    #print("TERM:")
+    #outstr = ""
+    #instr = split_with_delimiter_preserved(iinstr,[" + ","+ "," - "])
+    if num != 0:
+        instr = instr[:num]
+    inlist = [term.replace(".","") for term in instr]
+    outstr = ""
+    coup = 0
+    mass = 0
+    outstr = "\\begin{aligned}"
+    for i, iterm in enumerate(inlist):
+        if i ==0:
+            outstr += " \mathcal{L}= \quad \\\\ & "
+        else:
+            nqf = iterm.count("QF SP = 0")
+            nD  = iterm.count(" D ")
+            if nqf != 0 and nqf != 2 and nD == 0:
+                coup += 1
+                outstr += " \lambda_{"+str(coup)+"} \,"
+            if nqf == 2 and nD == 0:
+                mass += 1
+                outstr += " m^2_{"+str(mass)+"} \,"
+            outstr += term_to_tex(iterm,False) + " \quad "
+            if i%4 == 0: outstr += " \\\\ \\\\ & "
+    return outstr
+def master_str_tex(iinstr):
+    instr = split_with_delimiter_preserved(iinstr,[" + ","+ "," - "])
+    try:
+        outstr = str_tex(instr)
+    except Exception as e:
+        outstr = str_tex(instr,-1)
+        outstr += "  \cdots"
+        print(e)
+    outstr += "\\end{aligned}"
+    return outstr#########
+device = 'cpu'
+model_name = "JoseEliel/BART-Lagrangian"
+@st.cache_resource
+def load_model():
+    model = BartForConditionalGeneration.from_pretrained(model_name).to(device)
+    return model
+model = load_model()
+@st.cache_resource
+def load_tokenizer():
+    return PreTrainedTokenizerFast.from_pretrained(model_name)
+hf_tokenizer = load_tokenizer()
+def process_input(input_text):
+    input_text = input_text.replace("[SOS]", "").replace("[EOS]", "").replace("FIELD", "SPLITFIELD")
+    fields = input_text.split('SPLIT')[1:]
+    fields = [x.strip().split(' ') for x in fields]
+    fields = sorted(fields)
+    fields = "[SOS] " + " ".join([" ".join(x) for x in fields]) + " [EOS]"
+    return fields
+def process_output(output_text):
+    return output_text.replace("[SOS]", "").replace("[EOS]", "").replace(".","")
+def process_output_pretty_print(output_text):
+    pretty_output = output_text.replace(" / ", "/")
+    pretty_output = pretty_output.replace("=- ", "= -")
+    pretty_output = pretty_output.replace("+", "\n+")
+    return pretty_output
+def generate_lagrangian(input_text):
+    input_text = process_input(input_text)
+    inputs = hf_tokenizer([input_text], return_tensors='pt').to(device)
+    with st.spinner(text="Generating Lagrangian..."):
+        lagrangian_ids = model.generate(inputs['input_ids'], max_length=1024)
+        lagrangian = hf_tokenizer.decode(lagrangian_ids[0].tolist(), skip_special_tokens=False)
+        lagrangian = process_output(lagrangian)
+    return lagrangian
+def generate_field(sp, su2, su3, u1):
+    # Initialize components list
+    components = [f"FIELD SPIN={sp}"]
+    # Conditionally add each component
+    if su2 != "$1$":
+        components.append(f"SU2={su2}")
+    if su3 == "$\\bar{3}$":
+        components.append("SU3=-3")
+    if su3 != "$1$" and su3 != "$\\bar{3}$":
+        components.append(f"SU3={su3}")
+    if u1 != "0":
+        components.append(f"U1={u1}")
+    # Join components into final string
+    return " ".join(components).replace("$","")
+def main():
+    # Streamlit UI (Adjusted without 'className')
+    st.title("$\\mathscr{L}$agrangian Generator")
+    st.markdown(" ### For a set of chosen fields, this model generates the corresponding Lagrangian which encodes all interactions and dynamics of the fields.")
+    st.markdown(" #### This is a simple demo of our smaller [BART](https://arxiv.org/abs/1910.13461)-based model with 110M parameters")
+    st.markdown(" ##### :violet[Due to computational resources, we limit the number of fields to 2 and spin = 0]")
+    st.markdown(" ##### Choose up to two different fields:")
+    su2_options = ["$1$", "$2$", "$3$"]
+    su3_options = ["$1$", "$3$", "$\\bar{3}$"]
+    u1_options = ["-1","-2/3", "-1/2", "-1/3", "0","1/3" ,"1/2", "2/3", "1"]
+    # Initialize or update session state variables
+    if 'count' not in st.session_state:
+        st.session_state.count = 0  # Keeps track of button presses
+    if 'field_strings' not in st.session_state:
+        st.session_state.field_strings = []  # Stores the generated field strings
+    with st.form("field_selection"):
+        su2_selection = st.radio("Select $\\mathrm{SU}(2)$ value:", su2_options)
+        su3_selection = st.radio("Select $\\mathrm{SU}(3)$ value:", su3_options)
+        u1_selection = st.radio("Select $\\mathrm{U}(1)$ value:", u1_options)
+        submitted = st.form_submit_button("Add field")
+        if submitted:
+            if st.session_state.count < 4:
+                sp_value = 0  # Assume SP is always 0
+                field_string = generate_field(sp_value, su2_selection, su3_selection, u1_selection)
+                st.session_state.field_strings.append(field_string)  # Save generated field string
+                st.session_state.count += 1  # Increment button press count
+            elif st.session_state.count >= 2:
+                st.write("You have reached the maximum number of fields we allow in this demo.")
+    clear_fields = st.button("Clear fields")
+    if clear_fields:
+        st.session_state.field_strings = []
+        st.session_state.count = 0
+    # Button to generate field text, allows up to 2 button presses
+    st.write(f"Input Fields:")
+    for i, fs in enumerate(st.session_state.field_strings, 1):
+        texfield = obj_to_tex(fs)
+        fieldname = f"Field {i}:"
+        st.latex("\\text{" + fieldname + "} \quad" + texfield)
+    if st.button("Generate Lagrangian"):
+        input_fields = " ".join(st.session_state.field_strings)
+        if input_fields == "":
+            st.write("Please add fields before generating the Lagrangian.")
+            return
+        else:
+            print(input_fields)
+            print("\n")
+            # append input fields into csv file, create if not exist
+            #with open('usesdata.csv', 'a') as f:
+            #    f.write(input_fields + "\n")
+            # append and prepend input fields with SOS and EOS tokens
+            input_fields = "[SOS] " + input_fields + " [EOS]"
+            generated_lagrangian = generate_lagrangian(input_fields)
+            print(generated_lagrangian)
+            print("\n")
+            # Save generated lagrangian into same csv file, create if not exist
+            #with open('usesdata.csv', 'a') as f:
+            #    f.write(generated_lagrangian + "\n")
+            #latex_output = master_str_tex(generated_lagrangian[1:])
+            #print(latex_output)
+            #print("\n\n")
+            # save latex output in file
+            #with open('usesdata.csv', 'a') as f:
+            #    f.write(latex_output + "\n")
+            #st.text_area("Generated Lagrangian", pretty_output, height=300)
+            st.markdown("### Generated Lagrangian")
+            st.text(generated_lagrangian)
+    # write my contact info
+    st.markdown("### Contact")
+    st.markdown("If you have any questions or suggestions, please feel free to Email us. [Eliel](mailto:[email protected]) or [Yong Sheng](mailto:[email protected]).")
+if __name__ == "__main__":
+    main()