Image-Text-to-Text
Safetensors
openvla
custom_code
emrys-hong commited on
Commit
e4b3b79
·
verified ·
1 Parent(s): d575b00

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -6
README.md CHANGED
@@ -64,6 +64,9 @@ from PIL import Image
64
 
65
  import torch
66
 
 
 
 
67
  # Load Emma-X
68
  vla = AutoModelForVision2Seq.from_pretrained(
69
  "declare-lab/Emma-X",
@@ -74,14 +77,12 @@ vla = AutoModelForVision2Seq.from_pretrained(
74
  ).to("cuda:0")
75
  processor = AutoProcessor.from_pretrained("declare-lab/Emma-X", trust_remote_code=True)
76
 
77
- image: Image.Image = get_from_camera(...)
78
- prompt = "In: What action should the robot take to achieve the instruction\nINSTRUCTION: \n{<Instruction here>}\n\nOut: "
79
-
80
- # Predict Action (action is a 7 dimensional vector to control the robot)
81
  inputs = processor(prompt, image).to("cuda:0", dtype=torch.bfloat16)
82
- action, _ = vla.generate_actions(inputs, do_sample=False, max_new_tokens=512)
83
-
84
  print("action", action)
 
85
  # Execute...
86
  robot.act(action, ...)
87
  ```
 
64
 
65
  import torch
66
 
67
+ task_label = "put carrot in pot" # Change your desired task label
68
+ image: Image.Image = get_from_camera(...)
69
+
70
  # Load Emma-X
71
  vla = AutoModelForVision2Seq.from_pretrained(
72
  "declare-lab/Emma-X",
 
77
  ).to("cuda:0")
78
  processor = AutoProcessor.from_pretrained("declare-lab/Emma-X", trust_remote_code=True)
79
 
80
+ prompt, image = processor.get_prompt(task_label, image)
 
 
 
81
  inputs = processor(prompt, image).to("cuda:0", dtype=torch.bfloat16)
82
+ # Predict Action (action is a 7 dimensional vector to control the robot)
83
+ action, reasoning = vla.generate_actions(inputs, processor.tokenizer, do_sample=False, max_new_tokens=512)
84
  print("action", action)
85
+
86
  # Execute...
87
  robot.act(action, ...)
88
  ```