Xenova HF Staff commited on
Commit
e88a44e
·
verified ·
1 Parent(s): 8b27d7f

Update code snippet (#10)

Browse files

- Update code snippet (353633a1e0a9caa00f839ac15980f07f2fb2cf81)
- set num_image_tokens (e8afa24c4581c5a7c2fd1f086d253f1cc3a89f83)

Files changed (2) hide show
  1. README.md +7 -9
  2. config.json +1 -0
README.md CHANGED
@@ -24,35 +24,33 @@ npm i @huggingface/transformers
24
  import {
25
  Florence2ForConditionalGeneration,
26
  AutoProcessor,
27
- AutoTokenizer,
28
- RawImage,
29
  } from '@huggingface/transformers';
30
 
31
  // Load model, processor, and tokenizer
32
  const model_id = 'onnx-community/Florence-2-base-ft';
33
  const model = await Florence2ForConditionalGeneration.from_pretrained(model_id, { dtype: 'fp32' });
34
  const processor = await AutoProcessor.from_pretrained(model_id);
35
- const tokenizer = await AutoTokenizer.from_pretrained(model_id);
36
 
37
  // Load image and prepare vision inputs
38
  const url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg';
39
- const image = await RawImage.fromURL(url);
40
- const vision_inputs = await processor(image);
41
 
42
  // Specify task and prepare text inputs
43
  const task = '<MORE_DETAILED_CAPTION>';
44
  const prompts = processor.construct_prompts(task);
45
- const text_inputs = tokenizer(prompts);
 
 
46
 
47
  // Generate text
48
  const generated_ids = await model.generate({
49
- ...text_inputs,
50
- ...vision_inputs,
51
  max_new_tokens: 100,
52
  });
53
 
54
  // Decode generated text
55
- const generated_text = tokenizer.batch_decode(generated_ids, { skip_special_tokens: false })[0];
56
 
57
  // Post-process the generated text
58
  const result = processor.post_process_generation(generated_text, task, image.size);
 
24
  import {
25
  Florence2ForConditionalGeneration,
26
  AutoProcessor,
27
+ load_image,
 
28
  } from '@huggingface/transformers';
29
 
30
  // Load model, processor, and tokenizer
31
  const model_id = 'onnx-community/Florence-2-base-ft';
32
  const model = await Florence2ForConditionalGeneration.from_pretrained(model_id, { dtype: 'fp32' });
33
  const processor = await AutoProcessor.from_pretrained(model_id);
 
34
 
35
  // Load image and prepare vision inputs
36
  const url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg';
37
+ const image = await load_image(url);
 
38
 
39
  // Specify task and prepare text inputs
40
  const task = '<MORE_DETAILED_CAPTION>';
41
  const prompts = processor.construct_prompts(task);
42
+
43
+ // Pre-process the image and text inputs
44
+ const inputs = await processor(image, prompts);
45
 
46
  // Generate text
47
  const generated_ids = await model.generate({
48
+ ...inputs,
 
49
  max_new_tokens: 100,
50
  });
51
 
52
  // Decode generated text
53
+ const generated_text = processor.batch_decode(generated_ids, { skip_special_tokens: false })[0];
54
 
55
  // Post-process the generated text
56
  const result = processor.post_process_generation(generated_text, task, image.size);
config.json CHANGED
@@ -7,6 +7,7 @@
7
  "eos_token_id": 2,
8
  "ignore_index": -100,
9
  "is_encoder_decoder": true,
 
10
  "model_type": "florence2",
11
  "pad_token_id": 1,
12
  "projection_dim": 768,
 
7
  "eos_token_id": 2,
8
  "ignore_index": -100,
9
  "is_encoder_decoder": true,
10
+ "num_image_tokens": 577,
11
  "model_type": "florence2",
12
  "pad_token_id": 1,
13
  "projection_dim": 768,