egrace479 commited on
Commit
1e93828
·
verified ·
1 Parent(s): b2cb442

update column names from metadata file

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. components/query.py +6 -6
app.py CHANGED
@@ -31,7 +31,7 @@ hf_hub_download(repo_id="imageomics/demo-data",
31
  METADATA_PATH = "components/bioclip-2/metadata.parquet"
32
  # Read page IDs as int
33
  metadata_df = pl.read_parquet(METADATA_PATH, low_memory = False)
34
- metadata_df = metadata_df.with_columns(pl.col(["eol_page_id", "taxonID"]).cast(pl.Int64))
35
 
36
  MODEL_STR = "hf-hub:imageomics/bioclip-2"
37
  TOKENIZER_STR = "ViT-L-14"
 
31
  METADATA_PATH = "components/bioclip-2/metadata.parquet"
32
  # Read page IDs as int
33
  metadata_df = pl.read_parquet(METADATA_PATH, low_memory = False)
34
+ metadata_df = metadata_df.with_columns(pl.col(["eol_page_id", "gbif_id"]).cast(pl.Int64))
35
 
36
  MODEL_STR = "hf-hub:imageomics/bioclip-2"
37
  TOKENIZER_STR = "ViT-L-14"
components/query.py CHANGED
@@ -42,7 +42,7 @@ def get_sample(df, pred_taxon, rank):
42
  '''
43
  logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
44
  try:
45
- filepath, taxonID, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
46
  except Exception as e:
47
  logger.error(f"Error retrieving sample data: {e}")
48
  return None, f"We encountered the following error trying to retrieve a sample image: {e}."
@@ -58,8 +58,8 @@ def get_sample(df, pred_taxon, rank):
58
  )
59
  img_resp = requests.get(img_src)
60
  img = Image.open(io.BytesIO(img_resp.content))
61
- if taxonID:
62
- gbif_url = GBIF_URL + taxonID
63
  if eol_page_id:
64
  eol_url = EOL_URL + eol_page_id
65
  if is_exact:
@@ -100,7 +100,7 @@ def get_sample_data(df, pred_taxon, rank):
100
  --------
101
  filepath : str
102
  Filepath of selected sample image for predicted taxon.
103
- taxonID: str
104
  GBIF page ID associated with predicted taxon for more information.
105
  eol_page_id : str
106
  EOL page ID associated with predicted taxon for more information.
@@ -125,9 +125,9 @@ def get_sample_data(df, pred_taxon, rank):
125
  if exact_df.shape[0] > 0:
126
  df_filtered = exact_df.sample()
127
  full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
128
- return df_filtered["file_path"][0], df_filtered["taxonID"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True
129
 
130
  # If no exact matches, return any entry with the specified rank
131
  df_filtered = df.sample()
132
  full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
133
- return df_filtered["file_path"][0], df_filtered["taxonID"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False
 
42
  '''
43
  logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
44
  try:
45
+ filepath, gbif_id, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
46
  except Exception as e:
47
  logger.error(f"Error retrieving sample data: {e}")
48
  return None, f"We encountered the following error trying to retrieve a sample image: {e}."
 
58
  )
59
  img_resp = requests.get(img_src)
60
  img = Image.open(io.BytesIO(img_resp.content))
61
+ if gbif_id:
62
+ gbif_url = GBIF_URL + gbif_id
63
  if eol_page_id:
64
  eol_url = EOL_URL + eol_page_id
65
  if is_exact:
 
100
  --------
101
  filepath : str
102
  Filepath of selected sample image for predicted taxon.
103
+ gbif_id: str
104
  GBIF page ID associated with predicted taxon for more information.
105
  eol_page_id : str
106
  EOL page ID associated with predicted taxon for more information.
 
125
  if exact_df.shape[0] > 0:
126
  df_filtered = exact_df.sample()
127
  full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
128
+ return df_filtered["file_path"][0], df_filtered["gbif_id"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True
129
 
130
  # If no exact matches, return any entry with the specified rank
131
  df_filtered = df.sample()
132
  full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
133
+ return df_filtered["file_path"][0], df_filtered["gbif_id"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False