Spaces:
Running
Running
update column names from metadata file
Browse files- app.py +1 -1
- components/query.py +6 -6
app.py
CHANGED
@@ -31,7 +31,7 @@ hf_hub_download(repo_id="imageomics/demo-data",
|
|
31 |
METADATA_PATH = "components/bioclip-2/metadata.parquet"
|
32 |
# Read page IDs as int
|
33 |
metadata_df = pl.read_parquet(METADATA_PATH, low_memory = False)
|
34 |
-
metadata_df = metadata_df.with_columns(pl.col(["eol_page_id", "
|
35 |
|
36 |
MODEL_STR = "hf-hub:imageomics/bioclip-2"
|
37 |
TOKENIZER_STR = "ViT-L-14"
|
|
|
31 |
METADATA_PATH = "components/bioclip-2/metadata.parquet"
|
32 |
# Read page IDs as int
|
33 |
metadata_df = pl.read_parquet(METADATA_PATH, low_memory = False)
|
34 |
+
metadata_df = metadata_df.with_columns(pl.col(["eol_page_id", "gbif_id"]).cast(pl.Int64))
|
35 |
|
36 |
MODEL_STR = "hf-hub:imageomics/bioclip-2"
|
37 |
TOKENIZER_STR = "ViT-L-14"
|
components/query.py
CHANGED
@@ -42,7 +42,7 @@ def get_sample(df, pred_taxon, rank):
|
|
42 |
'''
|
43 |
logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
|
44 |
try:
|
45 |
-
filepath,
|
46 |
except Exception as e:
|
47 |
logger.error(f"Error retrieving sample data: {e}")
|
48 |
return None, f"We encountered the following error trying to retrieve a sample image: {e}."
|
@@ -58,8 +58,8 @@ def get_sample(df, pred_taxon, rank):
|
|
58 |
)
|
59 |
img_resp = requests.get(img_src)
|
60 |
img = Image.open(io.BytesIO(img_resp.content))
|
61 |
-
if
|
62 |
-
gbif_url = GBIF_URL +
|
63 |
if eol_page_id:
|
64 |
eol_url = EOL_URL + eol_page_id
|
65 |
if is_exact:
|
@@ -100,7 +100,7 @@ def get_sample_data(df, pred_taxon, rank):
|
|
100 |
--------
|
101 |
filepath : str
|
102 |
Filepath of selected sample image for predicted taxon.
|
103 |
-
|
104 |
GBIF page ID associated with predicted taxon for more information.
|
105 |
eol_page_id : str
|
106 |
EOL page ID associated with predicted taxon for more information.
|
@@ -125,9 +125,9 @@ def get_sample_data(df, pred_taxon, rank):
|
|
125 |
if exact_df.shape[0] > 0:
|
126 |
df_filtered = exact_df.sample()
|
127 |
full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
|
128 |
-
return df_filtered["file_path"][0], df_filtered["
|
129 |
|
130 |
# If no exact matches, return any entry with the specified rank
|
131 |
df_filtered = df.sample()
|
132 |
full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
|
133 |
-
return df_filtered["file_path"][0], df_filtered["
|
|
|
42 |
'''
|
43 |
logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
|
44 |
try:
|
45 |
+
filepath, gbif_id, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
|
46 |
except Exception as e:
|
47 |
logger.error(f"Error retrieving sample data: {e}")
|
48 |
return None, f"We encountered the following error trying to retrieve a sample image: {e}."
|
|
|
58 |
)
|
59 |
img_resp = requests.get(img_src)
|
60 |
img = Image.open(io.BytesIO(img_resp.content))
|
61 |
+
if gbif_id:
|
62 |
+
gbif_url = GBIF_URL + gbif_id
|
63 |
if eol_page_id:
|
64 |
eol_url = EOL_URL + eol_page_id
|
65 |
if is_exact:
|
|
|
100 |
--------
|
101 |
filepath : str
|
102 |
Filepath of selected sample image for predicted taxon.
|
103 |
+
gbif_id: str
|
104 |
GBIF page ID associated with predicted taxon for more information.
|
105 |
eol_page_id : str
|
106 |
EOL page ID associated with predicted taxon for more information.
|
|
|
125 |
if exact_df.shape[0] > 0:
|
126 |
df_filtered = exact_df.sample()
|
127 |
full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
|
128 |
+
return df_filtered["file_path"][0], df_filtered["gbif_id"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True
|
129 |
|
130 |
# If no exact matches, return any entry with the specified rank
|
131 |
df_filtered = df.sample()
|
132 |
full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
|
133 |
+
return df_filtered["file_path"][0], df_filtered["gbif_id"].cast(pl.String)[0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False
|