Spaces:
Running
Running
File size: 3,996 Bytes
3aabbc1 b5523d2 3aabbc1 b5523d2 3aabbc1 b5523d2 3aabbc1 b5523d2 3aabbc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import json
import pickle
import pandas as pd
import panel as pn
import geopandas as gpd
from shapely import Point
import folium
from branca import colormap as cm
import model2vec
from model2vec import StaticModel
from vicinity import Vicinity, Backend, Metric
from sklearn.decomposition import TruncatedSVD
gcantons=gpd.read_file("cantons-normandie.geojson").rename(columns={"nom": "canton"})
si=gcantons.sindex
def assigne_canton(row):
lon, lat=row.longitude, row.latitude
if pd.isna(lon) or pd.isna(lat):
return None
proche=si.nearest(Point(lon, lat))[1][0]
return gcantons.iloc[proche]['canton']
dfcomp=pd.read_csv("offres_emploi_competences_cantons.csv")\
.rename(columns={"competencelibelle": "Compétence"})[["canton", "Compétence"]]
dfform=pd.read_csv("formations_en_normandie.csv", index_col=0)\
.rename(columns={"y_latitude": "latitude", "x_longitude": "longitude"})
certcol="intitule_certification"
items_form= list(set(dfform[certcol]))
dfdist=pd.read_csv("cantons_dist.csv", index_col=0).astype(int)
potion128="minishlab/potion-multilingual-128M"
model = StaticModel.from_pretrained(potion128)
with open("sklearn_svd.pkl", "rb") as f:
svd=pickle.load(f)
def encode_and_project(query: str):
if isinstance(query, str):
query=[query]
q = model.encode(query)
q_reduced = svd.transform(q)
return q_reduced
certcol="intitule_certification"
items_form= list(set(dfform[certcol]))
items_emploi=dfcomp.Compétence.drop_duplicates().to_list()
vice = Vicinity.from_vectors_and_items(
vectors=encode_and_project(items_emploi),
items=items_emploi,
backend_type=Backend.USEARCH,
metric=Metric.COSINE
)
vicf = Vicinity.from_vectors_and_items(
vectors=encode_and_project(items_form),
items=items_form,
backend_type=Backend.USEARCH,
metric=Metric.COSINE
)
query=pn.widgets.TextInput(name="Rechercher une compétence")
proximax=0.5
score=pn.indicators.Number(name="Score d'adéquation", value=2, visible=False,
format="{value}/10",
title_size='10pt', font_size='30pt',
colors=[(3, 'red'), (7, 'orange'), (10, 'green')])
def calcul_score(dft):
dft['canton']=dft.apply(assigne_canton, axis=1)
dist=dfdist[:][dft.canton].min(axis=1).sum()
return max(0, 10-dist//1000)
def carte(col):
req=query.value
if req=='':
m = folium.Map(location=[49.124854, -0.0730575], zoom_start=8, tiles="CartoDB positron")
score.visible=False
else:
test_emb=encode_and_project(req)
selcol = [nom for (nom, dist) in vice.query(test_emb, k=200)[0] if dist<proximax]
dfselcol=dfcomp[dfcomp["Compétence"].isin(selcol)]
dfg=dfselcol.groupby("canton")
dfa=dfg.agg(total= ("Compétence", lambda x: len(x)),
compétence= ("Compétence", lambda x: x.value_counts().to_string(header=False).replace("\n", "<br>"))).reset_index()
gdet=gpd.GeoDataFrame(dfa.merge(gcantons, how='right'))
m=gdet.explore(column="total", tooltip=["canton", "compétence", "total"],
cmap="viridis", vmax=10, tiles="CartoDB positron")
res_form = [nom for (nom, dist) in vicf.query(test_emb, k=200)[0] if dist<proximax]
dft=pd.DataFrame(res_form, columns=[certcol]).merge(dfform).drop_duplicates(subset=["latitude", "longitude"])
for irow, row in dft.iterrows():
folium.Marker(
location=[row['latitude'], row['longitude']],
tooltip=f"<b>{row['intitule_certification']}</b><p>{row['intitule_formation']}",
icon=folium.Icon(color="red", prefix="fa", icon="fa-university")
).add_to(m)
score.value=len(res_form)#calcul_score(dft)
score.visible=True
return pn.pane.plot.Folium(m, height=800)
lien=pn.bind(carte, col=query)
app=pn.FlexBox(pn.Row(query, score), lien, height=1000)
app.servable()
|