File size: 3,996 Bytes
3aabbc1
 
 
b5523d2
3aabbc1
 
 
 
 
 
 
 
b5523d2
 
3aabbc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5523d2
 
3aabbc1
 
 
 
 
b5523d2
 
3aabbc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import json
import pickle
import pandas as pd
import panel as pn
import geopandas as gpd
from shapely import Point
import folium
from branca import colormap as cm
import model2vec
from model2vec import StaticModel
from vicinity import Vicinity, Backend, Metric
from sklearn.decomposition import TruncatedSVD


gcantons=gpd.read_file("cantons-normandie.geojson").rename(columns={"nom": "canton"})
si=gcantons.sindex
def assigne_canton(row):
    lon, lat=row.longitude, row.latitude
    if pd.isna(lon) or pd.isna(lat):
        return None
    proche=si.nearest(Point(lon, lat))[1][0]
    return gcantons.iloc[proche]['canton']


dfcomp=pd.read_csv("offres_emploi_competences_cantons.csv")\
    .rename(columns={"competencelibelle": "Compétence"})[["canton", "Compétence"]]


dfform=pd.read_csv("formations_en_normandie.csv", index_col=0)\
    .rename(columns={"y_latitude": "latitude", "x_longitude": "longitude"})
certcol="intitule_certification"

items_form= list(set(dfform[certcol]))

dfdist=pd.read_csv("cantons_dist.csv", index_col=0).astype(int)

potion128="minishlab/potion-multilingual-128M"
model = StaticModel.from_pretrained(potion128)

with open("sklearn_svd.pkl", "rb") as f:
    svd=pickle.load(f)

def encode_and_project(query: str):
    if isinstance(query, str):
        query=[query]
    q = model.encode(query)
    q_reduced = svd.transform(q)
    return q_reduced

certcol="intitule_certification"
items_form= list(set(dfform[certcol]))

items_emploi=dfcomp.Compétence.drop_duplicates().to_list()

vice = Vicinity.from_vectors_and_items(
    vectors=encode_and_project(items_emploi),
    items=items_emploi,
    backend_type=Backend.USEARCH,
    metric=Metric.COSINE
)

vicf = Vicinity.from_vectors_and_items(
    vectors=encode_and_project(items_form),
    items=items_form,
    backend_type=Backend.USEARCH,
    metric=Metric.COSINE
)

query=pn.widgets.TextInput(name="Rechercher une compétence")
proximax=0.5


score=pn.indicators.Number(name="Score d'adéquation", value=2, visible=False,
                           format="{value}/10",
                           title_size='10pt', font_size='30pt',
                           colors=[(3, 'red'), (7, 'orange'), (10, 'green')])


def calcul_score(dft):
    dft['canton']=dft.apply(assigne_canton, axis=1)
    dist=dfdist[:][dft.canton].min(axis=1).sum()    
    return max(0, 10-dist//1000)


def carte(col):
    req=query.value
    if req=='':
        m = folium.Map(location=[49.124854, -0.0730575], zoom_start=8,  tiles="CartoDB positron")
        score.visible=False
    else:
        test_emb=encode_and_project(req)
        selcol = [nom for (nom, dist) in vice.query(test_emb, k=200)[0] if dist<proximax]
        dfselcol=dfcomp[dfcomp["Compétence"].isin(selcol)]
        dfg=dfselcol.groupby("canton")
        dfa=dfg.agg(total= ("Compétence", lambda x: len(x)), 
        compétence= ("Compétence", lambda x: x.value_counts().to_string(header=False).replace("\n", "<br>"))).reset_index()
        gdet=gpd.GeoDataFrame(dfa.merge(gcantons, how='right'))
        m=gdet.explore(column="total", tooltip=["canton", "compétence", "total"],
                     cmap="viridis", vmax=10, tiles="CartoDB positron")

        res_form =  [nom for (nom, dist) in vicf.query(test_emb, k=200)[0] if dist<proximax]
        dft=pd.DataFrame(res_form, columns=[certcol]).merge(dfform).drop_duplicates(subset=["latitude", "longitude"])
        
        for irow, row in dft.iterrows():
           folium.Marker(
              location=[row['latitude'], row['longitude']],
              tooltip=f"<b>{row['intitule_certification']}</b><p>{row['intitule_formation']}",
              icon=folium.Icon(color="red", prefix="fa", icon="fa-university")
           ).add_to(m)

        score.value=len(res_form)#calcul_score(dft)
        score.visible=True
        
            
    return  pn.pane.plot.Folium(m, height=800)

lien=pn.bind(carte, col=query)

app=pn.FlexBox(pn.Row(query, score), lien, height=1000)
app.servable()