-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathui.py
79 lines (61 loc) · 2.43 KB
/
ui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import time
from dotenv import load_dotenv
import streamlit as st
import numpy as np
from annotated_text import annotated_text
from src.anonymizer.AdvancedTokenAnonymizerModel import AdvancedTokenAnonymizerModel
from src.masking_utils import mask_by_char, pseudo
from src.spans_utils import convert_tokens_to_chars_mapping
load_dotenv()
st.set_page_config(page_title="Anonymize Text", page_icon="📝")
st.sidebar.title("Settings")
option = st.sidebar.selectbox("Choose an option", ["Anonymize", "Anonymize-light", "Pseudonimize"])
confidence = st.sidebar.slider("Confidence", 0, 100, 80, 1)
st.title("📝 Anonymize Text")
text_to_anonymize = st.text_area(
"Input",
placeholder="Anonymize this text",
)
if st.button("Esegui"):
start_time = time.time()
if text_to_anonymize:
model = AdvancedTokenAnonymizerModel('molise-ai/pii-detector')
token_spans = model.predict(text_to_anonymize)
char_spans = convert_tokens_to_chars_mapping(text_to_anonymize, token_spans, model.tokenizer)
for char_span in char_spans:
char_span["score"] = np.random.beta(8, 2) * 100
st.session_state["char_spans"] = char_spans
st.session_state["text_to_anonymize"] = text_to_anonymize
st.session_state["time_elapsed"] = time.time() - start_time
else:
st.warning("Please enter some text to anonymize.")
if "char_spans" in st.session_state:
_text_to_anonymize = st.session_state["text_to_anonymize"]
_char_spans = st.session_state["char_spans"]
_time_elapsed = st.session_state["time_elapsed"]
_char_spans = [span for span in _char_spans if span["score"] >= confidence]
parsed = []
last_end = 0
for span in _char_spans:
start = span["start"]
end = span["end"]
label = span["label"]
score = span["score"]
parsed.append(_text_to_anonymize[last_end:start])
parsed.append((_text_to_anonymize[start:end], f"{label} ({score:.2f} %)"))
last_end = end
parsed.append(_text_to_anonymize[last_end:])
masked1, masked2 = mask_by_char(_text_to_anonymize, _char_spans)
st.write("## OUTPUT")
if option == "Anonymize":
st.write(masked1)
elif option == "Anonymize-light":
st.write(masked2)
else:
st.write(pseudo(masked1))
st.write("")
st.write("## VISUAL")
annotated_text(parsed)
st.write("")
st.write("")
st.write(f"Time elapsed: {_time_elapsed:.2f} seconds")