improve streamlit app

This commit is contained in:
Jason
2023-09-12 14:54:14 -04:00
parent 5d70d7924f
commit e025eaa64f
3 changed files with 290 additions and 3 deletions
+1 -3
View File
@@ -25,7 +25,7 @@ class StreamingAccumulatorManager:
try:
# Replace this line with your validation logic
obj = m.MultiSearch.model_validate(obj)
self.update(index, obj)
self.update(index, obj.model_dump())
self.accumulator[Status.IS_VALID.value].update(index, True)
except ValidationError as e:
self.accumulator[Status.IS_VALID.value].update(index, False)
@@ -59,8 +59,6 @@ class StreamingAccumulatorManager:
elif isinstance(data, Enum):
enum_path = f"{path}.enum"
self.accumulator[enum_path].update(index, data.value)
elif path != "$":
pass
else:
self.accumulator[path].update(index, data)
+224
View File
@@ -0,0 +1,224 @@
from collections import Counter
stats_dict = {
"$.queries.length": {
"_reverse_lookup": {
1: [0, 1, 8, 9, 10, 13, 14, 15],
2: [7, 11, 16],
3: [12, 17],
},
"counter": Counter({1: 8, 2: 3, 3: 2}),
"max": 3,
"mean": 1.5384615384615385,
"min": 1,
"missing_values": 0,
"std": 0.7457969011409735,
"unique_count": 3,
},
"$.queries[*].is_priority": {
"_reverse_lookup": {False: [13], True: [1, 9, 14, 17]},
"counter": Counter({True: 4, False: 1}),
"mean": 0.8,
"missing_values": 15,
"unique_count": 2,
},
"$.queries[*].query": {
"_reverse_lookup": {
"customer churn": [1],
"customer feedback": [15],
"customer satisfaction": [11],
"email campaigns": [12],
"email open rates": [17],
"email outreach": [10],
"marketing strategies": [14],
"new products": [16],
"product sales": [11],
"revenue 2022": [9],
"revenue streams": [16],
"sales Q1": [0, 7, 8, 13],
"sales Q2": [7],
"social impact": [12],
"social trends": [17],
"web traffic": [12],
"website analytics": [17],
},
"counter": Counter(
{
"sales Q1": 4,
"customer churn": 1,
"sales Q2": 1,
"revenue 2022": 1,
"email outreach": 1,
"product sales": 1,
"customer satisfaction": 1,
"social impact": 1,
"email campaigns": 1,
"web traffic": 1,
"marketing strategies": 1,
"customer feedback": 1,
"revenue streams": 1,
"new products": 1,
"social trends": 1,
"email open rates": 1,
"website analytics": 1,
}
),
"missing_values": 0,
"str_max_length": 21,
"str_mean_length": 13.15,
"str_min_length": 8,
"str_std_length": 3.8376425054973518,
"unique_count": 17,
},
"$.queries[*].results_limit": {
"_reverse_lookup": {
5: [17],
10: [0, 1, 7, 7, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15, 16, 16, 17, 17],
15: [14],
},
"counter": Counter({10: 18, 15: 1, 5: 1}),
"max": 15,
"mean": 10.0,
"min": 5,
"missing_values": 0,
"std": 1.5811388300841898,
"unique_count": 3,
},
"$.queries[*].source_type.enum": {
"_reverse_lookup": {
"CRM": [0, 7, 8, 11, 13, 16],
"EMAIL": [10, 11, 12, 15, 17],
"SOCIAL_MEDIA": [12, 17],
"WEB": [1, 7, 9, 12, 14, 16, 17],
},
"counter": Counter({"WEB": 7, "CRM": 6, "EMAIL": 5, "SOCIAL_MEDIA": 2}),
"missing_values": 0,
"str_max_length": 12,
"str_mean_length": 4.4,
"str_min_length": 3,
"str_std_length": 2.672077843177477,
"unique_count": 4,
},
"$.queries[*].tags": {
"_reverse_lookup": {},
"counter": Counter(),
"missing_values": 16,
"unique_count": 0,
},
"$.queries[*].tags.length": {
"_reverse_lookup": {1: [15, 17], 2: [10, 14]},
"counter": Counter({2: 2, 1: 2}),
"max": 2,
"mean": 1.5,
"min": 1,
"missing_values": 0,
"std": 0.5,
"unique_count": 2,
},
"$.queries[*].tags[*]": {
"_reverse_lookup": {
"2022": [10],
"2023": [14],
"analytics": [17],
"feedback": [15],
"outreach": [10],
"strategy": [14],
},
"counter": Counter(
{
"outreach": 1,
"2022": 1,
"strategy": 1,
"2023": 1,
"feedback": 1,
"analytics": 1,
}
),
"missing_values": 0,
"str_max_length": 9,
"str_mean_length": 6.833333333333333,
"str_min_length": 4,
"str_std_length": 2.034425935955618,
"unique_count": 6,
},
"$.user_id": {
"_reverse_lookup": {
"user_1": [0],
"user_10": [10],
"user_11": [11],
"user_12": [12],
"user_13": [13],
"user_14": [14],
"user_15": [15],
"user_16": [16],
"user_17": [17],
"user_2": [1],
"user_7": [7],
"user_8": [8],
"user_9": [9],
},
"counter": Counter(
{
"user_1": 1,
"user_2": 1,
"user_7": 1,
"user_8": 1,
"user_9": 1,
"user_10": 1,
"user_11": 1,
"user_12": 1,
"user_13": 1,
"user_14": 1,
"user_15": 1,
"user_16": 1,
"user_17": 1,
}
),
"missing_values": 0,
"str_max_length": 7,
"str_mean_length": 6.615384615384615,
"str_min_length": 6,
"str_std_length": 0.48650425541052295,
"unique_count": 13,
},
"_is_json_": {
"_reverse_lookup": {
False: [2, 4],
True: [0, 1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
},
"counter": Counter({True: 16, False: 2}),
"mean": 0.8888888888888888,
"missing_values": 0,
"unique_count": 2,
},
"_is_valid_": {
"_reverse_lookup": {
False: [3, 5, 6],
True: [0, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
},
"counter": Counter({True: 13, False: 3}),
"mean": 0.8125,
"missing_values": 0,
"unique_count": 2,
},
"_validation_error_": {
"_reverse_lookup": {
"$.queries.[*].is_priority.bool_parsing": [6],
"$.queries.[*].source_type.enum": [3],
"$.user_id.missing": [5],
},
"counter": Counter(
{
"$.queries.[*].source_type.enum": 1,
"$.user_id.missing": 1,
"$.queries.[*].is_priority.bool_parsing": 1,
}
),
"missing_values": 0,
"str_max_length": 38,
"str_mean_length": 28.333333333333332,
"str_min_length": 17,
"str_std_length": 8.653836657164781,
"unique_count": 3,
},
}
+65
View File
@@ -0,0 +1,65 @@
import streamlit as st
from stats_dict import stats_dict
import json
# Sample data
query_data = {i: line for i, line in enumerate(open("test.jsonl", "r"))}
# Initialize selected keys
selected_keys = {}
# Function to get lines
def get_lines(stats_key, keys):
indices = []
for key in keys:
indices.extend(stats_dict[stats_key]["_reverse_lookup"][key])
return "\n".join([query_data[i] for i in indices])
# Function to render dropdown and button
def render_dropdown_and_button(stats_key):
st.subheader(f"Stats for `{stats_key}`")
st.json(stats_dict[stats_key]["counter"])
st.json(
{k: v for k, v in stats_dict[stats_key].items() if isinstance(v, (int, float))}
)
st.subheader("Histogram")
st.bar_chart(stats_dict[stats_key]["counter"], use_container_width=True)
st.subheader("Select keys to view lines")
options = list(stats_dict[stats_key]["counter"].keys())
selected_keys[stats_key] = st.multiselect(
f"View samples with {stats_key}",
options,
default=selected_keys.get(stats_key, []),
)
if st.button(f"Show Selected for {stats_key}"):
st.code(get_lines(stats_key, selected_keys[stats_key]))
# Sidebar for navigation
st.sidebar.title("Navigation")
page = st.sidebar.selectbox(
"Select a page:",
["Validation Stats", "Individual Path Views"],
)
# Main Streamlit App
st.title("Query Data Visualizer")
# Validation Stats
if page == "Validation Stats":
st.header("Validation Stats")
for key in [k for k in stats_dict.keys() if k.startswith("_")]:
render_dropdown_and_button(key)
# Individual Path Views
elif page == "Individual Path Views":
st.header("Individual Path Views")
path = st.selectbox(
"Choose a path:",
[key for key in stats_dict.keys() if not key.startswith("_")],
)
if "counter" in stats_dict[path]:
render_dropdown_and_button(path)