mirror of
https://github.com/kennethreitz/instructor.git
synced 2026-06-05 22:50:18 +00:00
improve streamlit app
This commit is contained in:
@@ -25,7 +25,7 @@ class StreamingAccumulatorManager:
|
||||
try:
|
||||
# Replace this line with your validation logic
|
||||
obj = m.MultiSearch.model_validate(obj)
|
||||
self.update(index, obj)
|
||||
self.update(index, obj.model_dump())
|
||||
self.accumulator[Status.IS_VALID.value].update(index, True)
|
||||
except ValidationError as e:
|
||||
self.accumulator[Status.IS_VALID.value].update(index, False)
|
||||
@@ -59,8 +59,6 @@ class StreamingAccumulatorManager:
|
||||
elif isinstance(data, Enum):
|
||||
enum_path = f"{path}.enum"
|
||||
self.accumulator[enum_path].update(index, data.value)
|
||||
elif path != "$":
|
||||
pass
|
||||
else:
|
||||
self.accumulator[path].update(index, data)
|
||||
|
||||
|
||||
@@ -0,0 +1,224 @@
|
||||
from collections import Counter
|
||||
|
||||
stats_dict = {
|
||||
"$.queries.length": {
|
||||
"_reverse_lookup": {
|
||||
1: [0, 1, 8, 9, 10, 13, 14, 15],
|
||||
2: [7, 11, 16],
|
||||
3: [12, 17],
|
||||
},
|
||||
"counter": Counter({1: 8, 2: 3, 3: 2}),
|
||||
"max": 3,
|
||||
"mean": 1.5384615384615385,
|
||||
"min": 1,
|
||||
"missing_values": 0,
|
||||
"std": 0.7457969011409735,
|
||||
"unique_count": 3,
|
||||
},
|
||||
"$.queries[*].is_priority": {
|
||||
"_reverse_lookup": {False: [13], True: [1, 9, 14, 17]},
|
||||
"counter": Counter({True: 4, False: 1}),
|
||||
"mean": 0.8,
|
||||
"missing_values": 15,
|
||||
"unique_count": 2,
|
||||
},
|
||||
"$.queries[*].query": {
|
||||
"_reverse_lookup": {
|
||||
"customer churn": [1],
|
||||
"customer feedback": [15],
|
||||
"customer satisfaction": [11],
|
||||
"email campaigns": [12],
|
||||
"email open rates": [17],
|
||||
"email outreach": [10],
|
||||
"marketing strategies": [14],
|
||||
"new products": [16],
|
||||
"product sales": [11],
|
||||
"revenue 2022": [9],
|
||||
"revenue streams": [16],
|
||||
"sales Q1": [0, 7, 8, 13],
|
||||
"sales Q2": [7],
|
||||
"social impact": [12],
|
||||
"social trends": [17],
|
||||
"web traffic": [12],
|
||||
"website analytics": [17],
|
||||
},
|
||||
"counter": Counter(
|
||||
{
|
||||
"sales Q1": 4,
|
||||
"customer churn": 1,
|
||||
"sales Q2": 1,
|
||||
"revenue 2022": 1,
|
||||
"email outreach": 1,
|
||||
"product sales": 1,
|
||||
"customer satisfaction": 1,
|
||||
"social impact": 1,
|
||||
"email campaigns": 1,
|
||||
"web traffic": 1,
|
||||
"marketing strategies": 1,
|
||||
"customer feedback": 1,
|
||||
"revenue streams": 1,
|
||||
"new products": 1,
|
||||
"social trends": 1,
|
||||
"email open rates": 1,
|
||||
"website analytics": 1,
|
||||
}
|
||||
),
|
||||
"missing_values": 0,
|
||||
"str_max_length": 21,
|
||||
"str_mean_length": 13.15,
|
||||
"str_min_length": 8,
|
||||
"str_std_length": 3.8376425054973518,
|
||||
"unique_count": 17,
|
||||
},
|
||||
"$.queries[*].results_limit": {
|
||||
"_reverse_lookup": {
|
||||
5: [17],
|
||||
10: [0, 1, 7, 7, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15, 16, 16, 17, 17],
|
||||
15: [14],
|
||||
},
|
||||
"counter": Counter({10: 18, 15: 1, 5: 1}),
|
||||
"max": 15,
|
||||
"mean": 10.0,
|
||||
"min": 5,
|
||||
"missing_values": 0,
|
||||
"std": 1.5811388300841898,
|
||||
"unique_count": 3,
|
||||
},
|
||||
"$.queries[*].source_type.enum": {
|
||||
"_reverse_lookup": {
|
||||
"CRM": [0, 7, 8, 11, 13, 16],
|
||||
"EMAIL": [10, 11, 12, 15, 17],
|
||||
"SOCIAL_MEDIA": [12, 17],
|
||||
"WEB": [1, 7, 9, 12, 14, 16, 17],
|
||||
},
|
||||
"counter": Counter({"WEB": 7, "CRM": 6, "EMAIL": 5, "SOCIAL_MEDIA": 2}),
|
||||
"missing_values": 0,
|
||||
"str_max_length": 12,
|
||||
"str_mean_length": 4.4,
|
||||
"str_min_length": 3,
|
||||
"str_std_length": 2.672077843177477,
|
||||
"unique_count": 4,
|
||||
},
|
||||
"$.queries[*].tags": {
|
||||
"_reverse_lookup": {},
|
||||
"counter": Counter(),
|
||||
"missing_values": 16,
|
||||
"unique_count": 0,
|
||||
},
|
||||
"$.queries[*].tags.length": {
|
||||
"_reverse_lookup": {1: [15, 17], 2: [10, 14]},
|
||||
"counter": Counter({2: 2, 1: 2}),
|
||||
"max": 2,
|
||||
"mean": 1.5,
|
||||
"min": 1,
|
||||
"missing_values": 0,
|
||||
"std": 0.5,
|
||||
"unique_count": 2,
|
||||
},
|
||||
"$.queries[*].tags[*]": {
|
||||
"_reverse_lookup": {
|
||||
"2022": [10],
|
||||
"2023": [14],
|
||||
"analytics": [17],
|
||||
"feedback": [15],
|
||||
"outreach": [10],
|
||||
"strategy": [14],
|
||||
},
|
||||
"counter": Counter(
|
||||
{
|
||||
"outreach": 1,
|
||||
"2022": 1,
|
||||
"strategy": 1,
|
||||
"2023": 1,
|
||||
"feedback": 1,
|
||||
"analytics": 1,
|
||||
}
|
||||
),
|
||||
"missing_values": 0,
|
||||
"str_max_length": 9,
|
||||
"str_mean_length": 6.833333333333333,
|
||||
"str_min_length": 4,
|
||||
"str_std_length": 2.034425935955618,
|
||||
"unique_count": 6,
|
||||
},
|
||||
"$.user_id": {
|
||||
"_reverse_lookup": {
|
||||
"user_1": [0],
|
||||
"user_10": [10],
|
||||
"user_11": [11],
|
||||
"user_12": [12],
|
||||
"user_13": [13],
|
||||
"user_14": [14],
|
||||
"user_15": [15],
|
||||
"user_16": [16],
|
||||
"user_17": [17],
|
||||
"user_2": [1],
|
||||
"user_7": [7],
|
||||
"user_8": [8],
|
||||
"user_9": [9],
|
||||
},
|
||||
"counter": Counter(
|
||||
{
|
||||
"user_1": 1,
|
||||
"user_2": 1,
|
||||
"user_7": 1,
|
||||
"user_8": 1,
|
||||
"user_9": 1,
|
||||
"user_10": 1,
|
||||
"user_11": 1,
|
||||
"user_12": 1,
|
||||
"user_13": 1,
|
||||
"user_14": 1,
|
||||
"user_15": 1,
|
||||
"user_16": 1,
|
||||
"user_17": 1,
|
||||
}
|
||||
),
|
||||
"missing_values": 0,
|
||||
"str_max_length": 7,
|
||||
"str_mean_length": 6.615384615384615,
|
||||
"str_min_length": 6,
|
||||
"str_std_length": 0.48650425541052295,
|
||||
"unique_count": 13,
|
||||
},
|
||||
"_is_json_": {
|
||||
"_reverse_lookup": {
|
||||
False: [2, 4],
|
||||
True: [0, 1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
|
||||
},
|
||||
"counter": Counter({True: 16, False: 2}),
|
||||
"mean": 0.8888888888888888,
|
||||
"missing_values": 0,
|
||||
"unique_count": 2,
|
||||
},
|
||||
"_is_valid_": {
|
||||
"_reverse_lookup": {
|
||||
False: [3, 5, 6],
|
||||
True: [0, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
|
||||
},
|
||||
"counter": Counter({True: 13, False: 3}),
|
||||
"mean": 0.8125,
|
||||
"missing_values": 0,
|
||||
"unique_count": 2,
|
||||
},
|
||||
"_validation_error_": {
|
||||
"_reverse_lookup": {
|
||||
"$.queries.[*].is_priority.bool_parsing": [6],
|
||||
"$.queries.[*].source_type.enum": [3],
|
||||
"$.user_id.missing": [5],
|
||||
},
|
||||
"counter": Counter(
|
||||
{
|
||||
"$.queries.[*].source_type.enum": 1,
|
||||
"$.user_id.missing": 1,
|
||||
"$.queries.[*].is_priority.bool_parsing": 1,
|
||||
}
|
||||
),
|
||||
"missing_values": 0,
|
||||
"str_max_length": 38,
|
||||
"str_mean_length": 28.333333333333332,
|
||||
"str_min_length": 17,
|
||||
"str_std_length": 8.653836657164781,
|
||||
"unique_count": 3,
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
import streamlit as st
|
||||
from stats_dict import stats_dict
|
||||
import json
|
||||
|
||||
# Sample data
|
||||
query_data = {i: line for i, line in enumerate(open("test.jsonl", "r"))}
|
||||
|
||||
# Initialize selected keys
|
||||
selected_keys = {}
|
||||
|
||||
|
||||
# Function to get lines
|
||||
def get_lines(stats_key, keys):
|
||||
indices = []
|
||||
for key in keys:
|
||||
indices.extend(stats_dict[stats_key]["_reverse_lookup"][key])
|
||||
return "\n".join([query_data[i] for i in indices])
|
||||
|
||||
|
||||
# Function to render dropdown and button
|
||||
def render_dropdown_and_button(stats_key):
|
||||
st.subheader(f"Stats for `{stats_key}`")
|
||||
st.json(stats_dict[stats_key]["counter"])
|
||||
st.json(
|
||||
{k: v for k, v in stats_dict[stats_key].items() if isinstance(v, (int, float))}
|
||||
)
|
||||
st.subheader("Histogram")
|
||||
st.bar_chart(stats_dict[stats_key]["counter"], use_container_width=True)
|
||||
|
||||
st.subheader("Select keys to view lines")
|
||||
options = list(stats_dict[stats_key]["counter"].keys())
|
||||
selected_keys[stats_key] = st.multiselect(
|
||||
f"View samples with {stats_key}",
|
||||
options,
|
||||
default=selected_keys.get(stats_key, []),
|
||||
)
|
||||
if st.button(f"Show Selected for {stats_key}"):
|
||||
st.code(get_lines(stats_key, selected_keys[stats_key]))
|
||||
|
||||
|
||||
# Sidebar for navigation
|
||||
st.sidebar.title("Navigation")
|
||||
page = st.sidebar.selectbox(
|
||||
"Select a page:",
|
||||
["Validation Stats", "Individual Path Views"],
|
||||
)
|
||||
|
||||
# Main Streamlit App
|
||||
st.title("Query Data Visualizer")
|
||||
|
||||
# Validation Stats
|
||||
if page == "Validation Stats":
|
||||
st.header("Validation Stats")
|
||||
for key in [k for k in stats_dict.keys() if k.startswith("_")]:
|
||||
render_dropdown_and_button(key)
|
||||
|
||||
# Individual Path Views
|
||||
elif page == "Individual Path Views":
|
||||
st.header("Individual Path Views")
|
||||
path = st.selectbox(
|
||||
"Choose a path:",
|
||||
[key for key in stats_dict.keys() if not key.startswith("_")],
|
||||
)
|
||||
if "counter" in stats_dict[path]:
|
||||
render_dropdown_and_button(path)
|
||||
Reference in New Issue
Block a user