diff --git a/examples/evals/eval.py b/examples/evals/eval.py index 3b8de28..fad7a7f 100644 --- a/examples/evals/eval.py +++ b/examples/evals/eval.py @@ -25,7 +25,7 @@ class StreamingAccumulatorManager: try: # Replace this line with your validation logic obj = m.MultiSearch.model_validate(obj) - self.update(index, obj) + self.update(index, obj.model_dump()) self.accumulator[Status.IS_VALID.value].update(index, True) except ValidationError as e: self.accumulator[Status.IS_VALID.value].update(index, False) @@ -59,8 +59,6 @@ class StreamingAccumulatorManager: elif isinstance(data, Enum): enum_path = f"{path}.enum" self.accumulator[enum_path].update(index, data.value) - elif path != "$": - pass else: self.accumulator[path].update(index, data) diff --git a/examples/evals/stats_dict.py b/examples/evals/stats_dict.py new file mode 100644 index 0000000..9ca8678 --- /dev/null +++ b/examples/evals/stats_dict.py @@ -0,0 +1,224 @@ +from collections import Counter + +stats_dict = { + "$.queries.length": { + "_reverse_lookup": { + 1: [0, 1, 8, 9, 10, 13, 14, 15], + 2: [7, 11, 16], + 3: [12, 17], + }, + "counter": Counter({1: 8, 2: 3, 3: 2}), + "max": 3, + "mean": 1.5384615384615385, + "min": 1, + "missing_values": 0, + "std": 0.7457969011409735, + "unique_count": 3, + }, + "$.queries[*].is_priority": { + "_reverse_lookup": {False: [13], True: [1, 9, 14, 17]}, + "counter": Counter({True: 4, False: 1}), + "mean": 0.8, + "missing_values": 15, + "unique_count": 2, + }, + "$.queries[*].query": { + "_reverse_lookup": { + "customer churn": [1], + "customer feedback": [15], + "customer satisfaction": [11], + "email campaigns": [12], + "email open rates": [17], + "email outreach": [10], + "marketing strategies": [14], + "new products": [16], + "product sales": [11], + "revenue 2022": [9], + "revenue streams": [16], + "sales Q1": [0, 7, 8, 13], + "sales Q2": [7], + "social impact": [12], + "social trends": [17], + "web traffic": [12], + "website analytics": [17], + }, + "counter": Counter( + { + "sales Q1": 4, + "customer churn": 1, + "sales Q2": 1, + "revenue 2022": 1, + "email outreach": 1, + "product sales": 1, + "customer satisfaction": 1, + "social impact": 1, + "email campaigns": 1, + "web traffic": 1, + "marketing strategies": 1, + "customer feedback": 1, + "revenue streams": 1, + "new products": 1, + "social trends": 1, + "email open rates": 1, + "website analytics": 1, + } + ), + "missing_values": 0, + "str_max_length": 21, + "str_mean_length": 13.15, + "str_min_length": 8, + "str_std_length": 3.8376425054973518, + "unique_count": 17, + }, + "$.queries[*].results_limit": { + "_reverse_lookup": { + 5: [17], + 10: [0, 1, 7, 7, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15, 16, 16, 17, 17], + 15: [14], + }, + "counter": Counter({10: 18, 15: 1, 5: 1}), + "max": 15, + "mean": 10.0, + "min": 5, + "missing_values": 0, + "std": 1.5811388300841898, + "unique_count": 3, + }, + "$.queries[*].source_type.enum": { + "_reverse_lookup": { + "CRM": [0, 7, 8, 11, 13, 16], + "EMAIL": [10, 11, 12, 15, 17], + "SOCIAL_MEDIA": [12, 17], + "WEB": [1, 7, 9, 12, 14, 16, 17], + }, + "counter": Counter({"WEB": 7, "CRM": 6, "EMAIL": 5, "SOCIAL_MEDIA": 2}), + "missing_values": 0, + "str_max_length": 12, + "str_mean_length": 4.4, + "str_min_length": 3, + "str_std_length": 2.672077843177477, + "unique_count": 4, + }, + "$.queries[*].tags": { + "_reverse_lookup": {}, + "counter": Counter(), + "missing_values": 16, + "unique_count": 0, + }, + "$.queries[*].tags.length": { + "_reverse_lookup": {1: [15, 17], 2: [10, 14]}, + "counter": Counter({2: 2, 1: 2}), + "max": 2, + "mean": 1.5, + "min": 1, + "missing_values": 0, + "std": 0.5, + "unique_count": 2, + }, + "$.queries[*].tags[*]": { + "_reverse_lookup": { + "2022": [10], + "2023": [14], + "analytics": [17], + "feedback": [15], + "outreach": [10], + "strategy": [14], + }, + "counter": Counter( + { + "outreach": 1, + "2022": 1, + "strategy": 1, + "2023": 1, + "feedback": 1, + "analytics": 1, + } + ), + "missing_values": 0, + "str_max_length": 9, + "str_mean_length": 6.833333333333333, + "str_min_length": 4, + "str_std_length": 2.034425935955618, + "unique_count": 6, + }, + "$.user_id": { + "_reverse_lookup": { + "user_1": [0], + "user_10": [10], + "user_11": [11], + "user_12": [12], + "user_13": [13], + "user_14": [14], + "user_15": [15], + "user_16": [16], + "user_17": [17], + "user_2": [1], + "user_7": [7], + "user_8": [8], + "user_9": [9], + }, + "counter": Counter( + { + "user_1": 1, + "user_2": 1, + "user_7": 1, + "user_8": 1, + "user_9": 1, + "user_10": 1, + "user_11": 1, + "user_12": 1, + "user_13": 1, + "user_14": 1, + "user_15": 1, + "user_16": 1, + "user_17": 1, + } + ), + "missing_values": 0, + "str_max_length": 7, + "str_mean_length": 6.615384615384615, + "str_min_length": 6, + "str_std_length": 0.48650425541052295, + "unique_count": 13, + }, + "_is_json_": { + "_reverse_lookup": { + False: [2, 4], + True: [0, 1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], + }, + "counter": Counter({True: 16, False: 2}), + "mean": 0.8888888888888888, + "missing_values": 0, + "unique_count": 2, + }, + "_is_valid_": { + "_reverse_lookup": { + False: [3, 5, 6], + True: [0, 1, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], + }, + "counter": Counter({True: 13, False: 3}), + "mean": 0.8125, + "missing_values": 0, + "unique_count": 2, + }, + "_validation_error_": { + "_reverse_lookup": { + "$.queries.[*].is_priority.bool_parsing": [6], + "$.queries.[*].source_type.enum": [3], + "$.user_id.missing": [5], + }, + "counter": Counter( + { + "$.queries.[*].source_type.enum": 1, + "$.user_id.missing": 1, + "$.queries.[*].is_priority.bool_parsing": 1, + } + ), + "missing_values": 0, + "str_max_length": 38, + "str_mean_length": 28.333333333333332, + "str_min_length": 17, + "str_std_length": 8.653836657164781, + "unique_count": 3, + }, +} diff --git a/examples/evals/streamlit.py b/examples/evals/streamlit.py new file mode 100644 index 0000000..d98b69d --- /dev/null +++ b/examples/evals/streamlit.py @@ -0,0 +1,65 @@ +import streamlit as st +from stats_dict import stats_dict +import json + +# Sample data +query_data = {i: line for i, line in enumerate(open("test.jsonl", "r"))} + +# Initialize selected keys +selected_keys = {} + + +# Function to get lines +def get_lines(stats_key, keys): + indices = [] + for key in keys: + indices.extend(stats_dict[stats_key]["_reverse_lookup"][key]) + return "\n".join([query_data[i] for i in indices]) + + +# Function to render dropdown and button +def render_dropdown_and_button(stats_key): + st.subheader(f"Stats for `{stats_key}`") + st.json(stats_dict[stats_key]["counter"]) + st.json( + {k: v for k, v in stats_dict[stats_key].items() if isinstance(v, (int, float))} + ) + st.subheader("Histogram") + st.bar_chart(stats_dict[stats_key]["counter"], use_container_width=True) + + st.subheader("Select keys to view lines") + options = list(stats_dict[stats_key]["counter"].keys()) + selected_keys[stats_key] = st.multiselect( + f"View samples with {stats_key}", + options, + default=selected_keys.get(stats_key, []), + ) + if st.button(f"Show Selected for {stats_key}"): + st.code(get_lines(stats_key, selected_keys[stats_key])) + + +# Sidebar for navigation +st.sidebar.title("Navigation") +page = st.sidebar.selectbox( + "Select a page:", + ["Validation Stats", "Individual Path Views"], +) + +# Main Streamlit App +st.title("Query Data Visualizer") + +# Validation Stats +if page == "Validation Stats": + st.header("Validation Stats") + for key in [k for k in stats_dict.keys() if k.startswith("_")]: + render_dropdown_and_button(key) + +# Individual Path Views +elif page == "Individual Path Views": + st.header("Individual Path Views") + path = st.selectbox( + "Choose a path:", + [key for key in stats_dict.keys() if not key.startswith("_")], + ) + if "counter" in stats_dict[path]: + render_dropdown_and_button(path)