test-notebook

nothing
Published

July 17, 2024

df = pd.read_json("users_export_phone.json")
df.drop_duplicates(subset=["phone_number"], inplace=True)
df.dropna(subset=["phone_number"], inplace=True)
txt_num = "\n".join(df["phone_number"].astype("str").tolist())
with open("phone_numbers.txt", "w") as file:
    file .write(txt_num)
df_all = pd.DataFrame()
for i in glob.iglob("word_data/*.json"):
    if "article" in i:
        df = pd.read_json(i)
        if "count" in df.columns.tolist():
            df.rename(columns={"count":"words"}, inplace=True)
        df_all = pd.concat([df, df_all], ignore_index=True)
    else:
        pass
df_all["created"] = pd.to_datetime(df_all["created"])
df_all = df_all[df_all["created"] > "2023-07-23"]
df_all["month"] = df_all["created"].dt.month
df_all.groupby("month").agg({"complete": "sum", "words": "sum"})
complete words
month
1 1972 1944453
7 508 663098
8 1310 1688996
9 1479 1944561
10 1804 2373311
11 1935 1904306
12 2876 2759498
dd = df_all[df_all["created"] > "1-10-2023"]
dd["words"].max()
8839
df_month = df_all.groupby("week").agg({"words": "sum"}).reset_index()
df_day = df_all.groupby("day").agg({"words": "sum"}).reset_index()
a= dd.groupby(pd.Grouper(key='created', freq='D')).agg({"words": "mean"})
print(a["words"].mean())
614.6506297601618
a= df_all.groupby(pd.Grouper(key='created', freq='W')).agg({"words": "mean"})
print(a["words"].mean())
591.223619913266
def weekinmonth(dates):
    """Get week number in a month.
    
    Parameters: 
        dates (pd.Series): Series of dates.
    Returns: 
        pd.Series: Week number in a month.
    """
    firstday_in_month = dates - pd.to_timedelta(dates.dt.day - 1, unit='d')
    return (dates.dt.day-1 + firstday_in_month.dt.weekday) // 7 + 1
weekinmonth(df["created"])
AttributeError: Can only use .dt accessor with datetimelike values
df = pd.read_excel("Tarikh.xlsx", header=None)
df.dropna(inplace=True)
df[0] = df[0].apply(lambda x: str(x).replace("/", "-"))
df.iloc[0][0]
'1401/01/01'
"1401/01/01".replace("/", "")
# import re
# re.sub("\\", " ","1401/01/01")
'14010101'
df
0
0 1401-01-01
1 1401-01-02
2 1401-01-03
3 1401-01-04
4 1401-01-05
... ...
4573 1402-07-08
4574 1402-07-08
4575 1402-07-08
4576 1402-07-09
4577 1402-07-08

4412 rows × 1 columns

import jalali
jalali.Persian("1401-04-24").gregorian_string()
'2022-7-15'
def to_georgian(row):
    try:
        return jalali.Persian(str(row)).gregorian_string()
    except:
        return "BAD FORMAT"
df["georgian"] = df[0].apply(lambda x: to_georgian(x))
df = pd.read_csv("events-export-2920092-1697700606932.csv")
df["Time"].iloc[0]
1693584039.558
import json
with open("events-export-2920092-1697700859686.json") as f:
    j = json.load(f)
j[0]
{'event': 'Create Purchase',
 'properties': {'time': 1693584039.558,
  'distinct_id': 'mehdiahmadvand123s@gmail.com',
  '$insert_id': 'd1e5322e31ef4eb69e5d34080537e7f1',
  '$lib_version': '4.10.0',
  '$mp_api_endpoint': 'api.mixpanel.com',
  '$mp_api_timestamp_ms': 1693584039627,
  'Discount': None,
  'Plan': 3112720570236792,
  'Price': 100000,
  'Status': 'paied',
  'Words after purchase': 5000,
  'Words before purchase': 5000,
  'mp_lib': 'python',
  'mp_processing_time_ms': 1693584039662},
 'isExpanded': True}
import json
import glob
dirs = []
for i in glob.iglob("*.json"):
    dirs.append(i)
with open(dirs[3]) as file:
    j = json.load(file)
import pandas as pd
import re
with open("discount_codes.txt") as file:
    codes = file.readlines()
codes = [x.replace("\n", "") for x in codes]
df = pd.DataFrame({"codes": codes})
df.to_excel("discount_codes.xlsx", index=False)
a = """
html
asdad
html
"""
a.strip()
'html\nasdad\nhtml'
if a.strip().startswith("html"):
    print(a[4:])
l
asdad
html