asd - test-notebook

df = pd.read_json("users_export_phone.json")

df.drop_duplicates(subset=["phone_number"], inplace=True)
df.dropna(subset=["phone_number"], inplace=True)

txt_num = "\n".join(df["phone_number"].astype("str").tolist())
with open("phone_numbers.txt", "w") as file:
    file .write(txt_num)

df_all = pd.DataFrame()
for i in glob.iglob("word_data/*.json"):
    if "article" in i:
        df = pd.read_json(i)
        if "count" in df.columns.tolist():
            df.rename(columns={"count":"words"}, inplace=True)
        df_all = pd.concat([df, df_all], ignore_index=True)
    else:
        pass

df_all["created"] = pd.to_datetime(df_all["created"])

df_all = df_all[df_all["created"] > "2023-07-23"]

df_all["month"] = df_all["created"].dt.month
df_all.groupby("month").agg({"complete": "sum", "words": "sum"})

	complete	words
month
1	1972	1944453
7	508	663098
8	1310	1688996
9	1479	1944561
10	1804	2373311
11	1935	1904306
12	2876	2759498

dd = df_all[df_all["created"] > "1-10-2023"]
dd["words"].max()

df_month = df_all.groupby("week").agg({"words": "sum"}).reset_index()
df_day = df_all.groupby("day").agg({"words": "sum"}).reset_index()

a= dd.groupby(pd.Grouper(key='created', freq='D')).agg({"words": "mean"})
print(a["words"].mean())

614.6506297601618

a= df_all.groupby(pd.Grouper(key='created', freq='W')).agg({"words": "mean"})
print(a["words"].mean())

591.223619913266

def weekinmonth(dates):
    """Get week number in a month.
    
    Parameters: 
        dates (pd.Series): Series of dates.
    Returns: 
        pd.Series: Week number in a month.
    """
    firstday_in_month = dates - pd.to_timedelta(dates.dt.day - 1, unit='d')
    return (dates.dt.day-1 + firstday_in_month.dt.weekday) // 7 + 1
weekinmonth(df["created"])

AttributeError: Can only use .dt accessor with datetimelike values

df = pd.read_excel("Tarikh.xlsx", header=None)
df.dropna(inplace=True)
df[0] = df[0].apply(lambda x: str(x).replace("/", "-"))

df.iloc[0][0]

'1401/01/01'

"1401/01/01".replace("/", "")
# import re
# re.sub("\\", " ","1401/01/01")

'14010101'

df

	0
0	1401-01-01
1	1401-01-02
2	1401-01-03
3	1401-01-04
4	1401-01-05
...	...
4573	1402-07-08
4574	1402-07-08
4575	1402-07-08
4576	1402-07-09
4577	1402-07-08

4412 rows × 1 columns

import jalali

jalali.Persian("1401-04-24").gregorian_string()

'2022-7-15'

def to_georgian(row):
    try:
        return jalali.Persian(str(row)).gregorian_string()
    except:
        return "BAD FORMAT"

df["georgian"] = df[0].apply(lambda x: to_georgian(x))

df = pd.read_csv("events-export-2920092-1697700606932.csv")

df["Time"].iloc[0]

1693584039.558

import json

with open("events-export-2920092-1697700859686.json") as f:
    j = json.load(f)

j[0]

{'event': 'Create Purchase',
 'properties': {'time': 1693584039.558,
  'distinct_id': 'mehdiahmadvand123s@gmail.com',
  '$insert_id': 'd1e5322e31ef4eb69e5d34080537e7f1',
  '$lib_version': '4.10.0',
  '$mp_api_endpoint': 'api.mixpanel.com',
  '$mp_api_timestamp_ms': 1693584039627,
  'Discount': None,
  'Plan': 3112720570236792,
  'Price': 100000,
  'Status': 'paied',
  'Words after purchase': 5000,
  'Words before purchase': 5000,
  'mp_lib': 'python',
  'mp_processing_time_ms': 1693584039662},
 'isExpanded': True}

import json
import glob

dirs = []
for i in glob.iglob("*.json"):
    dirs.append(i)

with open(dirs[3]) as file:
    j = json.load(file)

import pandas as pd
import re

with open("discount_codes.txt") as file:
    codes = file.readlines()

codes = [x.replace("\n", "") for x in codes]

df = pd.DataFrame({"codes": codes})

df.to_excel("discount_codes.xlsx", index=False)

a = """
html
asdad
html
"""
a.strip()

'html\nasdad\nhtml'

if a.strip().startswith("html"):
    print(a[4:])

l
asdad
html