df = pd.read_json("users_export_phone.json")df.drop_duplicates(subset=["phone_number"], inplace=True)
df.dropna(subset=["phone_number"], inplace=True)txt_num = "\n".join(df["phone_number"].astype("str").tolist())
with open("phone_numbers.txt", "w") as file:
file .write(txt_num)df_all = pd.DataFrame()
for i in glob.iglob("word_data/*.json"):
if "article" in i:
df = pd.read_json(i)
if "count" in df.columns.tolist():
df.rename(columns={"count":"words"}, inplace=True)
df_all = pd.concat([df, df_all], ignore_index=True)
else:
passdf_all["created"] = pd.to_datetime(df_all["created"])df_all = df_all[df_all["created"] > "2023-07-23"]df_all["month"] = df_all["created"].dt.month
df_all.groupby("month").agg({"complete": "sum", "words": "sum"})| complete | words | |
|---|---|---|
| month | ||
| 1 | 1972 | 1944453 |
| 7 | 508 | 663098 |
| 8 | 1310 | 1688996 |
| 9 | 1479 | 1944561 |
| 10 | 1804 | 2373311 |
| 11 | 1935 | 1904306 |
| 12 | 2876 | 2759498 |
dd = df_all[df_all["created"] > "1-10-2023"]
dd["words"].max()8839
df_month = df_all.groupby("week").agg({"words": "sum"}).reset_index()
df_day = df_all.groupby("day").agg({"words": "sum"}).reset_index()a= dd.groupby(pd.Grouper(key='created', freq='D')).agg({"words": "mean"})
print(a["words"].mean())614.6506297601618
a= df_all.groupby(pd.Grouper(key='created', freq='W')).agg({"words": "mean"})
print(a["words"].mean())591.223619913266
def weekinmonth(dates):
"""Get week number in a month.
Parameters:
dates (pd.Series): Series of dates.
Returns:
pd.Series: Week number in a month.
"""
firstday_in_month = dates - pd.to_timedelta(dates.dt.day - 1, unit='d')
return (dates.dt.day-1 + firstday_in_month.dt.weekday) // 7 + 1
weekinmonth(df["created"])AttributeError: Can only use .dt accessor with datetimelike values
df = pd.read_excel("Tarikh.xlsx", header=None)
df.dropna(inplace=True)
df[0] = df[0].apply(lambda x: str(x).replace("/", "-"))df.iloc[0][0]'1401/01/01'
"1401/01/01".replace("/", "")
# import re
# re.sub("\\", " ","1401/01/01")'14010101'
df| 0 | |
|---|---|
| 0 | 1401-01-01 |
| 1 | 1401-01-02 |
| 2 | 1401-01-03 |
| 3 | 1401-01-04 |
| 4 | 1401-01-05 |
| ... | ... |
| 4573 | 1402-07-08 |
| 4574 | 1402-07-08 |
| 4575 | 1402-07-08 |
| 4576 | 1402-07-09 |
| 4577 | 1402-07-08 |
4412 rows × 1 columns
import jalalijalali.Persian("1401-04-24").gregorian_string()'2022-7-15'
def to_georgian(row):
try:
return jalali.Persian(str(row)).gregorian_string()
except:
return "BAD FORMAT"df["georgian"] = df[0].apply(lambda x: to_georgian(x))df = pd.read_csv("events-export-2920092-1697700606932.csv")df["Time"].iloc[0]1693584039.558
import jsonwith open("events-export-2920092-1697700859686.json") as f:
j = json.load(f)j[0]{'event': 'Create Purchase',
'properties': {'time': 1693584039.558,
'distinct_id': 'mehdiahmadvand123s@gmail.com',
'$insert_id': 'd1e5322e31ef4eb69e5d34080537e7f1',
'$lib_version': '4.10.0',
'$mp_api_endpoint': 'api.mixpanel.com',
'$mp_api_timestamp_ms': 1693584039627,
'Discount': None,
'Plan': 3112720570236792,
'Price': 100000,
'Status': 'paied',
'Words after purchase': 5000,
'Words before purchase': 5000,
'mp_lib': 'python',
'mp_processing_time_ms': 1693584039662},
'isExpanded': True}
import json
import globdirs = []
for i in glob.iglob("*.json"):
dirs.append(i)with open(dirs[3]) as file:
j = json.load(file)import pandas as pd
import rewith open("discount_codes.txt") as file:
codes = file.readlines()codes = [x.replace("\n", "") for x in codes]df = pd.DataFrame({"codes": codes})df.to_excel("discount_codes.xlsx", index=False)a = """
html
asdad
html
"""
a.strip()'html\nasdad\nhtml'
if a.strip().startswith("html"):
print(a[4:])l
asdad
html