import numpy as np
import pandas as pd
s = pd.Series()
print(s)
print(type(5))
pd.Series(5)
l=[1,3,5,np.nan,45]
print(type(l))
s = pd.Series(l)
s
d = {'b' : 1, 'a' : 0, 'c' : 2}
print(type(d))
pd.Series(d)
d = {'a' : 0., 'b' : 1., 'c' : 2.}
pd.Series(d) # 索引順序按照字典次序
pd.Series(d, index=['b', 'c', 'd', 'a']) # 按照给定的index生成索引顺序
index=list('abcde')
s = pd.Series(np.random.randn(5), index=index)
print(type(np.random.randn(5)))
s
l=[1,3,5,np.nan,45]
index=list('ABCDE')
s = pd.Series(l, index=index)
s
s.index
series = pd.Series([2, 43, 9, 27, np.nan], name='Jasper')
series.name
series = pd.Series([2, 43, 9, 27, np.nan], name='Jasper')
series2 = series.rename("Steven") # Note that series and series2 refer to different objects.
print(series.name)
series2.name
series1 = pd.Series([2, 43, 9, 27, np.nan])
print(series1)
print('-'*50)
series2 = pd.Series([np.nan, 23, 5, 27, 54])
print(series2)
print('-'*50)
replace_nan = 10
result = series1.eq(series2, fill_value = replace_nan)
print(result)
print('-'*50)
result2=series1.fillna(10)==series2.fillna(10)
print(result2)
series1 = pd.Series([2, 43, 9, 27, np.nan])
print(series1)
print('-'*50)
series2 = pd.Series([np.nan, 23, 5, 27, 54])
print(series2)
print('-'*50)
replace_nan = 10
result = series1.ne(series2, fill_value = replace_nan)
print(result)
result2=series1.fillna(10)!=series2.fillna(10)
print(result2)
series1 = pd.Series([2, 43, 9, 27, np.nan])
print(series1)
print('-'*50)
series2 = pd.Series([np.nan, 23, 5, 27, 54])
print(series2)
print('-'*50)
replace_nan = 10
result = series1.le(series2, fill_value = replace_nan)
print(result)
print('-'*50)
result2=series1.fillna(10)<=series2.fillna(10)
print(result2)
series1 = pd.Series([2, 43, 9, 27, np.nan])
print(series1)
print('-'*50)
series2 = pd.Series([np.nan, 23, 5, 27, 54])
print(series2)
print('-'*50)
replace_nan = 10
result = series1.ge(series2, fill_value = replace_nan)
print(result)
print('-'*50)
result2=series1.fillna(10)>=series2.fillna(10)
print(result2)
series1 = pd.Series([2, 43, 9, 27, np.nan])
print(series1)
print('-'*50)
series2 = pd.Series([np.nan, 23, 5, 27, 54])
print(series2)
print('-'*50)
replace_nan = 10
result = series1.lt(series2, fill_value = replace_nan)
print(result)
print('-'*50)
result2=series1.fillna(10)<series2.fillna(10)
print(result2)
series1 = pd.Series([2, 43, 9, 27, np.nan])
print(series1)
print('-'*50)
series2 = pd.Series([np.nan, 23, 5, 27, 54])
print(series2)
print('-'*50)
replace_nan = 10
result = series1.gt(series2, fill_value = replace_nan)
print(result)
print('-'*50)
result2=series1.fillna(10)>series2.fillna(10)
print(result2)
series =pd.Series([5, 3, 1, 1, np.nan, 9, 21, 3, 8])
series.between(2,8)
series1 = pd.Series([2, 43, 9, 27, np.nan])
print(series1)
print('-'*50)
series2 = pd.Series([np.nan, 23, 5, 27, np.nan])
print(series2)
print('-'*50)
result = series1.combine_first(series2)
print(result)
first =[1, 2, 5, 6, 3, 7, 11, 0, 4]
second =[5, 3, 2, 1, 3, 9, 21, 3, 1]
first = pd.Series(first)
second = pd.Series(second)
#result = first.combine(second, (lambda x1,x2: x1+x2))
result = first.combine(second, (lambda x1, x2: x1 if x1 < x2 else x2))
result
first =[1, 2, 5, 6, 3, 7, 11, 0, 4]
second =[5, 3, 2, 1, 3, 9, 21, 3, np.nan]
first = pd.Series(first)
second = pd.Series(second)
print(first.size)
print(second.size)
first =[1, 2, 5, 6, 3, 7, 11, 0, 4]
second =[5, 3, 2, 1, 3, 9, 21, 3, np.nan]
first = pd.Series(first)
second = pd.Series(second)
print(first.count())
print(second.count())
index=['a', 'b', 'c', 'd', 'e']
series = pd.Series(np.random.randn(5), index=index)
print(series)
print('-'*50)
print(series+series)
print('-'*50)
print(series.add(series))
print('-'*50)
print(series.radd(series))
Equivalent to _Series - other__ , but with support to substitute a fill_value for missing data in one of the inputs.
Parameters:
other : Series, DataFrame, or constant
axis : {0, 1, ‘index’, ‘columns’}
- For Series input, axis to match Series index on
level : int or name
- Broadcast across a level, matching Index values on the passed MultiIndex level
fill_value : None or float value, default None
- Fill existing missing (NaN) values, and any new element needed for successful DataFrame alignment, with this value before computation. If data in both corresponding DataFrame locations is missing the result will be missing
index = pd.date_range('1/1/2000', periods=8)
series1 = pd.Series(np.random.randn(8), index=index )
print(series1)
print('-'*50)
series2 = pd.Series(np.random.randn(8), index=index )
print(series2)
print('-'*50)
series1.sub(series2)
index=['a', 'b', 'c', 'd', 'e']
series = pd.Series(np.random.randn(5), index=index)
print(series)
print('-'*50)
print(series ** 2)
series.mul(series)
index=['a', 'b', 'c', 'd', 'e']
series = pd.Series(np.random.randn(5), index=index)
print(series)
print('-'*50)
print(series.div(2))
series.div(series)
first =[1, 2, 3, 1, 2, 7, 3, 0, 4]
first = pd.Series(first)
type(first.unique())
first.unique()
first =[1, 2, 3, 1, 2, 7, 3, 0, 4]
first = pd.Series(first)
first.nunique()
first =[1, 2, 3, 1, 2, 7, 3, 0, 4]
first = pd.Series(first)
first.is_unique
second =[1, 2, 3, 4, 5, 6, 7]
second = pd.Series(second)
second.is_unique
series =pd.Series([5, 3, 2, 1, 3, 9, 21, 3, np.nan])
series.max()
series =pd.Series([5, 3, 2, 1, 3, 9, 21, 3, np.nan])
print(series)
print('-'*50)
print('The index of the highest value is ', series.idxmax())
series =pd.Series([5, 3, 2, 1, 3, 9, 21, 3, np.nan])
series.min()
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
print(series)
print('-'*50)
print('The index of the lowest value is ', series.idxmin())
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
series.value_counts()
series =pd.Series([5, 3, 1, np.nan])
series.sum()
series =pd.Series([5, 3, 1, np.nan])
series.prod()
series =pd.Series([5, 3, 0, 0, np.nan])
series.mean()
series1 =pd.Series([2, 3, 4, np.nan, 3, 1])
series2 =pd.Series([1, 2, 3, 2, np.nan, np.nan])
series1.pow(series2)
series1 =pd.Series([2, 3, 4, np.nan, 3, 1])
series2 =pd.Series([1, 2, 3, 2, np.nan, np.nan])
series1.pow(series2, fill_value=1)
series =pd.Series([-2, -3, 4, np.nan])
series.abs()
series = pd.Series(np.arange(10))
print(series.tolist())
print('-'*50)
div, rem = divmod(series, 3)
print('use divmod')
print(div.tolist())
print(rem.tolist())
print('-'*50)
print('use // & %')
div2=series//3
rem2=series%3
print(div2.tolist())
print(rem2.tolist())
print('-'*50)
div4, rem4 = divmod(series, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6]) # elementwise divmod()
print('use divmod and a list')
print(div4.tolist())
print(rem4.tolist())
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
series.sort_values()
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
series.sort_values(ascending=False)
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
series.sort_values().sort_index()
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
series.get(1)
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
series.head()
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
series.tail()
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
print(series)
series.clip(2,4, inplace=True) # inplace=True 表示對原Series修改
series
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
print(series)
series.clip_lower(2)
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
print(series)
series.clip_upper(4)
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
print('dtype is', series.dtypes)
print('dtype is', series.astype(int, errors='ignore').dtypes)
print('dtype is', series.fillna(0).astype(int).dtypes)
print('dtype is', series.fillna('A').astype(str).dtypes)
print('dtype is', series.dropna().astype(str).dtypes)
series.fillna(0).astype(int)
#series.dropna().astype(int)
series =pd.Series([5, 3, 1, 1, 3, 9, 21, 3, np.nan])
print(type(series))
print(type(series.tolist()))
series.tolist()
series =pd.Series([5, 3, 1, 1, np.nan, 9, 21, 3, np.nan])
series.factorize()
# 相當與按下列Series的索引,對series的每一個元素定位,NaN爲-1
series =pd.Series([5, 3, 1, 1, np.nan, 9, 21, 3, np.nan])
print(pd.Series(series.dropna().unique()))
series =pd.Series([5, 3, 1, 1, np.nan, 9, 21, 3, np.nan])
#series.map(lambda x: x*10, na_action='ignore')
#series.fillna(0).map(lambda x: x*10 if x < 5 else (x*2 if x < 10 else x/10))
series.map(lambda x: x*10 if x < 5 else (x*2 if x < 10 else (0 if np.isnan(x) else x/10))) # 嵌套if語句
series =pd.Series([5, 3, 1, 1, np.nan, 9, 21, 3, np.nan])
series.apply(lambda x: x*10 if x < 5 else (x*2 if x < 10 else (0 if np.isnan(x) else x/10)))
series = pd.Series([1,3,5,np.nan,6,8])
print(series)
series2=series.shift(2)
series2
series1 = pd.Series([1,3,5,np.nan,6,8])
print(series1)
series2=series1.shift(2)
print(series2)
series1+series2
index=['a', 'b', 'c', 'd', 'e']
series = pd.Series(np.random.randn(5), index=index)
print(series)
print('-'*50)
print('series[0] is\n', series[0])
print('-'*50)
print('series[:3] is \n', series[:3])
print('-'*50)
print('series[[4, 3, 1]] is \n', series[[4, 3, 1]]) # ndarray-like
print('-'*50)
print('series["a"] is \n', series['a']) # dict-like
print('-'*50)
print('series["e"] is \n', series['e'])
index=['a', 'b', 'c', 'd', 'e']
series = pd.Series(np.random.randn(5), index=index)
print(series)
series[series > series.median()]
s1 = pd.Series([90, 91, 85])
s2=s1.shift(periods=1)
print(s2)
print('-'*50)
s3=s1.diff(periods=1)
print(s3)
print('-'*50)
s4=s3.div(s2)
print(s4)
print('-'*50)
s1.pct_change(periods=1)
ser = pd.Series([5, 6, np.NaN])
print(ser)
ser.isna()
ser = pd.Series([5, 6, np.NaN])
print(ser)
ser.notna()
ser = pd.Series([0, 1, np.nan, 9, np.nan, 5])
ser.interpolate()