Module imodels.skrules.datasets.credit_data
default of credit card clients dataset.
The original database is available from UCI Machine Learning Repository:
<https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients>
The data contains 30000 observations on 24 variables.
References
- Lichman, M. (2013). UCI Machine Learning Repository
- [http://archive.ics.uci.edu/ml].
Irvine
,CA
:University
ofCalifornia
,School
ofInformation
and
Computer
Science.
Expand source code
"""default of credit card clients dataset.
The original database is available from UCI Machine Learning Repository:
https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients
The data contains 30000 observations on 24 variables.
References
----------
Lichman, M. (2013). UCI Machine Learning Repository
[http://archive.ics.uci.edu/ml].
Irvine, CA: University of California, School of Information and Computer
Science.
"""
import pandas as pd
import numpy as np
from sklearn.datasets.base import get_data_home, Bunch
from sklearn.datasets.base import _fetch_remote, RemoteFileMetadata
from os.path import exists, join
def load_credit_data():
sk_data_dir = get_data_home()
archive = RemoteFileMetadata(
filename='default of credit card clients.xls',
url='https://archive.ics.uci.edu/ml/machine-learning-databases/'
'00350/default%20of%20credit%20card%20clients.xls',
checksum=('30c6be3abd8dcfd3e6096c828bad8c2f'
'011238620f5369220bd60cfc82700933'))
if not exists(join(sk_data_dir, archive.filename)):
_fetch_remote(archive, dirname=sk_data_dir)
data = pd.read_excel(join(sk_data_dir, archive.filename),
sheet_name='Data', header=1)
dataset = Bunch(
data=(data.drop('default payment next month', axis=1)),
target=np.array(data['default payment next month'])
)
return dataset
Functions
def load_credit_data()
-
Expand source code
def load_credit_data(): sk_data_dir = get_data_home() archive = RemoteFileMetadata( filename='default of credit card clients.xls', url='https://archive.ics.uci.edu/ml/machine-learning-databases/' '00350/default%20of%20credit%20card%20clients.xls', checksum=('30c6be3abd8dcfd3e6096c828bad8c2f' '011238620f5369220bd60cfc82700933')) if not exists(join(sk_data_dir, archive.filename)): _fetch_remote(archive, dirname=sk_data_dir) data = pd.read_excel(join(sk_data_dir, archive.filename), sheet_name='Data', header=1) dataset = Bunch( data=(data.drop('default payment next month', axis=1)), target=np.array(data['default payment next month']) ) return dataset