Introduction

import pandas as pd
pd.options.display.max_columns = 999
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

from sklearn.metrics import mean_squared_error
from sklearn import linear_model
from sklearn.model_selection import KFold

df = pd.read_csv("AmesHousing.tsv", delimiter="\t")

def transform_features(df):
    return df

def select_features(df):
    return df[["Gr Liv Area", "SalePrice"]]

def train_and_test(df):  
    train = df[:1460]
    test = df[1460:]
    
    ## You can use `pd.DataFrame.select_dtypes()` to specify column types
    ## and return only those columns as a data frame.
    numeric_train = train.select_dtypes(include=['integer', 'float'])
    numeric_test = test.select_dtypes(include=['integer', 'float'])
    
    ## You can use `pd.Series.drop()` to drop a value.
    features = numeric_train.columns.drop("SalePrice")
    lr = linear_model.LinearRegression()
    lr.fit(train[features], train["SalePrice"])
    predictions = lr.predict(test[features])
    mse = mean_squared_error(test["SalePrice"], predictions)
    rmse = np.sqrt(mse)
    
    return rmse

transform_df = transform_features(df)
filtered_df = select_features(transform_df)
rmse = train_and_test(filtered_df)

rmse

57088.251612639091

Feature Engineering

Handle missing values:
All columns:
Drop any with 5% or more missing values for now.
Text columns:
Drop any with 1 or more missing values for now.
Numerical columns:
For columns with missing values, fill in with the most common value in that column

1: All columns: Drop any with 5% or more missing values for now.

## Series object: column name -> number of missing values
num_missing = df.isnull().sum()

# Filter Series to columns containing >5% missing values
drop_missing_cols = num_missing[(num_missing > len(df)/20)].sort_values()

# Drop those columns from the data frame. Note the use of the .index accessor
df = df.drop(drop_missing_cols.index, axis=1)

## Series object: column name -> number of missing values
text_mv_counts = df.select_dtypes(include=['object']).isnull().sum().sort_values(ascending=False)

## Filter Series to columns containing *any* missing values
drop_missing_cols_2 = text_mv_counts[text_mv_counts > 0]

df = df.drop(drop_missing_cols_2.index, axis=1)

## Compute column-wise missing value counts
num_missing = df.select_dtypes(include=['int', 'float']).isnull().sum()
fixable_numeric_cols = num_missing[(num_missing < len(df)/20) & (num_missing > 0)].sort_values()
fixable_numeric_cols

BsmtFin SF 1       1
BsmtFin SF 2       1
Bsmt Unf SF        1
Total Bsmt SF      1
Garage Cars        1
Garage Area        1
Bsmt Full Bath     2
Bsmt Half Bath     2
Mas Vnr Area      23
dtype: int64

## Compute the most common value for each column in `fixable_nmeric_missing_cols`.
replacement_values_dict = df[fixable_numeric_cols.index].mode().to_dict(orient='records')[0]
replacement_values_dict

{'Bsmt Full Bath': 0.0,
 'Bsmt Half Bath': 0.0,
 'Bsmt Unf SF': 0.0,
 'BsmtFin SF 1': 0.0,
 'BsmtFin SF 2': 0.0,
 'Garage Area': 0.0,
 'Garage Cars': 2.0,
 'Mas Vnr Area': 0.0,
 'Total Bsmt SF': 0.0}

## Use `pd.DataFrame.fillna()` to replace missing values.
df = df.fillna(replacement_values_dict)

## Verify that every column has 0 missing values
df.isnull().sum().value_counts()

0    64
dtype: int64

years_sold = df['Yr Sold'] - df['Year Built']
years_sold[years_sold < 0]

2180   -1
dtype: int64

years_since_remod = df['Yr Sold'] - df['Year Remod/Add']
years_since_remod[years_since_remod < 0]

1702   -1
2180   -2
2181   -1
dtype: int64

## Create new columns
df['Years Before Sale'] = years_sold
df['Years Since Remod'] = years_since_remod

## Drop rows with negative values for both of these new features
df = df.drop([1702, 2180, 2181], axis=0)

## No longer need original year columns
df = df.drop(["Year Built", "Year Remod/Add"], axis = 1)

Drop columns that:
a. that aren't useful for ML
b. leak data about the final sale

## Drop columns that aren't useful for ML
df = df.drop(["PID", "Order"], axis=1)

## Drop columns that leak info about the final sale
df = df.drop(["Mo Sold", "Sale Condition", "Sale Type", "Yr Sold"], axis=1)

Let's update transform_features()

def transform_features(df):
    num_missing = df.isnull().sum()
    drop_missing_cols = num_missing[(num_missing > len(df)/20)].sort_values()
    df = df.drop(drop_missing_cols.index, axis=1)
    
    text_mv_counts = df.select_dtypes(include=['object']).isnull().sum().sort_values(ascending=False)
    drop_missing_cols_2 = text_mv_counts[text_mv_counts > 0]
    df = df.drop(drop_missing_cols_2.index, axis=1)
    
    num_missing = df.select_dtypes(include=['int', 'float']).isnull().sum()
    fixable_numeric_cols = num_missing[(num_missing < len(df)/20) & (num_missing > 0)].sort_values()
    replacement_values_dict = df[fixable_numeric_cols.index].mode().to_dict(orient='records')[0]
    df = df.fillna(replacement_values_dict)
    
    years_sold = df['Yr Sold'] - df['Year Built']
    years_since_remod = df['Yr Sold'] - df['Year Remod/Add']
    df['Years Before Sale'] = years_sold
    df['Years Since Remod'] = years_since_remod
    df = df.drop([1702, 2180, 2181], axis=0)

    df = df.drop(["PID", "Order", "Mo Sold", "Sale Condition", "Sale Type", "Year Built", "Year Remod/Add"], axis=1)
    return df

def select_features(df):
    return df[["Gr Liv Area", "SalePrice"]]

def train_and_test(df):  
    train = df[:1460]
    test = df[1460:]
    
    ## You can use `pd.DataFrame.select_dtypes()` to specify column types
    ## and return only those columns as a data frame.
    numeric_train = train.select_dtypes(include=['integer', 'float'])
    numeric_test = test.select_dtypes(include=['integer', 'float'])
    
    ## You can use `pd.Series.drop()` to drop a value.
    features = numeric_train.columns.drop("SalePrice")
    lr = linear_model.LinearRegression()
    lr.fit(train[features], train["SalePrice"])
    predictions = lr.predict(test[features])
    mse = mean_squared_error(test["SalePrice"], predictions)
    rmse = np.sqrt(mse)
    
    return rmse

df = pd.read_csv("AmesHousing.tsv", delimiter="\t")
transform_df = transform_features(df)
filtered_df = select_features(transform_df)
rmse = train_and_test(filtered_df)

rmse

55275.367312413066

Feature Selection

numerical_df = transform_df.select_dtypes(include=['int', 'float'])
numerical_df

abs_corr_coeffs = numerical_df.corr()['SalePrice'].abs().sort_values()
abs_corr_coeffs

BsmtFin SF 2         0.006127
Misc Val             0.019273
Yr Sold              0.030358
3Ssn Porch           0.032268
Bsmt Half Bath       0.035875
Low Qual Fin SF      0.037629
Pool Area            0.068438
MS SubClass          0.085128
Overall Cond         0.101540
Screen Porch         0.112280
Kitchen AbvGr        0.119760
Enclosed Porch       0.128685
Bedroom AbvGr        0.143916
Bsmt Unf SF          0.182751
Lot Area             0.267520
2nd Flr SF           0.269601
Bsmt Full Bath       0.276258
Half Bath            0.284871
Open Porch SF        0.316262
Wood Deck SF         0.328183
BsmtFin SF 1         0.439284
Fireplaces           0.474831
TotRms AbvGrd        0.498574
Mas Vnr Area         0.506983
Years Since Remod    0.534985
Full Bath            0.546118
Years Before Sale    0.558979
1st Flr SF           0.635185
Garage Area          0.641425
Total Bsmt SF        0.644012
Garage Cars          0.648361
Gr Liv Area          0.717596
Overall Qual         0.801206
SalePrice            1.000000
Name: SalePrice, dtype: float64

## Let's only keep columns with a correlation coefficient of larger than 0.4 (arbitrary, worth experimenting later!)
abs_corr_coeffs[abs_corr_coeffs > 0.4]

BsmtFin SF 1         0.439284
Fireplaces           0.474831
TotRms AbvGrd        0.498574
Mas Vnr Area         0.506983
Years Since Remod    0.534985
Full Bath            0.546118
Years Before Sale    0.558979
1st Flr SF           0.635185
Garage Area          0.641425
Total Bsmt SF        0.644012
Garage Cars          0.648361
Gr Liv Area          0.717596
Overall Qual         0.801206
SalePrice            1.000000
Name: SalePrice, dtype: float64

## Drop columns with less than 0.4 correlation with SalePrice
transform_df = transform_df.drop(abs_corr_coeffs[abs_corr_coeffs < 0.4].index, axis=1)

Which categorical columns should we keep?

## Create a list of column names from documentation that are *meant* to be categorical
nominal_features = ["PID", "MS SubClass", "MS Zoning", "Street", "Alley", "Land Contour", "Lot Config", "Neighborhood", 
                    "Condition 1", "Condition 2", "Bldg Type", "House Style", "Roof Style", "Roof Matl", "Exterior 1st", 
                    "Exterior 2nd", "Mas Vnr Type", "Foundation", "Heating", "Central Air", "Garage Type", 
                    "Misc Feature", "Sale Type", "Sale Condition"]

Which columns are currently numerical but need to be encoded as categorical instead (because the numbers don't have any semantic meaning)?
If a categorical column has hundreds of unique values (or categories), should we keep it? When we dummy code this column, hundreds of columns will need to be added back to the data frame.

## Which categorical columns have we still carried with us? We'll test tehse 
transform_cat_cols = []
for col in nominal_features:
    if col in transform_df.columns:
        transform_cat_cols.append(col)

## How many unique values in each categorical column?
uniqueness_counts = transform_df[transform_cat_cols].apply(lambda col: len(col.value_counts())).sort_values()
## Aribtrary cutoff of 10 unique values (worth experimenting)
drop_nonuniq_cols = uniqueness_counts[uniqueness_counts > 10].index
transform_df = transform_df.drop(drop_nonuniq_cols, axis=1)

## Select just the remaining text columns and convert to categorical
text_cols = transform_df.select_dtypes(include=['object'])
for col in text_cols:
    transform_df[col] = transform_df[col].astype('category')
    
## Create dummy columns and add back to the dataframe!
transform_df = pd.concat([
    transform_df, 
    pd.get_dummies(transform_df.select_dtypes(include=['category']))
], axis=1)

Update select_features()

def transform_features(df):
    num_missing = df.isnull().sum()
    drop_missing_cols = num_missing[(num_missing > len(df)/20)].sort_values()
    df = df.drop(drop_missing_cols.index, axis=1)
    
    text_mv_counts = df.select_dtypes(include=['object']).isnull().sum().sort_values(ascending=False)
    drop_missing_cols_2 = text_mv_counts[text_mv_counts > 0]
    df = df.drop(drop_missing_cols_2.index, axis=1)
    
    num_missing = df.select_dtypes(include=['int', 'float']).isnull().sum()
    fixable_numeric_cols = num_missing[(num_missing < len(df)/20) & (num_missing > 0)].sort_values()
    replacement_values_dict = df[fixable_numeric_cols.index].mode().to_dict(orient='records')[0]
    df = df.fillna(replacement_values_dict)
    
    years_sold = df['Yr Sold'] - df['Year Built']
    years_since_remod = df['Yr Sold'] - df['Year Remod/Add']
    df['Years Before Sale'] = years_sold
    df['Years Since Remod'] = years_since_remod
    df = df.drop([1702, 2180, 2181], axis=0)

    df = df.drop(["PID", "Order", "Mo Sold", "Sale Condition", "Sale Type", "Year Built", "Year Remod/Add"], axis=1)
    return df

def select_features(df, coeff_threshold=0.4, uniq_threshold=10):
    numerical_df = df.select_dtypes(include=['int', 'float'])
    abs_corr_coeffs = numerical_df.corr()['SalePrice'].abs().sort_values()
    df = df.drop(abs_corr_coeffs[abs_corr_coeffs < coeff_threshold].index, axis=1)
    
    nominal_features = ["PID", "MS SubClass", "MS Zoning", "Street", "Alley", "Land Contour", "Lot Config", "Neighborhood", 
                    "Condition 1", "Condition 2", "Bldg Type", "House Style", "Roof Style", "Roof Matl", "Exterior 1st", 
                    "Exterior 2nd", "Mas Vnr Type", "Foundation", "Heating", "Central Air", "Garage Type", 
                    "Misc Feature", "Sale Type", "Sale Condition"]
    
    transform_cat_cols = []
    for col in nominal_features:
        if col in df.columns:
            transform_cat_cols.append(col)

    uniqueness_counts = df[transform_cat_cols].apply(lambda col: len(col.value_counts())).sort_values()
    drop_nonuniq_cols = uniqueness_counts[uniqueness_counts > 10].index
    df = df.drop(drop_nonuniq_cols, axis=1)
    
    text_cols = df.select_dtypes(include=['object'])
    for col in text_cols:
        df[col] = df[col].astype('category')
    df = pd.concat([df, pd.get_dummies(df.select_dtypes(include=['category']))], axis=1)
    
    return df

def train_and_test(df, k=0):
    numeric_df = df.select_dtypes(include=['integer', 'float'])
    features = numeric_df.columns.drop("SalePrice")
    lr = linear_model.LinearRegression()
    
    if k == 0:
        train = df[:1460]
        test = df[1460:]

        lr.fit(train[features], train["SalePrice"])
        predictions = lr.predict(test[features])
        mse = mean_squared_error(test["SalePrice"], predictions)
        rmse = np.sqrt(mse)

        return rmse
    
    if k == 1:
        # Randomize *all* rows (frac=1) from `df` and return
        shuffled_df = df.sample(frac=1, )
        train = df[:1460]
        test = df[1460:]
        
        lr.fit(train[features], train["SalePrice"])
        predictions_one = lr.predict(test[features])        
        
        mse_one = mean_squared_error(test["SalePrice"], predictions_one)
        rmse_one = np.sqrt(mse_one)
        
        lr.fit(test[features], test["SalePrice"])
        predictions_two = lr.predict(train[features])        
       
        mse_two = mean_squared_error(train["SalePrice"], predictions_two)
        rmse_two = np.sqrt(mse_two)
        
        avg_rmse = np.mean([rmse_one, rmse_two])
        print(rmse_one)
        print(rmse_two)
        return avg_rmse
    else:
        kf = KFold(n_splits=k, shuffle=True)
        rmse_values = []
        for train_index, test_index, in kf.split(df):
            train = df.iloc[train_index]
            test = df.iloc[test_index]
            lr.fit(train[features], train["SalePrice"])
            predictions = lr.predict(test[features])
            mse = mean_squared_error(test["SalePrice"], predictions)
            rmse = np.sqrt(mse)
            rmse_values.append(rmse)
        print(rmse_values)
        avg_rmse = np.mean(rmse_values)
        return avg_rmse

df = pd.read_csv("AmesHousing.tsv", delimiter="\t")
transform_df = transform_features(df)
filtered_df = select_features(transform_df)
rmse = train_and_test(filtered_df, k=4)

rmse

[25761.875549560471, 36527.812968130842, 24956.485193881424, 28486.738135675929]

28933.227961812168

	MS SubClass	Lot Area	Overall Qual	Overall Cond	Mas Vnr Area	BsmtFin SF 1	BsmtFin SF 2	Bsmt Unf SF	Total Bsmt SF	1st Flr SF	2nd Flr SF	Low Qual Fin SF	Gr Liv Area	Bsmt Full Bath	Bsmt Half Bath	Full Bath	Half Bath	Bedroom AbvGr	Kitchen AbvGr	TotRms AbvGrd	Fireplaces	Garage Cars	Garage Area	Wood Deck SF	Open Porch SF	Enclosed Porch	3Ssn Porch	Screen Porch	Pool Area	Misc Val	Yr Sold	SalePrice	Years Before Sale	Years Since Remod
0	20	31770	6	5	112.0	639.0	0.0	441.0	1080.0	1656	0	0	1656	1.0	0.0	1	0	3	1	7	2	2.0	528.0	210	62	0	0	0	0	0	2010	215000	50	50
1	20	11622	5	6	0.0	468.0	144.0	270.0	882.0	896	0	0	896	0.0	0.0	1	0	2	1	5	0	1.0	730.0	140	0	0	0	120	0	0	2010	105000	49	49
2	20	14267	6	6	108.0	923.0	0.0	406.0	1329.0	1329	0	0	1329	0.0	0.0	1	1	3	1	6	0	1.0	312.0	393	36	0	0	0	0	12500	2010	172000	52	52
3	20	11160	7	5	0.0	1065.0	0.0	1045.0	2110.0	2110	0	0	2110	1.0	0.0	2	1	3	1	8	2	2.0	522.0	0	0	0	0	0	0	0	2010	244000	42	42
4	60	13830	5	5	0.0	791.0	0.0	137.0	928.0	928	701	0	1629	0.0	0.0	2	1	3	1	6	1	2.0	482.0	212	34	0	0	0	0	0	2010	189900	13	12
5	60	9978	6	6	20.0	602.0	0.0	324.0	926.0	926	678	0	1604	0.0	0.0	2	1	3	1	7	1	2.0	470.0	360	36	0	0	0	0	0	2010	195500	12	12
6	120	4920	8	5	0.0	616.0	0.0	722.0	1338.0	1338	0	0	1338	1.0	0.0	2	0	2	1	6	0	2.0	582.0	0	0	170	0	0	0	0	2010	213500	9	9
7	120	5005	8	5	0.0	263.0	0.0	1017.0	1280.0	1280	0	0	1280	0.0	0.0	2	0	2	1	5	0	2.0	506.0	0	82	0	0	144	0	0	2010	191500	18	18
8	120	5389	8	5	0.0	1180.0	0.0	415.0	1595.0	1616	0	0	1616	1.0	0.0	2	0	2	1	5	1	2.0	608.0	237	152	0	0	0	0	0	2010	236500	15	14
9	60	7500	7	5	0.0	0.0	0.0	994.0	994.0	1028	776	0	1804	0.0	0.0	2	1	3	1	7	1	2.0	442.0	140	60	0	0	0	0	0	2010	189000	11	11
10	60	10000	6	5	0.0	0.0	0.0	763.0	763.0	763	892	0	1655	0.0	0.0	2	1	3	1	7	1	2.0	440.0	157	84	0	0	0	0	0	2010	175900	17	16
11	20	7980	6	7	0.0	935.0	0.0	233.0	1168.0	1187	0	0	1187	1.0	0.0	2	0	3	1	6	0	2.0	420.0	483	21	0	0	0	0	500	2010	185000	18	3
12	60	8402	6	5	0.0	0.0	0.0	789.0	789.0	789	676	0	1465	0.0	0.0	2	1	3	1	7	1	2.0	393.0	0	75	0	0	0	0	0	2010	180400	12	12
13	20	10176	7	5	0.0	637.0	0.0	663.0	1300.0	1341	0	0	1341	1.0	0.0	1	1	2	1	5	1	2.0	506.0	192	0	0	0	0	0	0	2010	171500	20	20
14	120	6820	8	5	0.0	368.0	1120.0	0.0	1488.0	1502	0	0	1502	1.0	0.0	1	1	1	1	4	0	2.0	528.0	0	54	0	0	140	0	0	2010	212000	25	25
15	60	53504	8	5	603.0	1416.0	0.0	234.0	1650.0	1690	1589	0	3279	1.0	0.0	3	1	4	1	12	1	3.0	841.0	503	36	0	0	210	0	0	2010	538000	7	7
16	50	12134	8	7	0.0	427.0	0.0	132.0	559.0	1080	672	0	1752	0.0	0.0	2	0	4	1	8	0	2.0	492.0	325	12	0	0	0	0	0	2010	164000	22	5
17	20	11394	9	2	350.0	1445.0	0.0	411.0	1856.0	1856	0	0	1856	1.0	0.0	1	1	1	1	8	1	3.0	834.0	113	0	0	0	0	0	0	2010	394432	0	0
18	20	19138	4	5	0.0	120.0	0.0	744.0	864.0	864	0	0	864	0.0	0.0	1	0	2	1	4	0	2.0	400.0	0	0	0	0	0	0	0	2010	141000	59	59
19	20	13175	6	6	119.0	790.0	163.0	589.0	1542.0	2073	0	0	2073	1.0	0.0	2	0	3	1	7	2	2.0	500.0	349	0	0	0	0	0	0	2010	210000	32	22
20	20	11751	6	6	480.0	705.0	0.0	1139.0	1844.0	1844	0	0	1844	0.0	0.0	2	0	3	1	7	1	2.0	546.0	0	122	0	0	0	0	0	2010	190000	33	33
21	85	10625	7	6	81.0	885.0	168.0	0.0	1053.0	1173	0	0	1173	1.0	0.0	2	0	3	1	6	2	2.0	528.0	0	120	0	0	0	0	0	2010	170000	36	36
22	60	7500	7	5	0.0	533.0	0.0	281.0	814.0	814	860	0	1674	1.0	0.0	2	1	3	1	7	0	2.0	663.0	0	96	0	0	0	0	0	2010	216000	10	10
23	20	11241	6	7	180.0	578.0	0.0	426.0	1004.0	1004	0	0	1004	1.0	0.0	1	0	2	1	5	1	2.0	480.0	0	0	0	0	0	0	700	2010	149000	40	40
24	20	12537	5	6	0.0	734.0	0.0	344.0	1078.0	1078	0	0	1078	1.0	0.0	1	1	3	1	6	1	2.0	500.0	0	0	0	0	0	0	0	2010	149900	39	2
25	20	8450	5	6	0.0	775.0	0.0	281.0	1056.0	1056	0	0	1056	1.0	0.0	1	0	3	1	6	1	1.0	304.0	0	85	184	0	0	0	0	2010	142000	42	42
26	20	8400	4	5	0.0	804.0	78.0	0.0	882.0	882	0	0	882	1.0	0.0	1	0	2	1	4	0	2.0	525.0	240	0	0	0	0	0	0	2010	126000	40	40
27	20	10500	4	5	0.0	432.0	0.0	432.0	864.0	864	0	0	864	0.0	0.0	1	0	3	1	5	1	0.0	0.0	0	0	0	0	0	0	0	2010	115000	39	39
28	120	5858	7	5	0.0	1051.0	0.0	354.0	1405.0	1337	0	0	1337	1.0	0.0	2	0	2	1	5	1	2.0	511.0	203	68	0	0	0	0	0	2010	184000	11	11
29	160	1680	6	5	504.0	156.0	0.0	327.0	483.0	483	504	0	987	0.0	0.0	1	1	2	1	5	0	1.0	264.0	275	0	0	0	0	0	0	2010	96000	39	39
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2900	20	13618	8	5	198.0	1350.0	0.0	378.0	1728.0	1960	0	0	1960	1.0	0.0	2	0	3	1	8	2	3.0	714.0	172	38	0	0	0	0	0	2006	320000	1	0
2901	20	11443	8	5	208.0	1460.0	0.0	408.0	1868.0	2028	0	0	2028	1.0	0.0	2	0	2	1	7	2	3.0	880.0	326	66	0	0	0	0	0	2006	369900	1	0
2902	20	11577	9	5	382.0	1455.0	0.0	383.0	1838.0	1838	0	0	1838	1.0	0.0	2	0	3	1	9	1	3.0	682.0	161	225	0	0	0	0	0	2006	359900	1	0
2903	20	31250	1	3	0.0	0.0	0.0	0.0	0.0	1600	0	0	1600	0.0	0.0	1	1	3	1	6	0	1.0	270.0	0	0	135	0	0	0	0	2006	81500	55	55
2904	90	7020	7	5	200.0	1243.0	0.0	45.0	1288.0	1368	0	0	1368	2.0	0.0	2	0	2	2	8	0	4.0	784.0	0	48	0	0	0	0	0	2006	215000	9	9
2905	120	4500	6	5	116.0	897.0	0.0	319.0	1216.0	1216	0	0	1216	1.0	0.0	2	0	2	1	5	0	2.0	402.0	0	125	0	0	0	0	0	2006	164000	8	8
2906	120	4500	6	5	443.0	1201.0	0.0	36.0	1237.0	1337	0	0	1337	1.0	0.0	2	0	2	1	5	0	2.0	405.0	0	199	0	0	0	0	0	2006	153500	8	8
2907	20	17217	5	5	0.0	0.0	0.0	1140.0	1140.0	1140	0	0	1140	0.0	0.0	1	0	3	1	6	0	0.0	0.0	36	56	0	0	0	0	0	2006	84500	0	0
2908	160	2665	5	6	0.0	0.0	0.0	264.0	264.0	616	688	0	1304	0.0	0.0	1	1	3	1	5	1	1.0	336.0	141	0	0	0	0	0	0	2006	104500	29	29
2909	160	2665	5	6	0.0	548.0	173.0	36.0	757.0	925	550	0	1475	0.0	0.0	2	0	4	1	6	1	1.0	336.0	104	26	0	0	0	0	0	2006	127000	29	29
2910	160	3964	6	4	0.0	837.0	0.0	105.0	942.0	1291	1230	0	2521	1.0	0.0	2	1	5	1	10	1	2.0	576.0	728	20	0	0	0	0	0	2006	151400	33	33
2911	20	10172	5	7	0.0	441.0	0.0	423.0	864.0	874	0	0	874	1.0	0.0	1	0	3	1	5	0	1.0	288.0	0	120	0	0	0	0	0	2006	126500	38	3
2912	90	11836	5	5	0.0	149.0	0.0	1503.0	1652.0	1652	0	0	1652	0.0	0.0	2	0	4	2	8	0	3.0	928.0	0	0	0	0	0	0	0	2006	146500	36	36
2913	180	1470	4	6	0.0	522.0	0.0	108.0	630.0	630	0	0	630	1.0	0.0	1	0	1	1	3	0	0.0	0.0	0	0	0	0	0	0	0	2006	73000	36	36
2914	160	1484	4	4	0.0	252.0	0.0	294.0	546.0	546	546	0	1092	0.0	0.0	1	1	3	1	5	0	1.0	253.0	0	0	0	0	0	0	0	2006	79400	34	34
2915	20	13384	5	5	194.0	119.0	344.0	641.0	1104.0	1360	0	0	1360	1.0	0.0	1	0	3	1	8	1	1.0	336.0	160	0	0	0	0	0	0	2006	140000	37	27
2916	180	1533	5	7	0.0	553.0	0.0	77.0	630.0	630	0	0	630	1.0	0.0	1	0	1	1	3	0	0.0	0.0	0	0	0	0	0	0	0	2006	92000	36	36
2917	160	1533	4	5	0.0	408.0	0.0	138.0	546.0	546	546	0	1092	0.0	0.0	1	1	3	1	5	0	1.0	286.0	0	0	0	0	0	0	0	2006	87550	36	36
2918	160	1526	4	5	0.0	0.0	0.0	546.0	546.0	546	546	0	1092	0.0	0.0	1	1	3	1	5	0	0.0	0.0	0	34	0	0	0	0	0	2006	79500	36	36
2919	160	1936	4	7	0.0	0.0	0.0	546.0	546.0	546	546	0	1092	0.0	0.0	1	1	3	1	5	0	0.0	0.0	0	0	0	0	0	0	0	2006	90500	36	36
2920	160	1894	4	5	0.0	252.0	0.0	294.0	546.0	546	546	0	1092	0.0	0.0	1	1	3	1	6	0	1.0	286.0	0	24	0	0	0	0	0	2006	71000	36	36
2921	90	12640	6	5	0.0	936.0	396.0	396.0	1728.0	1728	0	0	1728	0.0	0.0	2	0	4	2	8	0	2.0	574.0	40	0	0	0	0	0	0	2006	150900	30	30
2922	90	9297	5	5	0.0	1606.0	0.0	122.0	1728.0	1728	0	0	1728	2.0	0.0	2	0	4	2	8	0	2.0	560.0	0	0	0	0	0	0	0	2006	188000	30	30
2923	20	17400	5	5	0.0	936.0	0.0	190.0	1126.0	1126	0	0	1126	1.0	0.0	2	0	3	1	5	1	2.0	484.0	295	41	0	0	0	0	0	2006	160000	29	29
2924	20	20000	5	7	0.0	1224.0	0.0	0.0	1224.0	1224	0	0	1224	1.0	0.0	1	0	4	1	7	1	2.0	576.0	474	0	0	0	0	0	0	2006	131000	46	10
2925	80	7937	6	6	0.0	819.0	0.0	184.0	1003.0	1003	0	0	1003	1.0	0.0	1	0	3	1	6	0	2.0	588.0	120	0	0	0	0	0	0	2006	142500	22	22
2926	20	8885	5	5	0.0	301.0	324.0	239.0	864.0	902	0	0	902	1.0	0.0	1	0	2	1	5	0	2.0	484.0	164	0	0	0	0	0	0	2006	131000	23	23
2927	85	10441	5	5	0.0	337.0	0.0	575.0	912.0	970	0	0	970	0.0	1.0	1	0	3	1	6	0	0.0	0.0	80	32	0	0	0	0	700	2006	132000	14	14
2928	20	10010	5	5	0.0	1071.0	123.0	195.0	1389.0	1389	0	0	1389	1.0	0.0	1	0	2	1	6	1	2.0	418.0	240	38	0	0	0	0	0	2006	170000	32	31
2929	60	9627	7	5	94.0	758.0	0.0	238.0	996.0	996	1004	0	2000	0.0	0.0	2	1	3	1	9	1	3.0	650.0	190	48	0	0	0	0	0	2006	188000	13	12