-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathse_cvxopt_example.py
127 lines (96 loc) · 3.65 KB
/
se_cvxopt_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# https://stats.stackexchange.com/questions/384909/formulating-quantile-regression-as-linear-programming-problem/407478#407478
#
# NOTE: This is quantile regression for linear regression problem. It turns into a linear programming problem.
# NOTE: 2023-01-16 I got this working. See do plot.
#
import io
import numpy as np
import pandas as pd
import requests
import os
from scipy.optimize import linprog
from cvxopt import matrix, solvers
filename = 'se_cvxopt_example.csv.gz'
if not os.path.exists(filename):
url = "http://freakonometrics.free.fr/rent98_00.txt"
s = requests.get(url).content
base = pd.read_csv(io.StringIO(s.decode('utf-8')), sep='\t')
base.to_csv(filename)
try:
base
except NameError:
print(f'reading {filename} ...', end='')
base = pd.read_csv(filename)
print(' done')
tau = 0.3
X = pd.DataFrame(columns=[0, 1])
X[1] = base["area"] # data points for independent variable area
X[2] = base["yearc"] # data points for independent variable year
X[0] = 1 # intercept
K = X.shape[1]
N = X.shape[0]
# equality constraints - left hand side
A1 = X.to_numpy() # intercepts & data points - positive weights
A2 = X.to_numpy() * -1 # intercept & data points - negative weights
A3 = np.identity(N) # error - positive
A4 = np.identity(N) * -1 # error - negative
A = np.concatenate((A1, A2, A3, A4), axis=1) # all the equality constraints
# equality constraints - right hand side
b = base["rent_euro"].to_numpy()
# goal function - intercept & data points have 0 weights
# positive error has tau weight, negative error has 1-tau weight
c = np.concatenate((np.repeat(0, 2 * K), tau * np.repeat(1, N), (1 - tau) * np.repeat(1, N)))
# converting from numpy types to cvxopt matrix
Am = matrix(A)
bm = matrix(b)
cm = matrix(c)
# all variables must be greater than zero
# adding inequality constraints - left hand side
n = Am.size[1]
G = matrix(0.0, (n, n))
G[:: n + 1] = -1.0
# adding inequality constraints - right hand side (all zeros)
h = matrix(0.0, (n, 1))
def solve_with_cvxopt():
# solving the model
sol = solvers.lp(cm, G, h, Am, bm, solver='glpk')
x = sol['x']
# both negative and positive components get values above zero, this gets fixed here
beta = x[0:K] - x[K : 2 * K]
print(beta)
return locals()
def solve_with_scipy():
sol = linprog(cm, G, h, Am, bm)
x = sol['x']
# both negative and positive components get values above zero, this gets fixed here
beta = x[0:K] - x[K : 2 * K]
print(beta)
return locals()
def plot_solution(beta, beta2=None):
import plotly.graph_objects as go
x = X.values[:, 1]
y = X.values[:, 2]
z = b
zp = X.values @ beta
traces = [
go.Scatter3d(z=z, x=x, y=y, marker_size=3, mode='markers', opacity=0.50, marker_color='red', name='data'),
go.Scatter3d(z=zp, x=x, y=y, opacity=0.50, mode='markers', marker_size=3, marker_color='blue', name='A'),
go.Mesh3d(z=zp, x=x, y=y, opacity=0.50, color='green', name='Surface A'),
]
if beta2 is not None:
zp2 = X.values @ np.array(beta2).squeeze()
traces.extend([
go.Scatter3d(z=zp2, x=x, y=y, opacity=0.50, mode='markers', marker_size=3, marker_color='pink', name='B'),
go.Mesh3d(z=zp2, x=x, y=y, opacity=0.50, color='purple', name='Surface B'),
])
print(f'len traces {len(traces)}')
fig = go.Figure(data=traces)
fig.update_layout(title='quantile regression example', autosize=False, width=500, height=500, margin=dict(l=65, r=50, b=65, t=90))
fig.show()
return locals()
def do():
a = solve_with_cvxopt()
b = solve_with_cvxopt()
plot_solution(a['beta'], b['beta'])
if __name__ == '__main__':
do()