Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 161 additions & 82 deletions lib/scholar/linear/logistic_regression.ex
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
defmodule Scholar.Linear.LogisticRegression do
@moduledoc """
Logistic regression in both binary and multinomial variants.
Multiclass logistic regression.

Time complexity is $O(N * K * I)$ where $N$ is the number of samples, $K$ is the number of features, and $I$ is the number of iterations.
"""
import Nx.Defn
import Scholar.Shared
alias Scholar.Linear.LinearHelpers

@derive {Nx.Container, containers: [:coefficients, :bias]}
defstruct [:coefficients, :bias]
Expand All @@ -15,35 +14,44 @@ defmodule Scholar.Linear.LogisticRegression do
num_classes: [
required: true,
type: :pos_integer,
doc: "number of classes contained in the input tensors."
doc: "Number of output classes."
],
iterations: [
max_iterations: [
type: :pos_integer,
default: 1000,
doc: "Maximum number of gradient descent iterations to perform."
],
optimizer: [
type: {:custom, Scholar.Options, :optimizer, []},
default: :sgd,
doc: """
number of iterations of gradient descent performed inside logistic
regression.
Optimizer name or {init, update} pair of functions (see `Polaris.Optimizers` for more details).
"""
],
learning_loop_unroll: [
type: :boolean,
default: false,
doc: ~S"""
If `true`, the learning loop is unrolled.
alpha: [
type: {:custom, Scholar.Options, :non_negative_number, []},
default: 1.0,
doc: """
Constant that multiplies the regularization term, controlling regularization strength.
If 0, no regularization is applied.
"""
],
optimizer: [
type: {:custom, Scholar.Options, :optimizer, []},
default: :sgd,
l1_ratio: [
type: {:custom, Scholar.Options, :non_negative_number, []},
default: 0.0,
doc: """
The optimizer name or {init, update} pair of functions (see `Polaris.Optimizers` for more details).
The Elastic-Net mixing parameter, with `0 <= l1_ratio <= 1`.
Setting `l1_ratio` to 0 gives pure L2 regularization, and setting it to 1 gives pure L1 regularization.
For values between 0 and 1, a penalty of the form `l1_ratio * L1 + (1 - l1_ratio) * L2` is used.
"""
],
eps: [
type: :float,
default: 1.0e-8,
doc:
"The convergence tolerance. If the `abs(loss) < size(x) * :eps`, the algorithm is considered to have converged."
tol: [
type: {:custom, Scholar.Options, :non_negative_number, []},
default: 1.0e-4,
doc: """
Convergence tolerance. If the infinity norm of the gradient is less than `:tol`,
the algorithm is considered to have converged.
"""
]
]

Expand All @@ -53,9 +61,6 @@ defmodule Scholar.Linear.LogisticRegression do
Fits a logistic regression model for sample inputs `x` and sample
targets `y`.

Depending on number of classes the function chooses either binary
or multinomial logistic regression.

## Options

#{NimbleOptions.docs(@opts_schema)}
Expand All @@ -68,10 +73,6 @@ defmodule Scholar.Linear.LogisticRegression do

* `:bias` - Bias added to the decision function.

* `:mode` - Indicates whether the problem is binary classification (`:num_classes` set to 2)
or multinomial (`:num_classes` is bigger than 2). For binary classification set to `:binary`, otherwise
set to `:multinomial`.

## Examples

iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
Expand All @@ -80,26 +81,41 @@ defmodule Scholar.Linear.LogisticRegression do
%Scholar.Linear.LogisticRegression{
coefficients: Nx.tensor(
[
[2.5531527996063232, -0.5531544089317322],
[-0.35652396082878113, 2.3565237522125244]
[0.09002052247524261, -0.09002052992582321],
[-0.1521512120962143, 0.1521512120962143]
]
),
bias: Nx.tensor(
[-0.28847914934158325, 0.28847917914390564]
)
bias: Nx.tensor([-0.05300388112664223, 0.053003907203674316])
}
"""
deftransform fit(x, y, opts \\ []) do
if Nx.rank(x) != 2 do
raise ArgumentError,
"expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
"expected x to have shape {num_samples, num_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
end

if Nx.rank(y) != 1 do
raise ArgumentError,
"expected y to have shape {num_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
end

{n_samples, _} = Nx.shape(x)
y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__)
{num_samples, num_features} = Nx.shape(x)

if Nx.axis_size(y, 0) != num_samples do
raise ArgumentError,
"expected x and y to have the same number of samples, got #{num_samples} and #{Nx.axis_size(y, 0)}"
end

opts = NimbleOptions.validate!(opts, @opts_schema)

{l1_ratio, opts} = Keyword.pop!(opts, :l1_ratio)

unless l1_ratio >= 0.0 and l1_ratio <= 1.0 do
raise ArgumentError,
"expected l1_ratio to be between 0 and 1, got: #{inspect(l1_ratio)}"
end

type = to_float_type(x)
{optimizer, opts} = Keyword.pop!(opts, :optimizer)

{optimizer_init_fn, optimizer_update_fn} =
Expand All @@ -108,23 +124,39 @@ defmodule Scholar.Linear.LogisticRegression do
{f1, f2} -> {f1, f2}
end

n = Nx.axis_size(x, -1)
num_classes = opts[:num_classes]

coef =
w =
Nx.broadcast(
Nx.tensor(1.0, type: to_float_type(x)),
{n, num_classes}
Nx.tensor(0.0, type: type),
{num_features, num_classes}
)

bias = Nx.broadcast(Nx.tensor(0, type: to_float_type(x)), {num_classes})
b = Nx.broadcast(Nx.tensor(0.0, type: type), {num_classes})

w_optimizer_state = optimizer_init_fn.(w) |> as_type(type)
b_optimizer_state = optimizer_init_fn.(b) |> as_type(type)

coef_optimizer_state = optimizer_init_fn.(coef) |> as_type(to_float_type(x))
bias_optimizer_state = optimizer_init_fn.(bias) |> as_type(to_float_type(x))
{alpha, opts} = Keyword.pop!(opts, :alpha)
{tol, opts} = Keyword.pop!(opts, :tol)
alpha = Nx.tensor(alpha, type: type)
l1_ratio = Nx.tensor(l1_ratio, type: type)
tol = Nx.tensor(tol, type: type)

opts = Keyword.put(opts, :optimizer_update_fn, optimizer_update_fn)

fit_n(x, y, coef, bias, coef_optimizer_state, bias_optimizer_state, opts)
fit_n(
x,
y,
w,
b,
alpha,
l1_ratio,
tol,
w_optimizer_state,
b_optimizer_state,
opts
)
end

deftransformp as_type(container, target_type) do
Expand All @@ -139,11 +171,20 @@ defmodule Scholar.Linear.LogisticRegression do
end)
end

# Logistic Regression training loop

defnp fit_n(x, y, coef, bias, coef_optimizer_state, bias_optimizer_state, opts) do
defnp fit_n(
x,
y,
w,
b,
alpha,
l1_ratio,
tol,
w_optimizer_state,
b_optimizer_state,
opts
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, when you have too many arguments like this, you can wrap the arguments that don't change (are fixed in the loop) in a map and pass it down. Helps clean up while loops and Elixir signatures!

Copy link
Copy Markdown
Member Author

@krstopro krstopro Jan 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works for tensors as well, right?
Apart from that, CI is failing. I am working on it.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I am not sure I understood. 😅

Would having a while loop as {result, state} = while ... where state is a map work? Is this something new in Nx?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it works. It is quite old i think :)

) do
num_samples = Nx.axis_size(x, 0)
iterations = opts[:iterations]
max_iterations = opts[:max_iterations]
num_classes = opts[:num_classes]
optimizer_update_fn = opts[:optimizer_update_fn]

Expand All @@ -153,40 +194,76 @@ defmodule Scholar.Linear.LogisticRegression do
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))

{{final_coef, final_bias}, _} =
while {{coef, bias},
{x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state,
has_converged = Nx.u8(0), iter = 0}},
iter < iterations and not has_converged do
{loss, {coef_grad, bias_grad}} = loss_and_grad(coef, bias, x, y_one_hot)
{coef, bias, _} =
while {w, b,
{x, y_one_hot, max_iterations, alpha, l1_ratio, tol, w_optimizer_state,
b_optimizer_state, converged? = Nx.u8(0), iter = Nx.u32(0)}},
iter < max_iterations and not converged? do
{w_grad, b_grad} =
grad({w, b}, fn {w, b} ->
compute_loss(w, b, alpha, l1_ratio, x, y_one_hot)
end)

{coef_updates, coef_optimizer_state} =
optimizer_update_fn.(coef_grad, coef_optimizer_state, coef)
{w_updates, w_optimizer_state} =
optimizer_update_fn.(w_grad, w_optimizer_state, w)

coef = Polaris.Updates.apply_updates(coef, coef_updates)
w = Polaris.Updates.apply_updates(w, w_updates)

{bias_updates, bias_optimizer_state} =
optimizer_update_fn.(bias_grad, bias_optimizer_state, bias)
{b_updates, b_optimizer_state} =
optimizer_update_fn.(b_grad, b_optimizer_state, b)

bias = Polaris.Updates.apply_updates(bias, bias_updates)
b = Polaris.Updates.apply_updates(b, b_updates)

has_converged = Nx.sum(Nx.abs(loss)) < Nx.size(x) * opts[:eps]
converged? =
Nx.reduce_max(Nx.abs(w_grad)) < tol and Nx.reduce_max(Nx.abs(b_grad)) < tol

{{coef, bias},
{x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state, has_converged,
iter + 1}}
{w, b,
{x, y_one_hot, max_iterations, alpha, l1_ratio, tol, w_optimizer_state,
b_optimizer_state, converged?, iter + 1}}
end

%__MODULE__{
coefficients: final_coef,
bias: final_bias
coefficients: coef,
bias: bias
}
end

defnp loss_and_grad(coeff, bias, xs, ys) do
value_and_grad({coeff, bias}, fn {coeff, bias} ->
-Nx.sum(ys * log_softmax(Nx.dot(xs, coeff) + bias), axes: [-1])
end)
defnp compute_regularization(w, alpha, l1_ratio) do
if alpha > 0.0 do
reg =
cond do
l1_ratio == 0.0 ->
# L2 regularization
Nx.sum(w * w)

l1_ratio == 1.0 ->
# L1 regularization
Nx.sum(Nx.abs(w))

# Elastic-Net regularization
true ->
l1_ratio * Nx.sum(Nx.abs(w)) +
(1 - l1_ratio) * Nx.sum(w * w)
end

alpha * reg
else
0.0
end
end

defnp compute_loss(w, b, alpha, l1_ratio, xs, ys) do
reg = compute_regularization(w, alpha, l1_ratio)

xs
|> Nx.dot(w)
|> Nx.add(b)
|> log_softmax()
|> Nx.multiply(ys)
|> Nx.sum(axes: [1])
|> Nx.negate()
|> Nx.mean()
|> Nx.add(reg)
end

defnp log_softmax(x) do
Expand Down Expand Up @@ -219,14 +296,16 @@ defmodule Scholar.Linear.LogisticRegression do
iex> y = Nx.tensor([1, 0, 1])
iex> model = Scholar.Linear.LogisticRegression.fit(x, y, num_classes: 2)
iex> Scholar.Linear.LogisticRegression.predict(model, Nx.tensor([[-3.0, 5.0]]))
#Nx.Tensor<
s32[1]
[1]
>
Nx.tensor([1])
"""
defn predict(%__MODULE__{coefficients: coeff, bias: bias} = _model, x) do
inter = Nx.dot(x, [1], coeff, [0]) + bias
Nx.argmax(inter, axis: 1)
if Nx.rank(x) != 2 do
raise ArgumentError,
"expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
end

logits = Nx.dot(x, coeff) + bias
Nx.argmax(logits, axis: 1)
end

@doc """
Expand All @@ -238,14 +317,14 @@ defmodule Scholar.Linear.LogisticRegression do
iex> y = Nx.tensor([1, 0, 1])
iex> model = Scholar.Linear.LogisticRegression.fit(x, y, num_classes: 2)
iex> Scholar.Linear.LogisticRegression.predict_probability(model, Nx.tensor([[-3.0, 5.0]]))
#Nx.Tensor<
f32[1][2]
[
[6.470913388456623e-11, 1.0]
]
>
Nx.tensor([[0.10269401967525482, 0.8973060250282288]])
"""
defn predict_probability(%__MODULE__{coefficients: coeff, bias: bias} = _model, x) do
softmax(Nx.dot(x, [1], coeff, [0]) + bias)
if Nx.rank(x) != 2 do
raise ArgumentError,
"expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}"
end

softmax(Nx.dot(x, coeff) + bias)
end
end
Loading
Loading