02 - Segment Analysis¶

Deep dive into segment properties and data export.

What You'll Learn¶

Understand all segment attributes
Export segments to pandas DataFrame
Analyze segment statistics
Interpret why segments were created

Setup¶

In [1]:

Copied!

import yfinance as yf

from trend_classifier import Segmenter

# Download data
df = yf.download("AAPL", start="2018-09-15", end="2022-09-05", interval="1d", progress=False)

# Create and calculate segments
seg = Segmenter(df=df, column="Close", n=20)
seg.calculate_segments()

print(f"Analyzing {len(seg.segments)} segments")
import yfinance as yf

from trend_classifier import Segmenter

# Download data
df = yf.download("AAPL", start="2018-09-15", end="2022-09-05", interval="1d", progress=False)

# Create and calculate segments
seg = Segmenter(df=df, column="Close", n=20)
seg.calculate_segments()

print(f"Analyzing {len(seg.segments)} segments")

Analyzing 49 segments

Segment Attributes¶

Each Segment object contains rich information about the detected trend:

Attribute	Description
`start`	Start index in the time series
`stop`	End index in the time series
`slope`	Overall slope of the linear trend
`offset`	Y-intercept of the linear trend
`std`	Standard deviation after detrending (volatility)
`span`	Normalized range of values
`slopes`	List of slopes from individual windows
`offsets`	List of offsets from individual windows
`slopes_std`	Std of slopes (trend consistency)
`offsets_std`	Std of offsets
`reason_for_new_segment`	Why this segment ended

In [2]:

Copied!





# Examine a single segment in detail
segment = seg.segments[2]

print("=== Segment Details ===")
print(f"Range: index {segment.start} to {segment.stop} ({segment.stop - segment.start} points)")
print(f"Slope: {segment.slope:.4f}")
print(f"Offset: {segment.offset:.2f}")
print(f"Volatility (std): {segment.std:.4f}")
print(f"Span: {segment.span:.1f}")
print(f"Trend consistency (slopes_std): {segment.slopes_std:.4f}")
print(f"Reason segment ended: '{segment.reason_for_new_segment}'")
# Examine a single segment in detail
segment = seg.segments[2]

print("=== Segment Details ===")
print(f"Range: index {segment.start} to {segment.stop} ({segment.stop - segment.start} points)")
print(f"Slope: {segment.slope:.4f}")
print(f"Offset: {segment.offset:.2f}")
print(f"Volatility (std): {segment.std:.4f}")
print(f"Span: {segment.span:.1f}")
print(f"Trend consistency (slopes_std): {segment.slopes_std:.4f}")
print(f"Reason segment ended: '{segment.reason_for_new_segment}'")

=== Segment Details ===
Range: index 76 to 81 (5 points)
Slope: 0.1050
Offset: 27.73
Volatility (std): 0.4467
Span: 30.9
Trend consistency (slopes_std): 0.0000
Reason segment ended: 'slope'

In [3]:

Copied!

# The full representation
print("\nFull repr:")
print(repr(segment))
# The full representation
print("\nFull repr:")
print(repr(segment))

Full repr:
Segment(start=76, stop=81, slope=0.10499692644392077, offset=27.7340259370347, std=0.44673911715410836, span=30.945143102918966, reason_for_new_segment=slope, slopes=[0.2653123439702775], offsets=[14.540656963147605], slopes_std=0.0, offsets_std=0.0)

Export to DataFrame¶

The to_dataframe() method converts all segments to a pandas DataFrame for easy analysis.

In [4]:

Copied!

# Convert segments to DataFrame
df_segments = seg.segments.to_dataframe()
df_segments
# Convert segments to DataFrame
df_segments = seg.segments.to_dataframe()
df_segments

Out[4]:

	start	stop	slope	offset	slopes_std	offsets_std	std	span	reason_for_new_segment	slopes	offsets
0	0	9	0.197502	51.373652	0.000000	0.000000	0.313283	17.926021	slope	[-0.1002435239634121]	[54.312332704013464]
1	10	75	-0.315274	58.936524	0.128520	4.743325	1.432513	152.753008	slope	[-0.15026299899682063, -0.15786464375660753, -...	[55.39250845944972, 55.46701279546978, 62.3780...
2	76	81	0.104997	27.734026	0.000000	0.000000	0.446739	30.945143	slope	[0.2653123439702775]	[14.540656963147605]
3	82	153	0.180014	21.923825	0.065423	7.017021	0.784706	94.854749	slope	[0.28343645827214536, 0.1573499894679941, 0.05...	[12.548412297184312, 24.649738174094253, 35.11...
4	154	171	-0.418104	114.731870	0.000000	0.000000	0.958458	83.338636	slope	[0.20273512359848547]	[8.268741292164817]
5	172	177	-0.252725	86.487991	0.000000	0.000000	0.220665	16.343903	offset	[0.3256940827333848]	[-14.319836067257278]
6	178	183	0.759340	-92.129464	0.000000	0.000000	0.143346	9.394813	offset	[0.16418316489771786]	[16.008887923391246]
7	184	243	0.052121	37.712443	0.069056	14.926482	0.921518	99.864373	offset	[0.10541228674408207, 0.09235581132702286, 0.0...	[27.264427543582734, 29.83007757717522, 29.861...
8	244	255	0.094036	29.266863	0.000000	0.000000	0.696379	41.776869	slope	[0.20368984767368656]	[0.727319608416445]
9	256	261	0.290731	-21.991700	0.000000	0.000000	0.356201	20.937572	offset	[0.29768106274138795]	[-24.30454409319677]
10	262	291	0.349733	-38.473924	0.032526	8.894525	0.598339	48.956367	offset	[0.2893737879014559, 0.3544255823121014]	[-22.044845073384636, -39.83389430225283]
11	292	309	0.022144	57.252010	0.000000	0.000000	0.635685	41.472750	offset	[0.4148262081289649]	[-63.515122125381765]
12	310	333	0.470871	-81.341134	0.000000	0.000000	0.609998	37.077467	offset	[0.45389397413210525]	[-75.97579879187023]
13	334	345	0.063475	54.885188	0.000000	0.000000	1.117712	48.474198	slope and offset	[-0.13614144002584588]	[124.56897648402624]
14	346	351	0.536635	-109.917587	0.000000	0.000000	0.866207	30.911479	slope	[-0.5661394219649489]	[277.07836881938823]
15	352	381	-0.789260	357.880087	0.001293	0.897931	2.795619	149.001721	slope and offset	[-0.7307296121927065, -0.7281436633346682]	[336.9613589752861, 335.16549767228923]
16	382	423	0.453120	-113.747268	0.089059	35.879060	1.711334	121.214741	offset	[0.5063411569236804, 0.30660507947878624, 0.46...	[-134.74731052656787, -55.07139889136243, -117...
17	424	459	0.508050	-139.004988	0.077954	34.173083	1.156028	69.226962	slope and offset	[0.5896744003869475, 0.40295758928571496, 0.46...	[-174.36450660533194, -92.50303976876422, -118...
18	460	489	1.137028	-434.631337	0.011891	5.977649	3.592332	116.964022	slope and offset	[1.2540462637306398, 1.2778273331491528]	[-490.22680138610605, -502.18209846897895]
19	490	507	-1.244808	735.206600	0.000000	0.000000	3.941715	137.593727	slope and offset	[0.650410002514835]	[-222.97327127671952]
20	508	519	0.542699	-168.379520	0.000000	0.000000	2.138564	55.675979	offset	[-0.23563809502393815]	[237.10188738600723]
21	520	531	-0.601584	431.092696	0.000000	0.000000	1.827518	68.039884	slope	[0.3130644948858911]	[-55.63623460468208]
22	532	549	0.500673	-157.644286	0.000000	0.000000	2.620798	85.081969	offset	[0.3456263205162643]	[-75.0487547107253]
23	550	555	0.490341	-157.663800	0.000000	0.000000	1.355362	34.328355	offset	[0.6507702361372153]	[-245.364759166974]
24	556	573	0.559880	-193.699279	0.000000	0.000000	1.760088	52.900783	offset	[-0.18182311093896558]	[232.67361025128878]
25	574	579	-1.274112	864.058092	0.000000	0.000000	0.849650	21.860277	slope	[0.55792021213619]	[-197.0884808762715]
26	580	591	0.605414	-227.102345	0.000000	0.000000	2.862259	72.166416	offset	[-0.04245336432205603]	[157.9319744310857]
27	592	597	-2.293729	1498.498366	0.000000	0.000000	1.800028	38.883082	slope	[-0.5593556396943261]	[467.5525597536457]
28	598	627	-0.626174	508.022457	0.108374	66.258657	2.916785	102.304223	slope	[-0.9246914282777318, -0.7079439263594823]	[689.8949669171135, 557.3776538648133]
29	628	633	-0.387734	363.291128	0.000000	0.000000	1.633302	32.609588	slope and offset	[0.8268386267181737]	[-405.7310210550709]
30	634	651	0.968227	-497.193711	0.000000	0.000000	1.638103	53.408664	slope and offset	[-0.4749670616666152]	[440.4340315223619]
31	652	675	-0.424315	407.539967	0.000000	0.000000	1.818585	60.497165	slope	[-0.5325873066608078]	[479.31027545784895]
32	676	681	-0.490511	455.367242	0.000000	0.000000	0.548065	11.590416	slope and offset	[0.5384893833246406]	[-244.8305416881562]
33	682	711	0.810743	-432.856493	0.114922	79.861526	1.462665	45.022395	offset	[0.6493853920384437, 0.8792302124482411]	[-321.56209564208825, -481.2851471176767]
34	712	717	0.645929	-318.979067	0.000000	0.000000	1.418780	31.525749	slope	[0.11064521961642272]	[63.52184388583852]
35	718	741	0.130685	48.786457	0.000000	0.000000	1.415251	34.115561	offset	[0.21880750154193834]	[-15.585733995937698]
36	742	747	0.859827	-491.153349	0.000000	0.000000	1.050089	23.900059	slope	[-0.6074867994265448]	[603.561564750958]
37	748	765	-0.714504	684.725351	0.000000	0.000000	1.985395	48.662846	slope	[0.3562589487635109]	[-134.05372709116585]
38	766	789	0.495140	-242.044947	0.000000	0.000000	1.536369	41.680240	slope and offset	[0.5387300878539012]	[-275.87474810377125]
39	790	825	0.909180	-573.847995	0.151179	121.756423	3.530730	93.289918	slope and offset	[0.9441855064908137, 1.1994848150955963, 0.839...	[-602.8754119069766, -807.1867172241173, -516....
40	826	843	-0.881215	905.486190	0.000000	0.000000	2.437321	45.508232	slope and offset	[0.562445343705938]	[-312.3649464313193]
41	844	855	1.478926	-1090.794540	0.000000	0.000000	3.818125	74.190487	slope	[-0.5809495968926105]	[665.8387338509134]
42	856	873	-0.664082	737.712966	0.000000	0.000000	2.681230	61.276158	slope and offset	[0.7979965668871951]	[-540.6812979906184]
43	874	891	1.455711	-1122.103696	0.000000	0.000000	3.774928	101.249277	slope and offset	[-0.8033062698249669]	[888.1132038890993]
44	892	927	-0.978035	1045.696285	0.153028	139.323646	3.499759	109.880280	slope and offset	[-0.8096245328286469, -0.8107930348331622, -1....	[893.3659070122446, 894.947132185155, 1189.703...
45	928	933	2.036399	-1752.156001	0.000000	0.000000	1.668818	34.938471	slope and offset	[-0.8605478215038463]	[947.1130684243236]
46	934	945	-1.931660	1952.364934	0.000000	0.000000	2.874555	72.674823	slope and offset	[0.754942453714241]	[-581.3665397988219]
47	946	981	0.923006	-741.493741	0.124756	120.417111	2.238339	65.082380	offset	[0.7999810469777827, 0.9226313884993126, 1.103...	[-623.910067508082, -741.9576232451127, -917.0...
48	982	999	-1.094704	1247.730395	0.000000	0.000000	2.877927	75.998117		[]	[]

In [5]:

Copied!





# Select most useful columns
df_summary = df_segments[["start", "stop", "slope", "std", "span", "reason_for_new_segment"]].copy()
df_summary["length"] = df_summary["stop"] - df_summary["start"]
df_summary
# Select most useful columns
df_summary = df_segments[["start", "stop", "slope", "std", "span", "reason_for_new_segment"]].copy()
df_summary["length"] = df_summary["stop"] - df_summary["start"]
df_summary

Out[5]:

	start	stop	slope	std	span	reason_for_new_segment	length
0	0	9	0.197502	0.313283	17.926021	slope	9
1	10	75	-0.315274	1.432513	152.753008	slope	65
2	76	81	0.104997	0.446739	30.945143	slope	5
3	82	153	0.180014	0.784706	94.854749	slope	71
4	154	171	-0.418104	0.958458	83.338636	slope	17
5	172	177	-0.252725	0.220665	16.343903	offset	5
6	178	183	0.759340	0.143346	9.394813	offset	5
7	184	243	0.052121	0.921518	99.864373	offset	59
8	244	255	0.094036	0.696379	41.776869	slope	11
9	256	261	0.290731	0.356201	20.937572	offset	5
10	262	291	0.349733	0.598339	48.956367	offset	29
11	292	309	0.022144	0.635685	41.472750	offset	17
12	310	333	0.470871	0.609998	37.077467	offset	23
13	334	345	0.063475	1.117712	48.474198	slope and offset	11
14	346	351	0.536635	0.866207	30.911479	slope	5
15	352	381	-0.789260	2.795619	149.001721	slope and offset	29
16	382	423	0.453120	1.711334	121.214741	offset	41
17	424	459	0.508050	1.156028	69.226962	slope and offset	35
18	460	489	1.137028	3.592332	116.964022	slope and offset	29
19	490	507	-1.244808	3.941715	137.593727	slope and offset	17
20	508	519	0.542699	2.138564	55.675979	offset	11
21	520	531	-0.601584	1.827518	68.039884	slope	11
22	532	549	0.500673	2.620798	85.081969	offset	17
23	550	555	0.490341	1.355362	34.328355	offset	5
24	556	573	0.559880	1.760088	52.900783	offset	17
25	574	579	-1.274112	0.849650	21.860277	slope	5
26	580	591	0.605414	2.862259	72.166416	offset	11
27	592	597	-2.293729	1.800028	38.883082	slope	5
28	598	627	-0.626174	2.916785	102.304223	slope	29
29	628	633	-0.387734	1.633302	32.609588	slope and offset	5
30	634	651	0.968227	1.638103	53.408664	slope and offset	17
31	652	675	-0.424315	1.818585	60.497165	slope	23
32	676	681	-0.490511	0.548065	11.590416	slope and offset	5
33	682	711	0.810743	1.462665	45.022395	offset	29
34	712	717	0.645929	1.418780	31.525749	slope	5
35	718	741	0.130685	1.415251	34.115561	offset	23
36	742	747	0.859827	1.050089	23.900059	slope	5
37	748	765	-0.714504	1.985395	48.662846	slope	17
38	766	789	0.495140	1.536369	41.680240	slope and offset	23
39	790	825	0.909180	3.530730	93.289918	slope and offset	35
40	826	843	-0.881215	2.437321	45.508232	slope and offset	17
41	844	855	1.478926	3.818125	74.190487	slope	11
42	856	873	-0.664082	2.681230	61.276158	slope and offset	17
43	874	891	1.455711	3.774928	101.249277	slope and offset	17
44	892	927	-0.978035	3.499759	109.880280	slope and offset	35
45	928	933	2.036399	1.668818	34.938471	slope and offset	5
46	934	945	-1.931660	2.874555	72.674823	slope and offset	11
47	946	981	0.923006	2.238339	65.082380	offset	35
48	982	999	-1.094704	2.877927	75.998117		17

Statistical Analysis¶

In [6]:

Copied!





# Descriptive statistics
print("=== Segment Statistics ===")
print(f"Number of segments: {len(df_segments)}")
print(f"Average segment length: {df_summary['length'].mean():.1f} points")
print(f"Shortest segment: {df_summary['length'].min()} points")
print(f"Longest segment: {df_summary['length'].max()} points")
# Descriptive statistics
print("=== Segment Statistics ===")
print(f"Number of segments: {len(df_segments)}")
print(f"Average segment length: {df_summary['length'].mean():.1f} points")
print(f"Shortest segment: {df_summary['length'].min()} points")
print(f"Longest segment: {df_summary['length'].max()} points")

=== Segment Statistics ===
Number of segments: 49
Average segment length: 19.4 points
Shortest segment: 5 points
Longest segment: 71 points

In [7]:

Copied!

# Slope distribution
print("\n=== Slope Distribution ===")
print(df_segments["slope"].describe())
# Slope distribution
print("\n=== Slope Distribution ===")
print(df_segments["slope"].describe())

=== Slope Distribution ===
count    49.000000
mean      0.066328
std       0.864553
min      -2.293729
25%      -0.490511
50%       0.180014
75%       0.559880
max       2.036399
Name: slope, dtype: float64

In [8]:

Copied!





# Count trends by direction
uptrends = (df_segments["slope"] > 0).sum()
downtrends = (df_segments["slope"] < 0).sum()

print(f"\nUptrends: {uptrends} ({100*uptrends/len(df_segments):.1f}%)")
print(f"Downtrends: {downtrends} ({100*downtrends/len(df_segments):.1f}%)")
# Count trends by direction
uptrends = (df_segments["slope"] > 0).sum()
downtrends = (df_segments["slope"] < 0).sum()

print(f"\nUptrends: {uptrends} ({100*uptrends/len(df_segments):.1f}%)")
print(f"Downtrends: {downtrends} ({100*downtrends/len(df_segments):.1f}%)")

Uptrends: 31 (63.3%)
Downtrends: 18 (36.7%)

Understanding `reason_for_new_segment`¶

This attribute explains why the algorithm decided to end a segment:

"slope": The slope changed significantly
"offset": The offset changed significantly
"slope and offset": Both changed

In [9]:

Copied!





# Count reasons
reason_counts = df_segments["reason_for_new_segment"].value_counts()
print("Why segments ended:")
print(reason_counts)
# Count reasons
reason_counts = df_segments["reason_for_new_segment"].value_counts()
print("Why segments ended:")
print(reason_counts)

Why segments ended:
reason_for_new_segment
slope               16
offset              16
slope and offset    16
                     1
Name: count, dtype: int64

Volatility Analysis¶

The std attribute measures volatility after removing the linear trend. Low std = clean trend, High std = noisy/choppy trend.

In [10]:

Copied!





# Find cleanest and noisiest trends
cleanest_idx = df_segments["std"].idxmin()
noisiest_idx = df_segments["std"].idxmax()

print(f"Cleanest trend: Segment {cleanest_idx} (std={df_segments.loc[cleanest_idx, 'std']:.4f})")
print(f"Noisiest trend: Segment {noisiest_idx} (std={df_segments.loc[noisiest_idx, 'std']:.4f})")
# Find cleanest and noisiest trends
cleanest_idx = df_segments["std"].idxmin()
noisiest_idx = df_segments["std"].idxmax()

print(f"Cleanest trend: Segment {cleanest_idx} (std={df_segments.loc[cleanest_idx, 'std']:.4f})")
print(f"Noisiest trend: Segment {noisiest_idx} (std={df_segments.loc[noisiest_idx, 'std']:.4f})")

Cleanest trend: Segment 6 (std=0.1433)
Noisiest trend: Segment 19 (std=3.9417)

In [11]:

Copied!





# Visualize cleanest vs noisiest
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(14, 4))

for ax, idx, title in [(axes[0], cleanest_idx, "Cleanest Trend"), 
                        (axes[1], noisiest_idx, "Noisiest Trend")]:
    s = seg.segments[idx]
    x_seg = seg.x[s.start:s.stop]
    y_seg = seg.y[s.start:s.stop]
    ax.plot(x_seg, y_seg)
    ax.set_title(f"{title} (std={s.std:.4f})")
    ax.set_xlabel("Index")
    ax.set_ylabel("Price")

plt.tight_layout()
plt.show()
# Visualize cleanest vs noisiest
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(14, 4))

for ax, idx, title in [(axes[0], cleanest_idx, "Cleanest Trend"), 
                        (axes[1], noisiest_idx, "Noisiest Trend")]:
    s = seg.segments[idx]
    x_seg = seg.x[s.start:s.stop]
    y_seg = seg.y[s.start:s.stop]
    ax.plot(x_seg, y_seg)
    ax.set_title(f"{title} (std={s.std:.4f})")
    ax.set_xlabel("Index")
    ax.set_ylabel("Price")

plt.tight_layout()
plt.show()

No description has been provided for this image

Next Steps¶

03_visualization.py - All plotting methods in detail
04_configuration.py - Tune parameters to get better segments