Setup¶
In [1]:
Copied!
import yfinance as yf
from trend_classifier import Segmenter
# Download data
df = yf.download("AAPL", start="2018-09-15", end="2022-09-05", interval="1d", progress=False)
# Create and calculate segments
seg = Segmenter(df=df, column="Close", n=20)
seg.calculate_segments()
print(f"Analyzing {len(seg.segments)} segments")
import yfinance as yf
from trend_classifier import Segmenter
# Download data
df = yf.download("AAPL", start="2018-09-15", end="2022-09-05", interval="1d", progress=False)
# Create and calculate segments
seg = Segmenter(df=df, column="Close", n=20)
seg.calculate_segments()
print(f"Analyzing {len(seg.segments)} segments")
Analyzing 49 segments
Segment Attributes¶
Each Segment object contains rich information about the detected trend:
| Attribute | Description |
|---|---|
start |
Start index in the time series |
stop |
End index in the time series |
slope |
Overall slope of the linear trend |
offset |
Y-intercept of the linear trend |
std |
Standard deviation after detrending (volatility) |
span |
Normalized range of values |
slopes |
List of slopes from individual windows |
offsets |
List of offsets from individual windows |
slopes_std |
Std of slopes (trend consistency) |
offsets_std |
Std of offsets |
reason_for_new_segment |
Why this segment ended |
In [2]:
Copied!
# Examine a single segment in detail
segment = seg.segments[2]
print("=== Segment Details ===")
print(f"Range: index {segment.start} to {segment.stop} ({segment.stop - segment.start} points)")
print(f"Slope: {segment.slope:.4f}")
print(f"Offset: {segment.offset:.2f}")
print(f"Volatility (std): {segment.std:.4f}")
print(f"Span: {segment.span:.1f}")
print(f"Trend consistency (slopes_std): {segment.slopes_std:.4f}")
print(f"Reason segment ended: '{segment.reason_for_new_segment}'")
# Examine a single segment in detail
segment = seg.segments[2]
print("=== Segment Details ===")
print(f"Range: index {segment.start} to {segment.stop} ({segment.stop - segment.start} points)")
print(f"Slope: {segment.slope:.4f}")
print(f"Offset: {segment.offset:.2f}")
print(f"Volatility (std): {segment.std:.4f}")
print(f"Span: {segment.span:.1f}")
print(f"Trend consistency (slopes_std): {segment.slopes_std:.4f}")
print(f"Reason segment ended: '{segment.reason_for_new_segment}'")
=== Segment Details === Range: index 76 to 81 (5 points) Slope: 0.1050 Offset: 27.73 Volatility (std): 0.4467 Span: 30.9 Trend consistency (slopes_std): 0.0000 Reason segment ended: 'slope'
In [3]:
Copied!
# The full representation
print("\nFull repr:")
print(repr(segment))
# The full representation
print("\nFull repr:")
print(repr(segment))
Full repr: Segment(start=76, stop=81, slope=0.10499692644392077, offset=27.7340259370347, std=0.44673911715410836, span=30.945143102918966, reason_for_new_segment=slope, slopes=[0.2653123439702775], offsets=[14.540656963147605], slopes_std=0.0, offsets_std=0.0)
Export to DataFrame¶
The to_dataframe() method converts all segments to a pandas DataFrame for easy analysis.
In [4]:
Copied!
# Convert segments to DataFrame
df_segments = seg.segments.to_dataframe()
df_segments
# Convert segments to DataFrame
df_segments = seg.segments.to_dataframe()
df_segments
Out[4]:
| start | stop | slope | offset | slopes_std | offsets_std | std | span | reason_for_new_segment | slopes | offsets | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 9 | 0.197502 | 51.373652 | 0.000000 | 0.000000 | 0.313283 | 17.926021 | slope | [-0.1002435239634121] | [54.312332704013464] |
| 1 | 10 | 75 | -0.315274 | 58.936524 | 0.128520 | 4.743325 | 1.432513 | 152.753008 | slope | [-0.15026299899682063, -0.15786464375660753, -... | [55.39250845944972, 55.46701279546978, 62.3780... |
| 2 | 76 | 81 | 0.104997 | 27.734026 | 0.000000 | 0.000000 | 0.446739 | 30.945143 | slope | [0.2653123439702775] | [14.540656963147605] |
| 3 | 82 | 153 | 0.180014 | 21.923825 | 0.065423 | 7.017021 | 0.784706 | 94.854749 | slope | [0.28343645827214536, 0.1573499894679941, 0.05... | [12.548412297184312, 24.649738174094253, 35.11... |
| 4 | 154 | 171 | -0.418104 | 114.731870 | 0.000000 | 0.000000 | 0.958458 | 83.338636 | slope | [0.20273512359848547] | [8.268741292164817] |
| 5 | 172 | 177 | -0.252725 | 86.487991 | 0.000000 | 0.000000 | 0.220665 | 16.343903 | offset | [0.3256940827333848] | [-14.319836067257278] |
| 6 | 178 | 183 | 0.759340 | -92.129464 | 0.000000 | 0.000000 | 0.143346 | 9.394813 | offset | [0.16418316489771786] | [16.008887923391246] |
| 7 | 184 | 243 | 0.052121 | 37.712443 | 0.069056 | 14.926482 | 0.921518 | 99.864373 | offset | [0.10541228674408207, 0.09235581132702286, 0.0... | [27.264427543582734, 29.83007757717522, 29.861... |
| 8 | 244 | 255 | 0.094036 | 29.266863 | 0.000000 | 0.000000 | 0.696379 | 41.776869 | slope | [0.20368984767368656] | [0.727319608416445] |
| 9 | 256 | 261 | 0.290731 | -21.991700 | 0.000000 | 0.000000 | 0.356201 | 20.937572 | offset | [0.29768106274138795] | [-24.30454409319677] |
| 10 | 262 | 291 | 0.349733 | -38.473924 | 0.032526 | 8.894525 | 0.598339 | 48.956367 | offset | [0.2893737879014559, 0.3544255823121014] | [-22.044845073384636, -39.83389430225283] |
| 11 | 292 | 309 | 0.022144 | 57.252010 | 0.000000 | 0.000000 | 0.635685 | 41.472750 | offset | [0.4148262081289649] | [-63.515122125381765] |
| 12 | 310 | 333 | 0.470871 | -81.341134 | 0.000000 | 0.000000 | 0.609998 | 37.077467 | offset | [0.45389397413210525] | [-75.97579879187023] |
| 13 | 334 | 345 | 0.063475 | 54.885188 | 0.000000 | 0.000000 | 1.117712 | 48.474198 | slope and offset | [-0.13614144002584588] | [124.56897648402624] |
| 14 | 346 | 351 | 0.536635 | -109.917587 | 0.000000 | 0.000000 | 0.866207 | 30.911479 | slope | [-0.5661394219649489] | [277.07836881938823] |
| 15 | 352 | 381 | -0.789260 | 357.880087 | 0.001293 | 0.897931 | 2.795619 | 149.001721 | slope and offset | [-0.7307296121927065, -0.7281436633346682] | [336.9613589752861, 335.16549767228923] |
| 16 | 382 | 423 | 0.453120 | -113.747268 | 0.089059 | 35.879060 | 1.711334 | 121.214741 | offset | [0.5063411569236804, 0.30660507947878624, 0.46... | [-134.74731052656787, -55.07139889136243, -117... |
| 17 | 424 | 459 | 0.508050 | -139.004988 | 0.077954 | 34.173083 | 1.156028 | 69.226962 | slope and offset | [0.5896744003869475, 0.40295758928571496, 0.46... | [-174.36450660533194, -92.50303976876422, -118... |
| 18 | 460 | 489 | 1.137028 | -434.631337 | 0.011891 | 5.977649 | 3.592332 | 116.964022 | slope and offset | [1.2540462637306398, 1.2778273331491528] | [-490.22680138610605, -502.18209846897895] |
| 19 | 490 | 507 | -1.244808 | 735.206600 | 0.000000 | 0.000000 | 3.941715 | 137.593727 | slope and offset | [0.650410002514835] | [-222.97327127671952] |
| 20 | 508 | 519 | 0.542699 | -168.379520 | 0.000000 | 0.000000 | 2.138564 | 55.675979 | offset | [-0.23563809502393815] | [237.10188738600723] |
| 21 | 520 | 531 | -0.601584 | 431.092696 | 0.000000 | 0.000000 | 1.827518 | 68.039884 | slope | [0.3130644948858911] | [-55.63623460468208] |
| 22 | 532 | 549 | 0.500673 | -157.644286 | 0.000000 | 0.000000 | 2.620798 | 85.081969 | offset | [0.3456263205162643] | [-75.0487547107253] |
| 23 | 550 | 555 | 0.490341 | -157.663800 | 0.000000 | 0.000000 | 1.355362 | 34.328355 | offset | [0.6507702361372153] | [-245.364759166974] |
| 24 | 556 | 573 | 0.559880 | -193.699279 | 0.000000 | 0.000000 | 1.760088 | 52.900783 | offset | [-0.18182311093896558] | [232.67361025128878] |
| 25 | 574 | 579 | -1.274112 | 864.058092 | 0.000000 | 0.000000 | 0.849650 | 21.860277 | slope | [0.55792021213619] | [-197.0884808762715] |
| 26 | 580 | 591 | 0.605414 | -227.102345 | 0.000000 | 0.000000 | 2.862259 | 72.166416 | offset | [-0.04245336432205603] | [157.9319744310857] |
| 27 | 592 | 597 | -2.293729 | 1498.498366 | 0.000000 | 0.000000 | 1.800028 | 38.883082 | slope | [-0.5593556396943261] | [467.5525597536457] |
| 28 | 598 | 627 | -0.626174 | 508.022457 | 0.108374 | 66.258657 | 2.916785 | 102.304223 | slope | [-0.9246914282777318, -0.7079439263594823] | [689.8949669171135, 557.3776538648133] |
| 29 | 628 | 633 | -0.387734 | 363.291128 | 0.000000 | 0.000000 | 1.633302 | 32.609588 | slope and offset | [0.8268386267181737] | [-405.7310210550709] |
| 30 | 634 | 651 | 0.968227 | -497.193711 | 0.000000 | 0.000000 | 1.638103 | 53.408664 | slope and offset | [-0.4749670616666152] | [440.4340315223619] |
| 31 | 652 | 675 | -0.424315 | 407.539967 | 0.000000 | 0.000000 | 1.818585 | 60.497165 | slope | [-0.5325873066608078] | [479.31027545784895] |
| 32 | 676 | 681 | -0.490511 | 455.367242 | 0.000000 | 0.000000 | 0.548065 | 11.590416 | slope and offset | [0.5384893833246406] | [-244.8305416881562] |
| 33 | 682 | 711 | 0.810743 | -432.856493 | 0.114922 | 79.861526 | 1.462665 | 45.022395 | offset | [0.6493853920384437, 0.8792302124482411] | [-321.56209564208825, -481.2851471176767] |
| 34 | 712 | 717 | 0.645929 | -318.979067 | 0.000000 | 0.000000 | 1.418780 | 31.525749 | slope | [0.11064521961642272] | [63.52184388583852] |
| 35 | 718 | 741 | 0.130685 | 48.786457 | 0.000000 | 0.000000 | 1.415251 | 34.115561 | offset | [0.21880750154193834] | [-15.585733995937698] |
| 36 | 742 | 747 | 0.859827 | -491.153349 | 0.000000 | 0.000000 | 1.050089 | 23.900059 | slope | [-0.6074867994265448] | [603.561564750958] |
| 37 | 748 | 765 | -0.714504 | 684.725351 | 0.000000 | 0.000000 | 1.985395 | 48.662846 | slope | [0.3562589487635109] | [-134.05372709116585] |
| 38 | 766 | 789 | 0.495140 | -242.044947 | 0.000000 | 0.000000 | 1.536369 | 41.680240 | slope and offset | [0.5387300878539012] | [-275.87474810377125] |
| 39 | 790 | 825 | 0.909180 | -573.847995 | 0.151179 | 121.756423 | 3.530730 | 93.289918 | slope and offset | [0.9441855064908137, 1.1994848150955963, 0.839... | [-602.8754119069766, -807.1867172241173, -516.... |
| 40 | 826 | 843 | -0.881215 | 905.486190 | 0.000000 | 0.000000 | 2.437321 | 45.508232 | slope and offset | [0.562445343705938] | [-312.3649464313193] |
| 41 | 844 | 855 | 1.478926 | -1090.794540 | 0.000000 | 0.000000 | 3.818125 | 74.190487 | slope | [-0.5809495968926105] | [665.8387338509134] |
| 42 | 856 | 873 | -0.664082 | 737.712966 | 0.000000 | 0.000000 | 2.681230 | 61.276158 | slope and offset | [0.7979965668871951] | [-540.6812979906184] |
| 43 | 874 | 891 | 1.455711 | -1122.103696 | 0.000000 | 0.000000 | 3.774928 | 101.249277 | slope and offset | [-0.8033062698249669] | [888.1132038890993] |
| 44 | 892 | 927 | -0.978035 | 1045.696285 | 0.153028 | 139.323646 | 3.499759 | 109.880280 | slope and offset | [-0.8096245328286469, -0.8107930348331622, -1.... | [893.3659070122446, 894.947132185155, 1189.703... |
| 45 | 928 | 933 | 2.036399 | -1752.156001 | 0.000000 | 0.000000 | 1.668818 | 34.938471 | slope and offset | [-0.8605478215038463] | [947.1130684243236] |
| 46 | 934 | 945 | -1.931660 | 1952.364934 | 0.000000 | 0.000000 | 2.874555 | 72.674823 | slope and offset | [0.754942453714241] | [-581.3665397988219] |
| 47 | 946 | 981 | 0.923006 | -741.493741 | 0.124756 | 120.417111 | 2.238339 | 65.082380 | offset | [0.7999810469777827, 0.9226313884993126, 1.103... | [-623.910067508082, -741.9576232451127, -917.0... |
| 48 | 982 | 999 | -1.094704 | 1247.730395 | 0.000000 | 0.000000 | 2.877927 | 75.998117 | [] | [] |
In [5]:
Copied!
# Select most useful columns
df_summary = df_segments[["start", "stop", "slope", "std", "span", "reason_for_new_segment"]].copy()
df_summary["length"] = df_summary["stop"] - df_summary["start"]
df_summary
# Select most useful columns
df_summary = df_segments[["start", "stop", "slope", "std", "span", "reason_for_new_segment"]].copy()
df_summary["length"] = df_summary["stop"] - df_summary["start"]
df_summary
Out[5]:
| start | stop | slope | std | span | reason_for_new_segment | length | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | 9 | 0.197502 | 0.313283 | 17.926021 | slope | 9 |
| 1 | 10 | 75 | -0.315274 | 1.432513 | 152.753008 | slope | 65 |
| 2 | 76 | 81 | 0.104997 | 0.446739 | 30.945143 | slope | 5 |
| 3 | 82 | 153 | 0.180014 | 0.784706 | 94.854749 | slope | 71 |
| 4 | 154 | 171 | -0.418104 | 0.958458 | 83.338636 | slope | 17 |
| 5 | 172 | 177 | -0.252725 | 0.220665 | 16.343903 | offset | 5 |
| 6 | 178 | 183 | 0.759340 | 0.143346 | 9.394813 | offset | 5 |
| 7 | 184 | 243 | 0.052121 | 0.921518 | 99.864373 | offset | 59 |
| 8 | 244 | 255 | 0.094036 | 0.696379 | 41.776869 | slope | 11 |
| 9 | 256 | 261 | 0.290731 | 0.356201 | 20.937572 | offset | 5 |
| 10 | 262 | 291 | 0.349733 | 0.598339 | 48.956367 | offset | 29 |
| 11 | 292 | 309 | 0.022144 | 0.635685 | 41.472750 | offset | 17 |
| 12 | 310 | 333 | 0.470871 | 0.609998 | 37.077467 | offset | 23 |
| 13 | 334 | 345 | 0.063475 | 1.117712 | 48.474198 | slope and offset | 11 |
| 14 | 346 | 351 | 0.536635 | 0.866207 | 30.911479 | slope | 5 |
| 15 | 352 | 381 | -0.789260 | 2.795619 | 149.001721 | slope and offset | 29 |
| 16 | 382 | 423 | 0.453120 | 1.711334 | 121.214741 | offset | 41 |
| 17 | 424 | 459 | 0.508050 | 1.156028 | 69.226962 | slope and offset | 35 |
| 18 | 460 | 489 | 1.137028 | 3.592332 | 116.964022 | slope and offset | 29 |
| 19 | 490 | 507 | -1.244808 | 3.941715 | 137.593727 | slope and offset | 17 |
| 20 | 508 | 519 | 0.542699 | 2.138564 | 55.675979 | offset | 11 |
| 21 | 520 | 531 | -0.601584 | 1.827518 | 68.039884 | slope | 11 |
| 22 | 532 | 549 | 0.500673 | 2.620798 | 85.081969 | offset | 17 |
| 23 | 550 | 555 | 0.490341 | 1.355362 | 34.328355 | offset | 5 |
| 24 | 556 | 573 | 0.559880 | 1.760088 | 52.900783 | offset | 17 |
| 25 | 574 | 579 | -1.274112 | 0.849650 | 21.860277 | slope | 5 |
| 26 | 580 | 591 | 0.605414 | 2.862259 | 72.166416 | offset | 11 |
| 27 | 592 | 597 | -2.293729 | 1.800028 | 38.883082 | slope | 5 |
| 28 | 598 | 627 | -0.626174 | 2.916785 | 102.304223 | slope | 29 |
| 29 | 628 | 633 | -0.387734 | 1.633302 | 32.609588 | slope and offset | 5 |
| 30 | 634 | 651 | 0.968227 | 1.638103 | 53.408664 | slope and offset | 17 |
| 31 | 652 | 675 | -0.424315 | 1.818585 | 60.497165 | slope | 23 |
| 32 | 676 | 681 | -0.490511 | 0.548065 | 11.590416 | slope and offset | 5 |
| 33 | 682 | 711 | 0.810743 | 1.462665 | 45.022395 | offset | 29 |
| 34 | 712 | 717 | 0.645929 | 1.418780 | 31.525749 | slope | 5 |
| 35 | 718 | 741 | 0.130685 | 1.415251 | 34.115561 | offset | 23 |
| 36 | 742 | 747 | 0.859827 | 1.050089 | 23.900059 | slope | 5 |
| 37 | 748 | 765 | -0.714504 | 1.985395 | 48.662846 | slope | 17 |
| 38 | 766 | 789 | 0.495140 | 1.536369 | 41.680240 | slope and offset | 23 |
| 39 | 790 | 825 | 0.909180 | 3.530730 | 93.289918 | slope and offset | 35 |
| 40 | 826 | 843 | -0.881215 | 2.437321 | 45.508232 | slope and offset | 17 |
| 41 | 844 | 855 | 1.478926 | 3.818125 | 74.190487 | slope | 11 |
| 42 | 856 | 873 | -0.664082 | 2.681230 | 61.276158 | slope and offset | 17 |
| 43 | 874 | 891 | 1.455711 | 3.774928 | 101.249277 | slope and offset | 17 |
| 44 | 892 | 927 | -0.978035 | 3.499759 | 109.880280 | slope and offset | 35 |
| 45 | 928 | 933 | 2.036399 | 1.668818 | 34.938471 | slope and offset | 5 |
| 46 | 934 | 945 | -1.931660 | 2.874555 | 72.674823 | slope and offset | 11 |
| 47 | 946 | 981 | 0.923006 | 2.238339 | 65.082380 | offset | 35 |
| 48 | 982 | 999 | -1.094704 | 2.877927 | 75.998117 | 17 |
Statistical Analysis¶
In [6]:
Copied!
# Descriptive statistics
print("=== Segment Statistics ===")
print(f"Number of segments: {len(df_segments)}")
print(f"Average segment length: {df_summary['length'].mean():.1f} points")
print(f"Shortest segment: {df_summary['length'].min()} points")
print(f"Longest segment: {df_summary['length'].max()} points")
# Descriptive statistics
print("=== Segment Statistics ===")
print(f"Number of segments: {len(df_segments)}")
print(f"Average segment length: {df_summary['length'].mean():.1f} points")
print(f"Shortest segment: {df_summary['length'].min()} points")
print(f"Longest segment: {df_summary['length'].max()} points")
=== Segment Statistics === Number of segments: 49 Average segment length: 19.4 points Shortest segment: 5 points Longest segment: 71 points
In [7]:
Copied!
# Slope distribution
print("\n=== Slope Distribution ===")
print(df_segments["slope"].describe())
# Slope distribution
print("\n=== Slope Distribution ===")
print(df_segments["slope"].describe())
=== Slope Distribution === count 49.000000 mean 0.066328 std 0.864553 min -2.293729 25% -0.490511 50% 0.180014 75% 0.559880 max 2.036399 Name: slope, dtype: float64
In [8]:
Copied!
# Count trends by direction
uptrends = (df_segments["slope"] > 0).sum()
downtrends = (df_segments["slope"] < 0).sum()
print(f"\nUptrends: {uptrends} ({100*uptrends/len(df_segments):.1f}%)")
print(f"Downtrends: {downtrends} ({100*downtrends/len(df_segments):.1f}%)")
# Count trends by direction
uptrends = (df_segments["slope"] > 0).sum()
downtrends = (df_segments["slope"] < 0).sum()
print(f"\nUptrends: {uptrends} ({100*uptrends/len(df_segments):.1f}%)")
print(f"Downtrends: {downtrends} ({100*downtrends/len(df_segments):.1f}%)")
Uptrends: 31 (63.3%) Downtrends: 18 (36.7%)
Understanding reason_for_new_segment¶
This attribute explains why the algorithm decided to end a segment:
- "slope": The slope changed significantly
- "offset": The offset changed significantly
- "slope and offset": Both changed
In [9]:
Copied!
# Count reasons
reason_counts = df_segments["reason_for_new_segment"].value_counts()
print("Why segments ended:")
print(reason_counts)
# Count reasons
reason_counts = df_segments["reason_for_new_segment"].value_counts()
print("Why segments ended:")
print(reason_counts)
Why segments ended:
reason_for_new_segment
slope 16
offset 16
slope and offset 16
1
Name: count, dtype: int64
Volatility Analysis¶
The std attribute measures volatility after removing the linear trend.
Low std = clean trend, High std = noisy/choppy trend.
In [10]:
Copied!
# Find cleanest and noisiest trends
cleanest_idx = df_segments["std"].idxmin()
noisiest_idx = df_segments["std"].idxmax()
print(f"Cleanest trend: Segment {cleanest_idx} (std={df_segments.loc[cleanest_idx, 'std']:.4f})")
print(f"Noisiest trend: Segment {noisiest_idx} (std={df_segments.loc[noisiest_idx, 'std']:.4f})")
# Find cleanest and noisiest trends
cleanest_idx = df_segments["std"].idxmin()
noisiest_idx = df_segments["std"].idxmax()
print(f"Cleanest trend: Segment {cleanest_idx} (std={df_segments.loc[cleanest_idx, 'std']:.4f})")
print(f"Noisiest trend: Segment {noisiest_idx} (std={df_segments.loc[noisiest_idx, 'std']:.4f})")
Cleanest trend: Segment 6 (std=0.1433) Noisiest trend: Segment 19 (std=3.9417)
In [11]:
Copied!
# Visualize cleanest vs noisiest
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 2, figsize=(14, 4))
for ax, idx, title in [(axes[0], cleanest_idx, "Cleanest Trend"),
(axes[1], noisiest_idx, "Noisiest Trend")]:
s = seg.segments[idx]
x_seg = seg.x[s.start:s.stop]
y_seg = seg.y[s.start:s.stop]
ax.plot(x_seg, y_seg)
ax.set_title(f"{title} (std={s.std:.4f})")
ax.set_xlabel("Index")
ax.set_ylabel("Price")
plt.tight_layout()
plt.show()
# Visualize cleanest vs noisiest
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 2, figsize=(14, 4))
for ax, idx, title in [(axes[0], cleanest_idx, "Cleanest Trend"),
(axes[1], noisiest_idx, "Noisiest Trend")]:
s = seg.segments[idx]
x_seg = seg.x[s.start:s.stop]
y_seg = seg.y[s.start:s.stop]
ax.plot(x_seg, y_seg)
ax.set_title(f"{title} (std={s.std:.4f})")
ax.set_xlabel("Index")
ax.set_ylabel("Price")
plt.tight_layout()
plt.show()
Next Steps¶
- 03_visualization.py - All plotting methods in detail
- 04_configuration.py - Tune parameters to get better segments