# Dati in tempo reale e storici assieme

In [1]:
import pandas as pd
import numpy as np
import time
from datetime import datetime, timedelta, timezone
import tpqoa

In [2]:
now = datetime.now(timezone.utc)
yesterday = now - timedelta(days=1)

In [3]:
now, yesterday

(datetime.datetime(2024, 11, 15, 10, 2, 26, 361607, tzinfo=datetime.timezone.utc),
 datetime.datetime(2024, 11, 14, 10, 2, 26, 361607, tzinfo=datetime.timezone.utc))

OANDA purtroppo non riesce a gestire le date con i microsecondi

In [4]:
now = datetime.now(timezone.utc)
now = now - timedelta(microseconds = now.microsecond) # pay attention, microseconds and then microsecond (singular!!!)
yesterday = now - timedelta(days=1)

In [5]:
now, yesterday

(datetime.datetime(2024, 11, 15, 10, 2, 26, tzinfo=datetime.timezone.utc),
 datetime.datetime(2024, 11, 14, 10, 2, 26, tzinfo=datetime.timezone.utc))

In [6]:
api = tpqoa.tpqoa("oandaMY.cfg")

In [7]:
df = api.get_history(instrument = "EUR_USD", start = str(yesterday)[:-6], end = str(now)[:-6],
                    granularity= "S5", price="M", localize=False)["c"].to_frame()

  dr = pd.date_range(start, end, freq=freq)


In [8]:
df

Unnamed: 0_level_0,c
time,Unnamed: 1_level_1
2024-11-14 10:02:25+00:00,1.05199
2024-11-14 10:02:30+00:00,1.05198
2024-11-14 10:02:35+00:00,1.05197
2024-11-14 10:02:40+00:00,1.05197
2024-11-14 10:02:45+00:00,1.05192
...,...
2024-11-15 10:02:10+00:00,1.05616
2024-11-15 10:02:15+00:00,1.05616
2024-11-15 10:02:20+00:00,1.05623
2024-11-15 10:02:25+00:00,1.05624


In [9]:
df.rename(columns={"c":"EUR_USD"},inplace=True)

In [10]:
df

Unnamed: 0_level_0,EUR_USD
time,Unnamed: 1_level_1
2024-11-14 10:02:25+00:00,1.05199
2024-11-14 10:02:30+00:00,1.05198
2024-11-14 10:02:35+00:00,1.05197
2024-11-14 10:02:40+00:00,1.05197
2024-11-14 10:02:45+00:00,1.05192
...,...
2024-11-15 10:02:10+00:00,1.05616
2024-11-15 10:02:15+00:00,1.05616
2024-11-15 10:02:20+00:00,1.05623
2024-11-15 10:02:25+00:00,1.05624


In [11]:
df = df.resample("1min", label="right").last().dropna().iloc[:-1]
# notare che invece di ffill qui faccio dropna perché:
# 1. sono sicuro che con 5 secondo su un minuto trovo sempre almeno un tick per periodo
# 2. rischio di avere in mezzo dei weekend e NON VOGLIO portarmi i prezzi del venerdì sera fino al lunedì mattina

In [12]:
df

Unnamed: 0_level_0,EUR_USD
time,Unnamed: 1_level_1
2024-11-14 10:03:00+00:00,1.05188
2024-11-14 10:04:00+00:00,1.05172
2024-11-14 10:05:00+00:00,1.05174
2024-11-14 10:06:00+00:00,1.05172
2024-11-14 10:07:00+00:00,1.05158
...,...
2024-11-15 09:58:00+00:00,1.05614
2024-11-15 09:59:00+00:00,1.05617
2024-11-15 10:00:00+00:00,1.05622
2024-11-15 10:01:00+00:00,1.05622


# Trader Class

In [13]:
class Trader(tpqoa.tpqoa):
    
    def __init__(self,config_file, instrument, period):
        super().__init__(config_file)
        self.instrument = instrument
        self.period=period
        self.tickData = pd.DataFrame()
        # NEW
        self.rawData = None # lo assegnamo in getMostRecent
        self.ultimoPeriodo = None # lo assegnamo in getMostRecent
        
    def getMostRecent(self, days=10): # questo metodo scarica i dati FINO ad adesso
        now = datetime.now(timezone.utc)
        now = now - timedelta(microseconds = now.microsecond) # pay attention, microseconds and then microsecond (singular!!!)
        yesterday = now - timedelta(days= days) # lo chiamo YESTERDAY, ma è l'inizio del periodo da scaricare
        df = self.get_history(instrument = self.instrument, start = str(yesterday)[:-6], end = str(now)[:-6],
                    granularity= "S5", price="M", localize=False)["c"].to_frame() 
                    # scarico a 5secondi, il che vuol dire che period non può essere più corto!
        # NOTARE che cosa triste che sono costretto a fare perché OANDA non prende le date nel nuovo formato dal 11/2024
        df.rename(columns={"c":self.instrument},inplace=True)
        self.rawData = df.resample(self.period, label="right").last().dropna().iloc[:-1]
        self.ultimoPeriodo = self.rawData.index[-1]
    
    def on_success(self, time, bid, ask):
#        print(time, bid, ask)
        print(self.ticks, end=" ")
        tickCorrente = pd.to_datetime(time)
        # NEW
        df = pd.DataFrame({self.instrument:(ask+bid)/2}, index=[tickCorrente])
        self.tickData=pd.concat((self.tickData,df),axis=0)
        if tickCorrente - self.ultimoPeriodo > pd.to_timedelta(self.period): # NB: devo LOCALIZZARE ultimoPeriodo
            self.resampleJoin()
    
    def resampleJoin(self):
        self.rawData=pd.concat((self.rawData,self.tickData.resample(self.period,label="right").last().ffill().iloc[:-1] ),axis=0)
        self.tickData = self.tickData.iloc[-1:] 
        self.ultimoPeriodo = self.rawData.index[-1]

In [14]:
t = Trader("oandaMY.cfg","EUR_USD","1min")

In [15]:
print(datetime.now(timezone.utc))
t.getMostRecent()

2024-11-15 10:02:27.913887+00:00


  dr = pd.date_range(start, end, freq=freq)


In [16]:
t.rawData

Unnamed: 0_level_0,EUR_USD
time,Unnamed: 1_level_1
2024-11-05 10:03:00+00:00,1.08928
2024-11-05 10:04:00+00:00,1.08917
2024-11-05 10:05:00+00:00,1.08918
2024-11-05 10:06:00+00:00,1.08926
2024-11-05 10:07:00+00:00,1.08922
...,...
2024-11-15 09:58:00+00:00,1.05614
2024-11-15 09:59:00+00:00,1.05617
2024-11-15 10:00:00+00:00,1.05622
2024-11-15 10:01:00+00:00,1.05622


In [17]:
print(datetime.now(timezone.utc))
t.getMostRecent()
print(datetime.now(timezone.utc))
t.stream_data(t.instrument,stop=50)

2024-11-15 10:02:39.394650+00:00


  dr = pd.date_range(start, end, freq=freq)


2024-11-15 10:02:50.470521+00:00
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 

In [18]:
t.rawData

Unnamed: 0,EUR_USD
2024-11-05 10:03:00+00:00,1.08928
2024-11-05 10:04:00+00:00,1.08917
2024-11-05 10:05:00+00:00,1.08918
2024-11-05 10:06:00+00:00,1.08926
2024-11-05 10:07:00+00:00,1.08922
...,...
2024-11-15 09:59:00+00:00,1.05617
2024-11-15 10:00:00+00:00,1.05622
2024-11-15 10:01:00+00:00,1.05622
2024-11-15 10:02:00+00:00,1.05622


In [19]:
t.tickData

Unnamed: 0,EUR_USD
2024-11-15 10:03:00.049983561+00:00,1.05649
2024-11-15 10:03:00.141970656+00:00,1.056495
2024-11-15 10:03:00.424449445+00:00,1.05649
2024-11-15 10:03:01.582103948+00:00,1.05649
2024-11-15 10:03:02.097482218+00:00,1.056495
2024-11-15 10:03:05.355296003+00:00,1.05651
2024-11-15 10:03:05.559650226+00:00,1.05653
2024-11-15 10:03:05.870179965+00:00,1.056525
2024-11-15 10:03:06.165972323+00:00,1.056525
2024-11-15 10:03:06.271267980+00:00,1.05653


**Problema**: potrebbe succedere che scarichiamo i dati a 10:27:55 e quindi buttiamo via tutti i dati da 10:27:01 in poi. I dati real-time partiranno un pochino dopo, ad esempio 10:28:13 e quindi non avrò nessun dato per il minuto 10:27. 

**Soluzione**: ripeto lo scaricaggio dei dati finché non mi capita una situazione fortunata in cui non perdo nulla, ad esempio scarichiamo i dati a 10:27:10 e buttiamo via tutti i dati da 10:27:01 in poi e i dati real-time partono da 10:27:45 e quindi ho il dato per il minuto 10:27. 

In [20]:
class Trader(tpqoa.tpqoa):
    
    def __init__(self,config_file, instrument, period):
        super().__init__(config_file)
        self.instrument = instrument
        self.period=period
        self.tickData = pd.DataFrame()
        self.rawData = None
        self.ultimoPeriodo = None 
        
    def getMostRecent(self, days=10): # questo metodo scarica i dati FINO ad adesso
        while True:
            now = datetime.now(timezone.utc)
            now = now - timedelta(microseconds = now.microsecond) # pay attention, microseconds and then microsecond (singular!!!)
            yesterday = now - timedelta(days= days) # lo chiamo YESTERDAY, ma è l'inizio del periodo da scaricare
            df = self.get_history(instrument = self.instrument, start = str(yesterday)[:-6], end = str(now)[:-6],
                        granularity= "S5", price="M", localize=False)["c"].to_frame() 
                        # scarico a 5secondi, il che vuol dire che period non può essere più corto!
            df.rename(columns={"c":self.instrument},inplace=True)
            self.rawData = df.resample(self.period, label="right").last().dropna().iloc[:-1]
            self.ultimoPeriodo = self.rawData.index[-1]
            if pd.to_datetime(datetime.now(timezone.utc)) - self.ultimoPeriodo < pd.to_timedelta(self.period):
                break
                # interrompo i tentativi quando adesso - fine ultimo periodo valido è inferiore al periodo, 
                # vuol dire che ho ancora dei secondi a disposizione per scaricare l'ultimo prezzo valido del periodo
            else:
                print("Spiacente ma devo riscaricare i dati, sigh")
                time.sleep(2)
    
    def on_success(self, time, bid, ask):
#        print(time, bid, ask)
        print(self.ticks, end=" ")
        tickCorrente = pd.to_datetime(time)
        df = pd.DataFrame({self.instrument:(ask+bid)/2}, index=[tickCorrente])
        self.tickData=pd.concat((self.tickData,df),axis=0)
        if tickCorrente - self.ultimoPeriodo > pd.to_timedelta(self.period): # converto il 5s in un intervallo temporale
            self.resampleJoin()
    
    def resampleJoin(self):
        self.rawData=pd.concat((self.rawData,self.tickData.resample(self.period,label="right").last().ffill().iloc[:-1] ),axis=0)
        self.tickData = self.tickData.iloc[-1:] 
        self.ultimoPeriodo = self.rawData.index[-1]

In [32]:
t = Trader("oandaMY.cfg","EUR_USD","1min")
print(datetime.now(timezone.utc))
t.getMostRecent()
print(datetime.now(timezone.utc))
t.stream_data(t.instrument,stop=50)

2024-11-15 10:05:55.211175+00:00


  dr = pd.date_range(start, end, freq=freq)


Spiacente ma devo riscaricare i dati, sigh


  dr = pd.date_range(start, end, freq=freq)


2024-11-15 10:06:19.596098+00:00
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 

In [22]:
t.rawData.tail(20)

Unnamed: 0,EUR_USD
2024-11-15 09:45:00+00:00,1.05698
2024-11-15 09:46:00+00:00,1.05668
2024-11-15 09:47:00+00:00,1.05684
2024-11-15 09:48:00+00:00,1.05678
2024-11-15 09:49:00+00:00,1.05668
2024-11-15 09:50:00+00:00,1.05678
2024-11-15 09:51:00+00:00,1.05664
2024-11-15 09:52:00+00:00,1.05652
2024-11-15 09:53:00+00:00,1.05647
2024-11-15 09:54:00+00:00,1.05642


In [31]:
print(datetime.now(timezone.utc))

2024-11-15 10:05:51.821801+00:00
