[ntk/apt.git] / methods / http.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: http.cc,v 1.59 2004/05/08 19:42:35 mdz Exp $
4 /* ######################################################################
6 HTTP Acquire Method - This is the HTTP acquire method for APT.
8 It uses HTTP/1.1 and many of the fancy options there-in, such as
9 pipelining, range, if-range and so on.
11 It is based on a doubly buffered select loop. A groupe of requests are
12 fed into a single output buffer that is constantly fed out the
13 socket. This provides ideal pipelining as in many cases all of the
14 requests will fit into a single packet. The input socket is buffered
15 the same way and fed into the fd for the file (may be a pipe in future).
17 This double buffering provides fairly substantial transfer rates,
18 compared to wget the http method is about 4% faster. Most importantly,
19 when HTTP is compared with FTP as a protocol the speed difference is
20 huge. In tests over the internet from two sites to llug (via ATM) this
21 program got 230k/s sustained http transfer rates. FTP on the other
22 hand topped out at 170k/s. That combined with the time to setup the
23 FTP connection makes HTTP a vastly superior protocol.
25 ##################################################################### */
26 /*}}}*/
27 // Include Files /*{{{*/
28 #include <config.h>
30 #include <apt-pkg/fileutl.h>
31 #include <apt-pkg/acquire-method.h>
32 #include <apt-pkg/configuration.h>
33 #include <apt-pkg/error.h>
34 #include <apt-pkg/hashes.h>
35 #include <apt-pkg/netrc.h>
36 #include <apt-pkg/strutl.h>
37 #include <apt-pkg/proxy.h>
39 #include <stddef.h>
40 #include <stdlib.h>
41 #include <sys/select.h>
42 #include <cstring>
43 #include <sys/stat.h>
44 #include <sys/time.h>
45 #include <unistd.h>
46 #include <stdio.h>
47 #include <errno.h>
48 #include <iostream>
49 #include <sstream>
51 #include "config.h"
52 #include "connect.h"
53 #include "http.h"
55 #include <apti18n.h>
56 /*}}}*/
57 using namespace std;
59 unsigned long long CircleBuf::BwReadLimit=0;
60 unsigned long long CircleBuf::BwTickReadData=0;
61 struct timeval CircleBuf::BwReadTick={0,0};
62 const unsigned int CircleBuf::BW_HZ=10;
64 // CircleBuf::CircleBuf - Circular input buffer /*{{{*/
65 // ---------------------------------------------------------------------
66 /* */
67 CircleBuf::CircleBuf(unsigned long long Size)
68 : Size(Size), Hash(0), TotalWriten(0)
69 {
70 Buf = new unsigned char[Size];
71 Reset();
73 CircleBuf::BwReadLimit = _config->FindI("Acquire::http::Dl-Limit",0)*1024;
74 }
75 /*}}}*/
76 // CircleBuf::Reset - Reset to the default state /*{{{*/
77 // ---------------------------------------------------------------------
78 /* */
79 void CircleBuf::Reset()
80 {
81 InP = 0;
82 OutP = 0;
83 StrPos = 0;
84 TotalWriten = 0;
85 MaxGet = (unsigned long long)-1;
86 OutQueue = string();
87 if (Hash != 0)
88 {
89 delete Hash;
90 Hash = new Hashes;
91 }
92 }
93 /*}}}*/
94 // CircleBuf::Read - Read from a FD into the circular buffer /*{{{*/
95 // ---------------------------------------------------------------------
96 /* This fills up the buffer with as much data as is in the FD, assuming it
97 is non-blocking.. */
98 bool CircleBuf::Read(int Fd)
99 {
100 while (1)
101 {
102 // Woops, buffer is full
103 if (InP - OutP == Size)
104 return true;
106 // what's left to read in this tick
107 unsigned long long const BwReadMax = CircleBuf::BwReadLimit/BW_HZ;
109 if(CircleBuf::BwReadLimit) {
110 struct timeval now;
111 gettimeofday(&now,0);
113 unsigned long long d = (now.tv_sec-CircleBuf::BwReadTick.tv_sec)*1000000 +
114 now.tv_usec-CircleBuf::BwReadTick.tv_usec;
115 if(d > 1000000/BW_HZ) {
116 CircleBuf::BwReadTick = now;
117 CircleBuf::BwTickReadData = 0;
118 }
120 if(CircleBuf::BwTickReadData >= BwReadMax) {
121 usleep(1000000/BW_HZ);
122 return true;
123 }
124 }
126 // Write the buffer segment
127 ssize_t Res;
128 if(CircleBuf::BwReadLimit) {
129 Res = read(Fd,Buf + (InP%Size),
130 BwReadMax > LeftRead() ? LeftRead() : BwReadMax);
131 } else
132 Res = read(Fd,Buf + (InP%Size),LeftRead());
134 if(Res > 0 && BwReadLimit > 0)
135 CircleBuf::BwTickReadData += Res;
137 if (Res == 0)
138 return false;
139 if (Res < 0)
140 {
141 if (errno == EAGAIN)
142 return true;
143 return false;
144 }
146 if (InP == 0)
147 gettimeofday(&Start,0);
148 InP += Res;
149 }
150 }
151 /*}}}*/
152 // CircleBuf::Read - Put the string into the buffer /*{{{*/
153 // ---------------------------------------------------------------------
154 /* This will hold the string in and fill the buffer with it as it empties */
155 bool CircleBuf::Read(string Data)
156 {
157 OutQueue += Data;
158 FillOut();
159 return true;
160 }
161 /*}}}*/
162 // CircleBuf::FillOut - Fill the buffer from the output queue /*{{{*/
163 // ---------------------------------------------------------------------
164 /* */
165 void CircleBuf::FillOut()
166 {
167 if (OutQueue.empty() == true)
168 return;
169 while (1)
170 {
171 // Woops, buffer is full
172 if (InP - OutP == Size)
173 return;
175 // Write the buffer segment
176 unsigned long long Sz = LeftRead();
177 if (OutQueue.length() - StrPos < Sz)
178 Sz = OutQueue.length() - StrPos;
179 memcpy(Buf + (InP%Size),OutQueue.c_str() + StrPos,Sz);
181 // Advance
182 StrPos += Sz;
183 InP += Sz;
184 if (OutQueue.length() == StrPos)
185 {
186 StrPos = 0;
187 OutQueue = "";
188 return;
189 }
190 }
191 }
192 /*}}}*/
193 // CircleBuf::Write - Write from the buffer into a FD /*{{{*/
194 // ---------------------------------------------------------------------
195 /* This empties the buffer into the FD. */
196 bool CircleBuf::Write(int Fd)
197 {
198 while (1)
199 {
200 FillOut();
202 // Woops, buffer is empty
203 if (OutP == InP)
204 return true;
206 if (OutP == MaxGet)
207 return true;
209 // Write the buffer segment
210 ssize_t Res;
211 Res = write(Fd,Buf + (OutP%Size),LeftWrite());
213 if (Res == 0)
214 return false;
215 if (Res < 0)
216 {
217 if (errno == EAGAIN)
218 return true;
220 return false;
221 }
223 TotalWriten += Res;
225 if (Hash != 0)
226 Hash->Add(Buf + (OutP%Size),Res);
228 OutP += Res;
229 }
230 }
231 /*}}}*/
232 // CircleBuf::WriteTillEl - Write from the buffer to a string /*{{{*/
233 // ---------------------------------------------------------------------
234 /* This copies till the first empty line */
235 bool CircleBuf::WriteTillEl(string &Data,bool Single)
236 {
237 // We cheat and assume it is unneeded to have more than one buffer load
238 for (unsigned long long I = OutP; I < InP; I++)
239 {
240 if (Buf[I%Size] != '\n')
241 continue;
242 ++I;
244 if (Single == false)
245 {
246 if (I < InP && Buf[I%Size] == '\r')
247 ++I;
248 if (I >= InP || Buf[I%Size] != '\n')
249 continue;
250 ++I;
251 }
253 Data = "";
254 while (OutP < I)
255 {
256 unsigned long long Sz = LeftWrite();
257 if (Sz == 0)
258 return false;
259 if (I - OutP < Sz)
260 Sz = I - OutP;
261 Data += string((char *)(Buf + (OutP%Size)),Sz);
262 OutP += Sz;
263 }
264 return true;
265 }
266 return false;
267 }
268 /*}}}*/
269 // CircleBuf::Stats - Print out stats information /*{{{*/
270 // ---------------------------------------------------------------------
271 /* */
272 void CircleBuf::Stats()
273 {
274 if (InP == 0)
275 return;
277 struct timeval Stop;
278 gettimeofday(&Stop,0);
279 /* float Diff = Stop.tv_sec - Start.tv_sec +
280 (float)(Stop.tv_usec - Start.tv_usec)/1000000;
281 clog << "Got " << InP << " in " << Diff << " at " << InP/Diff << endl;*/
282 }
283 /*}}}*/
284 CircleBuf::~CircleBuf()
285 {
286 delete [] Buf;
287 delete Hash;
288 }
290 // HttpServerState::HttpServerState - Constructor /*{{{*/
291 HttpServerState::HttpServerState(URI Srv,HttpMethod *Owner) : ServerState(Srv, Owner), In(64*1024), Out(4*1024)
292 {
293 TimeOut = _config->FindI("Acquire::http::Timeout",TimeOut);
294 Reset();
295 }
296 /*}}}*/
297 // HttpServerState::Open - Open a connection to the server /*{{{*/
298 // ---------------------------------------------------------------------
299 /* This opens a connection to the server. */
300 bool HttpServerState::Open()
301 {
302 // Use the already open connection if possible.
303 if (ServerFd != -1)
304 return true;
306 Close();
307 In.Reset();
308 Out.Reset();
309 Persistent = true;
311 // Determine the proxy setting
312 AutoDetectProxy(ServerName);
313 string SpecificProxy = _config->Find("Acquire::http::Proxy::" + ServerName.Host);
314 if (!SpecificProxy.empty())
315 {
316 if (SpecificProxy == "DIRECT")
317 Proxy = "";
318 else
319 Proxy = SpecificProxy;
320 }
321 else
322 {
323 string DefProxy = _config->Find("Acquire::http::Proxy");
324 if (!DefProxy.empty())
325 {
326 Proxy = DefProxy;
327 }
328 else
329 {
330 char* result = getenv("http_proxy");
331 Proxy = result ? result : "";
332 }
333 }
335 // Parse no_proxy, a , separated list of domains
336 if (getenv("no_proxy") != 0)
337 {
338 if (CheckDomainList(ServerName.Host,getenv("no_proxy")) == true)
339 Proxy = "";
340 }
342 // Determine what host and port to use based on the proxy settings
343 int Port = 0;
344 string Host;
345 if (Proxy.empty() == true || Proxy.Host.empty() == true)
346 {
347 if (ServerName.Port != 0)
348 Port = ServerName.Port;
349 Host = ServerName.Host;
350 }
351 else
352 {
353 if (Proxy.Port != 0)
354 Port = Proxy.Port;
355 Host = Proxy.Host;
356 }
358 // Connect to the remote server
359 if (Connect(Host,Port,"http",80,ServerFd,TimeOut,Owner) == false)
360 return false;
362 return true;
363 }
364 /*}}}*/
365 // HttpServerState::Close - Close a connection to the server /*{{{*/
366 // ---------------------------------------------------------------------
367 /* */
368 bool HttpServerState::Close()
369 {
370 close(ServerFd);
371 ServerFd = -1;
372 return true;
373 }
374 /*}}}*/
375 // HttpServerState::RunData - Transfer the data from the socket /*{{{*/
376 bool HttpServerState::RunData(FileFd * const File)
377 {
378 State = Data;
380 // Chunked transfer encoding is fun..
381 if (Encoding == Chunked)
382 {
383 while (1)
384 {
385 // Grab the block size
386 bool Last = true;
387 string Data;
388 In.Limit(-1);
389 do
390 {
391 if (In.WriteTillEl(Data,true) == true)
392 break;
393 }
394 while ((Last = Go(false, File)) == true);
396 if (Last == false)
397 return false;
399 // See if we are done
400 unsigned long long Len = strtoull(Data.c_str(),0,16);
401 if (Len == 0)
402 {
403 In.Limit(-1);
405 // We have to remove the entity trailer
406 Last = true;
407 do
408 {
409 if (In.WriteTillEl(Data,true) == true && Data.length() <= 2)
410 break;
411 }
412 while ((Last = Go(false, File)) == true);
413 if (Last == false)
414 return false;
415 return !_error->PendingError();
416 }
418 // Transfer the block
419 In.Limit(Len);
420 while (Go(true, File) == true)
421 if (In.IsLimit() == true)
422 break;
424 // Error
425 if (In.IsLimit() == false)
426 return false;
428 // The server sends an extra new line before the next block specifier..
429 In.Limit(-1);
430 Last = true;
431 do
432 {
433 if (In.WriteTillEl(Data,true) == true)
434 break;
435 }
436 while ((Last = Go(false, File)) == true);
437 if (Last == false)
438 return false;
439 }
440 }
441 else
442 {
443 /* Closes encoding is used when the server did not specify a size, the
444 loss of the connection means we are done */
445 if (Encoding == Closes)
446 In.Limit(-1);
447 else
448 In.Limit(Size - StartPos);
450 // Just transfer the whole block.
451 do
452 {
453 if (In.IsLimit() == false)
454 continue;
456 In.Limit(-1);
457 return !_error->PendingError();
458 }
459 while (Go(true, File) == true);
460 }
462 return Owner->Flush() && !_error->PendingError();
463 }
464 /*}}}*/
465 bool HttpServerState::ReadHeaderLines(std::string &Data) /*{{{*/
466 {
467 return In.WriteTillEl(Data);
468 }
469 /*}}}*/
470 bool HttpServerState::LoadNextResponse(bool const ToFile, FileFd * const File)/*{{{*/
471 {
472 return Go(ToFile, File);
473 }
474 /*}}}*/
475 bool HttpServerState::WriteResponse(const std::string &Data) /*{{{*/
476 {
477 return Out.Read(Data);
478 }
479 /*}}}*/
480 APT_PURE bool HttpServerState::IsOpen() /*{{{*/
481 {
482 return (ServerFd != -1);
483 }
484 /*}}}*/
485 bool HttpServerState::InitHashes(FileFd &File) /*{{{*/
486 {
487 delete In.Hash;
488 In.Hash = new Hashes;
490 // Set the expected size and read file for the hashes
491 File.Truncate(StartPos);
492 return In.Hash->AddFD(File, StartPos);
493 }
494 /*}}}*/
495 APT_PURE Hashes * HttpServerState::GetHashes() /*{{{*/
496 {
497 return In.Hash;
498 }
499 /*}}}*/
500 // HttpServerState::Die - The server has closed the connection. /*{{{*/
501 bool HttpServerState::Die(FileFd &File)
502 {
503 unsigned int LErrno = errno;
505 // Dump the buffer to the file
506 if (State == ServerState::Data)
507 {
508 // on GNU/kFreeBSD, apt dies on /dev/null because non-blocking
509 // can't be set
510 if (File.Name() != "/dev/null")
511 SetNonBlock(File.Fd(),false);
512 while (In.WriteSpace() == true)
513 {
514 if (In.Write(File.Fd()) == false)
515 return _error->Errno("write",_("Error writing to the file"));
517 // Done
518 if (In.IsLimit() == true)
519 return true;
520 }
521 }
523 // See if this is because the server finished the data stream
524 if (In.IsLimit() == false && State != HttpServerState::Header &&
525 Encoding != HttpServerState::Closes)
526 {
527 Close();
528 if (LErrno == 0)
529 return _error->Error(_("Error reading from server. Remote end closed connection"));
530 errno = LErrno;
531 return _error->Errno("read",_("Error reading from server"));
532 }
533 else
534 {
535 In.Limit(-1);
537 // Nothing left in the buffer
538 if (In.WriteSpace() == false)
539 return false;
541 // We may have got multiple responses back in one packet..
542 Close();
543 return true;
544 }
546 return false;
547 }
548 /*}}}*/
549 // HttpServerState::Flush - Dump the buffer into the file /*{{{*/
550 // ---------------------------------------------------------------------
551 /* This takes the current input buffer from the Server FD and writes it
552 into the file */
553 bool HttpServerState::Flush(FileFd * const File)
554 {
555 if (File != NULL)
556 {
557 // on GNU/kFreeBSD, apt dies on /dev/null because non-blocking
558 // can't be set
559 if (File->Name() != "/dev/null")
560 SetNonBlock(File->Fd(),false);
561 if (In.WriteSpace() == false)
562 return true;
564 while (In.WriteSpace() == true)
565 {
566 if (In.Write(File->Fd()) == false)
567 return _error->Errno("write",_("Error writing to file"));
568 if (In.IsLimit() == true)
569 return true;
570 }
572 if (In.IsLimit() == true || Encoding == ServerState::Closes)
573 return true;
574 }
575 return false;
576 }
577 /*}}}*/
578 // HttpServerState::Go - Run a single loop /*{{{*/
579 // ---------------------------------------------------------------------
580 /* This runs the select loop over the server FDs, Output file FDs and
581 stdin. */
582 bool HttpServerState::Go(bool ToFile, FileFd * const File)
583 {
584 // Server has closed the connection
585 if (ServerFd == -1 && (In.WriteSpace() == false ||
586 ToFile == false))
587 return false;
589 fd_set rfds,wfds;
590 FD_ZERO(&rfds);
591 FD_ZERO(&wfds);
593 /* Add the server. We only send more requests if the connection will
594 be persisting */
595 if (Out.WriteSpace() == true && ServerFd != -1
596 && Persistent == true)
597 FD_SET(ServerFd,&wfds);
598 if (In.ReadSpace() == true && ServerFd != -1)
599 FD_SET(ServerFd,&rfds);
601 // Add the file
602 int FileFD = -1;
603 if (File != NULL)
604 FileFD = File->Fd();
606 if (In.WriteSpace() == true && ToFile == true && FileFD != -1)
607 FD_SET(FileFD,&wfds);
609 // Add stdin
610 if (_config->FindB("Acquire::http::DependOnSTDIN", true) == true)
613 // Figure out the max fd
614 int MaxFd = FileFD;
615 if (MaxFd < ServerFd)
616 MaxFd = ServerFd;
618 // Select
619 struct timeval tv;
620 tv.tv_sec = TimeOut;
621 tv.tv_usec = 0;
622 int Res = 0;
623 if ((Res = select(MaxFd+1,&rfds,&wfds,0,&tv)) < 0)
624 {
625 if (errno == EINTR)
626 return true;
627 return _error->Errno("select",_("Select failed"));
628 }
630 if (Res == 0)
631 {
632 _error->Error(_("Connection timed out"));
633 return Die(*File);
634 }
636 // Handle server IO
637 if (ServerFd != -1 && FD_ISSET(ServerFd,&rfds))
638 {
639 errno = 0;
640 if (In.Read(ServerFd) == false)
641 return Die(*File);
642 }
644 if (ServerFd != -1 && FD_ISSET(ServerFd,&wfds))
645 {
646 errno = 0;
647 if (Out.Write(ServerFd) == false)
648 return Die(*File);
649 }
651 // Send data to the file
652 if (FileFD != -1 && FD_ISSET(FileFD,&wfds))
653 {
654 if (In.Write(FileFD) == false)
655 return _error->Errno("write",_("Error writing to output file"));
656 }
658 if (ExpectedSize > 0 && File && File->Tell() > ExpectedSize)
659 {
660 return _error->Error("Writing more data than expected (%llu > %llu)",
661 File->Tell(), ExpectedSize);
662 }
664 // Handle commands from APT
665 if (FD_ISSET(STDIN_FILENO,&rfds))
666 {
667 if (Owner->Run(true) != -1)
668 exit(100);
669 }
671 return true;
672 }
673 /*}}}*/
675 // HttpMethod::SendReq - Send the HTTP request /*{{{*/
676 // ---------------------------------------------------------------------
677 /* This places the http request in the outbound buffer */
678 void HttpMethod::SendReq(FetchItem *Itm)
679 {
680 URI Uri = Itm->Uri;
682 // The HTTP server expects a hostname with a trailing :port
683 std::stringstream Req;
684 string ProperHost;
686 if (Uri.Host.find(':') != string::npos)
687 ProperHost = '[' + Uri.Host + ']';
688 else
689 ProperHost = Uri.Host;
691 /* RFC 2616 ยง5.1.2 requires absolute URIs for requests to proxies,
692 but while its a must for all servers to accept absolute URIs,
693 it is assumed clients will sent an absolute path for non-proxies */
694 std::string requesturi;
695 if (Server->Proxy.empty() == true || Server->Proxy.Host.empty())
696 requesturi = Uri.Path;
697 else
698 requesturi = Itm->Uri;
700 // The "+" is encoded as a workaround for a amazon S3 bug
701 // see LP bugs #1003633 and #1086997.
702 requesturi = QuoteString(requesturi, "+~ ");
704 /* Build the request. No keep-alive is included as it is the default
705 in 1.1, can cause problems with proxies, and we are an HTTP/1.1
706 client anyway.
707 C.f. https://tools.ietf.org/wg/httpbis/trac/ticket/158 */
708 Req << "GET " << requesturi << " HTTP/1.1\r\n";
709 if (Uri.Port != 0)
710 Req << "Host: " << ProperHost << ":" << Uri.Port << "\r\n";
711 else
712 Req << "Host: " << ProperHost << "\r\n";
714 // generate a cache control header (if needed)
715 if (_config->FindB("Acquire::http::No-Cache",false) == true)
716 Req << "Cache-Control: no-cache\r\n"
717 << "Pragma: no-cache\r\n";
718 else if (Itm->IndexFile == true)
719 Req << "Cache-Control: max-age=" << _config->FindI("Acquire::http::Max-Age",0) << "\r\n";
720 else if (_config->FindB("Acquire::http::No-Store",false) == true)
721 Req << "Cache-Control: no-store\r\n";
723 // If we ask for uncompressed files servers might respond with content-
724 // negotiation which lets us end up with compressed files we do not support,
725 // see 657029, 657560 and co, so if we have no extension on the request
726 // ask for text only. As a sidenote: If there is nothing to negotate servers
727 // seem to be nice and ignore it.
728 if (_config->FindB("Acquire::http::SendAccept", true) == true)
729 {
730 size_t const filepos = Itm->Uri.find_last_of('/');
731 string const file = Itm->Uri.substr(filepos + 1);
732 if (flExtension(file) == file)
733 Req << "Accept: text/*\r\n";
734 }
736 // Check for a partial file and send if-queries accordingly
737 struct stat SBuf;
738 if (stat(Itm->DestFile.c_str(),&SBuf) >= 0 && SBuf.st_size > 0)
739 Req << "Range: bytes=" << SBuf.st_size << "-\r\n"
740 << "If-Range: " << TimeRFC1123(SBuf.st_mtime) << "\r\n";
741 else if (Itm->LastModified != 0)
742 Req << "If-Modified-Since: " << TimeRFC1123(Itm->LastModified).c_str() << "\r\n";
744 if (Server->Proxy.User.empty() == false || Server->Proxy.Password.empty() == false)
745 Req << "Proxy-Authorization: Basic "
746 << Base64Encode(Server->Proxy.User + ":" + Server->Proxy.Password) << "\r\n";
748 maybe_add_auth (Uri, _config->FindFile("Dir::Etc::netrc"));
749 if (Uri.User.empty() == false || Uri.Password.empty() == false)
750 Req << "Authorization: Basic "
751 << Base64Encode(Uri.User + ":" + Uri.Password) << "\r\n";
753 Req << "User-Agent: " << _config->Find("Acquire::http::User-Agent",
754 "Debian APT-HTTP/1.3 (" PACKAGE_VERSION ")") << "\r\n";
756 Req << "\r\n";
758 if (Debug == true)
759 cerr << Req.str() << endl;
761 Server->WriteResponse(Req.str());
762 }
763 /*}}}*/
764 // HttpMethod::Configuration - Handle a configuration message /*{{{*/
765 // ---------------------------------------------------------------------
766 /* We stash the desired pipeline depth */
767 bool HttpMethod::Configuration(string Message)
768 {
769 if (ServerMethod::Configuration(Message) == false)
770 return false;
772 AllowRedirect = _config->FindB("Acquire::http::AllowRedirect",true);
773 PipelineDepth = _config->FindI("Acquire::http::Pipeline-Depth",
774 PipelineDepth);
775 Debug = _config->FindB("Debug::Acquire::http",false);
777 return true;
778 }
779 /*}}}*/
780 ServerState * HttpMethod::CreateServerState(URI uri) /*{{{*/
781 {
782 return new HttpServerState(uri, this);
783 }
784 /*}}}*/
785 void HttpMethod::RotateDNS() /*{{{*/
786 {
787 ::RotateDNS();
788 }
789 /*}}}*/